Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 40 additions & 62 deletions prqlc/prqlc-parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -202,9 +202,9 @@ fn keyword<'a>() -> impl Parser<'a, ParserInput<'a>, TokenKind, ParserError<'a>>
just("import"),
just("enum"),
))
.to_slice()
.then_ignore(end_expr())
// TODO: possibly we can avoid an allocation by using `.map(TokenKind::Keyword)`
.map(|x| TokenKind::Keyword(x.to_string()))
.map(|s: &str| TokenKind::Keyword(s.to_string()))
}

fn param<'a>() -> impl Parser<'a, ParserInput<'a>, TokenKind, ParserError<'a>> {
Expand All @@ -213,7 +213,8 @@ fn param<'a>() -> impl Parser<'a, ParserInput<'a>, TokenKind, ParserError<'a>> {
any()
.filter(|c: &char| c.is_alphanumeric() || *c == '_' || *c == '.')
.repeated()
.collect::<String>(),
.to_slice()
.map(|s: &str| s.to_string()),
)
.map(TokenKind::Param)
}
Expand Down Expand Up @@ -283,14 +284,10 @@ pub fn ident_part<'a>() -> impl Parser<'a, ParserInput<'a>, String, ParserError<
// .then(text::ascii::ident())
any()
.filter(|c: &char| c.is_alphanumeric() || *c == '_')
.repeated()
.collect::<Vec<char>>(),
.repeated(),
)
.map(|(first, rest)| {
let mut chars = vec![first];
chars.extend(rest);
chars.into_iter().collect::<String>()
});
.to_slice()
.map(|s: &str| s.to_string());

let backtick = none_of('`')
.repeated()
Expand All @@ -301,10 +298,8 @@ pub fn ident_part<'a>() -> impl Parser<'a, ParserInput<'a>, String, ParserError<
}

// Date/time components
fn digits<'a>(count: usize) -> impl Parser<'a, ParserInput<'a>, Vec<char>, ParserError<'a>> {
chumsky::text::digits(10)
.exactly(count)
.collect::<Vec<char>>()
fn digits<'a>(count: usize) -> impl Parser<'a, ParserInput<'a>, &'a str, ParserError<'a>> {
chumsky::text::digits(10).exactly(count).to_slice()
}

fn date_inner<'a>() -> impl Parser<'a, ParserInput<'a>, String, ParserError<'a>> {
Expand All @@ -316,25 +311,26 @@ fn date_inner<'a>() -> impl Parser<'a, ParserInput<'a>, String, ParserError<'a>>
.then(just('-'))
.then(text::digits(10).exactly(2))
.to_slice()
// TODO: can change this to return the slice and avoid the allocation
// TODO: Returning &str instead of String would require changing Literal::Date
// to use Cow<'a, str> or a similar approach, which is a larger refactoring
.map(|s: &str| s.to_owned())
}

fn time_inner<'a>() -> impl Parser<'a, ParserInput<'a>, String, ParserError<'a>> {
// Helper function for parsing time components with separators
fn time_component<'p>(
separator: char,
component_parser: impl Parser<'p, ParserInput<'p>, Vec<char>, ParserError<'p>>,
component_parser: impl Parser<'p, ParserInput<'p>, &'p str, ParserError<'p>>,
) -> impl Parser<'p, ParserInput<'p>, String, ParserError<'p>> {
just(separator)
.then(component_parser)
.map(move |(sep, comp)| format!("{}{}", sep, String::from_iter(comp)))
.map(move |(sep, comp): (char, &str)| format!("{}{}", sep, comp))
.or_not()
.map(|opt| opt.unwrap_or_default())
}

// Hours (required)
let hours = digits(2).map(String::from_iter);
let hours = digits(2).map(|s: &str| s.to_string());

// Minutes and seconds (optional) - with colon separator
let minutes = time_component(':', digits(2));
Expand All @@ -348,18 +344,18 @@ fn time_inner<'a>() -> impl Parser<'a, ParserInput<'a>, String, ParserError<'a>>
.repeated()
.at_least(1)
.at_most(6)
.collect::<Vec<char>>(),
.to_slice(),
);

// Timezone (optional): either 'Z' or '+/-HH:MM'
let timezone = choice((
just('Z').map(|c| c.to_string()),
one_of("-+")
.then(digits(2).then(just(':').or_not().then(digits(2))).map(
|(hrs, (_opt_colon, mins))| {
|(hrs, (_opt_colon, mins)): (&str, (Option<char>, &str))| {
// Always format as -0800 without colon for SQL compatibility, regardless of input format
// We need to handle both -08:00 and -0800 input formats but standardize the output
format!("{}{}", String::from_iter(hrs), String::from_iter(mins))
format!("{}{}", hrs, mins)
},
))
.map(|(sign, offset)| format!("{}{}", sign, offset)),
Expand Down Expand Up @@ -463,26 +459,21 @@ fn number<'a>() -> impl Parser<'a, ParserInput<'a>, Literal, ParserError<'a>> {
}

// Parse integer part
let integer = parse_integer().map(|chars| chars.into_iter().collect::<String>());
let integer = parse_integer();

// Parse fractional part
let fraction_digits = any()
.filter(|c: &char| c.is_ascii_digit())
.then(
any()
.filter(|c: &char| c.is_ascii_digit() || *c == '_')
.repeated()
.collect::<Vec<char>>(),
.repeated(),
)
.map(|(first, rest)| {
let mut chars = vec![first];
chars.extend(rest);
chars
});
.to_slice();

let frac = just('.')
.then(fraction_digits)
.map(|(dot, digits)| format!("{}{}", dot, String::from_iter(digits)));
.map(|(dot, digits): (char, &str)| format!("{}{}", dot, digits));

// Parse exponent
let exp_digits = one_of("+-")
Expand All @@ -491,21 +482,13 @@ fn number<'a>() -> impl Parser<'a, ParserInput<'a>, Literal, ParserError<'a>> {
any()
.filter(|c: &char| c.is_ascii_digit())
.repeated()
.at_least(1)
.collect::<Vec<char>>(),
.at_least(1),
)
.map(|(sign_opt, digits)| {
let mut s = String::new();
if let Some(sign) = sign_opt {
s.push(sign);
}
s.push_str(&String::from_iter(digits));
s
});
.to_slice();

let exp = one_of("eE")
.then(exp_digits)
.map(|(e, digits)| format!("{}{}", e, digits));
.map(|(e, digits): (char, &str)| format!("{}{}", e, digits));

// Combine all parts into a number using the helper function
integer
Expand All @@ -529,25 +512,18 @@ fn number<'a>() -> impl Parser<'a, ParserInput<'a>, Literal, ParserError<'a>> {
})
}

fn parse_integer<'a>() -> impl Parser<'a, ParserInput<'a>, Vec<char>, ParserError<'a>> {
fn parse_integer<'a>() -> impl Parser<'a, ParserInput<'a>, &'a str, ParserError<'a>> {
// Handle both multi-digit numbers (can't start with 0) and single digit 0
choice((
any()
.filter(|c: &char| c.is_ascii_digit() && *c != '0')
.then(
any()
.filter(|c: &char| c.is_ascii_digit() || *c == '_')
.repeated()
.collect::<Vec<char>>(),
.repeated(),
)
// TODO: there's a few of these, which seems unlikely to be the
// idomatic approach. I tried `.to_slice()` but couldn't get it to work
.map(|(first, rest)| {
let mut chars = vec![first];
chars.extend(rest);
chars
}),
just('0').map(|c| vec![c]),
.to_slice(),
just('0').to_slice(),
))
}

Expand All @@ -562,10 +538,14 @@ fn raw_string<'a>() -> impl Parser<'a, ParserInput<'a>, Literal, ParserError<'a>
any()
.filter(move |c: &char| *c != '\'' && *c != '"' && *c != '\n' && *c != '\r')
.repeated()
.collect::<Vec<char>>(),
.to_slice(),
)
.then(choice((just('\''), just('"'))))
.map(|(((_, _), chars), _)| Literal::RawString(chars.into_iter().collect()))
.map(
|(((_, _open_quote), s), _close_quote): (((&str, char), &str), char)| {
Literal::RawString(s.to_string())
},
)
}

fn boolean<'a>() -> impl Parser<'a, ParserInput<'a>, Literal, ParserError<'a>> {
Expand Down Expand Up @@ -593,18 +573,16 @@ fn value_and_unit<'a>() -> impl Parser<'a, ParserInput<'a>, Literal, ParserError
));

// Parse the integer value followed by a unit
parse_integer()
.map(|chars| chars.into_iter().filter(|c| *c != '_').collect::<String>())
.then(unit)
.then_ignore(end_expr())
.map(|(number_str, unit_str): (String, &str)| {
// Parse the number, defaulting to 1 if parsing fails
let n = number_str.parse::<i64>().unwrap_or(1);
parse_integer().then(unit).then_ignore(end_expr()).map(
|(number_str, unit_str): (&str, &str)| {
// Parse the number (removing underscores), defaulting to 1 if parsing fails
let n = number_str.replace('_', "").parse::<i64>().unwrap_or(1);
Literal::ValueAndUnit(ValueAndUnit {
n,
unit: unit_str.to_string(),
})
})
},
)
}

pub fn quoted_string<'a>(
Expand Down