Skip to content

Commit 84b41ea

Browse files
author
Gentle
committed
change comment detection logic
en_GB contains the rule `COMPOUNDRULE #*0{`. unfortunately any word starting with a # was detected as a comment the new logic skips all lines starting with # and then allows unparsed data at the end of each line
1 parent dfa77ad commit 84b41ea

File tree

1 file changed

+15
-59
lines changed

1 file changed

+15
-59
lines changed

src/aff/parser.rs

Lines changed: 15 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -550,13 +550,6 @@ fn parse_compound_syllable<'aff>(
550550
}))
551551
}
552552
};
553-
let remaining_words = words.count();
554-
if remaining_words > 0 {
555-
return Err(lines.error(ParseDictionaryErrorKind::MismatchedArity {
556-
expected: 2,
557-
actual: 2 + remaining_words,
558-
}));
559-
}
560553

561554
cx.options.compound_syllable_max = max
562555
.parse::<u16>()
@@ -656,14 +649,6 @@ fn parse_compound_pattern_table(cx: &mut AffLineParser, lines: &mut Lines) -> Pa
656649
};
657650
let replacement = words.next();
658651

659-
let remaining_words = words.count();
660-
if remaining_words > 0 {
661-
return Err(lines.error(ParseDictionaryErrorKind::MismatchedArity {
662-
expected: 3,
663-
actual: 3 + remaining_words,
664-
}));
665-
}
666-
667652
let (first_word_end, first_word_flag) = split_word_and_flagset_naive(first_word_end);
668653
let (second_word_begin, second_word_flag) = split_word_and_flagset_naive(second_word_begin);
669654
let first_word_flag = (!first_word_flag.is_empty())
@@ -742,7 +727,7 @@ fn parse_map(cx: &mut AffLineParser, lines: &mut Lines) -> ParseResult {
742727

743728
/// A helper type that means "words on a line split by whitespace with comments
744729
/// dropped." This is a concretion of `impl Iterator<Item = &'a str>`.
745-
type Words<'text> = TakeWhile<SplitWhitespace<'text>, for<'b, 'c> fn(&'b &'c str) -> bool>;
730+
type Words<'text> = SplitWhitespace<'text>;
746731

747732
struct Lines<'text> {
748733
lines: Peekable<Enumerate<core::str::Lines<'text>>>,
@@ -754,10 +739,11 @@ impl<'text> Lines<'text> {
754739
fn new(text: &'text str, source: ParseDictionaryErrorSource) -> Self {
755740
let text = text.strip_prefix('\u{feff}').unwrap_or(text);
756741
let mut lines = text.lines().enumerate().peekable();
757-
let words = lines.peek().map(|(_line_no, line)| {
758-
line.split_whitespace()
759-
.take_while((|word| !word.starts_with('#')) as for<'b, 'c> fn(&'b &'c str) -> bool)
760-
});
742+
while lines
743+
.next_if(|(_line_no, line)| line.trim_start().starts_with('#'))
744+
.is_some()
745+
{}
746+
let words = lines.peek().map(|(_line_no, line)| line.split_whitespace());
761747

762748
Self {
763749
lines,
@@ -772,10 +758,15 @@ impl<'text> Lines<'text> {
772758

773759
fn advance_line(&mut self) {
774760
self.lines.next();
775-
self.words = self.lines.peek().map(|(_line_no, line)| {
776-
line.split_whitespace()
777-
.take_while((|word| !word.starts_with('#')) as for<'b, 'c> fn(&'b &'c str) -> bool)
778-
});
761+
while self
762+
.lines
763+
.next_if(|(_line_no, line)| line.trim_start().starts_with('#'))
764+
.is_some()
765+
{}
766+
self.words = self
767+
.lines
768+
.peek()
769+
.map(|(_line_no, line)| line.split_whitespace());
779770
}
780771

781772
fn line(&mut self) -> Option<&str> {
@@ -813,19 +804,6 @@ impl<'text> Lines<'text> {
813804
}
814805

815806
fn parse_bool(&mut self) -> Result<bool> {
816-
// Boolean flags are specified by just the key. For example if you see `COMPLEXPREFIXES`
817-
// as a line, `complex_prefixes` is true. Otherwise it's false.
818-
let count = self
819-
.words
820-
.take()
821-
.map(|words| words.count())
822-
.unwrap_or_default();
823-
if count > 0 {
824-
return Err(self.error(ParseDictionaryErrorKind::MismatchedArity {
825-
expected: 0,
826-
actual: count,
827-
}));
828-
}
829807
Ok(true)
830808
}
831809

@@ -872,13 +850,6 @@ impl<'text> Lines<'text> {
872850
}))
873851
}
874852
};
875-
let remaining_words = words.count();
876-
if remaining_words > 0 {
877-
return Err(self.error(ParseDictionaryErrorKind::MismatchedArity {
878-
expected: 1,
879-
actual: 1 + remaining_words,
880-
}));
881-
}
882853

883854
f(word).map_err(|kind| self.error(kind))?;
884855
}
@@ -932,13 +903,6 @@ impl<'text> Lines<'text> {
932903
}))
933904
}
934905
};
935-
let remaining_words = words.count();
936-
if remaining_words > 0 {
937-
return Err(self.error(ParseDictionaryErrorKind::MismatchedArity {
938-
expected: 2,
939-
actual: 2 + remaining_words,
940-
}));
941-
}
942906

943907
f(word1, word2).map_err(|kind| self.error(kind))?;
944908
}
@@ -1006,14 +970,6 @@ impl<'text> Lines<'text> {
1006970
}
1007971
};
1008972

1009-
let remaining_words = words.count();
1010-
if remaining_words > 0 {
1011-
return Err(self.error(ParseDictionaryErrorKind::MismatchedArity {
1012-
expected: 3,
1013-
actual: 3 + remaining_words,
1014-
}));
1015-
}
1016-
1017973
for row in 1..=row_count {
1018974
// Each row takes the shape:
1019975
// PFX flag stripping prefix [condition [morphological_fields...]]

0 commit comments

Comments
 (0)