diff --git a/libraries/bot-dialogs/pom.xml b/libraries/bot-dialogs/pom.xml
index 078b32912..0d4cd4ed9 100644
--- a/libraries/bot-dialogs/pom.xml
+++ b/libraries/bot-dialogs/pom.xml
@@ -158,4 +158,15 @@
+
+
+
+ org.apache.maven.plugins
+ maven-surefire-plugin
+
+ -Dfile.encoding=UTF-8
+
+
+
+
diff --git a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/extractors/BaseDateExtractor.java b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/extractors/BaseDateExtractor.java
index 76b57dab3..532d7947f 100644
--- a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/extractors/BaseDateExtractor.java
+++ b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/extractors/BaseDateExtractor.java
@@ -108,6 +108,25 @@ private boolean validateMatch(Match match, String text) {
isValidMatch = startsWithBasicDate(subText);
}
}
+
+ // Expressions with mixed separators are not considered valid dates e.g. "30/4.85" (unless one is a comma "30/4, 2016")
+ MatchGroup dayGroup = match.getGroup("day");
+ MatchGroup monthGroup = match.getGroup("month");
+ if (!StringUtility.isNullOrEmpty(dayGroup.value) && !StringUtility.isNullOrEmpty(monthGroup.value)) {
+ String noDateText = match.value.replace(yearGroup.value, "")
+ .replace(monthGroup.value, "").replace(dayGroup.value, "");
+ String[] separators = {"/", "\\", "-", "."};
+ int separatorCount = 0;
+ for (String separator : separators) {
+ if (noDateText.contains(separator)) {
+ separatorCount++;
+ }
+ if (separatorCount > 1) {
+ isValidMatch = false;
+ break;
+ }
+ }
+ }
}
return isValidMatch;
diff --git a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/extractors/BaseDateTimeExtractor.java b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/extractors/BaseDateTimeExtractor.java
index 8fbdd8b89..f54bbbcb4 100644
--- a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/extractors/BaseDateTimeExtractor.java
+++ b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/extractors/BaseDateTimeExtractor.java
@@ -128,11 +128,27 @@ public List timeOfTodayAfter(String input, LocalDateTime reference) {
Match[] matches = RegExpUtility.getMatches(this.config.getSimpleTimeOfTodayAfterRegex(), input);
for (Match match : matches) {
+ // @TODO Remove when lookbehinds are handled correctly
+ if (isDecimal(match, input)) {
+ continue;
+ }
+
ret.add(new Token(match.index, match.index + match.length));
}
return ret;
}
+
+ // Check if the match is part of a decimal number (e.g. 123.24)
+ private boolean isDecimal(Match match, String text) {
+ boolean isDecimal = false;
+ if (match.index > 1 && (text.charAt(match.index - 1) == ',' ||
+ text.charAt(match.index - 1) == '.') && Character.isDigit(text.charAt(match.index - 2)) && Character.isDigit(match.value.charAt(0))) {
+ isDecimal = true;
+ }
+
+ return isDecimal;
+ }
public List timeOfTodayBefore(String input, LocalDateTime reference) {
List ret = new ArrayList<>();
diff --git a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/extractors/BaseTimeExtractor.java b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/extractors/BaseTimeExtractor.java
index 3bdcbf876..75b9547a6 100644
--- a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/extractors/BaseTimeExtractor.java
+++ b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/extractors/BaseTimeExtractor.java
@@ -88,6 +88,11 @@ public final List basicRegexMatch(String text) {
Match[] matches = RegExpUtility.getMatches(regex, text);
for (Match match : matches) {
+
+ // @TODO Remove when lookbehinds are handled correctly
+ if (isDecimal(match, text)) {
+ continue;
+ }
// @TODO Workaround to avoid incorrect partial-only matches. Remove after time regex reviews across languages.
String lth = match.getGroup("lth").value;
@@ -102,6 +107,17 @@ public final List basicRegexMatch(String text) {
return ret;
}
+
+ // Check if the match is part of a decimal number (e.g. 123.24)
+ private boolean isDecimal(Match match, String text) {
+ boolean isDecimal = false;
+ if (match.index > 1 && (text.charAt(match.index - 1) == ',' ||
+ text.charAt(match.index - 1) == '.') && Character.isDigit(text.charAt(match.index - 2)) && Character.isDigit(match.value.charAt(0))) {
+ isDecimal = true;
+ }
+
+ return isDecimal;
+ }
private List atRegexMatch(String text) {
List ret = new ArrayList<>();
diff --git a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/BaseDateTime.java b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/BaseDateTime.java
index 3add7bfae..bd4925600 100644
--- a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/BaseDateTime.java
+++ b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/BaseDateTime.java
@@ -17,7 +17,7 @@
public class BaseDateTime {
- public static final String HourRegex = "(?2[0-4]|[0-1]?\\d)(h)?";
+ public static final String HourRegex = "(?2[0-4]|[0-1]?\\d)(h)?";
public static final String TwoDigitHourRegex = "(?[0-1]\\d|2[0-4])(h)?";
@@ -36,6 +36,8 @@ public class BaseDateTime {
public static final String IllegalYearRegex = "([-])({FourDigitYearRegex})([-])"
.replace("{FourDigitYearRegex}", FourDigitYearRegex);
+ public static final String CheckDecimalRegex = "(?![,.]\\d)";
+
public static final String RangeConnectorSymbolRegex = "(--|-|—|——|~|–)";
public static final String BaseAmDescRegex = "(am\\b|a\\s*\\.\\s*m\\s*\\.|a[\\.]?\\s*m\\b)";
diff --git a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/EnglishDateTime.java b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/EnglishDateTime.java
index f91dc7b5b..c86b81be3 100644
--- a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/EnglishDateTime.java
+++ b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/EnglishDateTime.java
@@ -119,7 +119,7 @@ public class EnglishDateTime {
public static final String OfPrepositionRegex = "(\\bof\\b)";
- public static final String TwoDigitYearRegex = "\\b(?([0-24-9]\\d))(?!(\\s*((\\:\\d)|{AmDescRegex}|{PmDescRegex}|\\.\\d)))\\b"
+ public static final String TwoDigitYearRegex = "\\b(?([0-9]\\d))(?!(\\s*((\\:\\d)|{AmDescRegex}|{PmDescRegex}|\\.\\d)))\\b"
.replace("{AmDescRegex}", AmDescRegex)
.replace("{PmDescRegex}", PmDescRegex);
@@ -134,7 +134,10 @@ public class EnglishDateTime {
public static final String RelativeMonthRegex = "(?((day\\s+)?of\\s+)?{RelativeRegex}\\s+month)\\b"
.replace("{RelativeRegex}", RelativeRegex);
- public static final String WrittenMonthRegex = "(((the\\s+)?month of\\s+)?(?apr(il)?|aug(ust)?|dec(ember)?|feb(ruary)?|jan(uary)?|july?|june?|mar(ch)?|may|nov(ember)?|oct(ober)?|sept(ember)?|sept?))";
+ public static final String MonthRegex = "\\b(?apr(il)?|aug(ust)?|dec(ember)?|feb(ruary)?|jan(uary)?|july?|june?|mar(ch)?|may|nov(ember)?|oct(ober)?|sept(ember)?|sep)(?!\\p{L})";
+
+ public static final String WrittenMonthRegex = "(((the\\s+)?month of\\s+)?{MonthRegex})"
+ .replace("{MonthRegex}", MonthRegex);
public static final String MonthSuffixRegex = "(?(?:(in|of|on)\\s+)?({RelativeMonthRegex}|{WrittenMonthRegex}))"
.replace("{RelativeMonthRegex}", RelativeMonthRegex)
@@ -186,13 +189,14 @@ public class EnglishDateTime {
public static final String SpecialYearPrefixes = "(calendar|(?fiscal|school))";
- public static final String OneWordPeriodRegex = "\\b((((the\\s+)?month of\\s+)?({StrictRelativeRegex}\\s+)?(?apr(il)?|aug(ust)?|dec(ember)?|feb(ruary)?|jan(uary)?|july?|june?|mar(ch)?|may|nov(ember)?|oct(ober)?|sept(ember)?|sept?))|(month|year) to date|(?((un)?till?|to)\\s+date)|({RelativeRegex}\\s+)?(my\\s+)?((?working\\s+week|workweek)|week(end)?|month|(({SpecialYearPrefixes}\\s+)?year))(?!((\\s+of)?\\s+\\d+(?!({BaseDateTime.BaseAmDescRegex}|{BaseDateTime.BasePmDescRegex}))|\\s+to\\s+date))(\\s+{AfterNextSuffixRegex})?)\\b"
+ public static final String OneWordPeriodRegex = "\\b((((the\\s+)?month of\\s+)?({StrictRelativeRegex}\\s+)?{MonthRegex})|(month|year) to date|(?((un)?till?|to)\\s+date)|({RelativeRegex}\\s+)?(my\\s+)?((?working\\s+week|workweek)|week(end)?|month|(({SpecialYearPrefixes}\\s+)?year))(?!((\\s+of)?\\s+\\d+(?!({BaseDateTime.BaseAmDescRegex}|{BaseDateTime.BasePmDescRegex}))|\\s+to\\s+date))(\\s+{AfterNextSuffixRegex})?)\\b"
.replace("{StrictRelativeRegex}", StrictRelativeRegex)
.replace("{RelativeRegex}", RelativeRegex)
.replace("{AfterNextSuffixRegex}", AfterNextSuffixRegex)
.replace("{SpecialYearPrefixes}", SpecialYearPrefixes)
.replace("{BaseDateTime.BaseAmDescRegex}", BaseDateTime.BaseAmDescRegex)
- .replace("{BaseDateTime.BasePmDescRegex}", BaseDateTime.BasePmDescRegex);
+ .replace("{BaseDateTime.BasePmDescRegex}", BaseDateTime.BasePmDescRegex)
+ .replace("{MonthRegex}", MonthRegex);
public static final String MonthNumWithYear = "\\b(({BaseDateTime.FourDigitYearRegex}(\\s*)[/\\-\\.](\\s*){MonthNumRegex})|({MonthNumRegex}(\\s*)[/\\-](\\s*){BaseDateTime.FourDigitYearRegex}))\\b"
.replace("{BaseDateTime.FourDigitYearRegex}", BaseDateTime.FourDigitYearRegex)
@@ -274,8 +278,6 @@ public class EnglishDateTime {
public static final String MonthOfRegex = "(month)(\\s*)(of)";
- public static final String MonthRegex = "(?apr(il)?|aug(ust)?|dec(ember)?|feb(ruary)?|jan(uary)?|july?|june?|mar(ch)?|may|nov(ember)?|oct(ober)?|sept(ember)?|sept?)";
-
public static final String DateYearRegex = "(?{BaseDateTime.FourDigitYearRegex}|(?(?:3[0-1]|[1-2]\\d|0?[1-9])(?:th|nd|rd|st)?)[\\.]?(\\s+|\\s*[-,/]\\s*|\\s+of\\s+){MonthRegex}[\\.]?)\\b"
+ .replace("{DayPrefix}", DayPrefix)
.replace("{DayRegex}", DayRegex)
.replace("{MonthRegex}", MonthRegex)
- .replace("{DateExtractorYearTermRegex}", DateExtractorYearTermRegex);
+ .replace("{DateExtractorYearTermRegex}", DateExtractorYearTermRegex)
+ .replace("{BaseDateTime.FourDigitYearRegex}", BaseDateTime.FourDigitYearRegex);
public static final String DateExtractor4 = "\\b{MonthNumRegex}\\s*[/\\\\\\-]\\s*{DayRegex}[\\.]?\\s*[/\\\\\\-]\\s*{DateYearRegex}"
.replace("{MonthNumRegex}", MonthNumRegex)
.replace("{DayRegex}", DayRegex)
.replace("{DateYearRegex}", DateYearRegex);
- public static final String DateExtractor5 = "\\b{DayRegex}\\s*[/\\\\\\-\\.]\\s*({MonthNumRegex}|{MonthRegex})\\s*[/\\\\\\-\\.]\\s*{DateYearRegex}(?!\\s*[/\\\\\\-\\.]\\s*\\d+)"
+ public static final String DateExtractor5 = "\\b({DayPrefix}(\\s*,)?\\s+)?{DayRegex}\\s*[/\\\\\\-\\.]\\s*({MonthNumRegex}|{MonthRegex})\\s*[/\\\\\\-\\.]\\s*{DateYearRegex}(?!\\s*[/\\\\\\-\\.]\\s*\\d+)"
+ .replace("{DayPrefix}", DayPrefix)
.replace("{DayRegex}", DayRegex)
.replace("{MonthNumRegex}", MonthNumRegex)
.replace("{MonthRegex}", MonthRegex)
.replace("{DateYearRegex}", DateYearRegex);
- public static final String DateExtractor6 = "(?<={DatePreposition}\\s+)({StrictRelativeRegex}\\s+)?({WeekDayRegex}\\s+)?{MonthNumRegex}[\\-\\.]{DayRegex}(?![%])\\b"
+ public static final String DateExtractor6 = "(?<={DatePreposition}\\s+)({StrictRelativeRegex}\\s+)?({DayPrefix}\\s+)?{MonthNumRegex}[\\-\\.]{DayRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\\b"
.replace("{MonthNumRegex}", MonthNumRegex)
.replace("{DayRegex}", DayRegex)
- .replace("{WeekDayRegex}", WeekDayRegex)
+ .replace("{DayPrefix}", DayPrefix)
.replace("{DatePreposition}", DatePreposition)
- .replace("{StrictRelativeRegex}", StrictRelativeRegex);
+ .replace("{StrictRelativeRegex}", StrictRelativeRegex)
+ .replace("{BaseDateTime.CheckDecimalRegex}", BaseDateTime.CheckDecimalRegex);
- public static final String DateExtractor7L = "\\b({WeekDayRegex}\\s+)?{MonthNumRegex}\\s*/\\s*{DayRegex}{DateExtractorYearTermRegex}(?![%])\\b"
+ public static final String DateExtractor7L = "\\b({DayPrefix}(\\s*,)?\\s+)?{MonthNumRegex}\\s*/\\s*{DayRegex}{DateExtractorYearTermRegex}(?![%])\\b"
.replace("{MonthNumRegex}", MonthNumRegex)
.replace("{DayRegex}", DayRegex)
- .replace("{WeekDayRegex}", WeekDayRegex)
+ .replace("{DayPrefix}", DayPrefix)
.replace("{DateExtractorYearTermRegex}", DateExtractorYearTermRegex);
- public static final String DateExtractor7S = "\\b({WeekDayRegex}\\s+)?{MonthNumRegex}\\s*/\\s*{DayRegex}(?![%])\\b"
+ public static final String DateExtractor7S = "\\b({DayPrefix}(\\s*,)?\\s+)?{MonthNumRegex}\\s*/\\s*{DayRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\\b"
.replace("{MonthNumRegex}", MonthNumRegex)
.replace("{DayRegex}", DayRegex)
- .replace("{WeekDayRegex}", WeekDayRegex);
+ .replace("{DayPrefix}", DayPrefix)
+ .replace("{BaseDateTime.CheckDecimalRegex}", BaseDateTime.CheckDecimalRegex);
- public static final String DateExtractor8 = "(?<={DatePreposition}\\s+)({StrictRelativeRegex}\\s+)?({WeekDayRegex}\\s+)?{DayRegex}[\\\\\\-]{MonthNumRegex}(?![%])\\b"
+ public static final String DateExtractor8 = "(?<={DatePreposition}\\s+)({StrictRelativeRegex}\\s+)?({DayPrefix}\\s+)?{DayRegex}[\\\\\\-]{MonthNumRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\\b"
.replace("{DayRegex}", DayRegex)
.replace("{MonthNumRegex}", MonthNumRegex)
- .replace("{WeekDayRegex}", WeekDayRegex)
+ .replace("{DayPrefix}", DayPrefix)
.replace("{DatePreposition}", DatePreposition)
- .replace("{StrictRelativeRegex}", StrictRelativeRegex);
+ .replace("{StrictRelativeRegex}", StrictRelativeRegex)
+ .replace("{BaseDateTime.CheckDecimalRegex}", BaseDateTime.CheckDecimalRegex);
- public static final String DateExtractor9L = "\\b({WeekDayRegex}\\s+)?{DayRegex}\\s*/\\s*{MonthNumRegex}{DateExtractorYearTermRegex}(?![%])\\b"
+ public static final String DateExtractor9L = "\\b({DayPrefix}(\\s*,)?\\s+)?{DayRegex}\\s*/\\s*{MonthNumRegex}{DateExtractorYearTermRegex}(?![%])\\b"
.replace("{DayRegex}", DayRegex)
.replace("{MonthNumRegex}", MonthNumRegex)
- .replace("{WeekDayRegex}", WeekDayRegex)
+ .replace("{DayPrefix}", DayPrefix)
.replace("{DateExtractorYearTermRegex}", DateExtractorYearTermRegex);
- public static final String DateExtractor9S = "\\b({WeekDayRegex}\\s+)?{DayRegex}\\s*/\\s*{MonthNumRegex}(?![%])\\b"
+ public static final String DateExtractor9S = "\\b({DayPrefix}(\\s*,)?\\s+)?{DayRegex}\\s*/\\s*{MonthNumRegex}{BaseDateTime.CheckDecimalRegex}(?![%])\\b"
.replace("{DayRegex}", DayRegex)
.replace("{MonthNumRegex}", MonthNumRegex)
- .replace("{WeekDayRegex}", WeekDayRegex);
+ .replace("{DayPrefix}", DayPrefix)
+ .replace("{BaseDateTime.CheckDecimalRegex}", BaseDateTime.CheckDecimalRegex);
- public static final String DateExtractorA = "\\b({WeekDayRegex}\\s+)?{BaseDateTime.FourDigitYearRegex}\\s*[/\\\\\\-\\.]\\s*({MonthNumRegex}|{MonthRegex})\\s*[/\\\\\\-\\.]\\s*{DayRegex}"
+ public static final String DateExtractorA = "\\b({DayPrefix}(\\s*,)?\\s+)?(({BaseDateTime.FourDigitYearRegex}\\s*[/\\\\\\-\\.]\\s*({MonthNumRegex}|{MonthRegex})\\s*[/\\\\\\-\\.]\\s*{DayRegex})|({MonthRegex}\\s*[/\\\\\\-\\.]\\s*{BaseDateTime.FourDigitYearRegex}\\s*[/\\\\\\-\\.]\\s*(the\\s+)?(?(?:3[0-1]|[1-2]\\d|0?[1-9])(?:th|nd|rd|st)?))|({DayRegex}\\s*[/\\\\\\-\\.]\\s*{BaseDateTime.FourDigitYearRegex}\\s*[/\\\\\\-\\.]\\s*{MonthRegex}))"
.replace("{BaseDateTime.FourDigitYearRegex}", BaseDateTime.FourDigitYearRegex)
.replace("{MonthNumRegex}", MonthNumRegex)
.replace("{MonthRegex}", MonthRegex)
.replace("{DayRegex}", DayRegex)
- .replace("{WeekDayRegex}", WeekDayRegex);
+ .replace("{DayPrefix}", DayPrefix);
public static final String OfMonth = "^\\s*(day\\s+)?of\\s*{MonthRegex}"
.replace("{MonthRegex}", MonthRegex);
@@ -417,11 +429,11 @@ public class EnglishDateTime {
public static final String DeltaMinuteNumRegex = "(?ten|eleven|twelve|thirteen|fifteen|eighteen|(four|six|seven|nine)(teen)?|twenty|thirty|forty|fifty|one|two|three|five|eight)";
- public static final String PmRegex = "(?(((?:at|in|around|on|for)\\s+(the\\s+)?)?(afternoon|evening|midnight|lunchtime))|((at|in|around|on|for)\\s+(the\\s+)?night))";
+ public static final String PmRegex = "(?(((?:at|in|around|circa|on|for)\\s+(the\\s+)?)?(afternoon|evening|midnight|lunchtime))|((at|in|around|on|for)\\s+(the\\s+)?night))";
- public static final String PmRegexFull = "(?((?:at|in|around|on|for)\\s+(the\\s+)?)?(afternoon|evening|(mid)?night|lunchtime))";
+ public static final String PmRegexFull = "(?((?:at|in|around|circa|on|for)\\s+(the\\s+)?)?(afternoon|evening|(mid)?night|lunchtime))";
- public static final String AmRegex = "(?((?:at|in|around|on|for)\\s+(the\\s+)?)?(morning))";
+ public static final String AmRegex = "(?((?:at|in|around|circa|on|for)\\s+(the\\s+)?)?(morning))";
public static final String LunchRegex = "\\blunchtime\\b";
@@ -471,7 +483,7 @@ public class EnglishDateTime {
.replace("{MidafternoonRegex}", MidafternoonRegex)
.replace("{MiddayRegex}", MiddayRegex);
- public static final String AtRegex = "\\b(?:(?:(?<=\\bat\\s+)(?:{WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}(?!\\.\\d)(\\s*((?a)|(?p)))?|{MidTimeRegex}))|{MidTimeRegex})\\b"
+ public static final String AtRegex = "\\b(?:(?:(?<=\\b(at|(at)?\\s*around|circa)\\s+)(?:{WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}(?!\\.\\d)(\\s*((?a)|(?p)))?|{MidTimeRegex}))|{MidTimeRegex})\\b"
.replace("{WrittenTimeRegex}", WrittenTimeRegex)
.replace("{HourNumRegex}", HourNumRegex)
.replace("{BaseDateTime.HourRegex}", BaseDateTime.HourRegex)
@@ -530,7 +542,7 @@ public class EnglishDateTime {
.replace("{BasicTime}", BasicTime)
.replace("{DescRegex}", DescRegex);
- public static final String TimeRegex6 = "{BasicTime}(\\s*{DescRegex})?\\s+{TimeSuffix}\\b"
+ public static final String TimeRegex6 = "({BasicTime})(\\s*{DescRegex})?\\s+{TimeSuffix}\\b"
.replace("{BasicTime}", BasicTime)
.replace("{DescRegex}", DescRegex)
.replace("{TimeSuffix}", TimeSuffix);
@@ -609,7 +621,7 @@ public class EnglishDateTime {
public static final String SuffixAfterRegex = "\\b(((at)\\s)?(or|and)\\s+(above|after|later|greater)(?!\\s+than))\\b";
- public static final String PrepositionRegex = "(?^(at|on|of)(\\s+the)?$)";
+ public static final String PrepositionRegex = "(?^(,\\s*)?(at|on|of)(\\s+the)?$)";
public static final String LaterEarlyRegex = "((?early(\\s+|-))|(?late(r?\\s+|-)))";
@@ -652,7 +664,7 @@ public class EnglishDateTime {
public static final String TimeOfTodayAfterRegex = "^\\s*(,\\s*)?(in\\s+)?{DateTimeSpecificTimeOfDayRegex}"
.replace("{DateTimeSpecificTimeOfDayRegex}", DateTimeSpecificTimeOfDayRegex);
- public static final String TimeOfTodayBeforeRegex = "{DateTimeSpecificTimeOfDayRegex}(\\s*,)?(\\s+(at|around|in|on))?\\s*$"
+ public static final String TimeOfTodayBeforeRegex = "{DateTimeSpecificTimeOfDayRegex}(\\s*,)?(\\s+(at|around|circa|in|on))?\\s*$"
.replace("{DateTimeSpecificTimeOfDayRegex}", DateTimeSpecificTimeOfDayRegex);
public static final String SimpleTimeOfTodayAfterRegex = "(?week|month|year|decade|weekend)\\b"
.replace("{ReferencePrefixRegex}", ReferencePrefixRegex);
- public static final String ConnectorRegex = "^(-|,|for|t|around|@)$";
+ public static final String ConnectorRegex = "^(-|,|for|t|around|circa|@)$";
public static final String FromToRegex = "(\\b(from).+(to|and|or)\\b.+)";
@@ -830,7 +842,7 @@ public class EnglishDateTime {
public static final String UnspecificDatePeriodRegex = "^(week|month|year)$";
- public static final String PrepositionSuffixRegex = "\\b(on|in|at|around|from|to)$";
+ public static final String PrepositionSuffixRegex = "\\b(on|in|at|around|circa|from|to)$";
public static final String FlexibleDayRegex = "(?([A-Za-z]+\\s)?[A-Za-z\\d]+)";
@@ -901,7 +913,7 @@ public class EnglishDateTime {
public static final String DateNumberConnectorRegex = "^\\s*(?\\s+at)\\s*$";
- public static final String DecadeRegex = "(?(?:nough|twen|thir|fou?r|fif|six|seven|eight|nine)ties|two\\s+thousands)";
+ public static final String DecadeRegex = "(?(?:nough|twen|thir|fou?r|fif|six|seven|eigh|nine)ties|two\\s+thousands)";
public static final String DecadeWithCenturyRegex = "(the\\s+)?(((?\\d|1\\d|2\\d)?(')?(?\\d0)(')?(\\s)?s\\b)|(({CenturyRegex}(\\s+|-)(and\\s+)?)?{DecadeRegex})|({CenturyRegex}(\\s+|-)(and\\s+)?(?tens|hundreds)))"
.replace("{CenturyRegex}", CenturyRegex)
@@ -1386,6 +1398,7 @@ public class EnglishDateTime {
.put("\\b(a|one) second\\b", "\\b(? MorningTermList = Arrays.asList("morning");
diff --git a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/EnglishTimeZone.java b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/EnglishTimeZone.java
index 89b29c012..dd5e8a470 100644
--- a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/EnglishTimeZone.java
+++ b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/EnglishTimeZone.java
@@ -88,7 +88,7 @@ public class EnglishTimeZone {
.put("esat", -180)
.put("est", -300)
.put("estm", -300)
- .put("et", -240)
+ .put("et", -300)
.put("fjst", 780)
.put("fjt", 720)
.put("get", 240)
diff --git a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/FrenchDateTime.java b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/FrenchDateTime.java
index 0ab5214d3..0cee2693f 100644
--- a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/FrenchDateTime.java
+++ b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/FrenchDateTime.java
@@ -27,9 +27,9 @@ public class FrenchDateTime {
public static final String RangeConnectorRegex = "(?de la|au|[aà]|et(\\s*la)?|--|-|—|——)";
- public static final String RelativeRegex = "(?prochaine?|de|du|ce(tte)?|l[ae]|derni[eè]re|pr[eé]c[eé]dente|au\\s+cours+(de|du\\s*))";
+ public static final String RelativeRegex = "(?prochaine?|de|du|ce(tte)?|l[ae]|derni[eè]re|hier|pr[eé]c[eé]dente|au\\s+cours+(de|du\\s*))";
- public static final String StrictRelativeRegex = "(?prochaine?|derni[eè]re|pr[eé]c[eé]dente|au\\s+cours+(de|du\\s*))";
+ public static final String StrictRelativeRegex = "(?prochaine?|derni[eè]re|hier|pr[eé]c[eé]dente|au\\s+cours+(de|du\\s*))";
public static final String NextSuffixRegex = "(?prochaines?|prochain|suivante)\\b";
@@ -39,9 +39,9 @@ public class FrenchDateTime {
public static final String RangePrefixRegex = "(du|depuis|des?|entre)";
- public static final String DayRegex = "(?01|02|03|04|05|06|07|08|09|10|11e?|12e?|13e?|14e?|15e?|16e?|17e?|18e?|19e?|1er|1|21e?|20e?|22e?|23e?|24e?|25e?|26e?|27e?|28e?|29e?|2e?|30e?|31e?|3e?|4e?|5e?|6e?|7e?|8e?|9e?)(?=\\b|t)";
+ public static final String DayRegex = "(?(?:3[0-1]|[1-2]\\d|0?[1-9])(e(r)?)?)(?=\\b|t)";
- public static final String MonthNumRegex = "(?01|02|03|04|05|06|07|08|09|10|11|12|1|2|3|4|5|6|7|8|9)\\b";
+ public static final String MonthNumRegex = "(?1[0-2]|(0)?[1-9])\\b";
public static final String SpecialDescRegex = "(p\\b)";
@@ -60,7 +60,7 @@ public class FrenchDateTime {
.replace("{AmPmDescRegex}", AmPmDescRegex)
.replace("{SpecialDescRegex}", SpecialDescRegex);
- public static final String TwoDigitYearRegex = "\\b(?([0-24-9]\\d))(?!(\\s*((\\:\\d)|{AmDescRegex}|{PmDescRegex}|\\.\\d)))\\b"
+ public static final String TwoDigitYearRegex = "\\b(?([0-9]\\d))(?!(\\s*((\\:\\d)|{AmDescRegex}|{PmDescRegex}|\\.\\d)))\\b"
.replace("{AmDescRegex}", AmDescRegex)
.replace("{PmDescRegex}", PmDescRegex);
@@ -212,10 +212,11 @@ public class FrenchDateTime {
.replace("{YearRegex}", YearRegex)
.replace("{TwoDigitYearRegex}", TwoDigitYearRegex);
- public static final String DateExtractor1 = "\\b({WeekDayRegex}(\\s+|\\s*,\\s*))?{MonthRegex}\\s*[/\\\\\\.\\-]?\\s*{DayRegex}\\b"
+ public static final String DateExtractor1 = "\\b({WeekDayRegex}(\\s+|\\s*,\\s*))?{MonthRegex}\\s*[/\\\\\\.\\-]?\\s*{DayRegex}(\\s*[/\\\\\\.\\-]?\\s*{BaseDateTime.FourDigitYearRegex})?\\b"
.replace("{WeekDayRegex}", WeekDayRegex)
.replace("{MonthRegex}", MonthRegex)
- .replace("{DayRegex}", DayRegex);
+ .replace("{DayRegex}", DayRegex)
+ .replace("{BaseDateTime.FourDigitYearRegex}", BaseDateTime.FourDigitYearRegex);
public static final String DateExtractor2 = "\\b({WeekDayRegex}(\\s+|\\s*,\\s*))?{DayRegex}(\\s+|\\s*,\\s*|\\s+){MonthRegex}\\s*[\\.\\-]?\\s*{DateYearRegex}\\b"
.replace("{WeekDayRegex}", WeekDayRegex)
@@ -223,12 +224,13 @@ public class FrenchDateTime {
.replace("{DayRegex}", DayRegex)
.replace("{DateYearRegex}", DateYearRegex);
- public static final String DateExtractor3 = "\\b({WeekDayRegex}(\\s+|\\s*,\\s*))?(?{WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}:{BaseDateTime.MinuteRegex}(:{BaseDateTime.SecondRegex})?|{BaseDateTime.HourRegex})"
+ public static final String BasicTime = "(?{WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}(:|\\s*h\\s*){BaseDateTime.MinuteRegex}(:{BaseDateTime.SecondRegex})?|{BaseDateTime.HourRegex})"
.replace("{WrittenTimeRegex}", WrittenTimeRegex)
.replace("{HourNumRegex}", HourNumRegex)
.replace("{BaseDateTime.HourRegex}", BaseDateTime.HourRegex)
@@ -348,7 +356,7 @@ public class FrenchDateTime {
public static final String RestrictedTimeUnitRegex = "(?huere|minute)\\b";
- public static final String ConnectNumRegex = "{BaseDateTime.HourRegex}(?00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39|40|41|42|43|44|45|46|47|48|49|50|51|52|53|54|55|56|57|58|59)\\s*{DescRegex}"
+ public static final String ConnectNumRegex = "{BaseDateTime.HourRegex}(?[0-5][0-9])\\s*{DescRegex}"
.replace("{BaseDateTime.HourRegex}", BaseDateTime.HourRegex)
.replace("{DescRegex}", DescRegex);
@@ -446,7 +454,7 @@ public class FrenchDateTime {
public static final String TimeOfDayRegex = "\\b(?((((dans\\s+(l[ea])?\\s+)?((?d[eé]but(\\s+|-)|t[oô]t(\\s+|-)(l[ea]\\s*)?)|(?fin\\s*|fin de(\\s+(la)?)|tard\\s*))?(matin([ée]e)?|((d|l)?'?)apr[eè]s[-|\\s*]midi|nuit|soir([eé]e)?)))|(((\\s+(l[ea])?\\s+)?)jour(n[eé]e)?))s?)\\b";
- public static final String SpecificTimeOfDayRegex = "\\b(({RelativeRegex}\\s+{TimeOfDayRegex})|({TimeOfDayRegex}\\s*({NextSuffixRegex}))\\b|\\bsoir|\\bdu soir)s?\\b"
+ public static final String SpecificTimeOfDayRegex = "\\b(({RelativeRegex}\\s+{TimeOfDayRegex})|({TimeOfDayRegex}\\s*({NextSuffixRegex}))\\b|\\b(du )?soir)s?\\b"
.replace("{TimeOfDayRegex}", TimeOfDayRegex)
.replace("{RelativeRegex}", RelativeRegex)
.replace("{NextSuffixRegex}", NextSuffixRegex);
@@ -470,7 +478,7 @@ public class FrenchDateTime {
public static final String TimeOfTodayAfterRegex = "^\\s*(,\\s*)?(en|dans|du\\s+)?{DateTimeSpecificTimeOfDayRegex}"
.replace("{DateTimeSpecificTimeOfDayRegex}", DateTimeSpecificTimeOfDayRegex);
- public static final String TimeOfTodayBeforeRegex = "{DateTimeSpecificTimeOfDayRegex}(\\s*,)?(\\s+([àa]|pour))?\\s*$"
+ public static final String TimeOfTodayBeforeRegex = "{DateTimeSpecificTimeOfDayRegex}(\\s*,)?(\\s+([àa]|vers|pour))?\\s*$"
.replace("{DateTimeSpecificTimeOfDayRegex}", DateTimeSpecificTimeOfDayRegex);
public static final String SimpleTimeOfTodayAfterRegex = "({HourNumRegex}|{BaseDateTime.HourRegex})\\s*(,\\s*)?(en|[àa]\\s+)?{DateTimeSpecificTimeOfDayRegex}"
@@ -478,7 +486,7 @@ public class FrenchDateTime {
.replace("{BaseDateTime.HourRegex}", BaseDateTime.HourRegex)
.replace("{DateTimeSpecificTimeOfDayRegex}", DateTimeSpecificTimeOfDayRegex);
- public static final String SimpleTimeOfTodayBeforeRegex = "{DateTimeSpecificTimeOfDayRegex}(\\s*,)?(\\s+([àa]|vers))?\\s*({HourNumRegex}|{BaseDateTime.HourRegex})"
+ public static final String SimpleTimeOfTodayBeforeRegex = "{DateTimeSpecificTimeOfDayRegex}(\\s*,)?(\\s+([àa]|vers|pour))?\\s*({HourNumRegex}|{BaseDateTime.HourRegex})"
.replace("{DateTimeSpecificTimeOfDayRegex}", DateTimeSpecificTimeOfDayRegex)
.replace("{HourNumRegex}", HourNumRegex)
.replace("{BaseDateTime.HourRegex}", BaseDateTime.HourRegex);
@@ -581,7 +589,7 @@ public class FrenchDateTime {
public static final String SinceRegex = "\\b(depuis)\\b";
- public static final String AroundRegex = "^[.]";
+ public static final String AroundRegex = "\\b(vers)\\b";
public static final String AgoPrefixRegex = "\\b(y a)\\b";
@@ -640,7 +648,7 @@ public class FrenchDateTime {
public static final String RelativeDayRegex = "\\b(((la\\s+)?{RelativeRegex}\\s+journ[ée]e))\\b"
.replace("{RelativeRegex}", RelativeRegex);
- public static final String ConnectorRegex = "^(,|pour|t|vers)$";
+ public static final String ConnectorRegex = "^(,|pour|t|vers|le)$";
public static final String ConnectorAndRegex = "\\b(et\\s*(le|las?)?)\\b.+";
@@ -1188,6 +1196,7 @@ public class FrenchDateTime {
public static final ImmutableMap AmbiguityFiltersDict = ImmutableMap.builder()
.put("^([eé]t[eé])$", "(? AmbiguityTimeFiltersDict = ImmutableMap.builder()
diff --git a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/PortugueseDateTime.java b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/PortugueseDateTime.java
index 111190643..7f5fbdbfd 100644
--- a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/PortugueseDateTime.java
+++ b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/PortugueseDateTime.java
@@ -28,9 +28,9 @@ public class PortugueseDateTime {
public static final String RangeConnectorRegex = "(?(e\\s*(([àa]s?)|o)?)|{BaseDateTime.RangeConnectorSymbolRegex})"
.replace("{BaseDateTime.RangeConnectorSymbolRegex}", BaseDateTime.RangeConnectorSymbolRegex);
- public static final String DayRegex = "(?01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|1|20|21|22|23|24|25|26|27|28|29|2|30|31|3|4|5|6|7|8|9)(?=\\b|t)";
+ public static final String DayRegex = "(?(?:3[0-1]|[1-2]\\d|0?[1-9]))(?=\\b|t)";
- public static final String MonthNumRegex = "(?01|02|03|04|05|06|07|08|09|10|11|12|1|2|3|4|5|6|7|8|9)\\b";
+ public static final String MonthNumRegex = "(?1[0-2]|(0)?[1-9])\\b";
public static final String AmDescRegex = "({BaseDateTime.BaseAmDescRegex})"
.replace("{BaseDateTime.BaseAmDescRegex}", BaseDateTime.BaseAmDescRegex);
@@ -45,13 +45,15 @@ public class PortugueseDateTime {
.replace("{AmDescRegex}", AmDescRegex)
.replace("{PmDescRegex}", PmDescRegex);
+ public static final String OclockRegex = "(?em\\s+ponto)";
+
public static final String OfPrepositionRegex = "(\\bd(o|a|e)s?\\b)";
public static final String AfterNextSuffixRegex = "\\b(que\\s+vem|passad[oa])\\b";
public static final String RangePrefixRegex = "((de(sde)?|das?|entre)\\s+(a(s)?\\s+)?)";
- public static final String TwoDigitYearRegex = "\\b(?([0-24-9]\\d))(?!(\\s*((\\:\\d)|{AmDescRegex}|{PmDescRegex}|\\.\\d)))\\b"
+ public static final String TwoDigitYearRegex = "\\b(?([0-9]\\d))(?!(\\s*((\\:\\d)|{AmDescRegex}|{PmDescRegex}|\\.\\d)))\\b"
.replace("{AmDescRegex}", AmDescRegex)
.replace("{PmDescRegex}", PmDescRegex);
@@ -176,7 +178,9 @@ public class PortugueseDateTime {
public static final String OnRegex = "(?<=\\b(em|no)\\s+)({DayRegex}s?)\\b"
.replace("{DayRegex}", DayRegex);
- public static final String RelaxedOnRegex = "(?<=\\b(em|n[oa]|d[oa])\\s+)(dia\\s+)?((?10|11|12|13|14|15|16|17|18|19|1|20|21|22|23|24|25|26|27|28|29|2|30|31|3|4|5|6|7|8|9)s?)\\b";
+ public static final String RelaxedOnRegex = "(?<=\\b(em|n[oa]|d[oa])\\s+)(dia\\s+)?({DayRegex}s?)\\b(?!\\s*[/\\\\\\-\\.,:\\s]\\s*(\\d|{MonthRegex}))"
+ .replace("{DayRegex}", DayRegex)
+ .replace("{MonthRegex}", MonthRegex);
public static final String ThisRegex = "\\b(([nd]?es[st][ea]\\s*){WeekDayRegex})|({WeekDayRegex}\\s*([nd]?es[st]a\\s+semana))\\b"
.replace("{WeekDayRegex}", WeekDayRegex);
@@ -195,7 +199,12 @@ public class PortugueseDateTime {
public static final String WeekDayAndDayOfMonthRegex = ".^";
- public static final String WeekDayAndDayRegex = ".^";
+ public static final String WeekDayAndDayRegex = "\\b{WeekDayRegex}\\s+({DayRegex})(?!([-:/]|\\.\\d|(\\s+({AmDescRegex}|{PmDescRegex}|{OclockRegex}))))\\b"
+ .replace("{WeekDayRegex}", WeekDayRegex)
+ .replace("{DayRegex}", DayRegex)
+ .replace("{AmDescRegex}", AmDescRegex)
+ .replace("{PmDescRegex}", PmDescRegex)
+ .replace("{OclockRegex}", OclockRegex);
public static final String WeekDayOfMonthRegex = "(?(n?[ao]\\s+)?(?primeir[ao]|1[ao]|segund[ao]|2[ao]|terceir[ao]|3[ao]|[qc]uart[ao]|4[ao]|quint[ao]|5[ao]|[uú]ltim[ao])\\s+{WeekDayRegex}\\s+{MonthSuffixRegex})"
.replace("{WeekDayRegex}", WeekDayRegex)
@@ -225,18 +234,19 @@ public class PortugueseDateTime {
.replace("{YearRegex}", YearRegex)
.replace("{TwoDigitYearRegex}", TwoDigitYearRegex);
- public static final String DateExtractor1 = "\\b({WeekDayRegex}(\\s+|\\s*,\\s*))?{DayRegex}?((\\s*(de)|[/\\\\\\.\\-])\\s*)?{MonthRegex}\\b"
+ public static final String DateExtractor1 = "\\b({WeekDayRegex}(\\s+|\\s*,\\s*))?{DayRegex}((\\s*(de)|[/\\\\\\.\\- ])\\s*)?{MonthRegex}\\b"
.replace("{WeekDayRegex}", WeekDayRegex)
.replace("{DayRegex}", DayRegex)
.replace("{MonthRegex}", MonthRegex);
- public static final String DateExtractor2 = "\\b({WeekDayRegex}(\\s+|\\s*,\\s*))?{DayRegex}\\s*([\\.\\-]|de)?\\s*{MonthRegex}(\\s*(,|de)\\s*){DateYearRegex}\\b"
+ public static final String DateExtractor2 = "\\b({WeekDayRegex}(\\s+|\\s*,\\s*))?({DayRegex}\\s*([/\\.\\-]|de)?\\s*{MonthRegex}(\\s*([,./-]|de)\\s*){DateYearRegex}|{BaseDateTime.FourDigitYearRegex}\\s*[/\\.\\- ]\\s*{DayRegex}\\s*[/\\.\\- ]\\s*{MonthRegex})\\b"
.replace("{MonthRegex}", MonthRegex)
.replace("{DayRegex}", DayRegex)
.replace("{DateYearRegex}", DateYearRegex)
- .replace("{WeekDayRegex}", WeekDayRegex);
+ .replace("{WeekDayRegex}", WeekDayRegex)
+ .replace("{BaseDateTime.FourDigitYearRegex}", BaseDateTime.FourDigitYearRegex);
- public static final String DateExtractor3 = "\\b({WeekDayRegex}(\\s+|\\s*,\\s*))?{DayRegex}(\\s+|\\s*,\\s*|\\s+de\\s+|\\s*-\\s*){MonthRegex}((\\s+|\\s*(,|de)\\s*){DateYearRegex})?\\b"
+ public static final String DateExtractor3 = "\\b({WeekDayRegex}(\\s+|\\s*,\\s*))?{MonthRegex}(\\s*[/\\.\\- ]\\s*|\\s+de\\s+){DayRegex}((\\s*[/\\.\\- ]\\s*|\\s+de\\s+){DateYearRegex})?\\b"
.replace("{DayRegex}", DayRegex)
.replace("{MonthRegex}", MonthRegex)
.replace("{WeekDayRegex}", WeekDayRegex)
@@ -253,28 +263,34 @@ public class PortugueseDateTime {
.replace("{DayRegex}", DayRegex)
.replace("{DateYearRegex}", DateYearRegex);
- public static final String DateExtractor6 = "(?<=\\b(em|no|o)\\s+){MonthNumRegex}[\\-\\.]{DayRegex}\\b"
+ public static final String DateExtractor6 = "(?<=\\b(em|no|o)\\s+){MonthNumRegex}[\\-\\.]{DayRegex}{BaseDateTime.CheckDecimalRegex}\\b"
.replace("{MonthNumRegex}", MonthNumRegex)
- .replace("{DayRegex}", DayRegex);
+ .replace("{DayRegex}", DayRegex)
+ .replace("{BaseDateTime.CheckDecimalRegex}", BaseDateTime.CheckDecimalRegex);
- public static final String DateExtractor7 = "\\b{MonthNumRegex}\\s*/\\s*{DayRegex}((\\s+|\\s*(,|de)\\s*){DateYearRegex})?\\b"
+ public static final String DateExtractor7 = "\\b{MonthNumRegex}\\s*/\\s*{DayRegex}((\\s+|\\s*(,|de)\\s*){DateYearRegex})?{BaseDateTime.CheckDecimalRegex}\\b"
.replace("{MonthNumRegex}", MonthNumRegex)
.replace("{DayRegex}", DayRegex)
- .replace("{DateYearRegex}", DateYearRegex);
+ .replace("{DateYearRegex}", DateYearRegex)
+ .replace("{BaseDateTime.CheckDecimalRegex}", BaseDateTime.CheckDecimalRegex);
- public static final String DateExtractor8 = "(?<=\\b(em|no|o)\\s+){DayRegex}[\\\\\\-]{MonthNumRegex}\\b"
+ public static final String DateExtractor8 = "(?<=\\b(em|no|o)\\s+){DayRegex}[\\\\\\-]{MonthNumRegex}{BaseDateTime.CheckDecimalRegex}\\b"
.replace("{MonthNumRegex}", MonthNumRegex)
- .replace("{DayRegex}", DayRegex);
+ .replace("{DayRegex}", DayRegex)
+ .replace("{BaseDateTime.CheckDecimalRegex}", BaseDateTime.CheckDecimalRegex);
- public static final String DateExtractor9 = "\\b{DayRegex}\\s*/\\s*{MonthNumRegex}((\\s+|\\s*(,|de)\\s*){DateYearRegex})?\\b"
+ public static final String DateExtractor9 = "\\b{DayRegex}\\s*/\\s*{MonthNumRegex}((\\s+|\\s*(,|de)\\s*){DateYearRegex})?{BaseDateTime.CheckDecimalRegex}\\b"
.replace("{DayRegex}", DayRegex)
.replace("{MonthNumRegex}", MonthNumRegex)
- .replace("{DateYearRegex}", DateYearRegex);
+ .replace("{DateYearRegex}", DateYearRegex)
+ .replace("{BaseDateTime.CheckDecimalRegex}", BaseDateTime.CheckDecimalRegex);
- public static final String DateExtractor10 = "\\b{YearRegex}\\s*[/\\\\\\-\\.]\\s*{MonthNumRegex}\\s*[/\\\\\\-\\.]\\s*{DayRegex}(?!\\s*[/\\\\\\-\\.]\\s*\\d+)"
+ public static final String DateExtractor10 = "\\b({YearRegex}\\s*[/\\\\\\-\\.]\\s*({MonthNumRegex}|{MonthRegex})\\s*[/\\\\\\-\\.]\\s*{DayRegex}|{MonthRegex}\\s*[/\\\\\\-\\.]\\s*{BaseDateTime.FourDigitYearRegex}\\s*[/\\\\\\-\\.]\\s*{DayRegex}|{DayRegex}\\s*[/\\\\\\-\\.]\\s*{BaseDateTime.FourDigitYearRegex}\\s*[/\\\\\\-\\.]\\s*{MonthRegex})(?!\\s*[/\\\\\\-\\.:]\\s*\\d+)"
.replace("{YearRegex}", YearRegex)
.replace("{MonthNumRegex}", MonthNumRegex)
- .replace("{DayRegex}", DayRegex);
+ .replace("{MonthRegex}", MonthRegex)
+ .replace("{DayRegex}", DayRegex)
+ .replace("{BaseDateTime.FourDigitYearRegex}", BaseDateTime.FourDigitYearRegex);
public static final String DateExtractor11 = "(?<=\\b(dia)\\s+){DayRegex}"
.replace("{DayRegex}", DayRegex);
@@ -285,8 +301,6 @@ public class PortugueseDateTime {
public static final String DeltaMinuteNumRegex = "(?um|dois|tr[êe]s|[qc]uatro|cinco|seis|sete|oito|nove|dez|onze|doze|treze|catorze|quatorze|quinze|dez[ea]sseis|dez[ea]sete|dezoito|dez[ea]nove|vinte|trinta|[qc]uarenta|cin[qc]uenta)";
- public static final String OclockRegex = "(?em\\s+ponto)";
-
public static final String PmRegex = "(?((pela|de|da|\\b[àa]\\b|na)\\s+(tarde|noite)))|((depois\\s+do|ap[óo]s\\s+o)\\s+(almo[çc]o|meio dia|meio-dia))";
public static final String AmRegex = "(?(pela|de|da|na)\\s+(manh[ãa]|madrugada))";
@@ -329,7 +343,7 @@ public class PortugueseDateTime {
.replace("{WrittenTimeRegex}", WrittenTimeRegex)
.replace("{OclockRegex}", OclockRegex);
- public static final String ConnectNumRegex = "({BaseDateTime.HourRegex}(?00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39|40|41|42|43|44|45|46|47|48|49|50|51|52|53|54|55|56|57|58|59)\\s*{DescRegex})"
+ public static final String ConnectNumRegex = "({BaseDateTime.HourRegex}(?[0-5][0-9])\\s*{DescRegex})"
.replace("{BaseDateTime.HourRegex}", BaseDateTime.HourRegex)
.replace("{DescRegex}", DescRegex);
@@ -401,29 +415,29 @@ public class PortugueseDateTime {
.replace("{BaseDateTime.MinuteRegex}", BaseDateTime.MinuteRegex)
.replace("{DescRegex}", DescRegex);
- public static final String PrepositionRegex = "(?([àa]s?|em|por|pelo|pela|no|na|de|d[oa]?)?$)";
+ public static final String PrepositionRegex = "(?([àa]s?|em|por|pel[ao]|n[ao]|de|d[ao]?)?$)";
public static final String NowRegex = "\\b(?((logo|exatamente)\\s+)?agora(\\s+mesmo)?|neste\\s+momento|(assim\\s+que|t[ãa]o\\s+cedo\\s+quanto)\\s+(poss[ií]vel|possas?|possamos)|o\\s+mais\\s+(cedo|r[aá]pido)\\s+poss[íi]vel|recentemente|previamente)\\b";
- public static final String SuffixRegex = "^\\s*((e|a|em|por|pelo|pela|no|na|de)\\s+)?(manh[ãa]|madrugada|meio\\s*dia|tarde|noite)\\b";
+ public static final String SuffixRegex = "^\\s*((e|a|em|por|pel[ao]|n[ao]|de)\\s+)?(manh[ãa]|madrugada|meio\\s*dia|tarde|noite)\\b";
public static final String TimeOfDayRegex = "\\b(?manh[ãa]|madrugada|tarde|noite|((depois\\s+do|ap[óo]s\\s+o)\\s+(almo[çc]o|meio dia|meio-dia)))\\b";
public static final String SpecificTimeOfDayRegex = "\\b(((((a)?\\s+|[nd]?es[st]a|seguinte|pr[oó]xim[oa]|[uú]ltim[oa])\\s+)?{TimeOfDayRegex}))\\b"
.replace("{TimeOfDayRegex}", TimeOfDayRegex);
- public static final String TimeOfTodayAfterRegex = "^\\s*(,\\s*)?([àa]|em|por|pelo|pela|de|no|na?\\s+)?{SpecificTimeOfDayRegex}"
+ public static final String TimeOfTodayAfterRegex = "^\\s*(,\\s*)?([àa]|em|por|pel[ao]|de|no|na?\\s+)?{SpecificTimeOfDayRegex}"
.replace("{SpecificTimeOfDayRegex}", SpecificTimeOfDayRegex);
- public static final String TimeOfTodayBeforeRegex = "({SpecificTimeOfDayRegex}(\\s*,)?(\\s+(a\\s+la(s)?|para))?\\s*)"
+ public static final String TimeOfTodayBeforeRegex = "({SpecificTimeOfDayRegex}(\\s*,)?(\\s+([àa]s|para))?\\s*)"
.replace("{SpecificTimeOfDayRegex}", SpecificTimeOfDayRegex);
- public static final String SimpleTimeOfTodayAfterRegex = "({HourNumRegex}|{BaseDateTime.HourRegex})\\s*(,\\s*)?((en|de(l)?)?\\s+)?{SpecificTimeOfDayRegex}"
+ public static final String SimpleTimeOfTodayAfterRegex = "({HourNumRegex}|{BaseDateTime.HourRegex})\\s*(,\\s*)?{SpecificTimeOfDayRegex}"
.replace("{HourNumRegex}", HourNumRegex)
.replace("{BaseDateTime.HourRegex}", BaseDateTime.HourRegex)
.replace("{SpecificTimeOfDayRegex}", SpecificTimeOfDayRegex);
- public static final String SimpleTimeOfTodayBeforeRegex = "({SpecificTimeOfDayRegex}(\\s*,)?(\\s+(a\\s+la|para))?\\s*({HourNumRegex}|{BaseDateTime.HourRegex}))"
+ public static final String SimpleTimeOfTodayBeforeRegex = "({SpecificTimeOfDayRegex}(\\s*,)?(\\s+([àa]s|((cerca|perto|ao\\s+redor|por\\s+volta)\\s+(de|das))))?\\s*({HourNumRegex}|{BaseDateTime.HourRegex}))"
.replace("{SpecificTimeOfDayRegex}", SpecificTimeOfDayRegex)
.replace("{HourNumRegex}", HourNumRegex)
.replace("{BaseDateTime.HourRegex}", BaseDateTime.HourRegex);
@@ -434,13 +448,13 @@ public class PortugueseDateTime {
public static final String UnspecificEndOfRangeRegex = "^[.]";
- public static final String UnitRegex = "(?anos|ano|meses|m[êe]s|semanas|semana|dias|dia|horas|hora|h|hr|hrs|hs|minutos|minuto|mins|min|segundos|segundo|segs|seg)\\b";
+ public static final String UnitRegex = "(?anos?|meses|m[êe]s|semanas?|dias?|horas?|hrs?|hs?|minutos?|mins?|segundos?|segs?)\\b";
- public static final String ConnectorRegex = "^(,|t|para [ao]|para as|pras|cerca de|cerca das|perto de|perto das|quase)$";
+ public static final String ConnectorRegex = "^(,|t|para [ao]|para as|pras|(cerca|perto|ao\\s+redor|por\\s+volta)\\s+(de|das)|quase)$";
- public static final String TimeHourNumRegex = "(?vinte e um|vinte e dois|vinte e tr[êe]s|vinte e quatro|zero|um|uma|dois|duas|tr[êe]s|quatro|cinco|seis|sete|oito|nove|dez|onze|doze|treze|quatorze|catorze|quinze|dez[ea]sseis|dez[ea]ssete|dezoito|dez[ea]nove|vinte)";
+ public static final String TimeHourNumRegex = "(?vinte( e (um|dois|tr[êe]s|quatro))?|zero|uma?|dois|duas|tr[êe]s|quatro|cinco|seis|sete|oito|nove|dez|onze|doze|treze|quatorze|catorze|quinze|dez([ea]sseis|[ea]ssete|oito|[ea]nove))";
- public static final String PureNumFromTo = "((desde|de|da|das)\\s+(a(s)?\\s+)?)?({BaseDateTime.HourRegex}|{TimeHourNumRegex})(\\s*(?{DescRegex}))?\\s*{TillRegex}\\s*({BaseDateTime.HourRegex}|{TimeHourNumRegex})\\s*(?{PmRegex}|{AmRegex}|{DescRegex})?"
+ public static final String PureNumFromTo = "(((desde|de|da|das)\\s+(a(s)?\\s+)?)?({BaseDateTime.HourRegex}|{TimeHourNumRegex})(\\s*(?{DescRegex}))?\\s*{TillRegex}(?{DescRegex}))?\\s*{TillRegex})\\s*({BaseDateTime.HourRegex}|{TimeHourNumRegex})\\s*(?{PmRegex}|{AmRegex}|{DescRegex})?"
.replace("{BaseDateTime.HourRegex}", BaseDateTime.HourRegex)
.replace("{TimeHourNumRegex}", TimeHourNumRegex)
.replace("{DescRegex}", DescRegex)
@@ -459,7 +473,7 @@ public class PortugueseDateTime {
public static final String SpecificTimeBetweenAnd = "^[.]";
- public static final String TimeUnitRegex = "(?horas|hora|h|minutos|minuto|mins|min|segundos|segundo|secs|sec)\\b";
+ public static final String TimeUnitRegex = "(?horas?|h|minutos?|mins?|segundos?|se[cg]s?)\\b";
public static final String TimeFollowedUnit = "^\\s*{TimeUnitRegex}"
.replace("{TimeUnitRegex}", TimeUnitRegex);
@@ -505,7 +519,7 @@ public class PortugueseDateTime {
public static final String InexactNumberUnitRegex = "\\b(poucos|pouco|algum|alguns|v[áa]rios)\\s+{UnitRegex}"
.replace("{UnitRegex}", UnitRegex);
- public static final String HolidayRegex1 = "\\b(?sexta-feira santa|sexta-feira da paix[ãa]o|quarta-feira de cinzas|carnaval|dia (de|de los) presidentes?|ano novo chin[eê]s|ano novo|v[ée]spera de ano novo|natal|v[ée]spera de natal|dia de a[cç][ãa]o de gra[çc]as|a[cç][ãa]o de gra[çc]as|yuandan|halloween|dia das bruxas|p[áa]scoa)(\\s+(d[eo]?\\s+)?({YearRegex}|(?(pr[oó]xim[oa]?|[nd]?es[st][ea]|[uú]ltim[oa]?|em))\\s+ano))?\\b"
+ public static final String HolidayRegex1 = "\\b(?sexta-feira santa|sexta-feira da paix[ãa]o|quarta-feira de cinzas|carnaval|dia dos? presidentes?|ano novo chin[eê]s|ano novo|v[ée]spera de ano novo|natal|v[ée]spera de natal|dia de a[cç][ãa]o de gra[çc]as|a[cç][ãa]o de gra[çc]as|yuandan|halloween|dia das bruxas|p[áa]scoa)(\\s+(d[eo]?\\s+)?({YearRegex}|(?(pr[oó]xim[oa]?|[nd]?es[st][ea]|[uú]ltim[oa]?|em))\\s+ano))?\\b"
.replace("{YearRegex}", YearRegex);
public static final String HolidayRegex2 = "\\b(?(dia\\s+(d[eoa]s?\\s+)?)?(martin luther king|todos os santos|s[ãa]o (patr[íi]cio|francisco|jorge|jo[ãa]o)|independ[êe]ncia))(\\s+(d[eo]?\\s+)?({YearRegex}|(?(pr[oó]xim[oa]?|[nd]?es[st][ea]|[uú]ltim[oa]?|em))\\s+ano))?\\b"
@@ -520,7 +534,7 @@ public class PortugueseDateTime {
public static final String SinceRegex = "(desde(\\s+(as?|o))?)";
- public static final String AroundRegex = "^[.]";
+ public static final String AroundRegex = "(?:\\b(?:cerca|perto|ao\\s+redor|por\\s+volta)\\s*?\\b)(\\s+(de|das))?";
public static final String PeriodicRegex = "\\b(?di[áa]ri[ao]|diariamente|mensalmente|semanalmente|quinzenalmente|anualmente)\\b";
@@ -566,7 +580,7 @@ public class PortugueseDateTime {
public static final String AgoRegex = "\\b(antes|atr[áa]s|no passado)\\b";
- public static final String LaterRegex = "\\b(depois d[eoa]s?|ap[óo]s (as)?|desde (as|o)|desde|no futuro|mais tarde)\\b";
+ public static final String LaterRegex = "\\b(depois d[eoa]s?|ap[óo]s (as)?|desde( (as|o))?|no futuro|mais tarde)\\b";
public static final String Tomorrow = "amanh[ãa]";
@@ -932,7 +946,7 @@ public class PortugueseDateTime {
public static final List DurationDateRestrictions = Arrays.asList();
public static final ImmutableMap AmbiguityFiltersDict = ImmutableMap.builder()
- .put("null", "null")
+ .put("^(abr|ago|dez|fev|jan|ju[ln]|mar|maio?|nov|out|sep?t)$", "([$%£&!?@#])(abr|ago|dez|fev|jan|ju[ln]|mar|maio?|nov|out|sep?t)|(abr|ago|dez|fev|jan|ju[ln]|mar|maio?|nov|out|sep?t)([$%£&@#])")
.build();
public static final List EarlyMorningTermList = Arrays.asList("madrugada");
diff --git a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/SpanishDateTime.java b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/SpanishDateTime.java
index 019ed94cb..d8330a668 100644
--- a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/SpanishDateTime.java
+++ b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/SpanishDateTime.java
@@ -36,7 +36,7 @@ public class SpanishDateTime {
public static final String DayRegex = "\\b(?01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|1|20|21|22|23|24|25|26|27|28|29|2|30|31|3|4|5|6|7|8|9)(?:\\.[º°])?(?=\\b|t)";
- public static final String MonthNumRegex = "(?01|02|03|04|05|06|07|08|09|10|11|12|1|2|3|4|5|6|7|8|9)\\b";
+ public static final String MonthNumRegex = "(?1[0-2]|(0)?[1-9])\\b";
public static final String OclockRegex = "(?en\\s+punto)";
@@ -68,7 +68,7 @@ public class SpanishDateTime {
public static final String RangePrefixRegex = "((de(l|sde)?|entre)(\\s+la(s)?)?)";
- public static final String TwoDigitYearRegex = "\\b(?([0-24-9]\\d))(?!(\\s*((\\:\\d)|{AmDescRegex}|{PmDescRegex}|\\.\\d))|\\.?[º°ª])\\b"
+ public static final String TwoDigitYearRegex = "\\b(?([0-9]\\d))(?!(\\s*((\\:\\d)|{AmDescRegex}|{PmDescRegex}|\\.\\d))|\\.?[º°ª])\\b"
.replace("{AmDescRegex}", AmDescRegex)
.replace("{PmDescRegex}", PmDescRegex);
@@ -308,17 +308,18 @@ public class SpanishDateTime {
.replace("{DayRegex}", DayRegex)
.replace("{MonthRegex}", MonthRegex);
- public static final String DateExtractor2 = "\\b((el\\s+d[ií]a|{WeekDayRegex})(\\s+|\\s*,\\s*))?(?cero|una|dos|tres|cuatro|cinco|seis|siete|ocho|nueve|diez|once|doce)\\b";
@@ -399,6 +406,11 @@ public class SpanishDateTime {
.replace("{PmRegex}", PmRegex)
.replace("{OclockRegex}", OclockRegex);
+ public static final String GeneralDescRegex = "({DescRegex}|(?{AmRegex}|{PmRegex}))"
+ .replace("{DescRegex}", DescRegex)
+ .replace("{AmRegex}", AmRegex)
+ .replace("{PmRegex}", PmRegex);
+
public static final String BasicTime = "(?{WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}:{BaseDateTime.MinuteRegex}(:{BaseDateTime.SecondRegex})?|{BaseDateTime.HourRegex})"
.replace("{WrittenTimeRegex}", WrittenTimeRegex)
.replace("{HourNumRegex}", HourNumRegex)
@@ -415,7 +427,7 @@ public class SpanishDateTime {
.replace("{DescRegex}", DescRegex)
.replace("{MidTimeRegex}", MidTimeRegex);
- public static final String ConnectNumRegex = "({BaseDateTime.HourRegex}(?00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39|40|41|42|43|44|45|46|47|48|49|50|51|52|53|54|55|56|57|58|59)\\s*{DescRegex})"
+ public static final String ConnectNumRegex = "({BaseDateTime.HourRegex}(?[0-5][0-9])\\s*{DescRegex})"
.replace("{BaseDateTime.HourRegex}", BaseDateTime.HourRegex)
.replace("{DescRegex}", DescRegex);
@@ -444,8 +456,9 @@ public class SpanishDateTime {
.replace("{TimeTokenPrefix}", TimeTokenPrefix)
.replace("{TimeSuffix}", TimeSuffix);
- public static final String TimeRegex4 = "\\b(({DescRegex}?)|({BasicTime}?)({DescRegex}?)){TimePrefix}(\\s*({HourNumRegex}|{BaseDateTime.HourRegex}))?(\\s+{TensTimeRegex}(\\s*(y\\s+)?{MinuteNumRegex})?)?(\\s*({OclockRegex}|{DescRegex})|\\b)"
+ public static final String TimeRegex4 = "\\b(({DescRegex}?)|({BasicTime}\\s*)?({GeneralDescRegex}?)){TimePrefix}(\\s*({HourNumRegex}|{BaseDateTime.HourRegex}))?(\\s+{TensTimeRegex}(\\s*(y\\s+)?{MinuteNumRegex})?)?(\\s*({OclockRegex}|{DescRegex})|\\b)"
.replace("{DescRegex}", DescRegex)
+ .replace("{GeneralDescRegex}", GeneralDescRegex)
.replace("{BasicTime}", BasicTime)
.replace("{TimePrefix}", TimePrefix)
.replace("{HourNumRegex}", HourNumRegex)
@@ -500,7 +513,7 @@ public class SpanishDateTime {
public static final String SuffixRegex = "^\\s*(((y|a|en|por)\\s+la|al)\\s+)?(mañana|madrugada|medio\\s*d[ií]a|(?(({LaterEarlyRegex}\\s+)((del?|en|por)(\\s+(el|los?|las?))?\\s+)?)?(mañana|madrugada|pasado\\s+(el\\s+)?medio\\s?d[ií]a|(?(({LaterEarlyRegex}\\s+)((del?|en|por)(\\s+(el|los?|las?))?\\s+)?)?(mañana|madrugada|pasado\\s+(el\\s+)?medio\\s?d[ií]a|(?veintiuno|veintidos|veintitres|veinticuatro|cero|uno|dos|tres|cuatro|cinco|seis|siete|ocho|nueve|diez|once|doce|trece|catorce|quince|diecis([eé])is|diecisiete|dieciocho|diecinueve|veinte)";
+ public static final String TimeHourNumRegex = "(?veint(i(uno|dos|tres|cuatro)|e)|cero|uno|dos|tres|cuatro|cinco|seis|siete|ocho|nueve|diez|once|doce|trece|catorce|quince|dieci(s([eé])is|siete|ocho|nueve))";
public static final String PureNumFromTo = "((\\b(desde|de)\\s+(la(s)?\\s+)?)?({BaseDateTime.HourRegex}|{TimeHourNumRegex})(?!\\s+al?\\b)(\\s*(?{DescRegex}))?|(\\b(desde|de)\\s+(la(s)?\\s+)?)({BaseDateTime.HourRegex}|{TimeHourNumRegex})(\\s*(?{DescRegex}))?)\\s*{TillRegex}\\s*({BaseDateTime.HourRegex}|{TimeHourNumRegex})\\s*(?{PmRegex}|{AmRegex}|{DescRegex})?"
.replace("{BaseDateTime.HourRegex}", BaseDateTime.HourRegex)
@@ -1120,7 +1133,7 @@ public class SpanishDateTime {
public static final String UnspecificDatePeriodRegex = "^[\\.]";
- public static final String PrepositionSuffixRegex = "\\b(en|el|la|cerca|desde|durante|hasta|hacia)$";
+ public static final String PrepositionSuffixRegex = "\\b(en|el|la|cerca|alrededor|desde|durante|hasta|hacia)$";
public static final String RestOfDateTimeRegex = "\\bresto\\s+((del?)\\s+)?((la|el|est[ae])\\s+)?(?(día|jornada))(\\s+de\\s+hoy)?\\b";
@@ -1156,6 +1169,7 @@ public class SpanishDateTime {
.put("^a[nñ]o$", "(? EarlyMorningTermList = Arrays.asList("madrugada");
diff --git a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/number/resources/ChineseNumeric.java b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/number/resources/ChineseNumeric.java
index bf648924b..187fef6f4 100644
--- a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/number/resources/ChineseNumeric.java
+++ b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/number/resources/ChineseNumeric.java
@@ -162,7 +162,7 @@ public class ChineseNumeric {
public static final String DigitalNumberRegex = "((?<=(\\d|\\b)){BaseNumbers.MultiplierLookupRegex}(?=\\b))"
.replace("{BaseNumbers.MultiplierLookupRegex}", BaseNumbers.MultiplierLookupRegex);
- public static final String ZeroToNineFullHalfRegex = "[\\d1234567890]";
+ public static final String ZeroToNineFullHalfRegex = "[\\d]";
public static final String DigitNumRegex = "{ZeroToNineFullHalfRegex}+"
.replace("{ZeroToNineFullHalfRegex}", ZeroToNineFullHalfRegex);
@@ -311,11 +311,11 @@ public class ChineseNumeric {
.replace("{AllFloatRegex}", AllFloatRegex)
.replace("{ZeroToNineIntegerRegex}", ZeroToNineIntegerRegex);
- public static final String DoubleExponentialNotationRegex = "(?)";
+ public static final String MoreRegex = "((大于|多于|高于|超过|大於|多於|高於|超過|超过)了?|过|>)";
public static final String LessRegex = "(小于|少于|低于|小於|少於|低於|不到|不足|<)";
@@ -447,7 +447,7 @@ public class ChineseNumeric {
.replace("{LessRegex}", LessRegex)
.replace("{SpeicalCharBeforeNumber}", SpeicalCharBeforeNumber);
- public static final String MoreOrEqualSuffix = "(或|或者)\\s*(以上|之上|更[大多高])";
+ public static final String MoreOrEqualSuffix = "(或|或者)\\s*(次?以上|之上|更[大多高])";
public static final String LessOrEqual = "(({LessRegex}\\s*(或|或者)?\\s*{EqualRegex})|(至多|最多){SpeicalCharBeforeNumber}?|不{MoreRegex}|≤)"
.replace("{LessRegex}", LessRegex)
@@ -463,7 +463,7 @@ public class ChineseNumeric {
public static final String OneNumberRangeMoreRegex2 = "比\\s*(?((?!(([,,](?!\\d+))|。)).)+)\\s*更?[大多高]";
- public static final String OneNumberRangeMoreRegex3 = "(?((?!(([,,](?!\\d+))|。|[或者])).)+)\\s*(或|或者)?\\s*([多几余幾餘]|以上|之上|更[大多高])([万亿萬億]{0,2})";
+ public static final String OneNumberRangeMoreRegex3 = "(?((?!(([,,](?!\\d+))|。|[或者])).)+)\\s*(或|或者)?\\s*([多几余幾餘]|次?以上|之上|更[大多高])([万亿萬億]{0,2})";
public static final String OneNumberRangeLessRegex1 = "({LessOrEqual}|{LessRegex})\\s*(?((?!([并且而並的同時时]|([,,](?!\\d+))|。)).)+)"
.replace("{LessOrEqual}", LessOrEqual)
diff --git a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/number/resources/EnglishNumeric.java b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/number/resources/EnglishNumeric.java
index 9f46b23b6..ae82d423b 100644
--- a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/number/resources/EnglishNumeric.java
+++ b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/number/resources/EnglishNumeric.java
@@ -198,7 +198,7 @@ public static String DoubleWithoutIntegralRegex(String placeholder) {
public static final String TillRegex = "((?)";
+ public static final String MoreRegex = "(?:(bigger|greater|more|higher|larger)(\\s+than)?|above|over|beyond|exceed(ed|ing)?|surpass(ed|ing)?|(?)";
public static final String LessRegex = "(?:(less|lower|smaller|fewer)(\\s+than)?|below|under|(?|=)<)";
diff --git a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/number/resources/FrenchNumeric.java b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/number/resources/FrenchNumeric.java
index bbc5ae533..98441a99b 100644
--- a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/number/resources/FrenchNumeric.java
+++ b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/number/resources/FrenchNumeric.java
@@ -191,12 +191,113 @@ public static String DoubleWithoutIntegralRegex(String placeholder) {
public static final String DoubleCaretExponentialNotationRegex = "(((?)";
+
+ public static final String LessRegex = "(?:(less|plus\\s+(bas|petit|jeune)|moins|inf[ée]rieure?s?)(\\s+([àa]|d[e'’]|que))?|((en )?dessous)\\s+de|under|(?|=)<)";
+
+ public static final String EqualRegex = "(([ée]ga(l(es)?|ux)|au\\s+nombre)(\\s+([àa]|d[e'’]))?|(?)=)";
+
+ public static final String MoreOrEqualPrefix = "((pas\\s+{LessRegex})|(au\\s+moins|[àa] partir d[e'’]))"
+ .replace("{LessRegex}", LessRegex);
+
+ public static final String MoreOrEqual = "(?:({MoreRegex}\\s+(ou)?\\s+{EqualRegex})|({EqualRegex}\\s+(ou)?\\s+{MoreRegex})|{MoreOrEqualPrefix}(\\s+(ou)?\\s+{EqualRegex})?|({EqualRegex}\\s+(ou)?\\s+)?{MoreOrEqualPrefix}|>\\s*=|≥)"
+ .replace("{MoreRegex}", MoreRegex)
+ .replace("{EqualRegex}", EqualRegex)
+ .replace("{LessRegex}", LessRegex)
+ .replace("{MoreOrEqualPrefix}", MoreOrEqualPrefix);
+
+ public static final String MoreOrEqualSuffix = "((et|ou)\\s+(((more|greater|higher|plus(\\s+grand)?|sup[ée]rieure?s?)((?!\\s+([àa]|que))|(\\s+([àa]|que)(?!((\\s+ou\\s+[èe]ga(l(es)?|ux)\\s+[àa])?\\s*\\d+)))))|((a plus|au-dessus)\\s+d[e'’](?!\\s+than))))";
+
+ public static final String LessOrEqualPrefix = "((pas\\s+{MoreRegex})|(au\\s+plus)|(jusqu'[àa]))"
+ .replace("{MoreRegex}", MoreRegex);
+
+ public static final String LessOrEqual = "(({LessRegex}\\s+(ou)?\\s+{EqualRegex})|({EqualRegex}\\s+(ou)?\\s+{LessRegex})|{LessOrEqualPrefix}(\\s+(ou)?\\s+{EqualRegex})?|({EqualRegex}\\s+(ou)?\\s+)?{LessOrEqualPrefix}|<\\s*=|≤)"
+ .replace("{LessRegex}", LessRegex)
+ .replace("{EqualRegex}", EqualRegex)
+ .replace("{MoreRegex}", MoreRegex)
+ .replace("{LessOrEqualPrefix}", LessOrEqualPrefix);
+
+ public static final String LessOrEqualSuffix = "((et|ou)\\s+(less|lower|plus petit|moins|inf[ée]rieure?s?)((?!\\s+([àa]|de|que))|(\\s+([àa]|d[e'’]|que)(?!(\\s*\\d+)))))";
+
+ public static final String NumberSplitMark = "(?)(?!\\s*\\b(et\\s+({LessRegex}|{MoreRegex})|mais|ou|to)\\b)"
+ .replace("{MoreRegex}", MoreRegex)
+ .replace("{LessRegex}", LessRegex);
+
+ public static final String MoreRegexNoNumberSucceed = "((bigger|greater|more|plus(\\s+grand)?|sup[ée]rieure?s?)((?!\\s+([àa]|que))|\\s+(([àa]|que)(?!(\\s*\\d+))))|((au-dessus|a plus)\\s+d[e'’])(?!(\\s*\\d+)))";
+
+ public static final String LessRegexNoNumberSucceed = "((less|lower|plus petit|moins|inf[ée]rieure?s?)((?!\\s+([àa]|d[e'’]|que))|\\s+(([àa]|d[e'’]|que)(?!(\\s*\\d+))))|(((en )?dessous)\\s+d[e'’]|under)(?!(\\s*\\d+)))";
+
+ public static final String EqualRegexNoNumberSucceed = "([èe]ga(l(es)?|ux)((?!\\s+([àa]))|(\\s+([àa]|que)(?!(\\s*\\d+)))))";
+
+ public static final String OneNumberRangeMoreRegex1 = "({MoreOrEqual}|{MoreRegex})\\s*(l[ae]\\s+)?(?({NumberSplitMark}.)+)"
+ .replace("{MoreOrEqual}", MoreOrEqual)
+ .replace("{MoreRegex}", MoreRegex)
+ .replace("{NumberSplitMark}", NumberSplitMark);
+
+ public static final String OneNumberRangeMoreRegex1LB = "(?({NumberSplitMark}.)+)\\s*{MoreOrEqualSuffix}"
+ .replace("{MoreOrEqualSuffix}", MoreOrEqualSuffix)
+ .replace("{NumberSplitMark}", NumberSplitMark);
+
+ public static final String OneNumberRangeMoreSeparateRegex = "({EqualRegex}\\s+(?({NumberSplitMark}.)+)(\\s+ou\\s+){MoreRegexNoNumberSucceed})|({MoreRegex}\\s+(?({NumberSplitMark}.)+)(\\s+ou\\s+){EqualRegexNoNumberSucceed})"
+ .replace("{EqualRegex}", EqualRegex)
+ .replace("{MoreRegex}", MoreRegex)
+ .replace("{EqualRegexNoNumberSucceed}", EqualRegexNoNumberSucceed)
+ .replace("{MoreRegexNoNumberSucceed}", MoreRegexNoNumberSucceed)
+ .replace("{NumberSplitMark}", NumberSplitMark);
+
+ public static final String OneNumberRangeLessRegex1 = "({LessOrEqual}|{LessRegex})\\s*(l[ae]\\s+)?(?({NumberSplitMark}.)+)"
+ .replace("{LessOrEqual}", LessOrEqual)
+ .replace("{LessRegex}", LessRegex)
+ .replace("{NumberSplitMark}", NumberSplitMark);
+
+ public static final String OneNumberRangeLessRegex1LB = "(?({NumberSplitMark}.)+)\\s*{LessOrEqualSuffix}"
+ .replace("{LessOrEqualSuffix}", LessOrEqualSuffix)
+ .replace("{NumberSplitMark}", NumberSplitMark);
+
+ public static final String OneNumberRangeLessSeparateRegex = "({EqualRegex}\\s+(?({NumberSplitMark}.)+)(\\s+ou\\s+){LessRegexNoNumberSucceed})|({LessRegex}\\s+(?({NumberSplitMark}.)+)(\\s+ou\\s+){EqualRegexNoNumberSucceed})"
+ .replace("{EqualRegex}", EqualRegex)
+ .replace("{LessRegex}", LessRegex)
+ .replace("{EqualRegexNoNumberSucceed}", EqualRegexNoNumberSucceed)
+ .replace("{LessRegexNoNumberSucceed}", LessRegexNoNumberSucceed)
+ .replace("{NumberSplitMark}", NumberSplitMark);
+
+ public static final String OneNumberRangeEqualRegex = "(?({NumberSplitMark}.)+)"
+ .replace("{EqualRegex}", EqualRegex)
+ .replace("{NumberSplitMark}", NumberSplitMark);
+
+ public static final String TwoNumberRangeRegex1 = "entre\\s*(l[ae]\\s+)?(?({NumberSplitMark}.)+)\\s*et\\s*(l[ae]\\s+)?(?({NumberSplitMark}.)+)"
+ .replace("{NumberSplitMark}", NumberSplitMark);
+
+ public static final String TwoNumberRangeRegex2 = "({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2})\\s*(et|mais|,)\\s*({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2})"
+ .replace("{OneNumberRangeMoreRegex1}", OneNumberRangeMoreRegex1)
+ .replace("{OneNumberRangeMoreRegex2}", OneNumberRangeMoreRegex2)
+ .replace("{OneNumberRangeLessRegex1}", OneNumberRangeLessRegex1)
+ .replace("{OneNumberRangeLessRegex2}", OneNumberRangeLessRegex2);
+
+ public static final String TwoNumberRangeRegex3 = "({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2})\\s*(et|mais|,)\\s*({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2})"
+ .replace("{OneNumberRangeMoreRegex1}", OneNumberRangeMoreRegex1)
+ .replace("{OneNumberRangeMoreRegex2}", OneNumberRangeMoreRegex2)
+ .replace("{OneNumberRangeLessRegex1}", OneNumberRangeLessRegex1)
+ .replace("{OneNumberRangeLessRegex2}", OneNumberRangeLessRegex2);
+
+ public static final String TwoNumberRangeRegex4 = "(de\\s+)?(?({NumberSplitMark}(?!\\bde\\b).)+)\\s*{TillRegex}\\s*(l[ae]\\s+)?(?({NumberSplitMark}.)+)"
+ .replace("{TillRegex}", TillRegex)
+ .replace("{NumberSplitMark}", NumberSplitMark);
+
public static final Character DecimalSeparatorChar = ',';
public static final String FractionMarkerToken = "sur";
diff --git a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/german/extractors/GermanNumberWithUnitExtractorConfiguration.java b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/german/extractors/GermanNumberWithUnitExtractorConfiguration.java
index d066d80b6..401d9485d 100644
--- a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/german/extractors/GermanNumberWithUnitExtractorConfiguration.java
+++ b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/german/extractors/GermanNumberWithUnitExtractorConfiguration.java
@@ -46,7 +46,7 @@ public String getBuildSuffix() {
}
public String getConnectorToken() {
- return "";
+ return GermanNumericWithUnit.ConnectorToken;
}
public Pattern getCompoundUnitConnectorRegex() {
diff --git a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/german/parsers/GermanNumberWithUnitParserConfiguration.java b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/german/parsers/GermanNumberWithUnitParserConfiguration.java
index 203f80197..f8bd445bd 100644
--- a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/german/parsers/GermanNumberWithUnitParserConfiguration.java
+++ b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/german/parsers/GermanNumberWithUnitParserConfiguration.java
@@ -8,6 +8,7 @@
import com.microsoft.recognizers.text.number.parsers.AgnosticNumberParserFactory;
import com.microsoft.recognizers.text.number.parsers.AgnosticNumberParserType;
import com.microsoft.recognizers.text.numberwithunit.parsers.BaseNumberWithUnitParserConfiguration;
+import com.microsoft.recognizers.text.numberwithunit.resources.GermanNumericWithUnit;
public abstract class GermanNumberWithUnitParserConfiguration extends BaseNumberWithUnitParserConfiguration {
@@ -26,7 +27,7 @@ public IExtractor getInternalNumberExtractor() {
@Override
public String getConnectorToken() {
- return "";
+ return GermanNumericWithUnit.ConnectorToken;
}
public GermanNumberWithUnitParserConfiguration(CultureInfo ci) {
diff --git a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/BaseCurrency.java b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/BaseCurrency.java
index 55a95d4a6..799acd2ca 100644
--- a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/BaseCurrency.java
+++ b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/BaseCurrency.java
@@ -188,6 +188,7 @@ public class BaseCurrency {
.put("VEF", "CENTIMO")
.put("YER", "FILS")
.put("ZMW", "NGWEE")
+ .put("_XBT", "MILLIBITCOIN|SATOSHI")
.build();
public static final ImmutableMap CurrencyFractionalRatios = ImmutableMap.builder()
@@ -267,6 +268,8 @@ public class BaseCurrency {
.put("Kwartje", 4L)
.put("Dubbeltje", 10L)
.put("Stuiver", 20L)
+ .put("Millibitcoin", 1000L)
+ .put("Satoshi", 100000000L)
.build();
public static final ImmutableMap NonStandardFractionalSubunits = ImmutableMap.builder()
@@ -277,5 +280,6 @@ public class BaseCurrency {
.put("YDD", 1000L)
.put("TND", 1000L)
.put("MRO", 5L)
+ .put("_XBT", 1000L)
.build();
}
diff --git a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/ChineseNumericWithUnit.java b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/ChineseNumericWithUnit.java
index d4086e071..edd40e839 100644
--- a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/ChineseNumericWithUnit.java
+++ b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/ChineseNumericWithUnit.java
@@ -245,6 +245,7 @@ public class ChineseNumericWithUnit {
.put("Jiao", "毛钱|毛|角钱|角")
.put("Finnish markka", "芬兰马克")
.put("Penni", "盆尼")
+ .put("Bitcoin", "₿|btc|xbt|个比特币|比特币")
.build();
public static final ImmutableMap CurrencyNameToIsoCodeMap = ImmutableMap.builder()
@@ -536,6 +537,7 @@ public class ChineseNumericWithUnit {
.put("Euro", "€")
.put("Pound", "£")
.put("Costa Rican colón", "₡")
+ .put("Bitcoin", "₿|btc|xbt")
.build();
public static final List CurrencyAmbiguousValues = Arrays.asList("元", "仙", "分", "圆", "块", "毛", "盾", "箍", "蚊", "角");
diff --git a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/EnglishNumericWithUnit.java b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/EnglishNumericWithUnit.java
index 3ce10a864..04ed63447 100644
--- a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/EnglishNumericWithUnit.java
+++ b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/EnglishNumericWithUnit.java
@@ -47,6 +47,8 @@ public class EnglishNumericWithUnit {
.put("Acre", "-acre|acre|acres")
.build();
+ public static final List AmbiguousAreaUnitList = Arrays.asList("n/a");
+
public static final ImmutableMap CurrencySuffixList = ImmutableMap.builder()
.put("Abkhazian apsar", "abkhazian apsar|apsars")
.put("Afghan afghani", "afghan afghani|؋|afn|afghanis|afghani")
@@ -296,6 +298,9 @@ public class EnglishNumericWithUnit {
.put("Jiao", "jiao|mao")
.put("Finnish markka", "suomen markka|finnish markka|finsk mark|fim|markkaa|markka")
.put("Penni", "penniä|penni")
+ .put("Bitcoin", "bitcoin|bitcoins|btc|xbt|₿")
+ .put("Millibitcoin", "millibitcoin|millibitcoins|milibitcoin|milibitcoins")
+ .put("Satoshi", "satoshi|satoshis")
.build();
public static final ImmutableMap CurrencyNameToIsoCodeMap = ImmutableMap.builder()
@@ -484,6 +489,7 @@ public class EnglishNumericWithUnit {
.put("Ascension pound", "_AP")
.put("Alderney pound", "_ALP")
.put("Abkhazian apsar", "_AA")
+ .put("Bitcoin", "_XBT")
.build();
public static final ImmutableMap FractionalUnitNameToCodeMap = ImmutableMap.builder()
@@ -560,6 +566,8 @@ public class EnglishNumericWithUnit {
.put("Tiyin", "TIYIN")
.put("Hào", "HAO")
.put("Ngwee", "NGWEE")
+ .put("Millibitcoin", "MILLIBITCOIN")
+ .put("Satoshi", "SATOSHI")
.build();
public static final String CompoundUnitConnectorRegex = "(?and)";
@@ -608,9 +616,10 @@ public class EnglishNumericWithUnit {
.put("Pound", "£")
.put("Costa Rican colón", "₡")
.put("Turkish lira", "₺")
+ .put("Bitcoin", "₿|btc|xbt")
.build();
- public static final List AmbiguousCurrencyUnitList = Arrays.asList("din.", "kiwi", "kina", "kobo", "lari", "lipa", "napa", "para", "sfr.", "taka", "tala", "toea", "vatu", "yuan", "all", "ang", "ban", "bob", "btn", "byr", "cad", "cop", "cup", "dop", "gip", "jod", "kgs", "lak", "lei", "mga", "mop", "nad", "omr", "pul", "sar", "sbd", "scr", "sdg", "sek", "sen", "sol", "sos", "std", "try", "yer", "yen", "db");
+ public static final List AmbiguousCurrencyUnitList = Arrays.asList("din.", "kiwi", "kina", "kobo", "lari", "lipa", "napa", "para", "sfr.", "taka", "tala", "toea", "vatu", "yuan", "all", "ang", "ban", "bob", "btn", "byr", "cad", "cop", "cup", "dop", "gip", "jod", "kgs", "lak", "lei", "mga", "mop", "nad", "omr", "pul", "sar", "sbd", "scr", "sdg", "sek", "sen", "sol", "sos", "std", "try", "yer", "yen", "db", "satoshi", "satoshis");
public static final ImmutableMap InformationSuffixList = ImmutableMap.builder()
.put("Bit", "-bit|bit|bits")
@@ -627,7 +636,7 @@ public class EnglishNumericWithUnit {
.put("Petabyte", "-petabyte|-petabytes|petabyte|pB|PB|petabytes|peta byte|peta bytes|pbyte")
.build();
- public static final List AmbiguousDimensionUnitList = Arrays.asList("barrel", "barrels", "grain", "pound", "stone", "yards", "yard", "cord", "dram", "feet", "foot", "gill", "knot", "peck", "cup", "fps", "pts", "in", "dm", "\"");
+ public static final List AmbiguousDimensionUnitList = Arrays.asList("barrel", "barrels", "grain", "grains", "pound", "stone", "stones", "yards", "yard", "cord", "cords", "dram", "drachm", "drachma", "feet", "foot", "gill", "knot", "knots", "peck", "pecks", "cup", "cups", "fps", "pts", "in", "dm", "\"", "pinch", "pinches");
public static final String BuildPrefix = "(?<=(\\s|^))";
@@ -655,24 +664,29 @@ public class EnglishNumericWithUnit {
public static final List AmbiguousLengthUnitList = Arrays.asList("m", "yard", "yards", "pm", "pt", "pts");
public static final ImmutableMap SpeedSuffixList = ImmutableMap.builder()
- .put("Meter per second", "meters / second|m/s|meters per second|metres per second|meter per second|metre per second")
- .put("Kilometer per hour", "km/h|kilometres per hour|kilometers per hour|kilometer per hour|kilometre per hour")
+ .put("Meter per second", "meter/second|meters/second|meters / second|m/s|meters per second|metres per second|meter per second|metre per second")
+ .put("Kilometer per hour", "km/h|kilometres per hour|kilometers per hour|kilometer per hour|kilometre per hour|kph|kmph|km/hr")
.put("Kilometer per minute", "km/min|kilometers per minute|kilometres per minute|kilometer per minute|kilometre per minute")
.put("Kilometer per second", "km/s|kilometers per second|kilometres per second|kilometer per second|kilometre per second")
- .put("Mile per hour", "mph|mile per hour|miles per hour|mi/h|mile / hour|miles / hour|miles an hour")
- .put("Knot", "kt|knot|kn")
- .put("Foot per second", "ft/s|foot/s|foot per second|feet per second|fps")
- .put("Foot per minute", "ft/min|foot/min|foot per minute|feet per minute")
+ .put("Mile per hour", "mph|mile per hour|miles per hour|mi/h|mile / hour|miles / hour|miles an hour|mi/hr")
+ .put("Knot", "kt|knot|knots|kn")
+ .put("Foot per second", "ft/s|foot/s|feet/s|foot per second|feet per second|fps")
+ .put("Foot per minute", "ft/min|foot/min|feet/min|foot per minute|feet per minute")
.put("Yard per minute", "yards per minute|yard per minute|yards / minute|yards/min|yard/min")
.put("Yard per second", "yards per second|yard per second|yards / second|yards/s|yard/s")
+ .put("Meter per millisecond", "meter/millisecond|meters/millisecond|meter / millisecond|meters / millisecond|meter per millisecond|meters per millisecond|m/ms")
+ .put("Centimeter per millisecond", "centimeter/millisecond|centimeters/millisecond|centimeter / millisecond|centimeters / millisecond|centimeter per millisecond|centimeters per millisecond|cm/ms")
+ .put("Kilometer per millisecond", "kilometer/millisecond|kilometers/millisecond|kilometer / millisecond|kilometers / millisecond|kilometer per millisecond|kilometers per millisecond|km/ms")
.build();
+ public static final List AmbiguousSpeedUnitList = Arrays.asList("knot", "knots", "fps");
+
public static final ImmutableMap TemperatureSuffixList = ImmutableMap.builder()
- .put("F", "degrees fahrenheit|degree fahrenheit|deg fahrenheit|degs fahrenheit|fahrenheit|°f|degrees farenheit|degree farenheit|deg farenheit|degs farenheit|degrees f|degree f|deg f|degs f|farenheit|f")
+ .put("F", "degrees fahrenheit|degree fahrenheit|deg fahrenheit|degs fahrenheit|fahrenheit|°f|° f|degrees farenheit|degree farenheit|deg farenheit|degs farenheit|degrees f|degree f|deg f|degs f|farenheit|f")
.put("K", "k|K|kelvin")
.put("R", "rankine|°r")
.put("D", "delisle|°de")
- .put("C", "degrees celsius|degree celsius|deg celsius|degs celsius|celsius|degrees celcius|degree celcius|celcius|deg celcius|degs celcius|degrees centigrade|degree centigrade|centigrade|degrees centigrate|degree centigrate|degs centigrate|deg centigrate|centigrate|degrees c|degree c|deg c|degs c|°c|c")
+ .put("C", "degrees celsius|degree celsius|deg celsius|degs celsius|celsius|degrees celcius|degree celcius|celcius|deg celcius|degs celcius|degrees centigrade|degree centigrade|centigrade|degrees centigrate|degree centigrate|degs centigrate|deg centigrate|centigrate|degrees c|degree c|deg c|degs c|°c|° c|c")
.put("Degree", "degree|degrees|deg.|deg|°")
.build();
@@ -687,33 +701,49 @@ public class EnglishNumericWithUnit {
.put("Liter", "l|litre|liter|liters|litres")
.put("Deciliter", "dl|deciliter|decilitre|deciliters|decilitres")
.put("Centiliter", "cl|centiliter|centilitre|centiliters|centilitres")
- .put("Milliliter", "ml|mls|millilitre|milliliter|millilitres|milliliters")
+ .put("Milliliter", "ml|mls|millilitre|milliliter|millilitres|milliliters|cc")
.put("Cubic yard", "cubic yard|cubic yards")
.put("Cubic inch", "cubic inch|cubic inches")
.put("Cubic foot", "cubic foot|cubic feet")
.put("Cubic mile", "cubic mile|cubic miles")
.put("Fluid ounce", "fl oz|fluid ounce|fluid ounces")
- .put("Teaspoon", "teaspoon|teaspoons")
- .put("Tablespoon", "tablespoon|tablespoons")
- .put("Pint", "pint|pints")
- .put("Volume unit", "fluid dram|gill|quart|minim|cord|peck|bushel|hogshead|barrels|barrel|bbl")
+ .put("Teaspoon", "teaspoon|teaspoons|teaspoonful|teaspoonfuls|tsp|tsp.|tspn|tspn.|tea spoon|tea spoons|t.|ts.")
+ .put("Tablespoon", "tablespoon|tablespoons|tablespoonful|tablespoonfuls|tbl|tbl.|tbs|tbs.|tbsp|tbsp.|table spoon|table spoons|T.|Tb.|tbls.|tbls")
+ .put("Pint", "pint|pints|fl pt| fluid pint")
+ .put("Quart", "quart|quarts|fl qt")
+ .put("Cup", "cup|cups")
+ .put("Gill", "gill|gills")
+ .put("Pinch", "pinch|pinches")
+ .put("Fluid Dram", "fluid dram|fluid drachm|fluid drachma|fluidram|fluidrams")
+ .put("Barrel", "barrel|bbl|barrels")
+ .put("Minim", "minim")
+ .put("Cord", "cord|cords")
+ .put("Peck", "peck|pecks")
+ .put("Bushel", "bushel")
+ .put("Hogshead", "hogshead")
.build();
- public static final List AmbiguousVolumeUnitList = Arrays.asList("l", "ounce", "oz", "cup", "peck", "cord", "gill");
+ public static final List AmbiguousVolumeUnitList = Arrays.asList("l", "ounce", "oz", "cup", "cups", "peck", "pecks", "cord", "cords", "gill", "gills", "barrel", "barrels", "tbl", "quart", "quarts", "pinch", "t.", "T.", "Tb.", "ts.");
public static final ImmutableMap WeightSuffixList = ImmutableMap.builder()
.put("Kilogram", "kg|kilogram|kilograms|kilo|kilos")
- .put("Gram", "g|gram|grams")
+ .put("Gram", "g|gram|grams|gm")
.put("Milligram", "mg|milligram|milligrams")
- .put("Gallon", "-gallon|gallons|gallon")
+ .put("Gallon", "-gallon|gallons|gallon|gal")
.put("Metric ton", "metric tons|metric ton")
.put("Ton", "-ton|ton|tons|tonne|tonnes")
.put("Pound", "pound|pounds|lb|lbs")
.put("Ounce", "-ounce|ounce|oz|ounces")
- .put("Weight unit", "pennyweight|grain|british long ton|us short hundredweight|stone|dram")
+ .put("Grain", "grain|grains|gr")
+ .put("Pennyweight", "pennyweight")
+ .put("Long ton (British)", "british long ton|long ton (british)")
+ .put("Short ton (US)", "us short ton|short ton (us)")
+ .put("Short hundredweight (US)", "us short hundredweight|short hundredweight (us)")
+ .put("Stone", "stone")
+ .put("Dram", "dram|drachm|drachma|roman drachma|greek drachma")
.build();
- public static final List AmbiguousWeightUnitList = Arrays.asList("g", "oz", "stone", "dram", "lbs");
+ public static final List AmbiguousWeightUnitList = Arrays.asList("g", "oz", "stone", "dram", "lbs", "gal", "grain", "grains");
public static final ImmutableMap AmbiguityFiltersDict = ImmutableMap.builder()
.put("\\bm\\b", "((('|’)\\s*m)|(m\\s*('|’)))")
diff --git a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/FrenchNumericWithUnit.java b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/FrenchNumericWithUnit.java
index c7ae6bdaa..d2abdf03f 100644
--- a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/FrenchNumericWithUnit.java
+++ b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/FrenchNumericWithUnit.java
@@ -267,6 +267,9 @@ public class FrenchNumericWithUnit {
.put("Fen", "fen")
.put("Jiao", "jiao")
.put("Mark Finlandais", "marks finlandais|mark finlandais|fim|mark")
+ .put("Bitcoin", "bitcoin|bitcoins|btc|xbt|₿")
+ .put("Millibitcoin", "millibitcoin|millibitcoins|milibitcoin|milibitcoins")
+ .put("Satoshi", "satoshi|satoshis")
.build();
public static final String CompoundUnitConnectorRegex = "(?[^.])";
@@ -300,6 +303,7 @@ public class FrenchNumericWithUnit {
.put("Yen Japonais", "¥|jpy")
.put("Euro", "€|eur")
.put("Livre", "£")
+ .put("Bitcoin", "₿|btc|xbt")
.build();
public static final List AmbiguousCurrencyUnitList = Arrays.asList("din.", "kina", "lari", "taka", "tala", "vatu", "yuan", "bob", "btn", "cop", "cup", "dop", "gip", "jod", "kgs", "lak", "mga", "mop", "nad", "omr", "sar", "sbd", "scr", "sdg", "sek", "sos", "std", "try", "yer");
diff --git a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/GermanNumericWithUnit.java b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/GermanNumericWithUnit.java
index 5d4717a9c..a772e88d0 100644
--- a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/GermanNumericWithUnit.java
+++ b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/GermanNumericWithUnit.java
@@ -292,6 +292,8 @@ public class GermanNumericWithUnit {
.put("Jiao", "jiao")
.put("Finnish markka", "suomen markka|finnish markka|finsk mark|fim|markkaa|markka|finnische mark|finnischen mark")
.put("Penni", "penniä|penni")
+ .put("Bitcoin", "bitcoin|bitcoins|btc|xbt|₿")
+ .put("Millibitcoin", "millibitcoin|millibitcoins|milibitcoin|milibitcoins")
.build();
public static final String CompoundUnitConnectorRegex = "(?[^.])";
@@ -337,6 +339,7 @@ public class GermanNumericWithUnit {
.put("Pound", "£")
.put("Costa Rican colón", "₡")
.put("Turkish lira", "₺")
+ .put("Bitcoin", "₿|btc|xbt")
.build();
public static final List AmbiguousCurrencyUnitList = Arrays.asList("din.", "kiwi", "kina", "kobo", "lari", "lipa", "napa", "para", "sfr.", "taka", "tala", "toea", "vatu", "yuan", "ang", "ban", "bob", "btn", "byr", "cad", "cop", "cup", "dop", "gip", "jod", "kgs", "lak", "lei", "mga", "mop", "nad", "omr", "pul", "sar", "sbd", "scr", "sdg", "sek", "sen", "sol", "sos", "std", "try", "yer", "yen");
@@ -362,6 +365,8 @@ public class GermanNumericWithUnit {
public static final String BuildSuffix = "(?=(\\s|\\W|$))";
+ public static final String ConnectorToken = "-";
+
public static final ImmutableMap LengthSuffixList = ImmutableMap.builder()
.put("Kilometer", "km|kilometer|kilometern")
.put("Hectometer", "hm|hektometer|hektometern")
diff --git a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/PortugueseNumericWithUnit.java b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/PortugueseNumericWithUnit.java
index 5e1863215..a2965662a 100644
--- a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/PortugueseNumericWithUnit.java
+++ b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/PortugueseNumericWithUnit.java
@@ -349,9 +349,278 @@ public class PortugueseNumericWithUnit {
.put("Dinar iugoslavo", "dinar iugoslavo|dinares iugoslavos|dinar jugoslavo|dinares jugoslavos|yud")
.put("Kwacha zambiano", "kwacha zambiano|kwacha zambianos|kwachas zambianos|zmw")
.put("Ngwee zambiano", "ngwee zambiano|ngwee zambianos|ngwees zambianos")
+ .put("Bitcoin", "bitcoin|bitcoins|btc|xbt|₿")
+ .put("Millibitcoin", "millibitcoin|millibitcoins|milibitcoin|milibitcoins")
+ .put("Satoshi", "satoshi|satoshis")
.build();
- public static final String CompoundUnitConnectorRegex = "(?[^.])";
+ public static final ImmutableMap CurrencyNameToIsoCodeMap = ImmutableMap.builder()
+ .put("Afegani afegão", "AFN")
+ .put("Euro", "EUR")
+ .put("Lek albanês", "ALL")
+ .put("Kwanza angolano", "AOA")
+ .put("Dram armênio", "AMD")
+ .put("Florim arubano", "AWG")
+ .put("Taka bengali", "BDT")
+ .put("Ngultrum butanês", "BTN")
+ .put("Boliviano", "BOB")
+ .put("Marco da Bósnia e Herzegovina", "BAM")
+ .put("Pula", "BWP")
+ .put("Real brasileiro", "BRL")
+ .put("Lev búlgaro", "BGN")
+ .put("Riel cambojano", "KHR")
+ .put("Escudo cabo-verdiano", "CVE")
+ .put("Colón costarriquenho", "CRC")
+ .put("Kuna croata", "HRK")
+ .put("Coroa checa", "CZK")
+ .put("Nakfa", "ERN")
+ .put("Birr etíope", "ETB")
+ .put("Dalasi gambiano", "GMD")
+ .put("Lari georgiano", "GEL")
+ .put("Cedi", "GHS")
+ .put("Quetzal guatemalteco", "GTQ")
+ .put("Gurde haitiano", "HTG")
+ .put("Lempira hondurenha", "HNL")
+ .put("Florim húngaro", "HUF")
+ .put("Rial iraniano", "IRR")
+ .put("Rial iemenita", "YER")
+ .put("Novo shekel israelense", "ILS")
+ .put("Yen", "JPY")
+ .put("Tengue cazaque", "KZT")
+ .put("Xelim queniano", "KES")
+ .put("Won norte-coreano", "KPW")
+ .put("Won sul-coreano", "KRW")
+ .put("Som quirguiz", "KGS")
+ .put("Quipe laosiano", "LAK")
+ .put("Loti do Lesoto", "LSL")
+ .put("Rand sul-africano", "ZAR")
+ .put("Pataca macaense", "MOP")
+ .put("Dinar macedônio", "MKD")
+ .put("Ariary malgaxe", "MGA")
+ .put("Kwacha do Malawi", "MWK")
+ .put("Ringuite malaio", "MYR")
+ .put("Uguia", "MRO")
+ .put("Tugrik mongol", "MNT")
+ .put("Metical moçambicao", "MZN")
+ .put("Quiate mianmarense", "MMK")
+ .put("Córdova nicaraguense", "NIO")
+ .put("Naira", "NGN")
+ .put("Lira turca", "TRY")
+ .put("Rial omanense", "OMR")
+ .put("Balboa panamenho", "PAB")
+ .put("Kina", "PGK")
+ .put("Guarani", "PYG")
+ .put("Novo Sol", "PEN")
+ .put("Złoty", "PLN")
+ .put("Rial catariano", "QAR")
+ .put("Rial saudita", "SAR")
+ .put("Tala samoano", "WST")
+ .put("São Tomé and Príncipe dobra", "STN")
+ .put("Leone serra-leonino", "SLL")
+ .put("Lilangeni", "SZL")
+ .put("Somoni tajique", "TJS")
+ .put("Baht tailandês", "THB")
+ .put("Grívnia", "UAH")
+ .put("Vatu", "VUV")
+ .put("Bolívar forte venezuelano", "VEF")
+ .put("Kwacha zambiano", "ZMW")
+ .put("Dirame marroquino", "MAD")
+ .put("Dirame dos Emirados Árabes Unidos", "AED")
+ .put("Manat azeri", "AZN")
+ .put("Manat turcomeno", "TMT")
+ .put("Xelim somali", "SOS")
+ .put("Xelim tanzaniano", "TZS")
+ .put("Xelim ugandês", "UGX")
+ .put("Leu romeno", "RON")
+ .put("Leu moldávio", "MDL")
+ .put("Rupia nepalesa", "NPR")
+ .put("Rupia paquistanesa", "PKR")
+ .put("Rupia indiana", "INR")
+ .put("Rupia seichelense", "SCR")
+ .put("Rupia maurícia", "MUR")
+ .put("Rupia maldiva", "MVR")
+ .put("Rupia do Sri Lanka", "LKR")
+ .put("Rupia indonésia", "IDR")
+ .put("Coroa dinamarquesa", "DKK")
+ .put("Coroa norueguesa", "NOK")
+ .put("Coroa islandesa", "ISK")
+ .put("Coroa sueca", "SEK")
+ .put("Franco CFA da África Ocidental", "XOF")
+ .put("Franco CFA da África Central", "XAF")
+ .put("Franco comorense", "KMF")
+ .put("Franco congolês", "CDF")
+ .put("Burundian franc", "BIF")
+ .put("Franco djibutiano", "DJF")
+ .put("Franco CFP", "XPF")
+ .put("Franco da Guiné", "GNF")
+ .put("Franco suíço", "CHF")
+ .put("Franco ruandês", "RWF")
+ .put("Rublo russo", "RUB")
+ .put("Transnistrian ruble", "PRB")
+ .put("New Belarusian ruble", "BYN")
+ .put("Dinar argelino", "DZD")
+ .put("Dinar bareinita", "BHD")
+ .put("Dinar iraquiano", "IQD")
+ .put("Dinar jordaniano", "JOD")
+ .put("Dinar kuwaitiano", "KWD")
+ .put("Dinar libio", "LYD")
+ .put("Dinar sérvio", "RSD")
+ .put("Dinar tunisiano", "TND")
+ .put("Peso argentino", "ARS")
+ .put("Chilean peso", "CLP")
+ .put("Peso colombiano", "COP")
+ .put("Peso cubano convertível", "CUC")
+ .put("Peso cubano", "CUP")
+ .put("Peso dominicano", "DOP")
+ .put("Peso mexicano", "MXN")
+ .put("Peso uruguaio", "UYU")
+ .put("Libra esterlina", "GBP")
+ .put("Libra de Santa Helena", "SHP")
+ .put("Libra egípcia", "EGP")
+ .put("Libra das Malvinas", "FKP")
+ .put("Libra de Gibraltar", "GIP")
+ .put("Libra manesa", "IMP")
+ .put("Libra de Jersey", "JEP")
+ .put("Libra libanesa", "LBP")
+ .put("Libra sul-sudanesa", "SSP")
+ .put("Libra sudanesa", "SDG")
+ .put("Libra síria", "SYP")
+ .put("Dólar estadunidense", "USD")
+ .put("Dólar australiano", "AUD")
+ .put("Dólar bahamense", "BSD")
+ .put("Dólar de Barbados", "BBD")
+ .put("Dólar de Belize", "BZD")
+ .put("Dólar bermudense", "BMD")
+ .put("Dólar de Brunei", "BND")
+ .put("Singapore dólar", "SGD")
+ .put("Dólar canadense", "CAD")
+ .put("Dólar das Ilhas Cayman", "KYD")
+ .put("Dólar neozelandês", "NZD")
+ .put("Dólar fijiano", "FJD")
+ .put("Dólar guianense", "GYD")
+ .put("Dólar de Hong Kong", "HKD")
+ .put("Dólar jamaicano", "JMD")
+ .put("Dólar liberiano", "LRD")
+ .put("Dólar namibiano", "NAD")
+ .put("Dólar das Ilhas Salomão", "SBD")
+ .put("Dólar surinamês", "SRD")
+ .put("Novo dólar taiwanês", "TWD")
+ .put("Dólar de Trinidade e Tobago", "TTD")
+ .put("Tuvaluan dólar", "TVD")
+ .put("Yuan chinês", "CNY")
+ .put("Rial", "__RI")
+ .put("Xelim", "__S")
+ .put("Som", "__SO")
+ .put("Dirame", "__DR")
+ .put("Dinar", "_DN")
+ .put("Dólar", "__D")
+ .put("Manat", "__MA")
+ .put("Rupia", "__R")
+ .put("Coroa", "__K")
+ .put("Krona", "__K")
+ .put("Franco", "__F")
+ .put("Marco", "__M")
+ .put("Rublo", "__RB")
+ .put("Peso", "__PE")
+ .put("Libra", "__P")
+ .put("Tristan da Cunha libra", "_TP")
+ .put("South Georgia and the South Sandwich Islands libra", "_SP")
+ .put("Somaliland xelim", "_SS")
+ .put("Pitcairn Islands dólar", "_PND")
+ .put("Palauan dólar", "_PD")
+ .put("Niue dólar", "_NID")
+ .put("Nauruan dólar", "_ND")
+ .put("Micronesian dólar", "_MD")
+ .put("Kiribati dólar", "_KID")
+ .put("Guernsey libra", "_GGP")
+ .put("Faroese króna", "_FOK")
+ .put("Cook Islands dólar", "_CKD")
+ .put("British Virgin Islands dólar", "_BD")
+ .put("Ascension libra", "_AP")
+ .put("Alderney libra", "_ALP")
+ .put("Abkhazian apsar", "_AA")
+ .put("Bitcoin", "_XBT")
+ .build();
+
+ public static final ImmutableMap FractionalUnitNameToCodeMap = ImmutableMap.builder()
+ .put("Jiao", "JIAO")
+ .put("Kopek", "KOPEK")
+ .put("Pul", "PUL")
+ .put("Cent", "CENT")
+ .put("Qindarke", "QINDARKE")
+ .put("Peni", "PENNY")
+ .put("Santeem", "SANTEEM")
+ .put("Cêntimo", "CENTIMO")
+ .put("Centavo", "CENT")
+ .put("Luma", "LUMA")
+ .put("Qəpik", "QƏPIK")
+ .put("Fils", "FILS")
+ .put("Poisha", "POISHA")
+ .put("Kapyeyka", "KAPYEYKA")
+ .put("Centime", "CENTIME")
+ .put("Chetrum", "CHETRUM")
+ .put("Paisa", "PAISA")
+ .put("Fening", "FENING")
+ .put("Thebe", "THEBE")
+ .put("Sen", "SEN")
+ .put("Stotinka", "STOTINKA")
+ .put("Fen", "FEN")
+ .put("Céntimo", "CENT")
+ .put("Lipa", "LIPA")
+ .put("Haléř", "HALER")
+ .put("Øre", "ØRE")
+ .put("Piastre", "PIASTRE")
+ .put("Santim", "SANTIM")
+ .put("Oyra", "OYRA")
+ .put("Butut", "BUTUT")
+ .put("Tetri", "TETRI")
+ .put("Pesewa", "PESEWA")
+ .put("Fillér", "FILLER")
+ .put("Eyrir", "EYRIR")
+ .put("Dinar", "DINAR")
+ .put("Agora", "AGORA")
+ .put("Tïın", "TIIN")
+ .put("Chon", "CHON")
+ .put("Jeon", "JEON")
+ .put("Tyiyn", "TYIYN")
+ .put("Att", "ATT")
+ .put("Sente", "SENTE")
+ .put("Dirham", "DIRHAM")
+ .put("Rappen", "RAPPEN")
+ .put("Avo", "AVO")
+ .put("Deni", "DENI")
+ .put("Iraimbilanja", "IRAIMBILANJA")
+ .put("Tambala", "TAMBALA")
+ .put("Laari", "LAARI")
+ .put("Khoums", "KHOUMS")
+ .put("Ban", "BAN")
+ .put("Möngö", "MONGO")
+ .put("Pya", "PYA")
+ .put("Kobo", "KOBO")
+ .put("Kuruş", "KURUS")
+ .put("Baisa", "BAISA")
+ .put("Centésimo", "CENTESIMO")
+ .put("Toea", "TOEA")
+ .put("Sentimo", "SENTIMO")
+ .put("Grosz", "GROSZ")
+ .put("Sene", "SENE")
+ .put("Halala", "HALALA")
+ .put("Para", "PARA")
+ .put("Öre", "ORE")
+ .put("Diram", "DIRAM")
+ .put("Satang", "SATANG")
+ .put("Seniti", "SENITI")
+ .put("Millime", "MILLIME")
+ .put("Tennesi", "TENNESI")
+ .put("Kopiyka", "KOPIYKA")
+ .put("Tiyin", "TIYIN")
+ .put("Hào", "HAO")
+ .put("Ngwee", "NGWEE")
+ .put("Millibitcoin", "MILLIBITCOIN")
+ .put("Satoshi", "SATOSHI")
+ .build();
+
+ public static final String CompoundUnitConnectorRegex = "(?e)";
public static final ImmutableMap CurrencyPrefixList = ImmutableMap.builder()
.put("Dólar", "$")
@@ -385,6 +654,7 @@ public class PortugueseNumericWithUnit {
.put("Libra", "£")
.put("Colón costarriquenho", "₡")
.put("Lira turca", "₺")
+ .put("Bitcoin", "₿|btc|xbt")
.build();
public static final List AmbiguousCurrencyUnitList = Arrays.asList("le", "agora");
diff --git a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/SpanishNumericWithUnit.java b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/SpanishNumericWithUnit.java
index 198cdea86..24aae9db4 100644
--- a/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/SpanishNumericWithUnit.java
+++ b/libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/SpanishNumericWithUnit.java
@@ -45,7 +45,7 @@ public class SpanishNumericWithUnit {
public static final List AreaAmbiguousValues = Arrays.asList("área", "áreas");
public static final ImmutableMap CurrencySuffixList = ImmutableMap.builder()
- .put("Dólar", "dólar|dólares")
+ .put("Dólar", "dólar|dólares|dolar|dolares")
.put("Peso", "peso|pesos")
.put("Rublo", "rublo|rublos")
.put("Libra", "libra|libras")
@@ -353,9 +353,277 @@ public class SpanishNumericWithUnit {
.put("Dinar yugoslavo", "dinar yugoslavo|dinares yugoslavos|yud")
.put("Kwacha zambiano", "kwacha zambiano|kwacha zambianos|zmw")
.put("Ngwee zambiano", "ngwee zambiano|ngwee zambianos")
+ .put("Bitcoin", "bitcoin|bitcoins|btc|xbt|₿")
+ .put("Millibitcoin", "millibitcoin|millibitcoins|milibitcoin|milibitcoins")
+ .put("Satoshi", "satoshi|satoshis")
.build();
- public static final String CompoundUnitConnectorRegex = "(?[^.])";
+ public static final ImmutableMap CurrencyNameToIsoCodeMap = ImmutableMap.builder()
+ .put("Afgani afgano", "AFN")
+ .put("Euro", "EUR")
+ .put("Lek albanés", "ALL")
+ .put("Kwanza angoleño", "AOA")
+ .put("Dram armenio", "AMD")
+ .put("Florín arubeño", "AWG")
+ .put("Taka bangladeshí", "BDT")
+ .put("Ngultrum butanés", "BTN")
+ .put("Boliviano", "BOB")
+ .put("Marco bosnioherzegovino", "BAM")
+ .put("Pula", "BWP")
+ .put("Real brasileño", "BRL")
+ .put("Lev búlgaro", "BGN")
+ .put("Riel camboyano", "KHR")
+ .put("Escudo caboverdiano", "CVE")
+ .put("Colón costarricense", "CRC")
+ .put("Kuna croata", "HRK")
+ .put("Corona checa", "CZK")
+ .put("Nakfa", "ERN")
+ .put("Birr etíope", "ETB")
+ .put("Dalasi", "GMD")
+ .put("Lari georgiano", "GEL")
+ .put("Cedi", "GHS")
+ .put("Quetzal guatemalteco", "GTQ")
+ .put("Gourde haitiano", "HTG")
+ .put("Lempira hondureño", "HNL")
+ .put("Forinto húngaro", "HUF")
+ .put("Rial iraní", "IRR")
+ .put("Rial yemení", "YER")
+ .put("Nuevo shéquel", "ILS")
+ .put("Yen", "JPY")
+ .put("Tenge kazajo", "KZT")
+ .put("Chelín keniano", "KES")
+ .put("Won norcoreano", "KPW")
+ .put("Won surcoreano", "KRW")
+ .put("Som kirguís", "KGS")
+ .put("Kip laosiano", "LAK")
+ .put("Loti", "LSL")
+ .put("Rand sudafricano", "ZAR")
+ .put("Pataca macaense", "MOP")
+ .put("Denar macedonio", "MKD")
+ .put("Ariary malgache", "MGA")
+ .put("Kwacha malauí", "MWK")
+ .put("Ringgit malayo", "MYR")
+ .put("Uguiya", "MRO")
+ .put("Tugrik mongol", "MNT")
+ .put("Metical mozambiqueño", "MZN")
+ .put("Kyat birmano", "MMK")
+ .put("Córdoba nicaragüense", "NIO")
+ .put("Naira", "NGN")
+ .put("Lira turca", "TRY")
+ .put("Rial omaní", "OMR")
+ .put("Balboa panameño", "PAB")
+ .put("Kina", "PGK")
+ .put("Guaraní", "PYG")
+ .put("Sol", "PEN")
+ .put("Złoty", "PLN")
+ .put("Riyal qatarí", "QAR")
+ .put("Riyal saudí", "SAR")
+ .put("Tala", "WST")
+ .put("São Tomé and Príncipe dobra", "STN")
+ .put("Leone", "SLL")
+ .put("Lilangeni", "SZL")
+ .put("Somoni tayiko", "TJS")
+ .put("Baht tailandés", "THB")
+ .put("Grivna", "UAH")
+ .put("Vatu", "VUV")
+ .put("Bolívar fuerte", "VEF")
+ .put("Kwacha zambiano", "ZMW")
+ .put("Dirham marroquí", "MAD")
+ .put("Dirham de los Emiratos Árabes Unidos", "AED")
+ .put("Manat azerí", "AZN")
+ .put("Manat turkmeno", "TMT")
+ .put("Chelín somalí", "SOS")
+ .put("Chelín tanzano", "TZS")
+ .put("Chelín ugandés", "UGX")
+ .put("Leu rumano", "RON")
+ .put("Leu moldavo", "MDL")
+ .put("Rupia nepalí", "NPR")
+ .put("Rupia pakistaní", "PKR")
+ .put("Rupia india", "INR")
+ .put("Rupia de Seychelles", "SCR")
+ .put("Rupia de Mauricio", "MUR")
+ .put("Rupia de Maldivas", "MVR")
+ .put("Rupia de Sri Lanka", "LKR")
+ .put("Rupia indonesia", "IDR")
+ .put("Corona danesa", "DKK")
+ .put("Corona noruega", "NOK")
+ .put("Corona islandesa", "ISK")
+ .put("Corona sueca", "SEK")
+ .put("Franco CFA de África Occidental", "XOF")
+ .put("Franco CFA de África Central", "XAF")
+ .put("Franco comorano", "KMF")
+ .put("Franco congoleño", "CDF")
+ .put("Burundian franc", "BIF")
+ .put("Franco yibutiano", "DJF")
+ .put("Franco CFP", "XPF")
+ .put("Franco guineano", "GNF")
+ .put("Franco suizo", "CHF")
+ .put("Franco ruandés", "RWF")
+ .put("Rublo ruso", "RUB")
+ .put("Transnistrian ruble", "PRB")
+ .put("New Belarusian ruble", "BYN")
+ .put("Dinar argelino", "DZD")
+ .put("Dinar bahreiní", "BHD")
+ .put("Dinar iraquí", "IQD")
+ .put("Dinar jordano", "JOD")
+ .put("Kuwaiti dinar", "KWD")
+ .put("Dinar libio", "LYD")
+ .put("Dinar serbio", "RSD")
+ .put("Dinar tunecino", "TND")
+ .put("Peso argentino", "ARS")
+ .put("Chilean peso", "CLP")
+ .put("Peso colombiano", "COP")
+ .put("Peso cubano convertible", "CUC")
+ .put("Peso cubano", "CUP")
+ .put("Peso dominicano", "DOP")
+ .put("Peso mexicano", "MXN")
+ .put("Peso uruguayo", "UYU")
+ .put("Libra esterlina", "GBP")
+ .put("Libra de Santa Helena", "SHP")
+ .put("Libra egipcia", "EGP")
+ .put("Libra malvinense", "FKP")
+ .put("Libra gibraltareña", "GIP")
+ .put("Libra manesa", "IMP")
+ .put("Libra de Jersey", "JEP")
+ .put("Libra libanesa", "LBP")
+ .put("Libra sursudanesa", "SSP")
+ .put("Libra sudanesa", "SDG")
+ .put("Libra siria", "SYP")
+ .put("Dólar estadounidense", "USD")
+ .put("Dólar australiano", "AUD")
+ .put("Dólar bahameño", "BSD")
+ .put("Dólar de Barbados", "BBD")
+ .put("Dólar beliceño", "BZD")
+ .put("Dólar bermudeño", "BMD")
+ .put("Dólar de Brunéi", "BND")
+ .put("Singapore dollar", "SGD")
+ .put("Dólar canadiense", "CAD")
+ .put("Dólar de las Islas Caimán", "KYD")
+ .put("Dólar neozelandés", "NZD")
+ .put("Dólar fiyiano", "FJD")
+ .put("Dólar guyanés", "GYD")
+ .put("Dólar de Hong Kong", "HKD")
+ .put("Dólar jamaiquino", "JMD")
+ .put("Dólar liberiano", "LRD")
+ .put("Dólar namibio", "NAD")
+ .put("Dólar de las Islas Salomón", "SBD")
+ .put("Dólar surinamés", "SRD")
+ .put("Nuevo dólar taiwanés", "TWD")
+ .put("Dólar trinitense", "TTD")
+ .put("Tuvaluan dollar", "TVD")
+ .put("Yuan chino", "CNY")
+ .put("Rial", "__RI")
+ .put("Chelín", "__S")
+ .put("Som", "__SO")
+ .put("Dirham", "__DR")
+ .put("Dinar", "_DN")
+ .put("Dólar", "__D")
+ .put("Manat", "__MA")
+ .put("Rupia", "__R")
+ .put("Corona", "__K")
+ .put("Franco", "__F")
+ .put("Marco", "__M")
+ .put("Rublo", "__RB")
+ .put("Peso", "__PE")
+ .put("Libra", "__P")
+ .put("Tristan da Cunha libra", "_TP")
+ .put("South Georgia and the South Sandwich Islands libra", "_SP")
+ .put("Somaliland chelín", "_SS")
+ .put("Pitcairn Islands dólar", "_PND")
+ .put("Palauan dólar", "_PD")
+ .put("Niue dólar", "_NID")
+ .put("Nauruan dólar", "_ND")
+ .put("Micronesian dólar", "_MD")
+ .put("Kiribati dólar", "_KID")
+ .put("Guernsey libra", "_GGP")
+ .put("Faroese corona", "_FOK")
+ .put("Cook Islands dólar", "_CKD")
+ .put("British Virgin Islands dólar", "_BD")
+ .put("Ascension libra", "_AP")
+ .put("Alderney libra", "_ALP")
+ .put("Abkhazian apsar", "_AA")
+ .put("Bitcoin", "_XBT")
+ .build();
+
+ public static final ImmutableMap FractionalUnitNameToCodeMap = ImmutableMap.builder()
+ .put("Jiao", "JIAO")
+ .put("Kópek", "KOPEK")
+ .put("Kopek", "KOPEK")
+ .put("Pul", "PUL")
+ .put("Cent", "CENT")
+ .put("Qindarka", "QINDARKE")
+ .put("Penique", "PENNY")
+ .put("Santeem", "SANTEEM")
+ .put("Cêntimo", "CENT")
+ .put("Centavo", "CENT")
+ .put("Luma", "LUMA")
+ .put("Qəpik", "QƏPIK")
+ .put("Fils", "FILS")
+ .put("Poisha", "POISHA")
+ .put("Centime", "CENTIME")
+ .put("Chetrum", "CHETRUM")
+ .put("Paisa", "PAISA")
+ .put("Feningas", "FENING")
+ .put("Thebe", "THEBE")
+ .put("Sen", "SEN")
+ .put("Stotinka", "STOTINKA")
+ .put("Fen", "FEN")
+ .put("Céntimo", "CENT")
+ .put("Lipa", "LIPA")
+ .put("Haléř", "HALER")
+ .put("Øre", "ØRE")
+ .put("Piastre", "PIASTRE")
+ .put("Santim", "SANTIM")
+ .put("Oyra", "OYRA")
+ .put("Butut", "BUTUT")
+ .put("Tetri", "TETRI")
+ .put("Pesewa", "PESEWA")
+ .put("Fillér", "FILLER")
+ .put("Eyrir", "EYRIR")
+ .put("Dinar", "DINAR")
+ .put("Agora", "AGORA")
+ .put("Tïın", "TIIN")
+ .put("Chon", "CHON")
+ .put("Jeon", "JEON")
+ .put("Tyiyn", "TYIYN")
+ .put("Att", "ATT")
+ .put("Sente", "SENTE")
+ .put("Dirham", "DIRHAM")
+ .put("Rappen", "RAPPEN")
+ .put("Avo", "AVO")
+ .put("Deni", "DENI")
+ .put("Iraimbilanja", "IRAIMBILANJA")
+ .put("Tambala", "TAMBALA")
+ .put("Laari", "LAARI")
+ .put("Khoums", "KHOUMS")
+ .put("Ban", "BAN")
+ .put("Möngö", "MONGO")
+ .put("Pya", "PYA")
+ .put("Kobo", "KOBO")
+ .put("Kuruş", "KURUS")
+ .put("Baisa", "BAISA")
+ .put("Centésimo", "CENTESIMO")
+ .put("Toea", "TOEA")
+ .put("Sentimo", "SENTIMO")
+ .put("Grosz", "GROSZ")
+ .put("Sene", "SENE")
+ .put("Halala", "HALALA")
+ .put("Para", "PARA")
+ .put("Öre", "ORE")
+ .put("Diram", "DIRAM")
+ .put("Satang", "SATANG")
+ .put("Seniti", "SENITI")
+ .put("Millime", "MILLIME")
+ .put("Tennesi", "TENNESI")
+ .put("Kopiyka", "KOPIYKA")
+ .put("Tiyin", "TIYIN")
+ .put("Hào", "HAO")
+ .put("Ngwee", "NGWEE")
+ .put("Millibitcoin", "MILLIBITCOIN")
+ .put("Satoshi", "SATOSHI")
+ .build();
+
+ public static final String CompoundUnitConnectorRegex = "(?y)";
public static final ImmutableMap CurrencyPrefixList = ImmutableMap.builder()
.put("Dobra", "db|std")
@@ -390,6 +658,7 @@ public class SpanishNumericWithUnit {
.put("Libra", "£")
.put("Colón costarricense", "₡")
.put("Lira turca", "₺")
+ .put("Bitcoin", "₿|btc|xbt")
.build();
public static final List AmbiguousCurrencyUnitList = Arrays.asList("le", "db", "std");
diff --git a/libraries/bot-dialogs/src/test/java/com/microsoft/recognizers/text/tests/AbstractTest.java b/libraries/bot-dialogs/src/test/java/com/microsoft/recognizers/text/tests/AbstractTest.java
new file mode 100644
index 000000000..dcde7040a
--- /dev/null
+++ b/libraries/bot-dialogs/src/test/java/com/microsoft/recognizers/text/tests/AbstractTest.java
@@ -0,0 +1,325 @@
+package com.microsoft.recognizers.text.tests;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.MapperFeature;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.microsoft.recognizers.text.*;
+import com.microsoft.recognizers.text.datetime.parsers.DateTimeParseResult;
+import com.microsoft.recognizers.text.tests.helpers.DateTimeParseResultMixIn;
+import com.microsoft.recognizers.text.tests.helpers.ExtendedModelResultMixIn;
+import com.microsoft.recognizers.text.tests.helpers.ExtractResultMixIn;
+import com.microsoft.recognizers.text.tests.helpers.ModelResultMixIn;
+import org.apache.commons.io.FileUtils;
+import org.javatuples.Pair;
+import org.junit.*;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.util.*;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+@RunWith(Parameterized.class)
+public abstract class AbstractTest {
+
+ private static final String SpecsPath = "Specs/..";
+
+ private static final List SupportedCultures = Arrays.asList("English", "Spanish", "Portuguese", "French", "German", "Chinese");
+
+ // FEFF - UTF-8 byte order mark (EF BB BF) as Unicode char representation.
+ private static final String UTF8_BOM = "\uFEFF";
+
+ protected final TestCase currentCase;
+
+ public AbstractTest(TestCase currentCase) {
+ this.currentCase = currentCase;
+ }
+
+ private static Map testCounter;
+ private static Map passCounter;
+ private static Map failCounter;
+ private static Map skipCounter;
+
+ @BeforeClass
+ public static void before() {
+ testCounter = new LinkedHashMap<>();
+ passCounter = new LinkedHashMap<>();
+ failCounter = new LinkedHashMap<>();
+ skipCounter = new LinkedHashMap<>();
+ }
+
+ @AfterClass
+ public static void after() {
+
+ Map counter = new LinkedHashMap<>();
+
+ for (Map.Entry entry : testCounter.entrySet()) {
+ int skipped = skipCounter.getOrDefault(entry.getKey(), 0);
+ if (entry.getValue() > skipped) {
+ counter.put(entry.getKey(), String.format("%7d", entry.getValue()));
+ }
+ }
+
+ for (Map.Entry entry : counter.entrySet()) {
+ Integer passValue = passCounter.getOrDefault(entry.getKey(), 0);
+ Integer failValue = failCounter.getOrDefault(entry.getKey(), 0);
+ Integer skipValue = skipCounter.getOrDefault(entry.getKey(), 0);
+ counter.put(entry.getKey(), String.format("|%s |%7d |%7d |%7d ", entry.getValue(), passValue, skipValue, failValue));
+ }
+
+ print(counter);
+ }
+
+ private static void print(Map map) {
+ System.out.println("| TOTAL | Passed | Skipped | Failed || Key");
+ for (Map.Entry entry : map.entrySet()) {
+ System.out.println(entry.getValue() + "|| " + entry.getKey());
+ }
+ }
+
+ private void count(TestCase testCase) {
+ String key = testCase.recognizerName + "-" + testCase.language + "-" + testCase.modelName;
+ Integer current = testCounter.getOrDefault(key, 0);
+ testCounter.put(key, current + 1);
+ }
+
+ private void countPass(TestCase testCase) {
+ String key = testCase.recognizerName + "-" + testCase.language + "-" + testCase.modelName;
+ Integer current = passCounter.getOrDefault(key, 0);
+ passCounter.put(key, current + 1);
+ }
+
+ private void countSkip(TestCase testCase) {
+ String key = testCase.recognizerName + "-" + testCase.language + "-" + testCase.modelName;
+ Integer current = skipCounter.getOrDefault(key, 0);
+ skipCounter.put(key, current + 1);
+ }
+
+ private void countFail(TestCase testCase) {
+ String key = testCase.recognizerName + "-" + testCase.language + "-" + testCase.modelName;
+ Integer current = failCounter.getOrDefault(key, 0);
+ failCounter.put(key, current + 1);
+ }
+
+ @Test
+ public void test() {
+
+ count(currentCase);
+
+ if (!isJavaSupported(this.currentCase.notSupported)) {
+ countSkip(currentCase);
+ throw new AssumptionViolatedException("Test case wih input '" + this.currentCase.input + "' not supported.");
+ }
+
+ if (this.currentCase.debug) {
+ // Add breakpoint here to stop on those TestCases marked with "Debug": true
+ System.out.println("Debug Break!");
+ }
+
+ try {
+ recognizeAndAssert(currentCase);
+ countPass(this.currentCase);
+ } catch (AssumptionViolatedException ex) {
+ countSkip(currentCase);
+ throw ex;
+ } catch (Throwable err) {
+ countFail(currentCase);
+ throw err;
+ }
+ }
+
+ // TODO Override in specific models
+ protected abstract List recognize(TestCase currentCase);
+
+ protected void recognizeAndAssert(TestCase currentCase) {
+ List results = recognize(currentCase);
+ assertResults(currentCase, results);
+ }
+
+ public static void assertResults(TestCase currentCase, List results) {
+ assertResultsWithKeys(currentCase, results, Collections.emptyList());
+ }
+
+ public static void assertResultsWithKeys(TestCase currentCase, List results, List testResolutionKeys) {
+
+ List expectedResults = readExpectedResults(ModelResult.class, currentCase.results);
+ Assert.assertEquals(getMessage(currentCase, "\"Result Count\""), expectedResults.size(), results.size());
+
+ IntStream.range(0, expectedResults.size())
+ .mapToObj(i -> Pair.with(expectedResults.get(i), results.get(i)))
+ .forEach(t -> {
+ ModelResult expected = t.getValue0();
+ ModelResult actual = t.getValue1();
+
+ Assert.assertEquals(getMessage(currentCase, "typeName"), expected.typeName, actual.typeName);
+ Assert.assertEquals(getMessage(currentCase, "text"), expected.text, actual.text);
+
+ if (expected.resolution.containsKey(ResolutionKey.Value)) {
+ Assert.assertEquals(getMessage(currentCase, "resolution.value"),
+ expected.resolution.get(ResolutionKey.Value), actual.resolution.get(ResolutionKey.Value));
+ }
+
+ for (String key : testResolutionKeys) {
+ Assert.assertEquals(getMessage(currentCase, key), expected.resolution.get(key), actual.resolution.get(key));
+ }
+ });
+ }
+
+ public static Collection enumerateTestCases(String recognizerType, String modelName) {
+
+ String recognizerTypePath = String.format(File.separator + recognizerType + File.separator);
+
+ // Deserializer
+ ObjectMapper mapper = new ObjectMapper();
+ mapper.configure(MapperFeature.ACCEPT_CASE_INSENSITIVE_PROPERTIES, true);
+
+ // Map json to TestCases
+ return FileUtils.listFiles(new File(SpecsPath), new String[]{"json"}, true)
+ .stream().filter(f -> f.getPath().contains(recognizerTypePath))
+ .map(f -> parseSpecFile(f, mapper))
+ .flatMap(ts -> Arrays.stream(ts))
+ // Ignore tests with NotSupportedByDesign = Java
+ .filter(ts -> isJavaSupported(ts.notSupportedByDesign))
+ // Filter supported languages only
+ .filter(ts -> SupportedCultures.contains(ts.language))
+ .filter(ts -> ts.modelName.contains(modelName))
+ .collect(Collectors.toCollection(ArrayList::new));
+ }
+
+ public static TestCase[] parseSpecFile(File f, ObjectMapper mapper) {
+
+ List paths = Arrays.asList(f.toPath().toString().split(Pattern.quote(File.separator)));
+ List testInfo = paths.subList(paths.size() - 3, paths.size());
+
+ try {
+
+ // Workaround to consume a possible UTF-8 BOM byte
+ // https://stackoverflow.com/questions/4897876/reading-utf-8-bom-marker
+ String contents = new String(Files.readAllBytes(f.toPath()));
+ String json = StringUtf8Bom(contents);
+
+ TestCase[] tests = mapper.readValue(json, TestCase[].class);
+ Arrays.stream(tests).forEach(t -> {
+ t.recognizerName = testInfo.get(0);
+ t.language = testInfo.get(1);
+ t.modelName = testInfo.get(2).split(Pattern.quote("."))[0];
+ });
+
+ return tests;
+
+ } catch (IOException ex) {
+
+ System.out.println("Error reading Spec file: " + f.toString() + " | " + ex.getMessage());
+
+ // @TODO: This should cause a test run failure.
+ return new TestCase[0];
+ }
+ }
+
+ public static T parseExtractResult(Class extractorResultClass, Object result) {
+ // Deserializer
+ ObjectMapper mapper = new ObjectMapper();
+ mapper.configure(MapperFeature.ACCEPT_CASE_INSENSITIVE_PROPERTIES, true);
+ mapper.addMixIn(ExtractResult.class, ExtractResultMixIn.class);
+
+ try {
+ String json = mapper.writeValueAsString(result);
+ return mapper.readValue(json, extractorResultClass);
+
+ } catch (JsonProcessingException e) {
+ e.printStackTrace();
+ return null;
+
+ } catch (IOException e) {
+ e.printStackTrace();
+ return null;
+ }
+ }
+
+ public static T parseDateTimeParseResult(Class dateTimeParseResultClass, Object result) {
+ // Deserializer
+ ObjectMapper mapper = new ObjectMapper();
+ mapper.configure(MapperFeature.ACCEPT_CASE_INSENSITIVE_PROPERTIES, true);
+ mapper.addMixIn(DateTimeParseResult.class, DateTimeParseResultMixIn.class);
+
+ try {
+ String json = mapper.writeValueAsString(result);
+ return mapper.readValue(json, dateTimeParseResultClass);
+
+ } catch (JsonProcessingException e) {
+ e.printStackTrace();
+ return null;
+
+ } catch (IOException e) {
+ e.printStackTrace();
+ return null;
+ }
+ }
+
+ public static T parseResult(Class modelResultClass, Object result) {
+ // Deserializer
+ ObjectMapper mapper = new ObjectMapper();
+ mapper.configure(MapperFeature.ACCEPT_CASE_INSENSITIVE_PROPERTIES, true);
+ mapper.addMixIn(ModelResult.class, ModelResultMixIn.class);
+ mapper.addMixIn(ExtendedModelResult.class, ExtendedModelResultMixIn.class);
+
+ try {
+ String json = mapper.writeValueAsString(result);
+ return mapper.readValue(json, modelResultClass);
+
+ } catch (JsonProcessingException e) {
+ e.printStackTrace();
+ return null;
+
+ } catch (IOException e) {
+ e.printStackTrace();
+ return null;
+ }
+ }
+
+ public static List readExpectedResults(Class modelResultClass, List