441 lines
15 KiB
C++
441 lines
15 KiB
C++
/*
|
||
* Copyright (C) 2018 The Android Open Source Project
|
||
*
|
||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||
* you may not use this file except in compliance with the License.
|
||
* You may obtain a copy of the License at
|
||
*
|
||
* http://www.apache.org/licenses/LICENSE-2.0
|
||
*
|
||
* Unless required by applicable law or agreed to in writing, software
|
||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
* See the License for the specific language governing permissions and
|
||
* limitations under the License.
|
||
*/
|
||
|
||
#include "annotator/grammar/dates/utils/date-match.h"
|
||
|
||
#include <algorithm>
|
||
|
||
#include "annotator/grammar/dates/utils/date-utils.h"
|
||
#include "annotator/types.h"
|
||
#include "utils/strings/append.h"
|
||
|
||
static const int kAM = 0;
|
||
static const int kPM = 1;
|
||
|
||
namespace libtextclassifier3 {
|
||
namespace dates {
|
||
|
||
namespace {
|
||
static int GetMeridiemValue(const TimespanCode& timespan_code) {
|
||
switch (timespan_code) {
|
||
case TimespanCode_AM:
|
||
case TimespanCode_MIDNIGHT:
|
||
// MIDNIGHT [3] -> AM
|
||
return kAM;
|
||
case TimespanCode_TONIGHT:
|
||
// TONIGHT [11] -> PM
|
||
case TimespanCode_NOON:
|
||
// NOON [2] -> PM
|
||
case TimespanCode_PM:
|
||
return kPM;
|
||
case TimespanCode_TIMESPAN_CODE_NONE:
|
||
default:
|
||
TC3_LOG(WARNING) << "Failed to extract time span code.";
|
||
}
|
||
return NO_VAL;
|
||
}
|
||
|
||
static int GetRelativeCount(const RelativeParameter* relative_parameter) {
|
||
for (const int interpretation :
|
||
*relative_parameter->day_of_week_interpretation()) {
|
||
switch (interpretation) {
|
||
case RelativeParameter_::Interpretation_NEAREST_LAST:
|
||
case RelativeParameter_::Interpretation_PREVIOUS:
|
||
return -1;
|
||
case RelativeParameter_::Interpretation_SECOND_LAST:
|
||
return -2;
|
||
case RelativeParameter_::Interpretation_SECOND_NEXT:
|
||
return 2;
|
||
case RelativeParameter_::Interpretation_COMING:
|
||
case RelativeParameter_::Interpretation_SOME:
|
||
case RelativeParameter_::Interpretation_NEAREST:
|
||
case RelativeParameter_::Interpretation_NEAREST_NEXT:
|
||
return 1;
|
||
case RelativeParameter_::Interpretation_CURRENT:
|
||
return 0;
|
||
}
|
||
}
|
||
return 0;
|
||
}
|
||
} // namespace
|
||
|
||
using strings::JoinStrings;
|
||
using strings::SStringAppendF;
|
||
|
||
std::string DateMatch::DebugString() const {
|
||
std::string res;
|
||
#if !defined(NDEBUG)
|
||
if (begin >= 0 && end >= 0) {
|
||
SStringAppendF(&res, 0, "[%u,%u)", begin, end);
|
||
}
|
||
|
||
if (HasDayOfWeek()) {
|
||
SStringAppendF(&res, 0, "%u", day_of_week);
|
||
}
|
||
|
||
if (HasYear()) {
|
||
int year_output = year;
|
||
if (HasBcAd() && bc_ad == BCAD_BC) {
|
||
year_output = -year;
|
||
}
|
||
SStringAppendF(&res, 0, "%u/", year_output);
|
||
} else {
|
||
SStringAppendF(&res, 0, "____/");
|
||
}
|
||
|
||
if (HasMonth()) {
|
||
SStringAppendF(&res, 0, "%u/", month);
|
||
} else {
|
||
SStringAppendF(&res, 0, "__/");
|
||
}
|
||
|
||
if (HasDay()) {
|
||
SStringAppendF(&res, 0, "%u ", day);
|
||
} else {
|
||
SStringAppendF(&res, 0, "__ ");
|
||
}
|
||
|
||
if (HasHour()) {
|
||
SStringAppendF(&res, 0, "%u:", hour);
|
||
} else {
|
||
SStringAppendF(&res, 0, "__:");
|
||
}
|
||
|
||
if (HasMinute()) {
|
||
SStringAppendF(&res, 0, "%u:", minute);
|
||
} else {
|
||
SStringAppendF(&res, 0, "__:");
|
||
}
|
||
|
||
if (HasSecond()) {
|
||
if (HasFractionSecond()) {
|
||
SStringAppendF(&res, 0, "%u.%lf ", second, fraction_second);
|
||
} else {
|
||
SStringAppendF(&res, 0, "%u ", second);
|
||
}
|
||
} else {
|
||
SStringAppendF(&res, 0, "__ ");
|
||
}
|
||
|
||
if (HasTimeSpanCode() && TimespanCode_TIMESPAN_CODE_NONE < time_span_code &&
|
||
time_span_code <= TimespanCode_MAX) {
|
||
SStringAppendF(&res, 0, "TS=%u ", time_span_code);
|
||
}
|
||
|
||
if (HasTimeZoneCode() && time_zone_code != -1) {
|
||
SStringAppendF(&res, 0, "TZ= %u ", time_zone_code);
|
||
}
|
||
|
||
if (HasTimeZoneOffset()) {
|
||
SStringAppendF(&res, 0, "TZO=%u ", time_zone_offset);
|
||
}
|
||
|
||
if (HasRelativeDate()) {
|
||
const RelativeMatch* rm = relative_match;
|
||
SStringAppendF(&res, 0, (rm->is_future_date ? "future " : "past "));
|
||
if (rm->day_of_week != NO_VAL) {
|
||
SStringAppendF(&res, 0, "DOW:%d ", rm->day_of_week);
|
||
}
|
||
if (rm->year != NO_VAL) {
|
||
SStringAppendF(&res, 0, "Y:%d ", rm->year);
|
||
}
|
||
if (rm->month != NO_VAL) {
|
||
SStringAppendF(&res, 0, "M:%d ", rm->month);
|
||
}
|
||
if (rm->day != NO_VAL) {
|
||
SStringAppendF(&res, 0, "D:%d ", rm->day);
|
||
}
|
||
if (rm->week != NO_VAL) {
|
||
SStringAppendF(&res, 0, "W:%d ", rm->week);
|
||
}
|
||
if (rm->hour != NO_VAL) {
|
||
SStringAppendF(&res, 0, "H:%d ", rm->hour);
|
||
}
|
||
if (rm->minute != NO_VAL) {
|
||
SStringAppendF(&res, 0, "M:%d ", rm->minute);
|
||
}
|
||
if (rm->second != NO_VAL) {
|
||
SStringAppendF(&res, 0, "S:%d ", rm->second);
|
||
}
|
||
}
|
||
|
||
SStringAppendF(&res, 0, "prio=%d ", priority);
|
||
SStringAppendF(&res, 0, "conf-score=%lf ", annotator_priority_score);
|
||
|
||
if (IsHourAmbiguous()) {
|
||
std::vector<int8> values;
|
||
GetPossibleHourValues(&values);
|
||
std::string str_values;
|
||
|
||
for (unsigned int i = 0; i < values.size(); ++i) {
|
||
SStringAppendF(&str_values, 0, "%u,", values[i]);
|
||
}
|
||
SStringAppendF(&res, 0, "amb=%s ", str_values.c_str());
|
||
}
|
||
|
||
std::vector<std::string> tags;
|
||
if (is_inferred) {
|
||
tags.push_back("inferred");
|
||
}
|
||
if (!tags.empty()) {
|
||
SStringAppendF(&res, 0, "tag=%s ", JoinStrings(",", tags).c_str());
|
||
}
|
||
#endif // !defined(NDEBUG)
|
||
return res;
|
||
}
|
||
|
||
void DateMatch::GetPossibleHourValues(std::vector<int8>* values) const {
|
||
TC3_CHECK(values != nullptr);
|
||
values->clear();
|
||
if (HasHour()) {
|
||
int8 possible_hour = hour;
|
||
values->push_back(possible_hour);
|
||
for (int count = 1; count < ambiguous_hour_count; ++count) {
|
||
possible_hour += ambiguous_hour_interval;
|
||
if (possible_hour >= 24) {
|
||
possible_hour -= 24;
|
||
}
|
||
values->push_back(possible_hour);
|
||
}
|
||
}
|
||
}
|
||
|
||
DatetimeComponent::RelativeQualifier DateMatch::GetRelativeQualifier() const {
|
||
if (HasRelativeDate()) {
|
||
if (relative_match->existing & RelativeMatch::HAS_IS_FUTURE) {
|
||
if (!relative_match->is_future_date) {
|
||
return DatetimeComponent::RelativeQualifier::PAST;
|
||
}
|
||
}
|
||
return DatetimeComponent::RelativeQualifier::FUTURE;
|
||
}
|
||
return DatetimeComponent::RelativeQualifier::UNSPECIFIED;
|
||
}
|
||
|
||
// Embed RelativeQualifier information of DatetimeComponent as a sign of
|
||
// relative counter field of datetime component i.e. relative counter is
|
||
// negative when relative qualifier RelativeQualifier::PAST.
|
||
int GetAdjustedRelativeCounter(
|
||
const DatetimeComponent::RelativeQualifier& relative_qualifier,
|
||
const int relative_counter) {
|
||
if (DatetimeComponent::RelativeQualifier::PAST == relative_qualifier) {
|
||
return -relative_counter;
|
||
}
|
||
return relative_counter;
|
||
}
|
||
|
||
Optional<DatetimeComponent> CreateDatetimeComponent(
|
||
const DatetimeComponent::ComponentType& component_type,
|
||
const DatetimeComponent::RelativeQualifier& relative_qualifier,
|
||
const int absolute_value, const int relative_value) {
|
||
if (absolute_value == NO_VAL && relative_value == NO_VAL) {
|
||
return Optional<DatetimeComponent>();
|
||
}
|
||
return Optional<DatetimeComponent>(DatetimeComponent(
|
||
component_type,
|
||
(relative_value != NO_VAL)
|
||
? relative_qualifier
|
||
: DatetimeComponent::RelativeQualifier::UNSPECIFIED,
|
||
(absolute_value != NO_VAL) ? absolute_value : 0,
|
||
(relative_value != NO_VAL)
|
||
? GetAdjustedRelativeCounter(relative_qualifier, relative_value)
|
||
: 0));
|
||
}
|
||
|
||
Optional<DatetimeComponent> CreateDayOfWeekComponent(
|
||
const RelativeMatch* relative_match,
|
||
const DatetimeComponent::RelativeQualifier& relative_qualifier,
|
||
const DayOfWeek& absolute_day_of_week) {
|
||
DatetimeComponent::RelativeQualifier updated_relative_qualifier =
|
||
relative_qualifier;
|
||
int absolute_value = absolute_day_of_week;
|
||
int relative_value = NO_VAL;
|
||
if (relative_match) {
|
||
relative_value = relative_match->day_of_week;
|
||
if (relative_match->existing & RelativeMatch::HAS_DAY_OF_WEEK) {
|
||
if (relative_match->IsStandaloneRelativeDayOfWeek() &&
|
||
absolute_day_of_week == DayOfWeek_DOW_NONE) {
|
||
absolute_value = relative_match->day_of_week;
|
||
}
|
||
// Check if the relative date has day of week with week period.
|
||
if (relative_match->existing & RelativeMatch::HAS_WEEK) {
|
||
relative_value = 1;
|
||
} else {
|
||
const NonterminalValue* nonterminal =
|
||
relative_match->day_of_week_nonterminal;
|
||
TC3_CHECK(nonterminal != nullptr);
|
||
TC3_CHECK(nonterminal->relative_parameter());
|
||
const RelativeParameter* rp = nonterminal->relative_parameter();
|
||
if (rp->day_of_week_interpretation()) {
|
||
relative_value = GetRelativeCount(rp);
|
||
if (relative_value < 0) {
|
||
relative_value = abs(relative_value);
|
||
updated_relative_qualifier =
|
||
DatetimeComponent::RelativeQualifier::PAST;
|
||
} else if (relative_value > 0) {
|
||
updated_relative_qualifier =
|
||
DatetimeComponent::RelativeQualifier::FUTURE;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
return CreateDatetimeComponent(DatetimeComponent::ComponentType::DAY_OF_WEEK,
|
||
updated_relative_qualifier, absolute_value,
|
||
relative_value);
|
||
}
|
||
|
||
// Resolve the year’s ambiguity.
|
||
// If the year in the date has 4 digits i.e. DD/MM/YYYY then there is no
|
||
// ambiguity, the year value is YYYY but certain format i.e. MM/DD/YY is
|
||
// ambiguous e.g. in {April/23/15} year value can be 15 or 1915 or 2015.
|
||
// Following heuristic is used to resolve the ambiguity.
|
||
// - For YYYY there is nothing to resolve.
|
||
// - For all YY years
|
||
// - Value less than 50 will be resolved to 20YY
|
||
// - Value greater or equal 50 will be resolved to 19YY
|
||
static int InterpretYear(int parsed_year) {
|
||
if (parsed_year == NO_VAL) {
|
||
return parsed_year;
|
||
}
|
||
if (parsed_year < 100) {
|
||
if (parsed_year < 50) {
|
||
return parsed_year + 2000;
|
||
}
|
||
return parsed_year + 1900;
|
||
}
|
||
return parsed_year;
|
||
}
|
||
|
||
Optional<DatetimeComponent> DateMatch::GetDatetimeComponent(
|
||
const DatetimeComponent::ComponentType& component_type) const {
|
||
switch (component_type) {
|
||
case DatetimeComponent::ComponentType::YEAR:
|
||
return CreateDatetimeComponent(
|
||
component_type, GetRelativeQualifier(), InterpretYear(year),
|
||
(relative_match != nullptr) ? relative_match->year : NO_VAL);
|
||
case DatetimeComponent::ComponentType::MONTH:
|
||
return CreateDatetimeComponent(
|
||
component_type, GetRelativeQualifier(), month,
|
||
(relative_match != nullptr) ? relative_match->month : NO_VAL);
|
||
case DatetimeComponent::ComponentType::DAY_OF_MONTH:
|
||
return CreateDatetimeComponent(
|
||
component_type, GetRelativeQualifier(), day,
|
||
(relative_match != nullptr) ? relative_match->day : NO_VAL);
|
||
case DatetimeComponent::ComponentType::HOUR:
|
||
return CreateDatetimeComponent(
|
||
component_type, GetRelativeQualifier(), hour,
|
||
(relative_match != nullptr) ? relative_match->hour : NO_VAL);
|
||
case DatetimeComponent::ComponentType::MINUTE:
|
||
return CreateDatetimeComponent(
|
||
component_type, GetRelativeQualifier(), minute,
|
||
(relative_match != nullptr) ? relative_match->minute : NO_VAL);
|
||
case DatetimeComponent::ComponentType::SECOND:
|
||
return CreateDatetimeComponent(
|
||
component_type, GetRelativeQualifier(), second,
|
||
(relative_match != nullptr) ? relative_match->second : NO_VAL);
|
||
case DatetimeComponent::ComponentType::DAY_OF_WEEK:
|
||
return CreateDayOfWeekComponent(relative_match, GetRelativeQualifier(),
|
||
day_of_week);
|
||
case DatetimeComponent::ComponentType::MERIDIEM:
|
||
return CreateDatetimeComponent(component_type, GetRelativeQualifier(),
|
||
GetMeridiemValue(time_span_code), NO_VAL);
|
||
case DatetimeComponent::ComponentType::ZONE_OFFSET:
|
||
if (HasTimeZoneOffset()) {
|
||
return Optional<DatetimeComponent>(DatetimeComponent(
|
||
component_type, DatetimeComponent::RelativeQualifier::UNSPECIFIED,
|
||
time_zone_offset, /*arg_relative_count=*/0));
|
||
}
|
||
return Optional<DatetimeComponent>();
|
||
case DatetimeComponent::ComponentType::WEEK:
|
||
return CreateDatetimeComponent(
|
||
component_type, GetRelativeQualifier(), NO_VAL,
|
||
HasRelativeDate() ? relative_match->week : NO_VAL);
|
||
default:
|
||
return Optional<DatetimeComponent>();
|
||
}
|
||
}
|
||
|
||
bool DateMatch::IsValid() const {
|
||
if (!HasYear() && HasBcAd()) {
|
||
return false;
|
||
}
|
||
if (!HasMonth() && HasYear() && (HasDay() || HasDayOfWeek())) {
|
||
return false;
|
||
}
|
||
if (!HasDay() && HasDayOfWeek() && (HasYear() || HasMonth())) {
|
||
return false;
|
||
}
|
||
if (!HasDay() && !HasDayOfWeek() && HasHour() && (HasYear() || HasMonth())) {
|
||
return false;
|
||
}
|
||
if (!HasHour() && (HasMinute() || HasSecond() || HasFractionSecond())) {
|
||
return false;
|
||
}
|
||
if (!HasMinute() && (HasSecond() || HasFractionSecond())) {
|
||
return false;
|
||
}
|
||
if (!HasSecond() && HasFractionSecond()) {
|
||
return false;
|
||
}
|
||
// Check whether day exists in a month, to exclude cases like "April 31".
|
||
if (HasDay() && HasMonth() && day > GetLastDayOfMonth(year, month)) {
|
||
return false;
|
||
}
|
||
return (HasDateFields() || HasTimeFields() || HasRelativeDate());
|
||
}
|
||
|
||
void DateMatch::FillDatetimeComponents(
|
||
std::vector<DatetimeComponent>* datetime_component) const {
|
||
static const std::vector<DatetimeComponent::ComponentType>*
|
||
kDatetimeComponents = new std::vector<DatetimeComponent::ComponentType>{
|
||
DatetimeComponent::ComponentType::ZONE_OFFSET,
|
||
DatetimeComponent::ComponentType::MERIDIEM,
|
||
DatetimeComponent::ComponentType::SECOND,
|
||
DatetimeComponent::ComponentType::MINUTE,
|
||
DatetimeComponent::ComponentType::HOUR,
|
||
DatetimeComponent::ComponentType::DAY_OF_MONTH,
|
||
DatetimeComponent::ComponentType::DAY_OF_WEEK,
|
||
DatetimeComponent::ComponentType::WEEK,
|
||
DatetimeComponent::ComponentType::MONTH,
|
||
DatetimeComponent::ComponentType::YEAR};
|
||
|
||
for (const DatetimeComponent::ComponentType& component_type :
|
||
*kDatetimeComponents) {
|
||
Optional<DatetimeComponent> date_time =
|
||
GetDatetimeComponent(component_type);
|
||
if (date_time.has_value()) {
|
||
datetime_component->emplace_back(date_time.value());
|
||
}
|
||
}
|
||
}
|
||
|
||
std::string DateRangeMatch::DebugString() const {
|
||
std::string res;
|
||
// The method is only called for debugging purposes.
|
||
#if !defined(NDEBUG)
|
||
if (begin >= 0 && end >= 0) {
|
||
SStringAppendF(&res, 0, "[%u,%u)\n", begin, end);
|
||
}
|
||
SStringAppendF(&res, 0, "from: %s \n", from.DebugString().c_str());
|
||
SStringAppendF(&res, 0, "to: %s\n", to.DebugString().c_str());
|
||
#endif // !defined(NDEBUG)
|
||
return res;
|
||
}
|
||
|
||
} // namespace dates
|
||
} // namespace libtextclassifier3
|