/* * Copyright (C) 2018 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "utils/grammar/rules-utils.h" namespace libtextclassifier3::grammar { std::vector> ParseRulesLocales(const RulesSet* rules) { if (rules == nullptr || rules->rules() == nullptr) { return {}; } std::vector> locales(rules->rules()->size()); for (int i = 0; i < rules->rules()->size(); i++) { const grammar::RulesSet_::Rules* rules_shard = rules->rules()->Get(i); if (rules_shard->locale() == nullptr) { continue; } for (const LanguageTag* tag : *rules_shard->locale()) { locales[i].push_back(Locale::FromLanguageTag(tag)); } } return locales; } std::vector SelectLocaleMatchingShards( const RulesSet* rules, const std::vector>& shard_locales, const std::vector& locales) { std::vector shards; if (rules->rules() == nullptr) { return shards; } for (int i = 0; i < shard_locales.size(); i++) { if (shard_locales[i].empty() || Locale::IsAnyLocaleSupported(locales, /*supported_locales=*/shard_locales[i], /*default_value=*/false)) { shards.push_back(rules->rules()->Get(i)); } } return shards; } std::vector DeduplicateDerivations( const std::vector& derivations) { std::vector sorted_candidates = derivations; std::stable_sort( sorted_candidates.begin(), sorted_candidates.end(), [](const Derivation& a, const Derivation& b) { // Sort by id. if (a.rule_id != b.rule_id) { return a.rule_id < b.rule_id; } // Sort by increasing start. if (a.match->codepoint_span.first != b.match->codepoint_span.first) { return a.match->codepoint_span.first < b.match->codepoint_span.first; } // Sort by decreasing end. return a.match->codepoint_span.second > b.match->codepoint_span.second; }); // Deduplicate by overlap. std::vector result; for (int i = 0; i < sorted_candidates.size(); i++) { const Derivation& candidate = sorted_candidates[i]; bool eliminated = false; // Due to the sorting above, the candidate can only be completely // intersected by a match before it in the sorted order. for (int j = i - 1; j >= 0; j--) { if (sorted_candidates[j].rule_id != candidate.rule_id) { break; } if (sorted_candidates[j].match->codepoint_span.first <= candidate.match->codepoint_span.first && sorted_candidates[j].match->codepoint_span.second >= candidate.match->codepoint_span.second) { eliminated = true; break; } } if (!eliminated) { result.push_back(candidate); } } return result; } bool VerifyAssertions(const Match* match) { bool result = true; grammar::Traverse(match, [&result](const Match* node) { if (node->type != Match::kAssertionMatch) { // Only validation if all checks so far passed. return result; } // Positive assertions are by definition fulfilled, // fail if the assertion is negative. if (static_cast(node)->negative) { result = false; } return result; }); return result; } } // namespace libtextclassifier3::grammar