Android11/external/libtextclassifier/native/utils/grammar/rules-utils.cc

/*
 * Copyright (C) 2018 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "utils/grammar/rules-utils.h"

namespace libtextclassifier3::grammar {

std::vector<std::vector<Locale>> ParseRulesLocales(const RulesSet* rules) {
  if (rules == nullptr || rules->rules() == nullptr) {
    return {};
  }
  std::vector<std::vector<Locale>> locales(rules->rules()->size());
  for (int i = 0; i < rules->rules()->size(); i++) {
    const grammar::RulesSet_::Rules* rules_shard = rules->rules()->Get(i);
    if (rules_shard->locale() == nullptr) {
      continue;
    }
    for (const LanguageTag* tag : *rules_shard->locale()) {
      locales[i].push_back(Locale::FromLanguageTag(tag));
    }
  }
  return locales;
}

std::vector<const grammar::RulesSet_::Rules*> SelectLocaleMatchingShards(
    const RulesSet* rules,
    const std::vector<std::vector<Locale>>& shard_locales,
    const std::vector<Locale>& locales) {
  std::vector<const grammar::RulesSet_::Rules*> shards;
  if (rules->rules() == nullptr) {
    return shards;
  }
  for (int i = 0; i < shard_locales.size(); i++) {
    if (shard_locales[i].empty() ||
        Locale::IsAnyLocaleSupported(locales,
                                     /*supported_locales=*/shard_locales[i],
                                     /*default_value=*/false)) {
      shards.push_back(rules->rules()->Get(i));
    }
  }
  return shards;
}

std::vector<Derivation> DeduplicateDerivations(
    const std::vector<Derivation>& derivations) {
  std::vector<Derivation> sorted_candidates = derivations;
  std::stable_sort(
      sorted_candidates.begin(), sorted_candidates.end(),
      [](const Derivation& a, const Derivation& b) {
        // Sort by id.
        if (a.rule_id != b.rule_id) {
          return a.rule_id < b.rule_id;
        }

        // Sort by increasing start.
        if (a.match->codepoint_span.first != b.match->codepoint_span.first) {
          return a.match->codepoint_span.first < b.match->codepoint_span.first;
        }

        // Sort by decreasing end.
        return a.match->codepoint_span.second > b.match->codepoint_span.second;
      });

  // Deduplicate by overlap.
  std::vector<Derivation> result;
  for (int i = 0; i < sorted_candidates.size(); i++) {
    const Derivation& candidate = sorted_candidates[i];
    bool eliminated = false;

    // Due to the sorting above, the candidate can only be completely
    // intersected by a match before it in the sorted order.
    for (int j = i - 1; j >= 0; j--) {
      if (sorted_candidates[j].rule_id != candidate.rule_id) {
        break;
      }
      if (sorted_candidates[j].match->codepoint_span.first <=
              candidate.match->codepoint_span.first &&
          sorted_candidates[j].match->codepoint_span.second >=
              candidate.match->codepoint_span.second) {
        eliminated = true;
        break;
      }
    }

    if (!eliminated) {
      result.push_back(candidate);
    }
  }
  return result;
}

bool VerifyAssertions(const Match* match) {
  bool result = true;
  grammar::Traverse(match, [&result](const Match* node) {
    if (node->type != Match::kAssertionMatch) {
      // Only validation if all checks so far passed.
      return result;
    }

    // Positive assertions are by definition fulfilled,
    // fail if the assertion is negative.
    if (static_cast<const AssertionMatch*>(node)->negative) {
      result = false;
    }
    return result;
  });
  return result;
}

}  // namespace libtextclassifier3::grammar