chrome_models / 19 /flatbuffer /model.fbs

Upload 15 files

d552aac verified 16 days ago

33.9 kB

	// Copyright 2020 Google LLC
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// https://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.
	//

	include "bit-vector.fbs";
	include "intent-config.fbs";
	include "normalization.fbs";
	include "flatbuffers.fbs";
	include "experimental.fbs";
	include "resources.fbs";
	include "entity-data.fbs";
	include "codepoint-range.fbs";
	include "tokenizer.fbs";
	include "buffer.fbs";
	include "rules.fbs";

	file_identifier "TC2 ";

	// The possible model modes, represents a bit field.
	namespace libtextclassifier3;
	enum ModeFlag : int {
	NONE = 0,
	ANNOTATION = 1,
	CLASSIFICATION = 2,
	ANNOTATION_AND_CLASSIFICATION = 3,
	SELECTION = 4,
	ANNOTATION_AND_SELECTION = 5,
	CLASSIFICATION_AND_SELECTION = 6,
	ALL = 7,
	}

	// Enum for specifying the annotation usecase.
	namespace libtextclassifier3;
	enum AnnotationUsecase : int {
	// Results are optimized for Smart{Select,Share,Linkify}.
	ANNOTATION_USECASE_SMART = 0,
	// Smart{Select,Share,Linkify}

	// Results are optimized for using TextClassifier as an infrastructure that
	// annotates as much as possible.
	ANNOTATION_USECASE_RAW = 1,
	}

	namespace libtextclassifier3;
	enum DatetimeExtractorType : int {
	UNKNOWN_DATETIME_EXTRACTOR_TYPE = 0,
	AM = 1,
	PM = 2,
	JANUARY = 3,
	FEBRUARY = 4,
	MARCH = 5,
	APRIL = 6,
	MAY = 7,
	JUNE = 8,
	JULY = 9,
	AUGUST = 10,
	SEPTEMBER = 11,
	OCTOBER = 12,
	NOVEMBER = 13,
	DECEMBER = 14,
	NEXT = 15,
	NEXT_OR_SAME = 16,
	LAST = 17,
	NOW = 18,
	TOMORROW = 19,
	YESTERDAY = 20,
	PAST = 21,
	FUTURE = 22,
	DAY = 23,
	WEEK = 24,
	MONTH = 25,
	YEAR = 26,
	MONDAY = 27,
	TUESDAY = 28,
	WEDNESDAY = 29,
	THURSDAY = 30,
	FRIDAY = 31,
	SATURDAY = 32,
	SUNDAY = 33,
	DAYS = 34,
	WEEKS = 35,
	MONTHS = 36,

	// TODO(zilka): Make the following 3 values singular for consistency.
	HOURS = 37,

	MINUTES = 38,
	SECONDS = 39,
	YEARS = 40,
	DIGITS = 41,
	SIGNEDDIGITS = 42,
	ZERO = 43,
	ONE = 44,
	TWO = 45,
	THREE = 46,
	FOUR = 47,
	FIVE = 48,
	SIX = 49,
	SEVEN = 50,
	EIGHT = 51,
	NINE = 52,
	TEN = 53,
	ELEVEN = 54,
	TWELVE = 55,
	THIRTEEN = 56,
	FOURTEEN = 57,
	FIFTEEN = 58,
	SIXTEEN = 59,
	SEVENTEEN = 60,
	EIGHTEEN = 61,
	NINETEEN = 62,
	TWENTY = 63,
	THIRTY = 64,
	FORTY = 65,
	FIFTY = 66,
	SIXTY = 67,
	SEVENTY = 68,
	EIGHTY = 69,
	NINETY = 70,
	HUNDRED = 71,
	THOUSAND = 72,
	NOON = 73,
	MIDNIGHT = 74,
	}

	namespace libtextclassifier3;
	enum DatetimeGroupType : int {
	GROUP_UNKNOWN = 0,
	GROUP_UNUSED = 1,
	GROUP_YEAR = 2,
	GROUP_MONTH = 3,
	GROUP_DAY = 4,
	GROUP_HOUR = 5,
	GROUP_MINUTE = 6,
	GROUP_SECOND = 7,
	GROUP_AMPM = 8,
	GROUP_RELATIONDISTANCE = 9,
	GROUP_RELATION = 10,
	GROUP_RELATIONTYPE = 11,

	// Dummy groups serve just as an inflator of the selection. E.g. we might want
	// to select more text than was contained in an envelope of all extractor
	// spans.
	GROUP_DUMMY1 = 12,

	GROUP_DUMMY2 = 13,
	GROUP_ABSOLUTETIME = 14,
	}

	// Options for the model that predicts text selection.
	namespace libtextclassifier3;
	table SelectionModelOptions {
	// If true, before the selection is returned, the unpaired brackets contained
	// in the predicted selection are stripped from the both selection ends.
	// The bracket codepoints are defined in the Unicode standard:
	// http://www.unicode.org/Public/UNIDATA/BidiBrackets.txt
	strip_unpaired_brackets:bool = true;

	// Number of hypothetical click positions on either side of the actual click
	// to consider in order to enforce symmetry.
	symmetry_context_size:int;

	// Number of examples to bundle in one batch for inference.
	batch_size:int = 1024;

	// Whether to always classify a suggested selection or only on demand.
	always_classify_suggested_selection:bool = false;
	}

	// Options for the model that classifies a text selection.
	namespace libtextclassifier3;
	table ClassificationModelOptions {
	// Limits for phone numbers.
	phone_min_num_digits:int = 7;

	phone_max_num_digits:int = 15;

	// Limits for addresses.
	address_min_num_tokens:int;

	// Maximum number of tokens to attempt a classification (-1 is unlimited).
	max_num_tokens:int = -1;
	}

	// Options for post-checks, checksums and verification to apply on a match.
	namespace libtextclassifier3;
	table VerificationOptions {
	verify_luhn_checksum:bool = false;

	// Lua verifier to use.
	// Index of the lua verifier in the model.
	lua_verifier:int = -1;
	}

	// Behaviour of rule capturing groups.
	// This specifies how the text and span of a capturing group, in a regular
	// expression or from a capturing match in a grammar rule, should be handled.
	namespace libtextclassifier3;
	table CapturingGroup {
	// If true, the span of the capturing group will be used to
	// extend the selection.
	extend_selection:bool = true;

	// If set, the text of the capturing group will be used to set a field in
	// the classfication result entity data.
	entity_field_path:FlatbufferFieldPath;

	// If set, the flatbuffer entity data will be merged with the
	// classification result entity data.
	serialized_entity_data:string;

	// If set, normalization to apply before text is used in entity data.
	normalization_options:NormalizationOptions;

	entity_data:EntityData;
	}

	// List of regular expression matchers to check.
	namespace libtextclassifier3.RegexModel_;
	table Pattern {
	// The name of the collection of a match.
	collection_name:string;

	// The pattern to check.
	pattern:string;

	// The modes for which to apply the patterns.
	enabled_modes:ModeFlag = ALL;

	// The final score to assign to the results of this pattern.
	target_classification_score:float = 1;

	// Priority score used for conflict resolution with the other models.
	priority_score:float = 0;

	// If true, will use an approximate matching implementation implemented
	// using Find() instead of the true Match(). This approximate matching will
	// use the first Find() result and then check that it spans the whole input.
	use_approximate_matching:bool = false;

	compressed_pattern:CompressedBuffer;

	// Verification to apply on a match.
	verification_options:VerificationOptions;

	capturing_group:[CapturingGroup];

	// Entity data to set for a match.
	serialized_entity_data:string;

	entity_data:EntityData;
	}

	namespace libtextclassifier3;
	table RegexModel {
	patterns:[RegexModel_.Pattern];

	// If true, will compile the regexes only on first use.
	lazy_regex_compilation:bool = true;

	// Lua scripts for match verification.
	// The verifier can access:
	// * `context`: The context as a string.
	// * `match`: The groups of the regex match as an array, each group gives
	// * `begin`: span start
	// * `end`: span end
	// * `text`: the text
	// The verifier is expected to return a boolean, indicating whether the
	// verification succeeded or not.
	lua_verifier:[string];
	}

	// List of regex patterns.
	namespace libtextclassifier3.DatetimeModelPattern_;
	table Regex {
	pattern:string;

	// The ith entry specifies the type of the ith capturing group.
	// This is used to decide how the matched content has to be parsed.
	groups:[DatetimeGroupType];

	compressed_pattern:CompressedBuffer;
	}

	namespace libtextclassifier3;
	table DatetimeModelPattern {
	regexes:[DatetimeModelPattern_.Regex];

	// List of locale indices in DatetimeModel that represent the locales that
	// these patterns should be used for. If empty, can be used for all locales.
	locales:[int];

	// The final score to assign to the results of this pattern.
	target_classification_score:float = 1;

	// Priority score used for conflict resolution with the other models.
	priority_score:float = 0;

	// The modes for which to apply the patterns.
	enabled_modes:ModeFlag = ALL;

	// The annotation usecases for which to apply the patterns.
	// This is a flag field for values of AnnotationUsecase.
	enabled_annotation_usecases:uint = 4294967295;
	}

	namespace libtextclassifier3;
	table DatetimeModelExtractor {
	extractor:DatetimeExtractorType;
	pattern:string;
	locales:[int];
	compressed_pattern:CompressedBuffer;
	}

	namespace libtextclassifier3;
	table DatetimeModel {
	// List of BCP 47 locale strings representing all locales supported by the
	// model. The individual patterns refer back to them using an index.
	locales:[string];

	patterns:[DatetimeModelPattern];
	extractors:[DatetimeModelExtractor];

	// If true, will use the extractors for determining the match location as
	// opposed to using the location where the global pattern matched.
	use_extractors_for_locating:bool = true;

	// List of locale ids, rules of whose are always run, after the requested
	// ones.
	default_locales:[int];

	// If true, will generate the alternative interpretations for ambiguous
	// datetime expressions.
	generate_alternative_interpretations_when_ambiguous:bool = false;

	// If true, will compile the regexes only on first use.
	lazy_regex_compilation:bool = true;

	// If true, will give only future dates (when the day is not specified).
	prefer_future_for_unspecified_date:bool = false;
	}

	// Configuration for the tokenizer.
	namespace libtextclassifier3;
	table GrammarTokenizerOptions {
	tokenization_type:TokenizationType = ICU;

	// If true, white space tokens will be kept when using the icu tokenizer.
	icu_preserve_whitespace_tokens:bool = false;

	// Codepoint ranges that determine what role the different codepoints play
	// during tokenized. The ranges must not overlap.
	tokenization_codepoint_config:[TokenizationCodepointRange];

	// A set of codepoint ranges to use in the mixed tokenization mode to identify
	// stretches of tokens to re-tokenize using the internal tokenizer.
	internal_tokenizer_codepoint_ranges:[CodepointRange];

	// If true, tokens will be also split when the codepoint's script_id changes
	// as defined in TokenizationCodepointRange.
	tokenize_on_script_change:bool = false;
	}

	namespace libtextclassifier3.DatetimeModelLibrary_;
	table Item {
	key:string;
	value:DatetimeModel;
	}

	// A set of named DateTime models.
	namespace libtextclassifier3;
	table DatetimeModelLibrary {
	models:[DatetimeModelLibrary_.Item];
	}

	// Classification result to instantiate for a rule match.
	namespace libtextclassifier3.GrammarModel_;
	table RuleClassificationResult {
	// The name of the collection.
	collection_name:string;

	// The score.
	target_classification_score:float = 1;

	// The priority score used for conflict resolution with the other models.
	priority_score:float = 0;

	// Behaviour of capturing matches.
	capturing_group:[CapturingGroup];

	// Entity data to set for a match.
	serialized_entity_data:string;

	// Enabled modes.
	enabled_modes:ModeFlag = ALL;

	entity_data:EntityData;
	}

	// Configuration for grammar based annotators.
	namespace libtextclassifier3;
	table GrammarModel {
	// The grammar rules.
	rules:grammar.RulesSet;

	rule_classification_result:[GrammarModel_.RuleClassificationResult];

	// Number of tokens in the context to use for classification and text
	// selection suggestion.
	// A value -1 uses the full context.
	context_left_num_tokens:int;

	context_right_num_tokens:int;

	// Grammar specific tokenizer options.
	tokenizer_options:GrammarTokenizerOptions;
	}

	namespace libtextclassifier3.MoneyParsingOptions_;
	table QuantitiesNameToExponentEntry {
	key:string (key);
	value:int;
	}

	namespace libtextclassifier3;
	table MoneyParsingOptions {
	// Separators (codepoints) marking decimal or thousand in the money amount.
	separators:[int];

	// Mapping between a quantity string (e.g. "million") and the power of 10
	// it multiplies the amount with (e.g. 6 in case of "million").
	// NOTE: The entries need to be sorted by key since we use LookupByKey.
	quantities_name_to_exponent:[MoneyParsingOptions_.QuantitiesNameToExponentEntry];
	}

	namespace libtextclassifier3.ModelTriggeringOptions_;
	table CollectionToPriorityEntry {
	key:string (key);
	value:float;
	}

	// Options controlling the output of the Tensorflow Lite models.
	namespace libtextclassifier3;
	table ModelTriggeringOptions {
	// Lower bound threshold for filtering annotation model outputs.
	min_annotate_confidence:float = 0;

	// The modes for which to enable the models.
	enabled_modes:ModeFlag = ALL;

	// Comma-separated list of locales (BCP 47 tags) that dictionary
	// classification supports.
	dictionary_locales:string;

	// Comma-separated list of locales (BCP 47 tags) that the model supports, that
	// are used to prevent triggering on input in unsupported languages. If
	// empty, the model will trigger on all inputs.
	locales:string;

	// Priority score assigned to the "other" class from ML model.
	other_collection_priority_score:float = -1000;

	// Priority score assigned to knowledge engine annotations.
	knowledge_priority_score:float = 0;
	reserved_7:int16 (deprecated);

	// Apply a factor to the priority score for entities that are added to this
	// map. Key: collection type e.g. "address", "phone"..., Value: float number.
	// NOTE: The entries here need to be sorted since we use LookupByKey.
	collection_to_priority:[ModelTriggeringOptions_.CollectionToPriorityEntry];
	}

	// Options controlling the output of the classifier.
	namespace libtextclassifier3;
	table OutputOptions {
	// Lists of collection names that will be filtered out at the output:
	// - For annotation, the spans of given collection are simply dropped.
	// - For classification, the result is mapped to the class "other".
	// - For selection, the spans of given class are returned as
	// single-selection.
	filtered_collections_annotation:[string];

	filtered_collections_classification:[string];
	filtered_collections_selection:[string];
	}

	namespace libtextclassifier3.Model_;
	table EmbeddingPruningMask {
	// If true, use pruning mask. In this case, we use mask
	// pruning_mask to determine the mapping of hashed-charactergrams.
	enabled:bool;

	// Packing of the binary pruning mask into uint64 values.
	pruning_mask:[ulong] (force_align: 16);

	// Number of buckets before pruning.
	full_num_buckets:int;

	// Index of row of compressed embedding matrix to which all pruned buckets
	// are mapped.
	pruned_row_bucket_id:int;
	}

	namespace libtextclassifier3.Model_;
	table ConflictResolutionOptions {
	// If true, will prioritize the longest annotation during conflict
	// resolution.
	prioritize_longest_annotation:bool = false;

	// If true, the annotator will perform conflict resolution between the
	// different sub-annotators also in the RAW mode. If false, no conflict
	// resolution will be performed in RAW mode.
	do_conflict_resolution_in_raw_mode:bool = true;
	}

	namespace libtextclassifier3;
	table Model {
	// Comma-separated list of locales supported by the model as BCP 47 tags.
	locales:string;

	version:int;

	// A name for the model that can be used for e.g. logging.
	name:string;

	selection_feature_options:FeatureProcessorOptions;
	classification_feature_options:FeatureProcessorOptions;

	// Tensorflow Lite models.
	selection_model:[ubyte] (force_align: 16);

	classification_model:[ubyte] (force_align: 16);
	embedding_model:[ubyte] (force_align: 16);

	// Options for the different models.
	selection_options:SelectionModelOptions;

	classification_options:ClassificationModelOptions;
	regex_model:RegexModel;
	datetime_model:DatetimeModel;

	// Options controlling the output of the models.
	triggering_options:ModelTriggeringOptions;

	// Global switch that controls if SuggestSelection(), ClassifyText() and
	// Annotate() will run. If a mode is disabled it returns empty/no-op results.
	enabled_modes:ModeFlag = ALL;

	// If true, will snap the selections that consist only of whitespaces to the
	// containing suggested span. Otherwise, no suggestion is proposed, since the
	// selections are not part of any token.
	snap_whitespace_selections:bool = true;

	// Global configuration for the output of SuggestSelection(), ClassifyText()
	// and Annotate().
	output_options:OutputOptions;

	// Configures how Intents should be generated on Android.
	android_intent_options:AndroidIntentFactoryOptions;

	intent_options:IntentFactoryModel;

	// Model resources.
	resources:ResourcePool;

	// Schema data for handling entity data.
	entity_data_schema:[ubyte];

	number_annotator_options:NumberAnnotatorOptions;
	duration_annotator_options:DurationAnnotatorOptions;

	// Comma-separated list of locales (BCP 47 tags) that the model supports, that
	// are used to prevent triggering on input in unsupported languages. If
	// empty, the model will trigger on all inputs.
	triggering_locales:string;

	embedding_pruning_mask:Model_.EmbeddingPruningMask;
	reserved_25:int16 (deprecated);
	contact_annotator_options:ContactAnnotatorOptions;
	money_parsing_options:MoneyParsingOptions;
	translate_annotator_options:TranslateAnnotatorOptions;
	grammar_model:GrammarModel;
	conflict_resolution_options:Model_.ConflictResolutionOptions;
	experimental_model:ExperimentalModel;
	pod_ner_model:PodNerModel;
	vocab_model:VocabModel;
	}

	// Method for selecting the center token.
	namespace libtextclassifier3.FeatureProcessorOptions_;
	enum CenterTokenSelectionMethod : int {
	DEFAULT_CENTER_TOKEN_METHOD = 0,
	// Invalid option.

	// Use click indices to determine the center token.
	CENTER_TOKEN_FROM_CLICK = 1,

	// Use selection indices to get a token range, and select the middle of it
	// as the center token.
	CENTER_TOKEN_MIDDLE_OF_SELECTION = 2,
	}

	// Bounds-sensitive feature extraction configuration.
	namespace libtextclassifier3.FeatureProcessorOptions_;
	table BoundsSensitiveFeatures {
	// Enables the extraction of bounds-sensitive features, instead of the click
	// context features.
	enabled:bool;

	// The numbers of tokens to extract in specific locations relative to the
	// bounds.
	// Immediately before the span.
	num_tokens_before:int;

	// Inside the span, aligned with the beginning.
	num_tokens_inside_left:int;

	// Inside the span, aligned with the end.
	num_tokens_inside_right:int;

	// Immediately after the span.
	num_tokens_after:int;

	// If true, also extracts the tokens of the entire span and adds up their
	// features forming one "token" to include in the extracted features.
	include_inside_bag:bool;

	// If true, includes the selection length (in the number of tokens) as a
	// feature.
	include_inside_length:bool;

	// If true, for selection, single token spans are not run through the model
	// and their score is assumed to be zero.
	score_single_token_spans_as_zero:bool;
	}

	namespace libtextclassifier3;
	table FeatureProcessorOptions {
	// Number of buckets used for hashing charactergrams.
	num_buckets:int = -1;

	// Size of the embedding.
	embedding_size:int = -1;

	// Number of bits for quantization for embeddings.
	embedding_quantization_bits:int = 8;

	// Context size defines the number of words to the left and to the right of
	// the selected word to be used as context. For example, if context size is
	// N, then we take N words to the left and N words to the right of the
	// selected word as its context.
	context_size:int = -1;

	// Maximum number of words of the context to select in total.
	max_selection_span:int = -1;

	// Orders of charactergrams to extract. E.g., 2 means character bigrams, 3
	// character trigrams etc.
	chargram_orders:[int];

	// Maximum length of a word, in codepoints.
	max_word_length:int = 20;

	// If true, will use the unicode-aware functionality for extracting features.
	unicode_aware_features:bool = false;

	// Whether to extract the token case feature.
	extract_case_feature:bool = false;

	// Whether to extract the selection mask feature.
	extract_selection_mask_feature:bool = false;

	// List of regexps to run over each token. For each regexp, if there is a
	// match, a dense feature of 1.0 is emitted. Otherwise -1.0 is used.
	regexp_feature:[string];

	// Whether to remap all digits to a single number.
	remap_digits:bool = false;

	// Whether to lower-case each token before generating hashgrams.
	lowercase_tokens:bool;

	// If true, the selection classifier output will contain only the selections
	// that are feasible (e.g., those that are shorter than max_selection_span),
	// if false, the output will be a complete cross-product of possible
	// selections to the left and possible selections to the right, including the
	// infeasible ones.
	// NOTE: Exists mainly for compatibility with older models that were trained
	// with the non-reduced output space.
	selection_reduced_output_space:bool = true;

	// Collection names.
	collections:[string];

	// An index of collection in collections to be used if a collection name can't
	// be mapped to an id.
	default_collection:int = -1;

	// If true, will split the input by lines, and only use the line that contains
	// the clicked token.
	only_use_line_with_click:bool = false;

	// If true, will split tokens that contain the selection boundary, at the
	// position of the boundary.
	// E.g. "foo{bar}@google.com" -> "foo", "bar", "@google.com"
	split_tokens_on_selection_boundaries:bool = false;

	// Codepoint ranges that determine how different codepoints are tokenized.
	// The ranges must not overlap.
	tokenization_codepoint_config:[TokenizationCodepointRange];

	center_token_selection_method:FeatureProcessorOptions_.CenterTokenSelectionMethod;

	// If true, span boundaries will be snapped to containing tokens and not
	// required to exactly match token boundaries.
	snap_label_span_boundaries_to_containing_tokens:bool;

	// A set of codepoint ranges supported by the model.
	supported_codepoint_ranges:[CodepointRange];

	// A set of codepoint ranges to use in the mixed tokenization mode to identify
	// stretches of tokens to re-tokenize using the internal tokenizer.
	internal_tokenizer_codepoint_ranges:[CodepointRange];

	// Minimum ratio of supported codepoints in the input context. If the ratio
	// is lower than this, the feature computation will fail.
	min_supported_codepoint_ratio:float = 0;

	// Used for versioning the format of features the model expects.
	// - feature_version == 0:
	// For each token the features consist of:
	// - chargram embeddings
	// - dense features
	// Chargram embeddings for tokens are concatenated first together,
	// and at the end, the dense features for the tokens are concatenated
	// to it. So the resulting feature vector has two regions.
	feature_version:int = 0;

	tokenization_type:TokenizationType = INTERNAL_TOKENIZER;
	icu_preserve_whitespace_tokens:bool = false;

	// List of codepoints that will be stripped from beginning and end of
	// predicted spans.
	ignored_span_boundary_codepoints:[int];

	bounds_sensitive_features:FeatureProcessorOptions_.BoundsSensitiveFeatures;

	// List of allowed charactergrams. The extracted charactergrams are filtered
	// using this list, and charactergrams that are not present are interpreted as
	// out-of-vocabulary.
	// If no allowed_chargrams are specified, all charactergrams are allowed.
	// The field is typed as bytes type to allow non-UTF8 chargrams.
	allowed_chargrams:[string];

	// If true, tokens will be also split when the codepoint's script_id changes
	// as defined in TokenizationCodepointRange.
	tokenize_on_script_change:bool = false;

	// If true, the pipe character '\|' will be used as a newline character when
	// splitting lines.
	use_pipe_character_for_newline:bool = true;
	}

	namespace libtextclassifier3;
	table NumberAnnotatorOptions {
	// If true, number and percentage annotations will be produced.
	enabled:bool = false;

	// Score to assign to the annotated numbers and percentages in the annotator.
	score:float = 1;

	// Number priority score used for conflict resolution with the other models.
	priority_score:float = 0;

	// The modes in which to enable number and percentage annotations.
	enabled_modes:ModeFlag = ALL;

	// The annotation usecases for which to produce number annotations.
	// This is a flag field for values of AnnotationUsecase.
	enabled_annotation_usecases:uint = 4294967295;

	// [Deprecated] A list of codepoints that can form a prefix of a valid number.
	allowed_prefix_codepoints:[int];

	// [Deprecated] A list of codepoints that can form a suffix of a valid number.
	allowed_suffix_codepoints:[int];

	// [Deprecated] List of codepoints that will be stripped from beginning of
	// predicted spans.
	ignored_prefix_span_boundary_codepoints:[int];

	// [Deprecated] List of codepoints that will be stripped from end of predicted
	// spans.
	ignored_suffix_span_boundary_codepoints:[int];

	// [Deprecated] If true, percent annotations will be produced.
	enable_percentage:bool = false;

	// Zero separated and ordered list of suffixes that mark a percent.
	percentage_pieces_string:string;

	// [Deprecated] List of suffixes offsets in the percent_pieces_string string.
	percentage_pieces_offsets:[int];

	// Priority score for the percentage annotation.
	percentage_priority_score:float = 1;

	// Float number priority score used for conflict resolution with the other
	// models.
	float_number_priority_score:float = 0;

	// The maximum number of digits an annotated number can have. Requirement:
	// the value should be less or equal to 20.
	max_number_of_digits:int = 20;

	// The annotation usecases for which to produce percentage annotations.
	// This is a flag field for values of AnnotationUsecase.
	percentage_annotation_usecases:uint = 2;
	}

	// DurationAnnotator is so far tailored for English and Japanese only.
	namespace libtextclassifier3;
	table DurationAnnotatorOptions {
	// If true, duration annotations will be produced.
	enabled:bool = false;

	// Score to assign to the annotated durations from the annotator.
	score:float = 1;

	// Priority score used for conflict resolution with the other models.
	priority_score:float = 0;

	// The modes in which to enable duration annotations.
	enabled_modes:ModeFlag = ALL;

	// The annotation usecases for which to produce duration annotations.
	enabled_annotation_usecases:uint = 4294967295;

	// Durations typically look like XX hours and XX minutes etc... The list of
	// strings below enumerate variants of "hours", "minutes", etc. in these
	// expressions. These are verbatim strings that are matched against tokens in
	// the input.
	week_expressions:[string];

	day_expressions:[string];
	hour_expressions:[string];
	minute_expressions:[string];
	second_expressions:[string];

	// List of expressions that doesn't break a duration expression (can become
	// a part of it) but has not semantic meaning.
	filler_expressions:[string];

	// List of expressions that mean half of a unit of duration (e.g. "half an
	// hour").
	half_expressions:[string];

	// Set of condepoints that can split the Annotator tokens to sub-tokens for
	// sub-token matching.
	sub_token_separator_codepoints:[int];

	// If this is true, unit must be associated with quantity. For example, a
	// phrase "minute" is not parsed as one minute duration if this is true.
	require_quantity:bool;

	// If this is true, dangling quantity is included in the annotation. For
	// example, "10 minutes 20" is interpreted as 10 minutes and 20 seconds.
	enable_dangling_quantity_interpretation:bool = true;
	}

	namespace libtextclassifier3;
	table ContactAnnotatorOptions {
	// Supported for English genitives only so far.
	enable_declension:bool;

	// For each language there is a customized list of supported declensions.
	language:string;
	}

	namespace libtextclassifier3.TranslateAnnotatorOptions_;
	enum Algorithm : int {
	DEFAULT_ALGORITHM = 0,
	BACKOFF = 1,
	}

	// Backoff is the algorithm shipped with Android Q.
	namespace libtextclassifier3.TranslateAnnotatorOptions_;
	table BackoffOptions {
	// The minimum size of text to prefer for detection (in codepoints).
	min_text_size:int = 20;

	// For reducing the score when text is less than the preferred size.
	penalize_ratio:float = 1;

	// Original detection score to surrounding text detection score ratios.
	subject_text_score_ratio:float = 0.4;
	}

	namespace libtextclassifier3;
	table TranslateAnnotatorOptions {
	enabled:bool = false;

	// Score to assign to the classification results.
	score:float = 1;

	// Priority score used for conflict resolution with the other models.
	priority_score:float;

	algorithm:TranslateAnnotatorOptions_.Algorithm;
	backoff_options:TranslateAnnotatorOptions_.BackoffOptions;
	}

	namespace libtextclassifier3.PodNerModel_;
	table Collection {
	// Collection's name (e.g., "location", "person").
	name:string;

	// Priority scores used for conflict resolution with the other annotators
	// when the annotation is made over a single/multi token text.
	single_token_priority_score:float;

	multi_token_priority_score:float;
	}

	namespace libtextclassifier3.PodNerModel_.Label_;
	enum BoiseType : int {
	NONE = 0,
	BEGIN = 1,
	O = 2,
	// No label.

	INTERMEDIATE = 3,
	SINGLE = 4,
	END = 5,
	}

	namespace libtextclassifier3.PodNerModel_.Label_;
	enum MentionType : int {
	UNDEFINED = 0,
	NAM = 1,
	NOM = 2,
	}

	namespace libtextclassifier3.PodNerModel_;
	table Label {
	boise_type:Label_.BoiseType;
	mention_type:Label_.MentionType;
	collection_id:int;
	// points to the collections array above.
	}

	namespace libtextclassifier3;
	table PodNerModel {
	tflite_model:[ubyte];
	word_piece_vocab:[ubyte];
	lowercase_input:bool = true;

	// Index of mention_logits tensor in the output of the tflite model. Can
	// be found in the textproto output after model is converted to tflite.
	logits_index_in_output_tensor:int = 0;

	// Whether to append a period at the end of an input that doesn't already
	// end in punctuation.
	append_final_period:bool = false;

	// Priority score used for conflict resolution with the other models. Used
	// only if collections_array is empty.
	priority_score:float = 0;

	// Maximum number of wordpieces supported by the model.
	max_num_wordpieces:int = 128;

	// In case of long text (number of wordpieces greater than the max) we use
	// sliding window approach, this determines the number of overlapping
	// wordpieces between two consecutive windows. This overlap enables context
	// for each word NER annotates.
	sliding_window_num_wordpieces_overlap:int = 20;
	reserved_9:int16 (deprecated);

	// The possible labels the ner model can output. If empty the default labels
	// will be used.
	labels:[PodNerModel_.Label];

	// If the ratio of unknown wordpieces in the input text is greater than this
	// maximum, the text won't be annotated.
	max_ratio_unknown_wordpieces:float = 0.1;

	// Possible collections for labeled entities.
	collections:[PodNerModel_.Collection];

	// Minimum word-length and wordpieces-length required for the text to be
	// annotated.
	min_number_of_tokens:int = 1;

	min_number_of_wordpieces:int = 1;
	}

	namespace libtextclassifier3;
	table VocabModel {
	// A trie that stores a list of vocabs that triggers "Define". A id is
	// returned when looking up a vocab from the trie and the id can be used
	// to access more information about that vocab. The marisa trie library
	// requires 8-byte alignment because the first thing in a marisa trie is a
	// 64-bit integer.
	vocab_trie:[ubyte] (force_align: 8);

	// A bit vector that tells if the vocab should trigger "Define" for users of
	// beginner proficiency only. To look up the bit vector, use the id returned
	// by the trie.
	beginner_level:BitVectorData;

	// A sorted list of indices of vocabs that should not trigger "Define" if
	// its leading character is in upper case. The indices are those returned by
	// trie. You may perform binary search to look up an index.
	do_not_trigger_in_upper_case:BitVectorData;

	// Comma-separated list of locales (BCP 47 tags) that the model supports, that
	// are used to prevent triggering on input in unsupported languages. If
	// empty, the model will trigger on all inputs.
	triggering_locales:string;

	// The final score to assign to the results of the vocab model
	target_classification_score:float = 1;

	// Priority score used for conflict resolution with the other models.
	priority_score:float = 0;
	}

	root_type libtextclassifier3.Model;