Shaltiel commited on
Commit
88c6751
1 Parent(s): 0dd6a49

Upload BertForJointParsing.py

Browse files
Files changed (1) hide show
  1. BertForJointParsing.py +19 -0
BertForJointParsing.py CHANGED
@@ -187,6 +187,25 @@ class BertForJointParsing(BertPreTrainedModel):
187
  )
188
 
189
  def predict(self, sentences: Union[str, List[str]], tokenizer: BertTokenizerFast, padding='longest', truncation=True, compute_syntax_mst=True, per_token_ner=False, output_style: Literal['json', 'ud', 'iahlt_ud'] = 'json'):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  is_single_sentence = isinstance(sentences, str)
191
  if is_single_sentence:
192
  sentences = [sentences]
 
187
  )
188
 
189
  def predict(self, sentences: Union[str, List[str]], tokenizer: BertTokenizerFast, padding='longest', truncation=True, compute_syntax_mst=True, per_token_ner=False, output_style: Literal['json', 'ud', 'iahlt_ud'] = 'json'):
190
+ """
191
+ Predicts various linguistic features using the DictaBERT model.
192
+
193
+ This function takes a sentence or a list of sentences in Hebrew and applies the BERT model to predict multiple linguistic attributes simultaneously. These include syntax, named entity recognition (NER), morphological analysis, lexical information, and text segmentation.
194
+
195
+ Parameters:
196
+ sentences (Union[str, List[str]]): A single sentence or a list of sentences in Hebrew.
197
+ tokenizer (BertTokenizerFast): The tokenizer used for preprocessing the input sentences.
198
+ padding (str, optional): The strategy for padding sentences. Defaults to 'longest'.
199
+ truncation (bool, optional): Flag to enable or disable truncation. Defaults to True.
200
+ compute_syntax_mst (bool, optional): If True, computes the maximum spanning tree for syntax prediction. Defaults to True.
201
+ per_token_ner (bool, optional): If True, performs NER for each token. Defaults to False.
202
+ output_style (Literal['json', 'ud', 'iahlt_ud'], optional): The format of the output. Choices are 'json', 'ud' (Universal Dependencies), or 'iahlt_ud' (UD in the style of IAHLT). Defaults to 'json'.
203
+
204
+ Returns:
205
+ Depending on the output_style chosen, returns the linguistic analysis in the specified format.
206
+
207
+ The function is integral for comprehensive linguistic analysis in applications involving Hebrew text, catering to a variety of NLP tasks.
208
+ """
209
  is_single_sentence = isinstance(sentences, str)
210
  if is_single_sentence:
211
  sentences = [sentences]