nickmuchi commited on
Commit
cac7541
1 Parent(s): 262e678

Update functions.py

Browse files
Files changed (1) hide show
  1. functions.py +7 -2
functions.py CHANGED
@@ -132,7 +132,7 @@ def load_models():
132
  ner_tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-large-finetuned-conll03-english")
133
  emb_tokenizer = AutoTokenizer.from_pretrained('google/flan-t5-xl')
134
  sent_pipe = pipeline("text-classification",model=q_model, tokenizer=q_tokenizer)
135
- sum_pipe = pipeline("summarization",model="philschmid/flan-t5-base-samsum")
136
  ner_pipe = pipeline("ner", model=ner_model, tokenizer=ner_tokenizer, grouped_entities=True)
137
  cross_encoder = CrossEncoder('cross-encoder/mmarco-mMiniLMv2-L12-H384-v1') #cross-encoder/ms-marco-MiniLM-L-12-v2
138
  sbert = SentenceTransformer('all-MiniLM-L6-v2')
@@ -366,7 +366,12 @@ def chunk_and_preprocess_text(text, model_name= 'philschmid/flan-t5-base-samsum'
366
  def summarize_text(text_to_summarize,max_len,min_len):
367
  '''Summarize text with HF model'''
368
 
369
- summarized_text = sum_pipe(text_to_summarize,max_length=max_len,min_length=min_len,do_sample=False)
 
 
 
 
 
370
  summarized_text = ' '.join([summ['summary_text'] for summ in summarized_text])
371
 
372
  return summarized_text
 
132
  ner_tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-large-finetuned-conll03-english")
133
  emb_tokenizer = AutoTokenizer.from_pretrained('google/flan-t5-xl')
134
  sent_pipe = pipeline("text-classification",model=q_model, tokenizer=q_tokenizer)
135
+ sum_pipe = pipeline("summarization",model="philschmid/flan-t5-base-samsum",clean_up_tokenization_spaces=True)
136
  ner_pipe = pipeline("ner", model=ner_model, tokenizer=ner_tokenizer, grouped_entities=True)
137
  cross_encoder = CrossEncoder('cross-encoder/mmarco-mMiniLMv2-L12-H384-v1') #cross-encoder/ms-marco-MiniLM-L-12-v2
138
  sbert = SentenceTransformer('all-MiniLM-L6-v2')
 
366
  def summarize_text(text_to_summarize,max_len,min_len):
367
  '''Summarize text with HF model'''
368
 
369
+ summarized_text = sum_pipe(text_to_summarize,
370
+ max_length=max_len,
371
+ min_length=min_len,
372
+ do_sample=False,
373
+ early_stopping=True,
374
+ num_beams=4)
375
  summarized_text = ' '.join([summ['summary_text'] for summ in summarized_text])
376
 
377
  return summarized_text