EE21 commited on
Commit
d4396fe
1 Parent(s): c602055

Update abstractive_model.py

Browse files
Files changed (1) hide show
  1. abstractive_model.py +13 -5
abstractive_model.py CHANGED
@@ -4,9 +4,17 @@ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
  tokenizer = AutoTokenizer.from_pretrained("EE21/BART-ToSSimplify")
5
  model = AutoModelForSeq2SeqLM.from_pretrained("EE21/BART-ToSSimplify")
6
 
7
- # Define the abstractive summarization function
8
- def summarize_with_bart(input_text):
9
- inputs = tokenizer.encode("summarize: " + input_text, return_tensors="pt", max_length=1024, truncation=True)
10
- summary_ids = model.generate(inputs, max_length=500, min_length=300, num_beams=4)
11
- summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
 
 
 
 
 
 
 
 
12
  return summary
 
4
  tokenizer = AutoTokenizer.from_pretrained("EE21/BART-ToSSimplify")
5
  model = AutoModelForSeq2SeqLM.from_pretrained("EE21/BART-ToSSimplify")
6
 
7
+ # Define a function to summarize text with minimum length constraint
8
+ def summarize_with_bart(input_text, max_summary_tokens=200, min_summary_tokens=100, do_sample=False):
9
+ # Tokenize the input text and return input_ids as PyTorch tensors
10
+ inputs = tokenizer(input_text, return_tensors="pt").input_ids
11
+ # Generate the summary with minimum and maximum length constraints
12
+ outputs = model.generate(inputs,
13
+ max_length=max_summary_tokens,
14
+ min_length=min_summary_tokens,
15
+ do_sample=do_sample)
16
+
17
+ # Decode the generated token IDs back into text
18
+ summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
19
+
20
  return summary