EE21 commited on
Commit
2f8382c
1 Parent(s): da1f55e

Update abstractive_model.py

Browse files
Files changed (1) hide show
  1. abstractive_model.py +14 -4
abstractive_model.py CHANGED
@@ -1,12 +1,22 @@
1
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
2
 
3
- # Load the BART tokenizer and model
4
  tokenizer = AutoTokenizer.from_pretrained("EE21/BART-ToSSimplify")
5
  model = AutoModelForSeq2SeqLM.from_pretrained("EE21/BART-ToSSimplify")
6
 
7
- # Define the abstractive summarization function
8
- def summarize_with_bart(input_text):
 
 
 
9
  inputs = tokenizer.encode("summarize: " + input_text, return_tensors="pt", max_length=1024, truncation=True)
10
  summary_ids = model.generate(inputs, max_length=200, min_length=50, num_beams=1, early_stopping=False, length_penalty=1)
11
  summary = tokenizer.decode(summary_ids[0], skip_special_tokens=False)
 
 
 
 
 
 
 
12
  return summary
 
1
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
2
 
3
+ # Load the fine-tuned BART tokenizer and model
4
  tokenizer = AutoTokenizer.from_pretrained("EE21/BART-ToSSimplify")
5
  model = AutoModelForSeq2SeqLM.from_pretrained("EE21/BART-ToSSimplify")
6
 
7
+ # Load BART-large-cnn
8
+ pipe = pipeline("summarization", model="facebook/bart-large-cnn")
9
+
10
+ # Define the abstractive summarization function (fine-tuned BART)
11
+ def summarize_with_bart_ft(input_text):
12
  inputs = tokenizer.encode("summarize: " + input_text, return_tensors="pt", max_length=1024, truncation=True)
13
  summary_ids = model.generate(inputs, max_length=200, min_length=50, num_beams=1, early_stopping=False, length_penalty=1)
14
  summary = tokenizer.decode(summary_ids[0], skip_special_tokens=False)
15
+ return summary
16
+
17
+ # Define the abstractive summarization function (BART-large-cnn)
18
+ def summarize_with_bart(input_text):
19
+ inputs = tokenizer.encode("summarize: " + input_text, return_tensors="pt", max_length=1024, truncation=True)
20
+ summary_ids = model.generate(inputs, max_length=200, min_length=50, length_penalty=2.0, num_beams=2, early_stopping=True)
21
+ summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
22
  return summary