Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline | |
from langdetect import detect | |
from text_extractor import TextExtractor | |
summarizer = pipeline(task="summarization", model="facebook/bart-large-cnn") | |
tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn") | |
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn") | |
translator_to_french = pipeline( | |
task="translation_en_to_fr", model="Helsinki-NLP/opus-mt-en-fr" | |
) | |
translator_to_english = pipeline( | |
task="translation_fr_to_en", model="Helsinki-NLP/opus-mt-fr-en" | |
) | |
def summarize(doc: str, target_language: str) -> str: | |
text_extractor = TextExtractor(doc) | |
text, word_count = text_extractor.get_text() | |
summary_length = int(word_count / 2) | |
try: | |
summary = summarizer(text, max_length=summary_length, do_sample=False)[0][ | |
"summary_text" | |
] | |
except Exception as ex: | |
max_length = tokenizer.model_max_length | |
inputs = tokenizer( | |
text, truncation=True, max_length=max_length, return_tensors="pt" | |
) | |
summary_ids = model.generate( | |
inputs["input_ids"], | |
num_beams=4, | |
max_length=summary_length, | |
early_stopping=True, | |
) | |
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True) | |
detected_lang = detect(summary) | |
if target_language is None: | |
pass | |
elif detected_lang == "fr" and str(target_language).lower() == "english": | |
summary = translator_to_english(summary)[0]["translation_text"] | |
elif detected_lang == "en" and str(target_language).lower() == "french": | |
summary = translator_to_french(summary)[0]["translation_text"] | |
return summary | |
app = gr.Interface( | |
fn=summarize, | |
inputs=[ | |
gr.File( | |
label="Document to summarize", | |
file_types=["pdf", "docx", "doc", "txt", "odt", "dot", "dotx"], | |
), | |
gr.Radio( | |
label="Translate summary to", choices=["English", "French"], value="English" | |
), | |
], | |
outputs=gr.Textbox(label="Summary"), | |
examples=[ | |
["data/pd-file-example.pdf"], | |
["data/doc-file-example.docx"], | |
["data/text-file-example.txt"], | |
], | |
) | |
if __name__ == "__main__": | |
app.launch() | |