import gradio as gr import re from transformers import ( AutoTokenizer, AutoModelForSeq2SeqLM, ) def clean_text(text): text = text.encode("ascii", errors="ignore").decode( "ascii" ) # remove non-ascii, Chinese characters text = re.sub(r"\n", " ", text) text = re.sub(r"\n\n", " ", text) text = re.sub(r"\t", " ", text) text = re.sub(r"ADVERTISEMENT", " ", text) text = re.sub(r"ADVERTISING", " ", text) text = text.strip(" ") text = re.sub( " +", " ", text ).strip() # get rid of multiple spaces and replace with a single return text def newsroom_hd(hdchoice, text): if hdchoice == "Singapore News": modchoice = "chinhon/pegasus-newsroom-headline_writer_oct22" elif hdchoice == "International News": modchoice = "chinhon/pegasus-newsroom_wires_hdwriter42k" elif hdchoice == "Commentary": modchoice = "chinhon/bart-large-commentaries_hdwriter" elif hdchoice == "News in Malay": modchoice = "chinhon/pegasus-newsroom-malay_headlines" else: modchoice = "chinhon/pegasus-newsroom-headline_writer_oct22" input_text = clean_text(text) tokenizer = AutoTokenizer.from_pretrained(modchoice) model = AutoModelForSeq2SeqLM.from_pretrained(modchoice) with tokenizer.as_target_tokenizer(): batch = tokenizer( input_text, truncation=True, padding="longest", return_tensors="pt" ) raw = model.generate(**batch) headline = tokenizer.batch_decode(raw, skip_special_tokens=True) return headline[0] gradio_ui = gr.Interface( fn=newsroom_hd, title="Generate Newsroom Headlines With AI", description="**How to use**: Select the type of headline you wish to generate, paste in a relevant amount of text, and click submit.", article="**Note**: Paste in as much text as you think necessary, though there's an automatic cut-off of about 500 words for some models and about 850 words for others. If you copy-and-paste directly from a website, take note to remove unrelated text such as those for advertisements and recommended links.", inputs=[ gr.Dropdown( label="Select the type of headlines you would like to generate", choices=[ "Singapore News", "International News", "Commentary", "News in Malay", ], value="Singapore News", ), gr.Textbox(label="Paste text here"), ], outputs=gr.Textbox(label="Suggested Headline"), ) gradio_ui.queue().launch()