headline_writer / app.py
1 import gradio as gr
2 import re
3
4 from transformers import (
5 AutoTokenizer,
6 AutoModelForSeq2SeqLM,
7 )
8
9 def clean_text(text):
10 text = text.encode("ascii", errors="ignore").decode(
11 "ascii"
12 ) # remove non-ascii, Chinese characters
13 text = re.sub(r"http\S+", "", text)
14 text = re.sub(r"\n", " ", text)
15 text = re.sub(r"\n\n", " ", text)
16 text = re.sub(r"\t", " ", text)
17 text = text.strip(" ")
18 text = re.sub(
19 " +", " ", text
20 ).strip() # get rid of multiple spaces and replace with a single
21 return text
22
23
24 model_name = "chinhon/headline_writer"
25
26 def headline_writer(text):
27 input_text = clean_text(text)
28
29 tokenizer = AutoTokenizer.from_pretrained(model_name)
30
31 model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
32
33 with tokenizer.as_target_tokenizer():
34 batch = tokenizer(
35 input_text,
36 truncation=True,
37 max_length=1024,
38 padding="longest",
39 return_tensors="pt",
40 )
41
42 raw_write = model.generate(**batch)
43
44 headline = tokenizer.batch_decode(
45 raw_write, skip_special_tokens=True, min_length=200, length_penalty=50.5
46 )
47
48 return headline[0]
49
50
51 gradio_ui = gr.Interface(
52 fn=headline_writer,
53 title="Generate News Headlines with AI",
54 description="Too busy or tired to write a headline? Try this instead.",
55 inputs=gr.inputs.Textbox(
56 lines=20, label="Paste the first few paras of your news story here"
57 ),
58 outputs=gr.outputs.Textbox(label="Suggested Headline"),
59 theme="darkdefault"
60 )
61
62 gradio_ui.launch(enable_queue=True)
63