File size: 1,600 Bytes
e37d0f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
965073a
e37d0f5
47f895c
e37d0f5
 
47f895c
e37d0f5
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import gradio as gr
import re

from transformers import (
    BertTokenizer,
    BartForConditionalGeneration,
    pipeline
)

#DEFINE TEXT CLEANING FUNCTION
def clean_text(text):
    text = re.sub(r"http\S+", "", text)
    text = re.sub(r"ADVERTISEMENT", " ", text)
    text = re.sub(r"ADVERTISING", " ", text)
    text = re.sub(r"\n", " ", text)
    text = re.sub(r"\n\n", " ", text)
    text = re.sub(r"\t", " ", text)
    text = text.strip(" ")
    text = re.sub(
        " +", " ", text
    ).strip()  # get rid of multiple spaces and replace with a single
    return text

#Define headlne writer functon
model_name = "chinhon/bart-large-chinese-cnhdwriter"

tokenizer = BertTokenizer.from_pretrained(model_name, model_max_length=512)

model = BartForConditionalGeneration.from_pretrained(model_name)

text2text_generator = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    truncation=True
)

def cn_text(text):
    input_text = clean_text(text)

    prediction = text2text_generator(
        input_text,
        max_length=128,
        length_penalty=50.,
    )

    pred_text = [x.get("generated_text") for x in prediction]

    return pred_text[0]


#3 Define Gradio UI
gradio_ui = gr.Interface(
    fn=cn_text,
    title="Chinese News Headlines Generator",
    description="Too busy or tired to write a headline for your Chinese news story? Try this instead.",
    inputs=gr.Textbox(
        lines=20, label="Paste Chinese text here"
    ),
    outputs=gr.Textbox(label="Suggested Headline"),
    theme="huggingface",
)

gradio_ui.launch(enable_queue=True)