File size: 1,600 Bytes
e37d0f5 965073a e37d0f5 47f895c e37d0f5 47f895c e37d0f5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
import gradio as gr
import re
from transformers import (
BertTokenizer,
BartForConditionalGeneration,
pipeline
)
#DEFINE TEXT CLEANING FUNCTION
def clean_text(text):
text = re.sub(r"http\S+", "", text)
text = re.sub(r"ADVERTISEMENT", " ", text)
text = re.sub(r"ADVERTISING", " ", text)
text = re.sub(r"\n", " ", text)
text = re.sub(r"\n\n", " ", text)
text = re.sub(r"\t", " ", text)
text = text.strip(" ")
text = re.sub(
" +", " ", text
).strip() # get rid of multiple spaces and replace with a single
return text
#Define headlne writer functon
model_name = "chinhon/bart-large-chinese-cnhdwriter"
tokenizer = BertTokenizer.from_pretrained(model_name, model_max_length=512)
model = BartForConditionalGeneration.from_pretrained(model_name)
text2text_generator = pipeline(
"text2text-generation",
model=model,
tokenizer=tokenizer,
truncation=True
)
def cn_text(text):
input_text = clean_text(text)
prediction = text2text_generator(
input_text,
max_length=128,
length_penalty=50.,
)
pred_text = [x.get("generated_text") for x in prediction]
return pred_text[0]
#3 Define Gradio UI
gradio_ui = gr.Interface(
fn=cn_text,
title="Chinese News Headlines Generator",
description="Too busy or tired to write a headline for your Chinese news story? Try this instead.",
inputs=gr.Textbox(
lines=20, label="Paste Chinese text here"
),
outputs=gr.Textbox(label="Suggested Headline"),
theme="huggingface",
)
gradio_ui.launch(enable_queue=True)
|