# import sentencepiece before transformers to avoid crushes import sentencepiece # for text generation from transformers import pipeline generator = pipeline("text-generation", model="distilgpt2") # remember to add length and returning numbers # for NER ner = pipeline("ner", grouped_entities=True) # usage: ner('text') # for summarization summarizer = pipeline("summarization") # usage: summarizer('text') # for POS tagging import nltk nltk.download('punkt') nltk.download('averaged_perceptron_tagger') nltk.download('brown') from textblob import TextBlob # blob = TextBlob(text) \n POS_List = blob.tags # for translation from transformers import AutoTokenizer, AutoModelForSeq2SeqLM tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-zh-en", use_fast = False) model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-zh-en") import jionlp as jio #simtext = jio.tra2sim(tra_text, mode='char') import gradio as gr def TextProcessor(txt): # ASCII code greater than 122 will be zh if ord(str(txt)[0]) > 122: # convert to zh_sim sim_text = jio.tra2sim(txt, mode='char') zh2en_trans = pipeline("translation_zh_to_en", model = model, tokenizer = tokenizer) results = zh2en_trans(sim_text)[0]['translation_text'] # ASCII code less than 122 will be en else: # if length greater than 1, sentences; otherwise, words if len(txt.split()) < 2: blob = TextBlob(txt) POS_List = blob.tags results = POS_List[0][1] else: # if txt contains ..., do text generation; otherwise do summary, NER, noun and verb phrases if "..." or "…" in str(txt): txt = str(txt) text = txt[0:-3] txt_generation = generator(text, max_length = 50, num_return_sequences = 1) results = txt_generation[0]["generated_text"] else: txt = str(txt) #txt_summarization = summarizer(txt) #result_01 = txt_summarization[0] #result_02 = ner(txt) blob = TextBlob(txt) POS_List = blob.tags noun_phrases = [np for np in POS_List if "N" in np[1][0]] result_03 = noun_phrases verb_phrases = [vp for vp in POS_List if "V" in vp[1][0]] result_04 = verb_phrases results = ("noun_phrases:", result_03, "verb_phrases:", result_04) #"Summary:", result_01['summary_text'], "NER:", result_02, return results final = gr.Interface(fn = TextProcessor, inputs = "text", outputs = "text") final.launch()