Spaces:

roygryan
/

Text_Processor

Sleeping

App Files Files Community

Text_Processor / app.py

roygryan

Update app.py

c43e5a6 over 2 years ago

raw

history blame contribute delete

2.48 kB

	# import sentencepiece before transformers to avoid crushes
	import sentencepiece

	# for text generation
	from transformers import pipeline
	generator = pipeline("text-generation", model="distilgpt2") # remember to add length and returning numbers

	# for NER
	ner = pipeline("ner", grouped_entities=True) # usage: ner('text')

	# for summarization
	summarizer = pipeline("summarization") # usage: summarizer('text')

	# for POS tagging
	import nltk
	nltk.download('punkt')
	nltk.download('averaged_perceptron_tagger')
	nltk.download('brown')

	from textblob import TextBlob # blob = TextBlob(text) \n POS_List = blob.tags

	# for translation

	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

	tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-zh-en", use_fast = False)

	model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-zh-en")

	import jionlp as jio #simtext = jio.tra2sim(tra_text, mode='char')

	import gradio as gr

	def TextProcessor(txt):
	# ASCII code greater than 122 will be zh
	if ord(str(txt)[0]) > 122:
	# convert to zh_sim
	sim_text = jio.tra2sim(txt, mode='char')
	zh2en_trans = pipeline("translation_zh_to_en", model = model, tokenizer = tokenizer)
	results = zh2en_trans(sim_text)[0]['translation_text']
	# ASCII code less than 122 will be en
	else:
	# if length greater than 1, sentences; otherwise, words
	if len(txt.split()) < 2:
	blob = TextBlob(txt)
	POS_List = blob.tags
	results = POS_List[0][1]
	else:
	# if txt contains ..., do text generation; otherwise do summary, NER, noun and verb phrases
	if "..." or "…" in str(txt):
	txt = str(txt)
	text = txt[0:-3]
	txt_generation = generator(text, max_length = 50, num_return_sequences = 1)
	results = txt_generation[0]["generated_text"]
	else:
	txt = str(txt)
	#txt_summarization = summarizer(txt)
	#result_01 = txt_summarization[0]

	#result_02 = ner(txt)

	blob = TextBlob(txt)
	POS_List = blob.tags

	noun_phrases = [np for np in POS_List if "N" in np[1][0]]
	result_03 = noun_phrases

	verb_phrases = [vp for vp in POS_List if "V" in vp[1][0]]
	result_04 = verb_phrases
	results = ("noun_phrases:", result_03, "verb_phrases:", result_04)
	#"Summary:", result_01['summary_text'], "NER:", result_02,
	return results

	final = gr.Interface(fn = TextProcessor, inputs = "text", outputs = "text")
	final.launch()