# import sentencepiece before transformers to avoid crushes
import sentencepiece

# for text generation
from transformers import pipeline 
generator = pipeline("text-generation", model="distilgpt2") # remember to add length and returning numbers

# for NER
ner = pipeline("ner", grouped_entities=True) # usage: ner('text')

# for summarization
summarizer = pipeline("summarization") # usage: summarizer('text')

# for POS tagging 
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('brown')

from textblob import TextBlob # blob = TextBlob(text) \n POS_List = blob.tags

# for translation 

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-zh-en", use_fast = False)

model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-zh-en")

import jionlp as jio #simtext = jio.tra2sim(tra_text, mode='char')

import gradio as gr

def TextProcessor(txt):
  # ASCII code greater than 122 will be zh
  if ord(str(txt)[0]) > 122:
    # convert to zh_sim
    sim_text = jio.tra2sim(txt, mode='char')
    zh2en_trans = pipeline("translation_zh_to_en", model = model, tokenizer = tokenizer)
    results = zh2en_trans(sim_text)[0]['translation_text']
  # ASCII code less than 122 will be en
  else:
    # if length greater than 1, sentences; otherwise, words
    if len(txt.split()) < 2:
      blob = TextBlob(txt)
      POS_List = blob.tags
      results = POS_List[0][1]
    else:
      # if txt contains ..., do text generation; otherwise do summary, NER, noun and verb phrases
      if "..." or "…" in str(txt):
        txt = str(txt)
        text = txt[0:-3]
        txt_generation = generator(text, max_length = 50, num_return_sequences = 1)
        results = txt_generation[0]["generated_text"]
      else:
        txt = str(txt)
        #txt_summarization = summarizer(txt)
        #result_01 = txt_summarization[0]

        #result_02 = ner(txt)

        blob = TextBlob(txt)
        POS_List = blob.tags

        noun_phrases = [np for np in POS_List if "N" in np[1][0]]
        result_03 = noun_phrases

        verb_phrases = [vp for vp in POS_List if "V" in vp[1][0]]
        result_04 = verb_phrases
        results = ("noun_phrases:", result_03, "verb_phrases:", result_04)
  #"Summary:", result_01['summary_text'], "NER:", result_02, 
  return results

final = gr.Interface(fn = TextProcessor, inputs = "text", outputs = "text")
final.launch()