import pickle import gradio as gr from transformers import AutoTokenizer, AutoModelForSeq2SeqLM from sentence_transformers import SentenceTransformer import lightgbm lr_clf_finbert = pickle.load(open("lr_clf_finread_new.pkl",'rb')) model_read = SentenceTransformer('ProsusAI/finbert') def get_readability(text): emd = model_read.encode([text]) ans = 'not readable' if lr_clf_finbert.predict(emd)==1: ans = 'readable' score = round(lr_clf_finbert.predict_proba(emd)[0,1],4) return score # Reference : https://huggingface.co/humarin/chatgpt_paraphraser_on_T5_base tokenizer = AutoTokenizer.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base") model = AutoModelForSeq2SeqLM.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base") def paraphrase( question, num_beams=5, num_beam_groups=5, num_return_sequences=5, repetition_penalty=10.0, diversity_penalty=3.0, no_repeat_ngram_size=2, temperature=0.7, max_length=128 ): input_ids = tokenizer( f'paraphrase: {question}', return_tensors="pt", padding="longest", max_length=max_length, truncation=True, ).input_ids outputs = model.generate( input_ids, temperature=temperature, repetition_penalty=repetition_penalty, num_return_sequences=num_return_sequences, no_repeat_ngram_size=no_repeat_ngram_size, num_beams=num_beams, num_beam_groups=num_beam_groups, max_length=max_length, diversity_penalty=diversity_penalty ) res = tokenizer.batch_decode(outputs, skip_special_tokens=True) return res def get_most_raedable_paraphrse(text): li_paraphrases = paraphrase(text) li_paraphrases.append(text) best = li_paraphrases[0] score_max = get_readability(best) for i in range(1,len(li_paraphrases)): curr = li_paraphrases[i] score = get_readability(curr) if score > score_max: best = curr score_max = score if best!=text and score_max>.6: ans = "The most redable version of text that I can think of is:\n" + best else: "Sorry! I am not confident. As per my best knowledge, you already have the most readable version of the text!" return ans def set_example_text(example_text): return gr.Textbox.update(value=example_text[0]) with gr.Blocks() as demo: gr.Markdown( """ # FinLanSer Financial Language Simplifier """) text = gr.Textbox(label="Enter text you want to simply (make more readable)") greet_btn = gr.Button("Simplify/Make Readable") output = gr.Textbox(label="Output Box") greet_btn.click(fn=get_most_raedable_paraphrse, inputs=text, outputs=output, api_name="get_most_raedable_paraphrse") example_text = gr.Dataset(components=[text], samples=[['Inflation is the rate of increase in prices over a given period of time. Inflation is typically a broad measure, such as the overall increase in prices or the increase in the cost of living in a country.'], ['Legally assured line of credit with a bank'], ['A mutual fund is a type of financial vehicle made up of a pool of money collected from many investors to invest in securities like stocks, bonds, money market instruments']]) example_text.click(fn=set_example_text, inputs=example_text,outputs=example_text.components) demo.launch()