Spaces:
Runtime error
Runtime error
File size: 4,985 Bytes
3f9aebf 4816a7c 9afb236 4816a7c 3f9aebf 4816a7c 16346aa 3f9aebf 4816a7c 1d228e3 4816a7c 3f9aebf 711306c 4816a7c 1d228e3 4816a7c 3f9aebf 7504e3d 4816a7c 1d228e3 4816a7c ce0b273 4816a7c 16346aa 4816a7c 671160e 4816a7c 671160e 1d228e3 4816a7c d008982 4816a7c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
from transformers import T5Tokenizer, T5Model, T5ForConditionalGeneration, MarianMTModel, MarianTokenizer, pipeline
import nltk.data
import pandas as pd
import matplotlib.pyplot as plt
nltk.download('punkt')
import gradio as gr
from gradio.mix import Parallel
tokenizer_t5 = T5Tokenizer.from_pretrained("panggi/t5-base-indonesian-summarization-cased")
model_t5 = T5ForConditionalGeneration.from_pretrained("panggi/t5-base-indonesian-summarization-cased")
pretrained_sentiment = "ProsusAI/finbert"
pretrained_ner = "51la5/roberta-large-NER"
sentence_tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
tokenizer_translate = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-id-en")
model_translate = MarianMTModel.from_pretrained(
"Helsinki-NLP/opus-mt-id-en")
#finetuned_model = MarianMTModel.from_pretrained(
# "wolfrage89/annual_report_translation_id_en")
sentiment_pipeline = pipeline(
"sentiment-analysis",
model=pretrained_sentiment,
tokenizer=pretrained_sentiment,
return_all_scores=True
)
ner_pipeline = pipeline(
"ner",
model=pretrained_ner,
tokenizer=pretrained_ner,
grouped_entities=True
)
examples = [
"Perusahaan industri e-commerce Indonesia, Bukalapak telah memberhentikan puluhan karyawan dari beberapa function; Berlawanan dengan PHK sebelumnya, perusahaan mengontrak jajaran pekerja kantornya, harian Kompas melaporkan.",
"Dengan pabrik produksi baru, perusahaan akan meningkatkan kapasitasnya untuk memenuhi peningkatan permintaan yang diharapkan dan akan meningkatkan penggunaan bahan baku dan oleh karena itu meningkatkan profitabilitas produksi.",
"Lifetree didirikan pada tahun 2000, dan pendapatannya meningkat rata-rata 40% dengan margin di akhir 30-an."
]
def get_translation(text):
translated_tokens = model_translate.generate(
**tokenizer_translate([text], return_tensors='pt', max_length=104, truncation=True))[0]
translated_sentence = tokenizer_translate.decode(
translated_tokens, skip_special_tokens=True)
return translated_sentence
def summ_t5(text):
input_ids = tokenizer_t5.encode(text, return_tensors='pt')
summary_ids = model_t5.generate(input_ids,
max_length=100,
num_beams=2,
repetition_penalty=2.5,
length_penalty=1.0,
early_stopping=True,
no_repeat_ngram_size=2,
use_cache=True)
summary_text = tokenizer_t5.decode(summary_ids[0], skip_special_tokens=True)
return summary_text
def sentiment_analysis(text):
output = sentiment_pipeline(text)
return {elm["label"]: elm["score"] for elm in output[0]}
def ner(text):
output = ner_pipeline(text)
for elm in output:
elm['entity'] = elm['entity_group']
return {"text": text, "entities": output}
def sentiment_df(text):
df = pd.DataFrame(columns=['Text', 'Eng', 'Label', 'Score'])
text_list = sentence_tokenizer.tokenize(text)
eng_text = [get_translation(text) for text in text_list]
result = [sentiment_analysis(text) for text in eng_text]
labels = []
scores = []
for pred in result:
idx = list(pred.values()).index(max(list(pred.values())))
labels.append(list(pred.keys())[idx])
scores.append(round(list(pred.values())[idx], 3))
df['Text'] = text_list
df['Eng'] = eng_text
df['Label'] = labels
df['Score'] = scores
return df
def run(text):
summ_ = summ_t5(text)
summ_translated = get_translation(summ_)
sent_ = sentiment_analysis(summ_translated )
ner_ = ner(summ_)
df_sentiment = sentiment_df(text)
return summ_, sent_, ner_, df_sentiment
if __name__ == "__main__":
with gr.Blocks() as demo:
gr.Markdown("""<h1 style="text-align:center">Financial Statement Analysis - Indonesia</h1>""")
gr.Markdown(
"""
Creator: Wira Indra Kusuma
"""
)
with gr.Row():
with gr.Column():
input_text = gr.Textbox(label="Input Text")
analyze_button = gr.Button(label="Analyze")
examples_bar = gr.Examples(examples=examples, inputs=input_text)
with gr.Column():
summ_output = gr.Textbox(label="Article Summary")
ner_output = gr.HighlightedText(label="NER of Summary")
sent_output = gr.Label(label="Sentiment of Summary")
dataframe_component = gr.DataFrame(type="pandas",
label="Dataframe",
max_rows=(20,'fixed'),
overflow_row_behaviour='paginate',
wrap=True)
analyze_button.click(run, inputs=input_text, outputs=[summ_output, sent_output, ner_output, dataframe_component ])
demo.launch() |