Nighter commited on
Commit
5d8b19c
1 Parent(s): e7097c8

Upload 5 files

Browse files
app.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
4
+ from tensorflow.keras.preprocessing.text import text_to_word_sequence
5
+ import pickle
6
+ import re
7
+ from tensorflow.keras.models import load_model
8
+
9
+ # Load long model
10
+ with open('lstm-qa-long-answers-model/tokenizer.pickle', 'rb') as handle:
11
+ tokenizer = pickle.load(handle)
12
+ long_answer_model = load_model('lstm-qa-long-answers-model/model.h5')
13
+
14
+ def clean_text(text):
15
+ text = re.sub(r'<.*?>', '', text)
16
+ text = re.sub(r'\[\d+\]', '', text)
17
+ text = re.sub(r'[^a-zA-Z0-9\s().,]', '', text)
18
+ return text
19
+
20
+ def remove_parentheses(text):
21
+ pattern = r'\([^)]*\)'
22
+ return re.sub(pattern, '', text)
23
+
24
+ def split_into_sentences(text):
25
+ sentences = re.split(r'\.\s*', text)
26
+ return [s.strip() for s in sentences if s]
27
+
28
+ def predict_answer(context, question):
29
+ sentences = split_into_sentences(context)
30
+ best_sentence = None
31
+ best_score = 0
32
+
33
+ for sentence in sentences:
34
+ clean_sentence = clean_text(sentence)
35
+ question_seq = tokenizer.texts_to_sequences([question])
36
+ sentence_seq = tokenizer.texts_to_sequences([clean_sentence])
37
+
38
+ max_sentence_length = 300
39
+ padded_question = pad_sequences(question_seq, padding='post')
40
+ padded_sentence = pad_sequences(sentence_seq, maxlen=max_sentence_length, padding='post', truncating='post')
41
+
42
+ score = long_answer_model.predict([padded_sentence, padded_question])[0]
43
+
44
+ if score > best_score:
45
+ best_score = score
46
+ best_sentence = clean_sentence
47
+
48
+ return best_score, best_sentence
49
+
50
+ # Load short model
51
+ short_answer_model = pipeline("question-answering", model="Nighter/QA_wiki_data_short_answer", from_tf=True)
52
+
53
+ def answer_questions(context, question):
54
+ long_answer = predict_answer(context, question)
55
+ short_answer_result = short_answer_model(question=question, context=remove_parentheses(context))
56
+ return short_answer_result['answer'], long_answer
57
+
58
+ def answer_questions(context, question):
59
+ long_score, long_answer = predict_answer(context, question)
60
+ short_answer_result = short_answer_model(question=question, context=remove_parentheses(context))
61
+ short_answer_result1 = short_answer_model(question=question, context=remove_parentheses(long_answer))
62
+ # if short_answer_result['score'] < short_answer_result1['score']:
63
+ # short_answer_result['score'] = short_answer_result1['score']
64
+ # short_answer_result['answer'] = short_answer_result1['answer']
65
+ return short_answer_result['answer'], short_answer_result['score'], long_answer, long_score
66
+
67
+
68
+ with gr.Blocks() as app:
69
+ gr.Markdown("<center> <h1>Question Answering with Short and Long Answer Models </h1> </center><hr>")
70
+ with gr.Row():
71
+ with gr.Column():
72
+ context_input = gr.Textbox(lines=7, label="Context", placeholder="Input Context here...")
73
+ question_input = gr.Textbox(label="Question", placeholder="Input Question here...")
74
+ submit_btn = gr.Button("Submit")
75
+ gr.ClearButton([context_input,question_input])
76
+ with gr.Column():
77
+ with gr.Row():
78
+ with gr.Column(scale=4):
79
+ short_answer_output = gr.Textbox(lines=5, label="Distril Bert Short Answer")
80
+ with gr.Column(scale=1):
81
+ short_score_output = gr.Number(label="Short Answer Score")
82
+ with gr.Row():
83
+ with gr.Column(scale=4):
84
+ long_answer_output = gr.Textbox(lines=5, label="LSTM Long Answer")
85
+ with gr.Column(scale=1):
86
+ long_score_output = gr.Number(label="Long Answer Score")
87
+
88
+ submit_btn.click(fn=answer_questions, inputs=[context_input, question_input], outputs=[short_answer_output, short_score_output, long_answer_output, long_score_output])
89
+ examples='examples'
90
+ gr.Examples(examples,[context_input, question_input],[short_answer_output, short_score_output, long_answer_output, long_score_output],answer_questions)
91
+ if __name__ == "__main__":
92
+ app.launch()
examples/log.csv ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ context_input, question_input
2
+ "Mark Elliot Zuckerberg is an American business magnate, computer programmer, internet entrepreneur, and philanthropist. He co-founded the social media service Facebook and its parent company Meta Platforms (formerly Facebook, Inc.), of which he is executive chairman, chief executive officer and controlling shareholder.","Who is Mark Zuckerberg ?"
3
+ "Pierre Aveline (1656–1722), was a French engraver, print-publisher and print-seller.Aveline was born in Paris and the head of a family of artists, including his two sons Pierre-Alexandre Aveline and Antoine Aveline. He is best known for his reproductions of other artists' works. He died in Paris on 23 May 1722.","When did Pierre Aveline die ?"
4
+ "Vượng was born on 5 August 1968 in Hanoi; his paternal family has origins in Hà Tĩnh in north-central Vietnam.[4] His father served in the Vietnamese Army's air defence division, and his mother is a Hai Phonger, who had a tea shop, which left the family with a very meager income.[3] He grew up in Hanoi and graduated from Kim Lien High School in 1985.","Which school did Vuong graduate from ?"
5
+ "William Henry Gates III (born October 28, 1955) is an American businessman, investor, philanthropist, and writer best known for co-founding the software giant Microsoft, along with his childhood friend Paul Allen.[2][3] During his career at Microsoft, Gates held the positions of chairman, chief executive officer (CEO), president, and chief software architect, while also being its largest individual shareholder until May 2014.[4] He was a major entrepreneur of the microcomputer revolution of the 1970s and 1980s.", "Who is William Henry Gates ?"
6
+ "Elon Musk was born in Pretoria, South Africa, and briefly attended the University of Pretoria before immigrating to Canada at age 18, acquiring citizenship through his Canadian-born mother. Two years later, he matriculated at Queen's University in Kingston, Ontario. Musk later transferred to the University of Pennsylvania, and received bachelor's degrees in economics and physics there. He moved to California in 1995 to attend Stanford University. However, Musk dropped out after two days and, with his brother Kimbal, co-founded online city guide software company Zip2. The startup was acquired by Compaq for $307 million in 1999, and with $12 million of the money he made, that same year Musk co-founded X.com, a direct bank. X.com merged with Confinity in 2000 to form PayPal","Where was Elon Musk born ?"
7
+ "The Industrial University of Ho Chi Minh City (IUH), formerly known as Ho Chi Minh University of Industry (esquire: HUI) (Vietnamese: Trường Đại học Công nghiệp Thành phố Hồ Chí Minh)[1] (esquire: ĐHCN TP. HCM), is a university in Go Vap District, Ho Chi Minh City, Vietnam. It is one of technical universities in Ho Chi Minh City. The university has 2,000 employees, including about 1,600 teachers and 200 guest trainers who are invited from universities, scientific institutes, and industry. University of Industry School, Ho Chi Minh City is working under the management of the Ministry of Industry and Trade. The total number of students in the university in 2011 was approximately 129,000. (According to the report of the Conference of the academic year (2010–2011) by Dr. Dean. Anh Tuan Tran)","What was number of students in the university in 2011 ?"
lstm-qa-long-answers-model/model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07d140c9980acd24aa833d83f02356c04fb50c3a96229df3dd29eb4295a441ab
3
+ size 109328824
lstm-qa-long-answers-model/tokenizer.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5062b0e3232b8903e43048a8172c808c27b7f8d4ce9023ce387792c1eaaec440
3
+ size 26592138
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio
2
+ transformers
3
+ torch
4
+ tensorflow
5
+ keras_preprocessing