MarkNCI
commited on
Commit
•
861f214
1
Parent(s):
3c67833
Remove debug
Browse files- app.py +70 -0
- requirements.txt +5 -0
- requirements.txt.txt +5 -0
app.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""assessment3_Mark_Hayden.ipynb
|
3 |
+
|
4 |
+
Automatically generated by Colab.
|
5 |
+
|
6 |
+
Original file is located at
|
7 |
+
https://colab.research.google.com/drive/1gTZtBnC7kvWELzlIiTOkRd1aJaeXxJtm
|
8 |
+
|
9 |
+
**Part 1:**
|
10 |
+
This section of the notebook is used for pulling the Enron dataset down from Kaggle.
|
11 |
+
It contains the preprocessing steps I used. I have saved the output as a pickle file for Part 2 below.
|
12 |
+
"""
|
13 |
+
|
14 |
+
import pandas as pd
|
15 |
+
import numpy as np
|
16 |
+
import os
|
17 |
+
import email
|
18 |
+
import pickle
|
19 |
+
from transformers import pipeline
|
20 |
+
import gradio as gr
|
21 |
+
|
22 |
+
#path = '/tmp/gradio/tmp1biredw9'
|
23 |
+
#os.makedirs(path, exist_ok=True)
|
24 |
+
|
25 |
+
with open('file.pkl', 'rb') as file:
|
26 |
+
# Call load method to deserialze
|
27 |
+
df = pickle.load(file)
|
28 |
+
|
29 |
+
# QA pipeline setup
|
30 |
+
from transformers import AutoTokenizer, TFAutoModelForQuestionAnswering
|
31 |
+
model = TFAutoModelForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad",return_dict=False)
|
32 |
+
tokenizer = AutoTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
|
33 |
+
nlp = pipeline("question-answering", model=model, tokenizer=tokenizer)
|
34 |
+
|
35 |
+
df['bankrupt'] = df['emails'].str.lower().str.contains(r'\bbankrupt\b')
|
36 |
+
df['litigation'] = df['emails'].str.lower().str.contains(r'\blitigation\b')
|
37 |
+
df['fraud'] = df['emails'].str.lower().str.contains(r'\bfraud\b')
|
38 |
+
df['talking_points'] = df['emails'].str.lower().str.contains(r'\btalking\spoint\b')
|
39 |
+
df['shutdown'] = df['emails'].str.lower().str.contains(r'\bshutdown\b')
|
40 |
+
|
41 |
+
# Creating dataframe for Gradio
|
42 |
+
gradio_df = df.loc[(df['bankrupt'] == True) | (df['litigation'] == True) | (df['fraud'] == True) | (df['talking_points'] == True)
|
43 |
+
| (df['shutdown'] == True)]
|
44 |
+
|
45 |
+
|
46 |
+
example_questions = {}
|
47 |
+
tags = {}
|
48 |
+
for i in range(gradio_df.shape[0]):
|
49 |
+
example_questions['example_' + str(i+1)] = gradio_df['emails'].iloc[i]
|
50 |
+
tags['tag' + str(i+1)] = gradio_df[['bankrupt', 'litigation', 'fraud', 'talking_points','shutdown']].iloc[i]
|
51 |
+
|
52 |
+
import gradio as gr
|
53 |
+
# creating the function
|
54 |
+
def func(context, question):
|
55 |
+
result = nlp(question = question, context=context)
|
56 |
+
return result['answer']
|
57 |
+
|
58 |
+
question = 'please pose a question'
|
59 |
+
|
60 |
+
# creating the interface
|
61 |
+
app = gr.Interface(fn=func, inputs = ['textbox', 'text','text'], outputs = 'textbox',
|
62 |
+
title = 'Question Answering bot', theme = 'dark-grass',
|
63 |
+
description = 'Input context and question, then get answers!',
|
64 |
+
examples = [[example_questions['example_1'],question,tags['tag1']],[example_questions['example_2'],question,tags['tag2']],[example_questions['example_3'],question,tags['tag3']],[example_questions['example_4'],question,tags['tag4']],[example_questions['example_5'],question,tags['tag5']],[example_questions['example_6'],question,tags['tag6']],[example_questions['example_7'],question,tags['tag7']],[example_questions['example_8'],question,tags['tag8']],[example_questions['example_9'],question,tags['tag9']],[example_questions['example_10'],question,tags['tag10']],[example_questions['example_11'],question,tags['tag11']],[example_questions['example_12'],question,tags['tag12']],[example_questions['example_13'],question,tags['tag13']],[example_questions['example_14'],question,tags['tag14']],[example_questions['example_15'],question,tags['tag15']],[example_questions['example_16'],question,tags['tag16']],[example_questions['example_17'],question,tags['tag17']],[example_questions['example_18'],question,tags['tag18']],[example_questions['example_19'],question,tags['tag19']],[example_questions['example_20'],question,tags['tag20']],[example_questions['example_21'],question,tags['tag21']],[example_questions['example_22'],question,tags['tag22']],[example_questions['example_23'],question,tags['tag23']],[example_questions['example_24'],question,tags['tag24']],[example_questions['example_25'],question,tags['tag25']],[example_questions['example_26'],question,tags['tag26']],[example_questions['example_27'],question,tags['tag27']],[example_questions['example_28'],question,tags['tag28']],[example_questions['example_29'],question,tags['tag29']],[example_questions['example_30'],question,tags['tag30']],[example_questions['example_31'],question,tags['tag31']],[example_questions['example_32'],question,tags['tag32']],[example_questions['example_33'],question,tags['tag33']],[example_questions['example_34'],question,tags['tag34']],[example_questions['example_35'],question,tags['tag35']],[example_questions['example_36'],question,tags['tag36']],[example_questions['example_37'],question,tags['tag37']],[example_questions['example_38'],question,tags['tag38']],[example_questions['example_39'],question,tags['tag39']],[example_questions['example_40'],question,tags['tag40']],[example_questions['example_41'],question,tags['tag41']],[example_questions['example_42'],question,tags['tag42']],[example_questions['example_43'],question,tags['tag43']],[example_questions['example_44'],question,tags['tag44']],[example_questions['example_45'],question,tags['tag45']],[example_questions['example_46'],question,tags['tag46']],[example_questions['example_47'],question,tags['tag47']],[example_questions['example_48'],question,tags['tag48']],[example_questions['example_49'],question,tags['tag49']],[example_questions['example_50'],question,tags['tag50']],[example_questions['example_51'],question,tags['tag51']],[example_questions['example_52'],question,tags['tag52']],[example_questions['example_53'],question,tags['tag53']],[example_questions['example_54'],question,tags['tag54']],[example_questions['example_55'],question,tags['tag55']],[example_questions['example_56'],question,tags['tag56']],[example_questions['example_57'],question,tags['tag57']],[example_questions['example_58'],question,tags['tag58']],[example_questions['example_59'],question,tags['tag59']],[example_questions['example_60'],question,tags['tag60']],[example_questions['example_61'],question,tags['tag61']],[example_questions['example_62'],question,tags['tag62']],[example_questions['example_63'],question,tags['tag63']],[example_questions['example_64'],question,tags['tag64']],[example_questions['example_65'],question,tags['tag65']],[example_questions['example_66'],question,tags['tag66']],[example_questions['example_67'],question,tags['tag67']],[example_questions['example_68'],question,tags['tag68']],[example_questions['example_69'],question,tags['tag69']],[example_questions['example_70'],question,tags['tag70']],[example_questions['example_71'],question,tags['tag71']],[example_questions['example_72'],question,tags['tag72']],[example_questions['example_73'],question,tags['tag73']],[example_questions['example_74'],question,tags['tag74']],[example_questions['example_75'],question,tags['tag75']],[example_questions['example_76'],question,tags['tag76']],[example_questions['example_77'],question,tags['tag77']],[example_questions['example_78'],question,tags['tag78']],[example_questions['example_79'],question,tags['tag79']],[example_questions['example_80'],question,tags['tag80']],[example_questions['example_81'],question,tags['tag81']]]
|
65 |
+
)
|
66 |
+
|
67 |
+
# launching the app
|
68 |
+
app.launch()
|
69 |
+
|
70 |
+
app.close()
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
transformers
|
2 |
+
gradio
|
3 |
+
git+https://github.com/huggingface/transformers.git
|
4 |
+
tensorflow
|
5 |
+
tf-keras
|
requirements.txt.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
transformers
|
2 |
+
gradio
|
3 |
+
git+https://github.com/huggingface/transformers.git
|
4 |
+
tensorflow
|
5 |
+
tf-keras
|