Spaces:
Build error
Build error
File size: 5,019 Bytes
3d8edaf ea00796 3d8edaf 834345a 3d8edaf 834345a 833c58b 3d8edaf 834de5e 3d8edaf 2639906 3d8edaf 834de5e 3d8edaf 834345a 3d8edaf 834345a 3d8edaf 834345a c0c151c 834345a 3d8edaf 2639906 5ad0224 3d8edaf e557652 661c2a9 3d8edaf 58555b2 2639906 a701044 79c52cc a701044 661c2a9 3d8edaf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
import streamlit as st
from predict import run_prediction
from io import StringIO
import json
st.set_page_config(layout="wide")
st.cache(show_spinner=False, persist=True)
def load_questions():
questions = []
with open('data/questions.txt') as f:
questions = f.readlines()
# questions = []
# for i, q in enumerate(data['data'][0]['paragraphs'][0]['qas']):
# question = data['data'][0]['paragraphs'][0]['qas'][i]['question']
# questions.append(question)
return questions
def load_questions_short():
questions_short = []
with open('data/questions_short.txt') as f:
questions_short = f.readlines()
# questions = []
# for i, q in enumerate(data['data'][0]['paragraphs'][0]['qas']):
# question = data['data'][0]['paragraphs'][0]['qas'][i]['question']
# questions.append(question)
return questions_short
st.cache(show_spinner=False, persist=True)
def load_contracts():
with open('data/test.json') as json_file:
data = json.load(json_file)
contracts = []
for i, q in enumerate(data['data']):
contract = ' '.join(data['data'][i]['paragraphs'][0]['context'].split())
contracts.append(contract)
return contracts
questions = load_questions()
questions_short = load_questions_short()
# contracts = load_contracts()
### DEFINE SIDEBAR
st.sidebar.title("Interactive Contract Analysis")
st.sidebar.markdown(
"""
Process text with [Huggingface](https://huggingface.co) models and visualize the results.
This model uses a pretrained snapshot trained on the [Atticus](https://www.atticusprojectai.org/) Dataset - CUAD
"""
)
st.sidebar.header("Contract Selection")
# select contract
contracts_drop = ['contract 1', 'contract 2', 'contract 3']
contracts_files = ['contract-1.txt', 'contract-2.txt', 'contract-3.txt']
contract = st.sidebar.selectbox('Please Select a Contract', contracts_drop)
idx = contracts_drop.index(contract)
with open('data/'+contracts_files[idx]) as f:
contract_data = f.read()
# upload contract
user_upload = st.sidebar.file_uploader('Please upload your own', type=['docx', 'pdf', 'txt'],
accept_multiple_files=False)
print(user_upload)
# process upload
if user_upload is not None:
print(user_upload.name, user_upload.type)
extension = user_upload.name.split('.')[-1].lower()
if extension == 'txt':
print('text file uploaded')
# To convert to a string based IO:
stringio = StringIO(user_upload.getvalue().decode("utf-8"))
# To read file as string:
contract_data = stringio.read()
elif extension == 'pdf':
import PyPDF4
try:
# Extracting Text from PDFs
pdfReader = PyPDF4.PdfFileReader(user_upload)
print(pdfReader.numPages)
contract_data = ''
for i in range(0, pdfReader.numPages):
print(i)
pageobj = pdfReader.getPage(i)
contract_data = contract_data + pageobj.extractText()
except:
st.warning('Unable to read PDF, please try another file')
elif extension == 'docx':
import docx2txt
contract_data = docx2txt.process(user_upload)
else:
st.warning('Unknown uploaded file type, please try again')
results_drop = ['1', '2', '3']
number_results = st.sidebar.selectbox('Select number of results', results_drop)
### DEFINE MAIN PAGE
st.header("Legal Contract Review Demo")
st.write("This demo uses the CUAD dataset for Contract Understanding.")
paragraph = st.text_area(label="Contract", value=contract_data, height=300)
questions_drop = questions_short
question_short = st.selectbox('Choose one of the 41 queries from the CUAD dataset:', questions_drop)
idxq = questions_drop.index(question_short)
question = questions[idxq]
if st.button('Analyze'):
if (not len(paragraph)==0) and not (len(question)==0):
print('getting predictions')
with st.spinner(text='Analysis in progress...'):
predictions = run_prediction([question], paragraph, 'akdeniz27/roberta-base-cuad',
n_best_size=int(number_results))
if predictions['0'] == "":
answer = 'No answer found in document'
else:
if number_results == '1':
answer = f"Answer: {predictions['0']}"
# st.text_area(label="Answer", value=f"{answer}")
else:
answer = ""
with open("nbest.json") as jf:
data = json.load(jf)
for i in range(int(number_results)):
answer += f"Answer {i+1}: {data['0'][i]['text']} -- \n"
answer += f"Probability: {round(data['0'][i]['probability']*100,1)}%\n\n"
st.success(answer)
# st.success("Successfully processed contract!")
else:
st.write("Unable to call model, please select question and contract")
|