Spaces:

Jforeverss
/

finchat222

Runtime error

App Files Files

finchat222 / app.py

Jforeverss

Rename demo123.py to app.py

239f43a about 1 year ago

raw

history blame

6.42 kB

	# Install Streamlit and pyngrok
	!pip install -q streamlit
	!pip install -q pyngrok
	!pip install -q pdfplumber
	!pip install -q transformers
	!pip install -q tabula-py
	!pip install -q openai


	# Write the Streamlit app script
	# Write the Streamlit app script
	%%writefile app.py
	import streamlit as st
	import pdfplumber
	import torch
	from transformers import RobertaTokenizer, RobertaModel
	import nltk
	import openai
	from torch import nn
	import torch.nn.functional as F
	from nltk.tokenize import sent_tokenize

	# Download the 'punkt' package
	nltk.download('punkt')

	openai.api_key = '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'

	# Define your model architecture
	class Bert_model(nn.Module):
	def __init__(self, hidden_size, dropout_rate):
	super(Bert_model, self).__init__()
	self.hidden_size = hidden_size
	self.bert = RobertaModel.from_pretrained('deepset/roberta-base-squad2')
	self.cls_prj = nn.Linear(hidden_size, hidden_size, bias=True)
	self.cls_dropout = nn.Dropout(dropout_rate)
	self.cls_final = nn.Linear(hidden_size, 2, bias=True)

	def forward(self, input_ids, attention_mask):
	bert_outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
	bert_sequence_output = bert_outputs.last_hidden_state
	bert_pooled_output = bert_sequence_output[:, 0, :]
	pooled_output = self.cls_prj(bert_pooled_output)
	pooled_output = self.cls_dropout(pooled_output)
	logits = self.cls_final(pooled_output)
	return logits

	# Load the model
	model_path = "/content/model.pt" # Replace with your actual model path
	state_dict = torch.load(model_path)
	device = "cuda" # or "cpu" if GPU is not available

	# Instantiate the model architecture
	model = Bert_model(hidden_size=768, dropout_rate=0.1) # Adjust the hidden size to match the saved model
	model = nn.DataParallel(model)
	model.load_state_dict(state_dict)
	model = model.to(device)
	model.eval()

	# Load the tokenizer
	tokenizer = RobertaTokenizer.from_pretrained('deepset/roberta-base-squad2')

	# Function to preprocess PDF text
	def preprocess_pdf(pdf_path, tokenizer):
	with pdfplumber.open(pdf_path) as pdf:
	text = " ".join([page.extract_text() for page in pdf.pages[2:]])
	tokenized_text = tokenizer.encode_plus(
	text,
	add_special_tokens=True,
	max_length=512,
	padding='max_length',
	return_attention_mask=True
	)
	input_ids = torch.tensor([tokenized_text['input_ids']])
	attention_mask = torch.tensor([tokenized_text['attention_mask']])
	return input_ids, attention_mask, text

	def translate_text(text, target_language):
	response = openai.ChatCompletion.create(
	model="gpt-4",
	messages=[
	{"role": "system", "content": "You are a helpful assistant that translates English text to other languages."},
	{"role": "user", "content": f'Translate the following English text to {target_language}: "{text}"'},
	],
	)
	return response.choices[0].message['content']

	def explain_term(term):
	response = openai.ChatCompletion.create(
	model="gpt-4",
	messages=[
	{
	"role": "system",
	"content": "You are a helpful assistant that provides definitions."
	},
	{
	"role": "user",
	"content": f"Explain the term: {term}"
	},
	],
	)
	return response['choices'][0]['message']['content']

	# Streamlit code to upload file
	st.title('FinQA (Financial Question-Answering)')
	uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")

	# Select language
	language = st.selectbox('Select your language', ['English', 'French','Chinese','Korean','Spanish','German','Japanese'])

	if uploaded_file is not None:
	with open("temp.pdf", "wb") as f:
	f.write(uploaded_file.getbuffer())
	input_ids, attention_mask, text = preprocess_pdf("temp.pdf", tokenizer)
	st.write('File successfully uploaded and processed')

	# Ask a question
	question = st.text_input("Enter your question:")

	if question:
	sentences = sent_tokenize(text)
	predictions = []

	for sentence in sentences:
	inputs = tokenizer.encode_plus(question, sentence, return_tensors='pt', padding='max_length', truncation=True, max_length=512)
	input_ids = inputs['input_ids'].to(device)
	attention_mask = inputs['attention_mask'].to(device)

	with torch.no_grad():
	outputs = model(input_ids, attention_mask)
	probabilities = F.softmax(outputs, dim=1)
	max_value, max_index = torch.max(probabilities, dim=1)
	prediction = max_index.item()
	predictions.append((sentence, prediction, probabilities[0].tolist()))

	predictions.sort(key=lambda pair: pair[1], reverse=True)
	top_5_sentences = predictions[:13]

	#st.write("Top 5 Relevant Sentences:")
	#for sentence, prediction, probabilities in top_5_sentences:
	#st.write(f"Sentence: {sentence}, Prediction: {prediction}, Probability: {probabilities[prediction]}")

	# Prepare the chat history with the top 3 sentences
	chat_history = "\n".join([sentence[0] for sentence in top_5_sentences])

	# Ask the question using OpenAI API
	openai.api_key = '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' # Replace with your actual OpenAI API key

	response = openai.ChatCompletion.create(
	model="gpt-4",
	messages=[
	{"role": "system", "content": "You are a helpful generator which read the short paragraphs and answer the question."},
	{"role": "user", "content": chat_history},
	{"role": "user", "content": question},
	]
	)

	if language != 'English':
	response_content = translate_text(response.choices[0].message['content'], language)
	else:
	response_content = response.choices[0].message['content']

	st.text("Answer: " + response_content)

	term = st.text_input("Enter a term you want to define:")

	if term:
	# Define the term using OpenAI API
	definition = explain_term(term)

	if language != 'English':
	definition = translate_text(definition, language)

	st.text("Definition: " + definition)