import streamlit as st import pdfplumber import torch from transformers import RobertaTokenizer, RobertaModel import nltk import openai from torch import nn import torch.nn.functional as F from nltk.tokenize import sent_tokenize import os print(os.listdir('.')) nltk.download('punkt') class Bert_model(nn.Module): def __init__(self, hidden_size, dropout_rate): super(Bert_model, self).__init__() self.hidden_size = hidden_size self.bert = RobertaModel.from_pretrained('deepset/roberta-base-squad2') self.cls_prj = nn.Linear(hidden_size, hidden_size, bias=True) self.cls_dropout = nn.Dropout(dropout_rate) self.cls_final = nn.Linear(hidden_size, 2, bias=True) def forward(self, input_ids, attention_mask): bert_outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask) bert_sequence_output = bert_outputs.last_hidden_state bert_pooled_output = bert_sequence_output[:, 0, :] pooled_output = self.cls_prj(bert_pooled_output) pooled_output = self.cls_dropout(pooled_output) logits = self.cls_final(pooled_output) return logits model_path = "model.pt" state_dict = torch.load(model_path) device = torch.device("cuda:0") model = Bert_model(hidden_size=768, dropout_rate=0.1) model = nn.DataParallel(model) model.load_state_dict(state_dict) model = model.to(device) model.eval() tokenizer = RobertaTokenizer.from_pretrained('deepset/roberta-base-squad2') def preprocess_pdf(pdf_path, tokenizer): with pdfplumber.open(pdf_path) as pdf: text = " ".join([page.extract_text() for page in pdf.pages[2:]]) tokenized_text = tokenizer.encode_plus( text, add_special_tokens=True, max_length=512, padding='max_length', return_attention_mask=True ) input_ids = torch.tensor([tokenized_text['input_ids']]) attention_mask = torch.tensor([tokenized_text['attention_mask']]) return input_ids, attention_mask, text def translate_text(text, target_language): response = openai.ChatCompletion.create( model="gpt-4-1106-preview", messages=[ {"role": "system", "content": "You are a helpful assistant that translates English text to other languages."}, {"role": "user", "content": f'Translate the following English text to {target_language}: "{text}"'}, ], ) return response.choices[0].message['content'] def explain_term(term): response = openai.ChatCompletion.create( model="gpt-4-1106-preview", messages=[ { "role": "system", "content": "You are a helpful assistant that provides definitions." }, { "role": "user", "content": f"Explain the term: {term}" }, ], ) return response['choices'][0]['message']['content'] st.title('FinChat') # api_key = st.text_input("Enter your OpenAI API key:", type="password") # if api_key: # try: # openai.api_key = api_key # openai.ChatCompletion.create( # model="gpt-4-1106-preview", # messages=[ # {"role": "system", "content": "You are a helpful assistant."}, # {"role": "user", "content": "Hello"}, # ], # ) # st.success("API key is valid!") # except Exception as e: # st.error(f"Failed to validate API key: {e}") # else: # st.warning("Please enter your OpenAI API key.") api_key = st.secrets["api_key"] openai.api_key = api_key uploaded_file = st.file_uploader("Choose a PDF file", type="pdf") language = st.selectbox('Select your language', ['English', 'French','Chinese','Korean','Spanish','German','Japanese']) if uploaded_file is not None: with open("temp.pdf", "wb") as f: f.write(uploaded_file.getbuffer()) input_ids, attention_mask, text = preprocess_pdf("temp.pdf", tokenizer) st.write('File successfully uploaded and processed') question = st.text_input("Enter your question:") if question: sentences = sent_tokenize(text) predictions = [] for sentence in sentences: inputs = tokenizer.encode_plus(question, sentence, return_tensors='pt', padding='max_length', truncation=True, max_length=512) input_ids = inputs['input_ids'].to(device) attention_mask = inputs['attention_mask'].to(device) with torch.no_grad(): outputs = model(input_ids, attention_mask) probabilities = F.softmax(outputs, dim=1) max_value, max_index = torch.max(probabilities, dim=1) prediction = max_index.item() predictions.append((sentence, prediction, probabilities[0].tolist())) predictions.sort(key=lambda pair: pair[1], reverse=True) top_5_sentences = predictions[:13] chat_history = "\n".join([sentence[0] for sentence in top_5_sentences]) response = openai.ChatCompletion.create( model="gpt-4-1106-preview", messages=[ {"role": "system", "content": "You are a helpful generator which read the short paragraphs and answer the question."}, {"role": "user", "content": chat_history}, {"role": "user", "content": question}, ] ) if language != 'English': response_content = translate_text(response.choices[0].message['content'], language) else: response_content = response.choices[0].message['content'] st.text("Answer: " + response_content) term = st.text_input("Enter a term you want to define:") if term: definition = explain_term(term) if language != 'English': definition = translate_text(definition, language) st.text("Definition: " + definition)