import os import streamlit as st from langchain.embeddings.openai import OpenAIEmbeddings from langchain.text_splitter import CharacterTextSplitter from langchain.vectorstores import FAISS from transformers import TFAutoModelForQuestionAnswering, AutoTokenizer, pipeline os.environ["OPENAI_API_KEY"] = "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" # Read data with open("./data/full_context.txt", "r") as file1: doc = file1.read() # Splitting up the text into smaller chunks for indexing text_splitter = CharacterTextSplitter( separator = "\n", chunk_size = 1000, chunk_overlap = 200, #striding over the text length_function = len, ) texts = text_splitter.split_text(doc) # Download embeddings from OpenAI embeddings = OpenAIEmbeddings() docsearch = FAISS.from_texts(texts, embeddings) # Load model model_path = "/content/drive/MyDrive/Colab_Notebooks/COS30081_NLP/D_HD_Task/models/roberta_model" model = TFAutoModelForQuestionAnswering.from_pretrained(model_path) tokenizer = AutoTokenizer.from_pretrained('deepset/roberta-base-squad2') # Initialize Transformer pipeline with our own model and tokenizer question_answerer = pipeline("question-answering", model=model, tokenizer=tokenizer) def findHighestScore(question): docs_found = docsearch.similarity_search(question) doc_score = 0.5 doc_answer = '' for doc in docs_found: doc_result = question_answerer(question=question, context = doc.page_content) if doc_result['score'] > doc_score: doc_score = doc_result['score'] doc_answer = doc_result['answer'] return doc_answer, doc_score def QnAfunction(question): answer1, score1 = findHighestScore(question) if answer1 != '': return answer1, score1 # print("Answer: ", answer1) # print("Score: ", score1) else: return "No Answer found. Please ask question related to Bachelor of Computer Science program at Swinburne.", 0 # print("No Answer found. Please ask question related to Bachelor of Computer Science program at Swinburne.") text = st.text_area("Ask any question about the Bachelor of Computer Science program at Swinburne: ") if text: ans, score = QnAfunction(text) st.json(ans)