Spaces:

nickmuchi
/

fintweet-GPT-Search

Build error

App Files Files Community

nickmuchi commited on Mar 12, 2023

Commit

3384f62

•

1 Parent(s): 2b8b510

Update pages/2_Twitter_GPT_Search.py

Browse files

Files changed (1) hide show

pages/2_Twitter_GPT_Search.py +29 -27

pages/2_Twitter_GPT_Search.py CHANGED Viewed

@@ -4,6 +4,7 @@ from langchain.vectorstores import FAISS
 from langchain.chat_models.openai import ChatOpenAI
 from langchain import VectorDBQA
 import pandas as pd
 from langchain.chat_models import ChatOpenAI
 from langchain.prompts.chat import (
@@ -61,40 +62,41 @@ search_input = st.text_input(
 sbert_model_name = st.sidebar.selectbox("Embedding Model", options=list(bi_enc_dict.keys()), key='sbox')
-with open('tweets.txt') as f:
-    tweets = f.read()
-@st.experimental_singleton(suppress_st_warning=True)
-def process_tweets(file,embed_model,query):
-    '''Process file with latest tweets'''
-    # Split tweets int chunks
-    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
-    texts = text_splitter.split_text(file)
-    model = bi_enc_dict[embed_model]
-    if model == "hkunlp/instructor-large":
-        emb = HuggingFaceInstructEmbeddings(model_name=model,
-                                            query_instruction='Represent the Financial question for retrieving supporting documents: ',
-                                            embed_instruction='Represent the Financial document for retrieval: ')
-    elif model == "sentence-transformers/all-mpnet-base-v2":
-        emb = HuggingFaceEmbeddings(model_name=model)
-    docsearch = FAISS.from_texts(texts, emb)
-    chain_type_kwargs = {"prompt": prompt}
-    chain = VectorDBQA.from_chain_type(
-    ChatOpenAI(temperature=0),
-    chain_type="stuff",
-    vectorstore=docsearch,
-    chain_type_kwargs=chain_type_kwargs
-    )
-    result = chain({"query": query}, return_only_outputs=True)
-    return result

 from langchain.chat_models.openai import ChatOpenAI
 from langchain import VectorDBQA
 import pandas as pd
+from variables import *
 from langchain.chat_models import ChatOpenAI
 from langchain.prompts.chat import (
 sbert_model_name = st.sidebar.selectbox("Embedding Model", options=list(bi_enc_dict.keys()), key='sbox')
+file = get_latest_file()
+try:
+    if search_input:
+        embedding_model = bi_enc_dict[sbert_model_name]
+        with st.spinner(
+            text=f"Loading {embedding_model} embedding model and Generating Response..."
+        ):
+            tweets = process_tweets(file,sbert_model_name,search_input)
+        references = [doc.page_content for doc in tweets['source_documents']]
+        answer = tweets['result']
+        ##### Sematic Search #####
+        with st.expander(label='Query Result', expanded=True):
+            st.write(answer)
+        with st.expander(label='References from Corpus used to Generate Result'):
+            for ref in references:
+                st.write(ref)
+    else:
+        st.write('Please ensure you have entered the YouTube URL or uploaded the Earnings Call file')
+except RuntimeError:
+    st.write('Please ensure you have entered the YouTube URL or uploaded the Earnings Call file')