nickmuchi commited on
Commit
3384f62
1 Parent(s): 2b8b510

Update pages/2_Twitter_GPT_Search.py

Browse files
Files changed (1) hide show
  1. pages/2_Twitter_GPT_Search.py +29 -27
pages/2_Twitter_GPT_Search.py CHANGED
@@ -4,6 +4,7 @@ from langchain.vectorstores import FAISS
4
  from langchain.chat_models.openai import ChatOpenAI
5
  from langchain import VectorDBQA
6
  import pandas as pd
 
7
 
8
  from langchain.chat_models import ChatOpenAI
9
  from langchain.prompts.chat import (
@@ -61,40 +62,41 @@ search_input = st.text_input(
61
 
62
  sbert_model_name = st.sidebar.selectbox("Embedding Model", options=list(bi_enc_dict.keys()), key='sbox')
63
 
64
- with open('tweets.txt') as f:
65
- tweets = f.read()
66
 
67
- @st.experimental_singleton(suppress_st_warning=True)
68
- def process_tweets(file,embed_model,query):
69
- '''Process file with latest tweets'''
70
 
71
- # Split tweets int chunks
72
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
73
- texts = text_splitter.split_text(file)
74
-
75
- model = bi_enc_dict[embed_model]
76
-
77
- if model == "hkunlp/instructor-large":
78
- emb = HuggingFaceInstructEmbeddings(model_name=model,
79
- query_instruction='Represent the Financial question for retrieving supporting documents: ',
80
- embed_instruction='Represent the Financial document for retrieval: ')
81
 
82
- elif model == "sentence-transformers/all-mpnet-base-v2":
83
- emb = HuggingFaceEmbeddings(model_name=model)
 
 
 
84
 
85
- docsearch = FAISS.from_texts(texts, emb)
86
 
87
- chain_type_kwargs = {"prompt": prompt}
88
- chain = VectorDBQA.from_chain_type(
89
- ChatOpenAI(temperature=0),
90
- chain_type="stuff",
91
- vectorstore=docsearch,
92
- chain_type_kwargs=chain_type_kwargs
93
- )
94
 
95
- result = chain({"query": query}, return_only_outputs=True)
96
 
97
- return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
 
100
 
 
4
  from langchain.chat_models.openai import ChatOpenAI
5
  from langchain import VectorDBQA
6
  import pandas as pd
7
+ from variables import *
8
 
9
  from langchain.chat_models import ChatOpenAI
10
  from langchain.prompts.chat import (
 
62
 
63
  sbert_model_name = st.sidebar.selectbox("Embedding Model", options=list(bi_enc_dict.keys()), key='sbox')
64
 
65
+ file = get_latest_file()
 
66
 
67
+ try:
 
 
68
 
69
+ if search_input:
 
 
 
 
 
 
 
 
 
70
 
71
+ embedding_model = bi_enc_dict[sbert_model_name]
72
+
73
+ with st.spinner(
74
+ text=f"Loading {embedding_model} embedding model and Generating Response..."
75
+ ):
76
 
77
+ tweets = process_tweets(file,sbert_model_name,search_input)
78
 
 
 
 
 
 
 
 
79
 
80
+ references = [doc.page_content for doc in tweets['source_documents']]
81
 
82
+ answer = tweets['result']
83
+
84
+ ##### Sematic Search #####
85
+
86
+ with st.expander(label='Query Result', expanded=True):
87
+ st.write(answer)
88
+
89
+ with st.expander(label='References from Corpus used to Generate Result'):
90
+ for ref in references:
91
+ st.write(ref)
92
+
93
+ else:
94
+
95
+ st.write('Please ensure you have entered the YouTube URL or uploaded the Earnings Call file')
96
+
97
+ except RuntimeError:
98
+
99
+ st.write('Please ensure you have entered the YouTube URL or uploaded the Earnings Call file')
100
 
101
 
102