nickmuchi commited on
Commit
1540bd1
β€’
1 Parent(s): b86d577

Create 2_Twitter_GPT_Search.py

Browse files
Files changed (1) hide show
  1. pages/2_Twitter_GPT_Search.py +99 -0
pages/2_Twitter_GPT_Search.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.embeddings import HuggingFaceEmbeddings,HuggingFaceInstructEmbeddings
2
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
3
+ from langchain.vectorstores import FAISS
4
+ from langchain.chat_models.openai import ChatOpenAI
5
+ from langchain import VectorDBQA
6
+ import pandas as pd
7
+
8
+ from langchain.chat_models import ChatOpenAI
9
+ from langchain.prompts.chat import (
10
+ ChatPromptTemplate,
11
+ SystemMessagePromptTemplate,
12
+ AIMessagePromptTemplate,
13
+ HumanMessagePromptTemplate,
14
+ )
15
+ from langchain.schema import (
16
+ AIMessage,
17
+ HumanMessage,
18
+ SystemMessage
19
+ )
20
+
21
+ from datetime import datetime as dt
22
+
23
+ st.set_page_config(page_title="Tweets Question/Answering with Langchain and OpenAI", page_icon="πŸ”Ž")
24
+
25
+ system_template="""Use the following pieces of context to answer the users question.
26
+ If you don't know the answer, just say that you don't know, don't try to make up an answer.
27
+ ALWAYS return a "SOURCES" part in your answer.
28
+ The "SOURCES" part should be a reference to the source of the document from which you got your answer.
29
+
30
+ Example of your response should be:
31
+
32
+ ```
33
+ The answer is foo
34
+ SOURCES: xyz
35
+ ```
36
+
37
+ Begin!
38
+ ----------------
39
+ {context}"""
40
+ messages = [
41
+ SystemMessagePromptTemplate.from_template(system_template),
42
+ HumanMessagePromptTemplate.from_template("{question}")
43
+ ]
44
+ prompt = ChatPromptTemplate.from_messages(messages)
45
+
46
+ current_time = dt.strftime(dt.today(),'%d_%m_%Y_%H_%M')
47
+
48
+ st.markdown("## Financial Tweets GPT Search")
49
+
50
+ twitter_link = """
51
+ [![](https://img.shields.io/twitter/follow/nickmuchi?label=@nickmuchi&style=social)](https://twitter.com/nickmuchi)
52
+ """
53
+
54
+ st.markdown(twitter_link)
55
+
56
+ bi_enc_dict = {'mpnet-base-v2':"sentence-transformers/all-mpnet-base-v2",
57
+ 'instructor-base': 'hkunlp/instructor-base'}
58
+
59
+ search_input = st.text_input(
60
+ label='Enter Your Search Query',value= "What are the most topical risks?", key='search')
61
+
62
+ sbert_model_name = st.sidebar.selectbox("Embedding Model", options=list(bi_enc_dict.keys()), key='sbox')
63
+
64
+ with open('tweets.txt') as f:
65
+ tweets = f.read()
66
+
67
+ def process_tweets(file,embed_model,query):
68
+ '''Process file with latest tweets'''
69
+
70
+ # Split tweets int chunks
71
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
72
+ texts = text_splitter.split_text(file)
73
+
74
+ model = bi_enc_dict[embed_model]
75
+
76
+ if model == "hkunlp/instructor-large":
77
+ emb = HuggingFaceInstructEmbeddings(model_name=model,
78
+ query_instruction='Represent the Financial question for retrieving supporting documents: ',
79
+ embed_instruction='Represent the Financial document for retrieval: ')
80
+
81
+ elif model == "sentence-transformers/all-mpnet-base-v2":
82
+ emb = HuggingFaceEmbeddings(model_name=model)
83
+
84
+ docsearch = FAISS.from_texts(texts, emb)
85
+
86
+ chain_type_kwargs = {"prompt": prompt}
87
+ chain = VectorDBQA.from_chain_type(
88
+ ChatOpenAI(temperature=0),
89
+ chain_type="stuff",
90
+ vectorstore=docsearch,
91
+ chain_type_kwargs=chain_type_kwargs
92
+ )
93
+
94
+ result = chain({"query": query}, return_only_outputs=True)
95
+
96
+ return result
97
+
98
+
99
+