Shroogawh24 commited on
Commit
d9cef54
1 Parent(s): ffe2c83

Upload 3 files

Browse files
Files changed (3) hide show
  1. app (2).py +108 -0
  2. df_news (1) (1).pkl +3 -0
  3. requirements (3).txt +16 -0
app (2).py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import openai
4
+ import pandas as pd
5
+ from langchain.vectorstores import FAISS
6
+ from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
7
+ from langchain.chains import LLMChain
8
+ from langchain_core.output_parsers.string import StrOutputParser
9
+ from langchain.chat_models import ChatOpenAI
10
+ from langchain.embeddings.openai import OpenAIEmbeddings
11
+
12
+ from sentence_transformers import SentenceTransformer
13
+
14
+ #embeddings = OpenAIEmbeddings()
15
+
16
+ embedder = SentenceTransformer('all-mpnet-base-v2')
17
+
18
+ # Set the OpenAI API key
19
+ #openai.api_key = os.getenv("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")
20
+
21
+ # Load the FAISS index using LangChain's FAISS implementation
22
+ db = FAISS.load_local("Faiss_index", embedder, allow_dangerous_deserialization=True)
23
+ parser = StrOutputParser()
24
+
25
+ # Load your data (e.g., a DataFrame)
26
+ df = pd.read_pickle('df_news (1).pkl')
27
+
28
+ # Search function to retrieve relevant documents
29
+ def search(query):
30
+ query_embedding = embedder.embed_query(query).reshape(1, -1).astype('float32')
31
+ D, I = db.similarity_search_with_score(query_embedding, k=10)
32
+ results = []
33
+ for idx in I[0]:
34
+ if idx < 3327: # Adjust this based on your indexing
35
+ doc_index = idx
36
+ results.append({
37
+ 'type': 'metadata',
38
+ 'title': df.iloc[doc_index]['title'],
39
+ 'author': df.iloc[doc_index]['author'],
40
+ 'full_text': df.iloc[doc_index]['full_text'],
41
+ 'source': df.iloc[doc_index]['url']
42
+ })
43
+ else:
44
+ chunk_index = idx - 3327
45
+ metadata = metadata_info[chunk_index]
46
+ doc_index = metadata['index']
47
+ chunk_text = metadata['chunk']
48
+ results.append({
49
+ 'type': 'content',
50
+ 'title': df.iloc[doc_index]['title'],
51
+ 'author': df.iloc[doc_index]['author'],
52
+ 'content': chunk_text,
53
+ 'source': df.iloc[doc_index]['url']
54
+ })
55
+
56
+ return results
57
+
58
+ # Generate an answer based on the retrieved documents
59
+ def generate_answer(query):
60
+ context = search(query)
61
+ context_str = "\n\n".join([f"Title: {doc['title']}\nContent: {doc.get('content', doc.get('full_text', ''))}" for doc in context])
62
+
63
+ prompt = f"""
64
+ Answer the question based on the context below. If you can't answer the question, answer with "I don't know".
65
+ Context: {context_str}
66
+ Question: {query}
67
+ """
68
+
69
+ # Set up the ChatOpenAI model with temperature and other parameters
70
+ chat = ChatOpenAI(
71
+ model="gpt-4",
72
+ temperature=0.2,
73
+ max_tokens=1500,
74
+ api_key=openai.api_key
75
+ )
76
+
77
+ messages = [
78
+ SystemMessagePromptTemplate.from_template("You are a helpful assistant."),
79
+ HumanMessagePromptTemplate.from_template(prompt)
80
+ ]
81
+
82
+ chat_chain = LLMChain(
83
+ llm=chat,
84
+ prompt=ChatPromptTemplate.from_messages(messages)
85
+ )
86
+
87
+ # Get the response from the chat model
88
+ response = chat_chain.run(messages)
89
+ return response.strip()
90
+
91
+ # Gradio chat interface
92
+ def respond(message, history, system_message, max_tokens, temperature, top_p):
93
+ response = generate_answer(message)
94
+ yield response
95
+
96
+ # Gradio demo setup
97
+ demo = gr.ChatInterface(
98
+ respond,
99
+ additional_inputs=[
100
+ gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
101
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
102
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
103
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
104
+ ],
105
+ )
106
+
107
+ if __name__ == "__main__":
108
+ demo.launch()
df_news (1) (1).pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ea332af5c9e7ed45f1c7f06aa59a0295941717d7a5882d8f7f28756daa46f1d
3
+ size 76729780
requirements (3).txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio==3.15.0
2
+ openai==0.27.0
3
+ pandas==2.0.3
4
+ sentence-transformers==2.2.2
5
+ faiss-cpu==1.7.3
6
+ langchain>=0.0.200
7
+ pydantic>=2.0.0
8
+ langchain-community
9
+ langchain_core
10
+
11
+
12
+
13
+
14
+
15
+
16
+