Asif Islam commited on
Commit
58a8048
1 Parent(s): d3f0aab

Updated with info from modern langchain docs

Browse files
app.py CHANGED
@@ -1,57 +1,53 @@
1
- import os
2
- import sys
3
- import openai
4
- from dotenv import load_dotenv, find_dotenv
5
- import gradio as gr
6
- from langchain.chains import ConversationalRetrievalChain, RetrievalQA
 
7
  from langchain.chat_models import ChatOpenAI
8
- from langchain.document_loaders import DirectoryLoader, TextLoader
 
 
9
  from langchain.embeddings import OpenAIEmbeddings
10
- from langchain.indexes import VectorstoreIndexCreator
11
- from langchain.indexes.vectorstore import VectorStoreIndexWrapper
12
- from langchain.llms import OpenAI
13
  from langchain.vectorstores import Chroma
 
14
 
15
- #load_dotenv(find_dotenv())
16
-
17
- # Enable to save to disk & reuse the model (for repeated queries on the same data)
18
- PERSIST = False
19
-
20
- query = None
21
-
22
- if PERSIST and os.path.exists("persist"):
23
- print("Reusing index...\n")
24
- vectorstore = Chroma(persist_directory="persist",
25
- embedding_function=OpenAIEmbeddings())
26
- index = VectorStoreIndexWrapper(vectorstore=vectorstore)
27
- else:
28
- loader = DirectoryLoader("data/")
29
- if PERSIST:
30
- index = VectorstoreIndexCreator(
31
- vectorstore_kwargs={"persist_directory": "persist"}).from_loaders([loader])
32
- else:
33
- index = VectorstoreIndexCreator().from_loaders([loader])
34
-
35
- chain = ConversationalRetrievalChain.from_llm(
36
- llm=ChatOpenAI(model="gpt-3.5-turbo"),
37
- retriever=index.vectorstore.as_retriever(search_kwargs={"k": 1}),
38
- )
39
 
40
- # Stores chat history: TODO: Modify behavior to be like a queue after reaching a certain max (probably RAM dependent)
41
- chat_history = []
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
- async def getQuery(query: str):
45
- if not query:
46
- query = input("Prompt: ")
47
- if query in ['quit', 'q', 'exit']:
48
- sys.exit()
49
- result = chain({"question": query, "chat_history": chat_history})
50
- print(result['answer'])
51
 
52
- chat_history.append((query, result['answer']))
53
- query = None
54
- return result['answer']
55
 
56
- demo = gr.Interface(fn=getQuery, inputs="text", outputs="text", title="800_Codes_AI_Chat_Bot", flagging_options=(['Incorrect', 'Great Response']))
57
- demo.launch(share=False)
 
1
+ from langchain.prompts import (
2
+ ChatPromptTemplate,
3
+ MessagesPlaceholder,
4
+ SystemMessagePromptTemplate,
5
+ HumanMessagePromptTemplate,
6
+ )
7
+ from langchain.chains import LLMChain, ConversationalRetrievalChain
8
  from langchain.chat_models import ChatOpenAI
9
+ from langchain.memory import ConversationBufferMemory
10
+ from langchain.document_loaders import TextLoader
11
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
12
  from langchain.embeddings import OpenAIEmbeddings
 
 
 
13
  from langchain.vectorstores import Chroma
14
+ import gradio as gr
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
+ # LLM
18
+ llm = ChatOpenAI()
19
 
20
+ # load data
21
+ loader = TextLoader('data/codes.txt')
22
+ data = loader.load()
23
+
24
+ # Split and store into vector
25
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
26
+
27
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
28
+ all_splits = text_splitter.split_documents(data)
29
+ vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings(), persist_directory='persist')
30
+
31
+ # Prompt
32
+ prompt = ChatPromptTemplate(
33
+ messages=[
34
+ SystemMessagePromptTemplate.from_template(
35
+ "You are a nice chatbot having a conversation with a human about UF student government and its 800 codes which are about the budget and student org funding. Please reference context first when trying to respond to queries."
36
+ ),
37
+ # The `variable_name` here is what must align with memory
38
+ MessagesPlaceholder(variable_name="chat_history"),
39
+ HumanMessagePromptTemplate.from_template("{question}")
40
+ ]
41
+ )
42
 
43
+ # Notice that we `return_messages=True` to fit into the MessagesPlaceholder
44
+ # Notice that `"chat_history"` aligns with the MessagesPlaceholder name
45
+ retriever = vectorstore.as_retriever()
46
+ memory = ConversationBufferMemory(memory_key="chat_history",return_messages=True)
47
+ qa = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, memory=memory)
 
 
48
 
49
+ def queryAI(query: str):
50
+ return qa({"question": query})["answer"]
 
51
 
52
+ gui = gr.Interface(fn=queryAI, inputs="text", outputs="text", title="800 Codes Bot - EXPERIMENTAL")
53
+ gui.launch()
data/{800-Codes-Amended-in-Summer-2023.txt → codes.txt} RENAMED
File without changes
flagged/log.csv CHANGED
@@ -1,2 +1,4 @@
1
  query,output,flag,username,timestamp
2
  Tell me about student org travel,"Student org travel requires prior written approval from the Student Body Treasurer and the Student Government Finance Manager. All travel authorization forms must be submitted and approved by the student organization President and Treasurer on the online activity request system at least ten business days before the trip. If prior written approval is not granted, the Student Government cannot be held responsible for any costs incurred on the trip. Exceptions to the submission timeline may be made in extreme circumstances, but require a letter of justification signed by the organizational president or treasurer and the approval of the Student Body Treasurer.",Great Response,,2023-08-18 15:14:59.713209
 
 
 
1
  query,output,flag,username,timestamp
2
  Tell me about student org travel,"Student org travel requires prior written approval from the Student Body Treasurer and the Student Government Finance Manager. All travel authorization forms must be submitted and approved by the student organization President and Treasurer on the online activity request system at least ten business days before the trip. If prior written approval is not granted, the Student Government cannot be held responsible for any costs incurred on the trip. Exceptions to the submission timeline may be made in extreme circumstances, but require a letter of justification signed by the organizational president or treasurer and the approval of the Student Body Treasurer.",Great Response,,2023-08-18 15:14:59.713209
3
+ Tell me about the 800 codes,"The 800 codes refer to administrative regulations related to salaries, secretarial, and hiring within the organization.",Incorrect,,2023-08-18 16:10:56.130656
4
+ All funds allocated to student organizations for the purpose of copies shall have a copies line,"Based on the provided information, it is unclear if every student organization budget includes a specific line item for copies. The information only mentions that expenditures from a particular line item can be used to support the copy center. It is possible that some organizations may have a separate line item for copies, while others may include it under a different category or have a shared budget for printing expenses. It would be best to consult the specific guidelines or policies of the student organization budget for more accurate information.",Incorrect,,2023-08-18 16:19:42.437307