JahanavDixit commited on
Commit
62174f8
β€’
1 Parent(s): 80f1af7

Upload 5 files

Browse files
Files changed (5) hide show
  1. 48lawsofpower.pdf +0 -0
  2. chainlit.md +14 -0
  3. dockerfile.txt +11 -0
  4. pdf_qa.py +98 -0
  5. requirements.txt +8 -0
48lawsofpower.pdf ADDED
Binary file (105 kB). View file
 
chainlit.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Welcome to Chainlit! πŸš€πŸ€–
2
+
3
+ Hi there, Developer! πŸ‘‹ We're excited to have you on board. Chainlit is a powerful tool designed to help you prototype, debug and share applications built on top of LLMs.
4
+
5
+ ## Useful Links πŸ”—
6
+
7
+ - **Documentation:** Get started with our comprehensive [Chainlit Documentation](https://docs.chainlit.io) πŸ“š
8
+ - **Discord Community:** Join our friendly [Chainlit Discord](https://discord.gg/k73SQ3FyUh) to ask questions, share your projects, and connect with other developers! πŸ’¬
9
+
10
+ We can't wait to see what you create with Chainlit! Happy coding! πŸ’»πŸ˜Š
11
+
12
+ ## Welcome screen
13
+
14
+ To modify the welcome screen, edit the `chainlit.md` file at the root of your project. If you do not want a welcome screen, just leave this file empty.
dockerfile.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+
9
+ COPY ./* /code/
10
+
11
+ CMD ["chainlit", "run", "app.py", "--port", "7860"]
pdf_qa.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import necessary modules and define env variables
2
+
3
+ from langchain.chains import RetrievalQA
4
+ from langchain_community.document_loaders import PyPDFLoader
5
+ from langchain_community.vectorstores import FAISS
6
+ from langchain.prompts.chat import (
7
+ ChatPromptTemplate
8
+ )
9
+ from langchain_community.llms import HuggingFaceHub
10
+ import tempfile
11
+ from langchain_community.embeddings import HuggingFaceEmbeddings
12
+ import os
13
+ import io
14
+ import chainlit as cl
15
+ import PyPDF2
16
+
17
+ #os.environ["HUGGINGFACEHUB_API_TOKEN"] = ""
18
+ template = """Answer the question based only on the following context from the book 48 Laws of Power:
19
+ {context}
20
+
21
+ Question: {question}
22
+ """
23
+ prompt = ChatPromptTemplate.from_template(template)
24
+ chain_type_kwargs = {"prompt": prompt}
25
+
26
+ from langchain.text_splitter import SpacyTextSplitter
27
+
28
+ text_splitter = SpacyTextSplitter(chunk_size=1000)
29
+
30
+ @cl.on_chat_start
31
+ async def on_chat_start():
32
+ await cl.Message(content="Hello there, Welcome to Laws of Power chat app!").send()
33
+ msg = cl.Message(content=f"Processing Laws of Power...")
34
+ await msg.send()
35
+ loader = PyPDFLoader('./48lawsofpower.pdf')
36
+ pages = loader.load_and_split()
37
+
38
+ # Create a Chroma vector store
39
+ embeddings = HuggingFaceEmbeddings()
40
+ faiss_index = FAISS.from_documents(pages, embeddings)
41
+
42
+ # Clean up the temporary file
43
+ pdf = PyPDF2.PdfReader('./48lawsofpower.pdf')
44
+ pdf_text = ""
45
+ for page in pdf.pages:
46
+ pdf_text += page.extract_text()
47
+
48
+ # Split the text into chunks
49
+ texts = text_splitter.split_text(pdf_text)
50
+
51
+ # Create metadata for each chunk
52
+ metadatas = [{"source": f"{i}-pl"} for i in range(len(texts))]
53
+
54
+ repo_id = "HuggingFaceH4/zephyr-7b-beta"
55
+
56
+ chain_type_kwargs = {"prompt": prompt}
57
+
58
+ llm = HuggingFaceHub(
59
+ repo_id=repo_id, model_kwargs={"temperature": 0.1, "max_new_tokens":1024, "max_length": 728}
60
+ )
61
+
62
+ # Create a chain that uses the Chroma vector store
63
+ chain = RetrievalQA.from_chain_type(
64
+ llm,
65
+ chain_type="stuff",
66
+ retriever=faiss_index.as_retriever(),
67
+ )
68
+
69
+
70
+ # Save the metadata and texts in the user session
71
+ cl.user_session.set("metadatas", metadatas)
72
+ cl.user_session.set("texts", texts)
73
+
74
+ # Let the user know that the system is ready
75
+ msg.content = f"Processing Laws of Power done. You can now ask questions!"
76
+ await msg.update()
77
+
78
+ cl.user_session.set("chain", chain)
79
+
80
+
81
+ @cl.on_message
82
+ async def main(message:str):
83
+ message = message.content
84
+ print("This" , message)
85
+ chain = cl.user_session.get("chain")
86
+ cb = cl.AsyncLangchainCallbackHandler(
87
+ stream_final_answer=True, answer_prefix_tokens=["FINAL", "ANSWER"]
88
+ )
89
+ cb.answer_reached = True
90
+ res = await chain.acall(message, callbacks=[cb])
91
+
92
+ answer = res['result']
93
+ source_elements = []
94
+ if cb.has_streamed_final_answer:
95
+ cb.final_stream.elements = source_elements
96
+ await cb.final_stream.update()
97
+ else:
98
+ await cl.Message(content=answer, elements=source_elements).send()
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ chainlit
3
+ transformers
4
+ huggingface_hub
5
+ faiss_cpu
6
+ tiktoken
7
+ spacy
8
+ PyPDF2