Jagpreet Singh commited on
Commit
3021c33
2 Parent(s): ecacae5 cbae271

Merge pull request #2 from SinghJagpreet096/b1

Browse files
Files changed (7) hide show
  1. README.md +70 -0
  2. app.py +18 -6
  3. chainlit.md +7 -10
  4. requirements.txt +1 -1
  5. src/config.py +12 -1
  6. src/model.py +12 -6
  7. src/utils.py +5 -5
README.md CHANGED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Document Reader
2
+
3
+ Don't waste time reading lengthy Terms and Conditions! Upload your files here and ask anything you want to know.
4
+
5
+ ## Introduction
6
+
7
+ Welcome to Document Reader! This chatbot is designed to assist users with questions related to uploaded files. Users can upload text documents and inquire about their content.
8
+
9
+ ## Features
10
+
11
+ - **File Upload:** Users can upload files (text/pdf).
12
+ - **Question and Answer:** Ask questions related to the content of the uploaded file.
13
+ - **Interactive Assistance:** Receive information and insights based on the uploaded file.
14
+
15
+ ## Getting Started
16
+
17
+ Follow these steps to get started with Document Reader:
18
+
19
+ 1. Clone the repository:
20
+
21
+ ```bash
22
+ git clone https://github.com/SinghJagpreet096/Document-Reader.git
23
+ cd Document-Reader
24
+
25
+ ```
26
+ 2. Create Virtual Environment:
27
+ ```bash
28
+ python -m venv <env-name>
29
+ ```
30
+
31
+ 3. Activate venv:
32
+ ```bash
33
+ source <env-name>/bin/activate
34
+ ```
35
+ 4. Install dependencies:
36
+
37
+ ```bash
38
+ pip install -r requirements.txt
39
+ ```
40
+ 5. Create .env:
41
+ ```bash
42
+ echo 'OPENAI_API_KEY=<your-openai-api-key>' > .env
43
+ ```
44
+
45
+ 4. Run your app:
46
+
47
+ ```bash
48
+ chainlit run app.py
49
+ ```
50
+
51
+ ## Usage
52
+
53
+ 1. Open Document Reader.
54
+ 2. Click on the file upload button to share a document.
55
+ 3. Once the file is uploaded, ask questions about its content.
56
+ 4. The chatbot will provide information or insights based on the uploaded file.
57
+
58
+ Feel free to type "help" at any time for assistance.
59
+
60
+ ## Configuration
61
+
62
+ The chatbot may require configuration through environment variables. Check the `.env` file for details.
63
+
64
+
65
+
66
+
67
+
68
+ Create an OPENAI_API_KEY here https://platform.openai.com/api-keys
69
+
70
+
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import os
2
  import logging
3
 
4
- #pip install pypdf
5
  #export HNSWLIB_NO_NATIVE = 1
6
 
7
  from langchain.document_loaders import PyPDFDirectoryLoader, TextLoader
@@ -12,22 +12,34 @@ from langchain.memory import ChatMessageHistory, ConversationBufferMemory
12
  from langchain.chains import ConversationalRetrievalChain
13
  from langchain.chat_models import ChatOpenAI
14
  import chainlit as cl
15
-
16
  from src.utils import get_docsearch, get_source
17
 
18
  # text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
19
  # embeddings = OpenAIEmbeddings()
20
 
21
- welcome_message = """Welcome"""
 
 
 
 
 
 
 
 
 
 
22
 
23
 
24
  @cl.on_chat_start
25
  async def start():
26
- await cl.Message("test").send()
27
  files = None
28
  files = await cl.AskFileMessage(
29
  content=welcome_message,
30
  accept=["text/plain", "application/pdf"],
 
 
31
  ).send()
32
 
33
  logging.info("file uploaded")
@@ -53,8 +65,8 @@ async def start():
53
 
54
  ## create chain that uses chroma vector store
55
  chain = ConversationalRetrievalChain.from_llm(
56
- ChatOpenAI(model_name="gpt-3.5-turbo",temperature=0, streaming=True),
57
- chain_type="stuff",
58
  retriever=docsearch.as_retriever(),
59
  memory=memory,
60
  return_source_documents=True,
 
1
  import os
2
  import logging
3
 
4
+
5
  #export HNSWLIB_NO_NATIVE = 1
6
 
7
  from langchain.document_loaders import PyPDFDirectoryLoader, TextLoader
 
12
  from langchain.chains import ConversationalRetrievalChain
13
  from langchain.chat_models import ChatOpenAI
14
  import chainlit as cl
15
+ from src.config import Config
16
  from src.utils import get_docsearch, get_source
17
 
18
  # text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
19
  # embeddings = OpenAIEmbeddings()
20
 
21
+ welcome_message = """Welcome to Your Document Reader!
22
+
23
+ Here to assist you with any questions you have about a file. You can upload a file and ask me questions related to its content. Here's how you can get started:
24
+
25
+ 1. Click on the file upload button to share a document or image.
26
+ 2. Once the file is uploaded, feel free to ask me any questions about its content.
27
+ 3. I'll do my best to provide information or insights based on the uploaded file.
28
+
29
+ If you need help or have any specific queries, type "help" at any time.
30
+
31
+ Let's get the conversation started! """
32
 
33
 
34
  @cl.on_chat_start
35
  async def start():
36
+ await cl.Message("YOU ARE IN").send()
37
  files = None
38
  files = await cl.AskFileMessage(
39
  content=welcome_message,
40
  accept=["text/plain", "application/pdf"],
41
+ max_size_mb=Config.max_size_mb,
42
+ timeout=Config.timeout
43
  ).send()
44
 
45
  logging.info("file uploaded")
 
65
 
66
  ## create chain that uses chroma vector store
67
  chain = ConversationalRetrievalChain.from_llm(
68
+ ChatOpenAI(model_name=Config.model_name,temperature=Config.temperature, streaming=Config.streaming),
69
+ chain_type=Config.chain_type,
70
  retriever=docsearch.as_retriever(),
71
  memory=memory,
72
  return_source_documents=True,
chainlit.md CHANGED
@@ -1,14 +1,11 @@
1
- # Welcome to Chainlit! 🚀🤖
2
 
3
- Hi there, Developer! 👋 We're excited to have you on board. Chainlit is a powerful tool designed to help you prototype, debug and share applications built on top of LLMs.
4
 
5
- ## Useful Links 🔗
 
 
6
 
7
- - **Documentation:** Get started with our comprehensive [Chainlit Documentation](https://docs.chainlit.io) 📚
8
- - **Discord Community:** Join our friendly [Chainlit Discord](https://discord.gg/k73SQ3FyUh) to ask questions, share your projects, and connect with other developers! 💬
9
 
10
- We can't wait to see what you create with Chainlit! Happy coding! 💻😊
11
-
12
- ## Welcome screen
13
-
14
- To modify the welcome screen, edit the `chainlit.md` file at the root of your project. If you do not want a welcome screen, just leave this file empty.
 
1
+ Welcome to Your Document Reader!
2
 
3
+ Here to assist you with any questions you have about a file. You can upload a file and ask me questions related to its content. Here's how you can get started:
4
 
5
+ 1. Click on the file upload button to share a document or image.
6
+ 2. Once the file is uploaded, feel free to ask me any questions about its content.
7
+ 3. I'll do my best to provide information or insights based on the uploaded file.
8
 
9
+ If you need help or have any specific queries, type "help" at any time.
 
10
 
11
+ Let's get the conversation started!
 
 
 
 
requirements.txt CHANGED
@@ -4,4 +4,4 @@ pypdf
4
  chainlit
5
  openai
6
  chromadb
7
- tiktoken
 
4
  chainlit
5
  openai
6
  chromadb
7
+ tiktoken
src/config.py CHANGED
@@ -1,13 +1,24 @@
1
  import os
2
  from langchain.embeddings.openai import OpenAIEmbeddings
3
  from langchain.text_splitter import RecursiveCharacterTextSplitter
 
4
 
5
 
6
 
7
  class Config:
8
  temperature = 0
9
  streaming = True
 
 
10
  chain_type = "stuff"
11
  max_token_limit = 4098
12
  embeddings = OpenAIEmbeddings()
13
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
 
 
 
 
 
 
 
 
 
1
  import os
2
  from langchain.embeddings.openai import OpenAIEmbeddings
3
  from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain.memory import ChatMessageHistory, ConversationBufferMemory
5
 
6
 
7
 
8
  class Config:
9
  temperature = 0
10
  streaming = True
11
+ max_size_mb=20
12
+ timeout=180
13
  chain_type = "stuff"
14
  max_token_limit = 4098
15
  embeddings = OpenAIEmbeddings()
16
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
17
+ model_name="gpt-3.5-turbo"
18
+ message_history = ChatMessageHistory()
19
+ memory = ConversationBufferMemory(
20
+ memory_key="chat_history",
21
+ output_key="answer",
22
+ chat_memory=message_history,
23
+ return_messages=True
24
+ )
src/model.py CHANGED
@@ -2,6 +2,7 @@ from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesCha
2
  from langchain.chat_models import ChatOpenAI
3
  import logging
4
  import os
 
5
 
6
 
7
  from src.config import Config
@@ -11,16 +12,21 @@ from src.config import Config
11
 
12
 
13
  def load_model():
14
- model = ChatOpenAI(temperature=Config.temperature,
15
- streaming=Config.streaming)
 
16
  return model
17
 
18
 
19
  def load_chain(docsearch):
20
  model = load_model()
21
- chain = RetrievalQAWithSourcesChain.from_chain_type(
22
- ChatOpenAI(temperature=0, streaming=True),
23
- chain_type="stuff",
24
- retriever=docsearch.as_retriever(max_tokens_limit=4097),
 
 
 
25
  )
 
26
  return chain
 
2
  from langchain.chat_models import ChatOpenAI
3
  import logging
4
  import os
5
+ from langchain.chains import ConversationalRetrievalChain
6
 
7
 
8
  from src.config import Config
 
12
 
13
 
14
  def load_model():
15
+ model = ChatOpenAI(model_name=Config.model_name,
16
+ temperature=Config.temperature,
17
+ streaming=Config.streaming)
18
  return model
19
 
20
 
21
  def load_chain(docsearch):
22
  model = load_model()
23
+
24
+
25
+ chain = ConversationalRetrievalChain.from_llm(load_model,
26
+ chain_type=Config.chain_type,
27
+ retriever=docsearch.as_retriever(),
28
+ memory=Config.memory,
29
+ return_source_documents=True,
30
  )
31
+
32
  return chain
src/utils.py CHANGED
@@ -6,13 +6,13 @@ from langchain.vectorstores import Chroma
6
  from langchain.text_splitter import RecursiveCharacterTextSplitter
7
  from langchain.embeddings.openai import OpenAIEmbeddings
8
  import chainlit as cl
9
-
10
 
11
  from src.config import Config
12
  import logging
13
 
14
- text_splitter = RecursiveCharacterTextSplitter()
15
- embeddings = OpenAIEmbeddings()
16
 
17
  def process_file(file: AskFileResponse):
18
  import tempfile
@@ -26,7 +26,7 @@ def process_file(file: AskFileResponse):
26
  tempfile.write(file.content)
27
  loader = Loader(tempfile.name)
28
  documents = loader.load()
29
- docs = text_splitter.split_documents(documents)
30
  for i, doc in enumerate(docs):
31
  doc.metadata["source"] = f"source_{i}"
32
  return docs
@@ -40,7 +40,7 @@ def get_docsearch(file: AskFileResponse):
40
  # Create a unique namespace for the file
41
 
42
  docsearch = Chroma.from_documents(
43
- docs, embeddings
44
  )
45
  return docsearch
46
 
 
6
  from langchain.text_splitter import RecursiveCharacterTextSplitter
7
  from langchain.embeddings.openai import OpenAIEmbeddings
8
  import chainlit as cl
9
+ from src.config import Config
10
 
11
  from src.config import Config
12
  import logging
13
 
14
+ # text_splitter = RecursiveCharacterTextSplitter()
15
+ # embeddings = OpenAIEmbeddings()
16
 
17
  def process_file(file: AskFileResponse):
18
  import tempfile
 
26
  tempfile.write(file.content)
27
  loader = Loader(tempfile.name)
28
  documents = loader.load()
29
+ docs = Config.text_splitter.split_documents(documents)
30
  for i, doc in enumerate(docs):
31
  doc.metadata["source"] = f"source_{i}"
32
  return docs
 
40
  # Create a unique namespace for the file
41
 
42
  docsearch = Chroma.from_documents(
43
+ docs, Config.embeddings
44
  )
45
  return docsearch
46