Spaces:

singhjagpreet
/

Document-Reader

Sleeping

App Files Files Community

Jagpreet Singh commited on Dec 28, 2023

Commit

3021c33

•

2 Parent(s): ecacae5 cbae271

Merge pull request #2 from SinghJagpreet096/b1

Browse files

Files changed (7) hide show

README.md +70 -0
app.py +18 -6
chainlit.md +7 -10
requirements.txt +1 -1
src/config.py +12 -1
src/model.py +12 -6
src/utils.py +5 -5

README.md CHANGED Viewed

	@@ -0,0 +1,70 @@

+# Document Reader
+Don't waste time reading lengthy Terms and Conditions! Upload your files here and ask anything you want to know.
+## Introduction
+Welcome to Document Reader! This chatbot is designed to assist users with questions related to uploaded files. Users can upload text documents and inquire about their content.
+## Features
+- **File Upload:** Users can upload files (text/pdf).
+- **Question and Answer:** Ask questions related to the content of the uploaded file.
+- **Interactive Assistance:** Receive information and insights based on the uploaded file.
+## Getting Started
+Follow these steps to get started with Document Reader:
+1. Clone the repository:
+    ```bash
+    git clone https://github.com/SinghJagpreet096/Document-Reader.git
+    cd Document-Reader
+    ```
+2. Create Virtual Environment:
+    ```bash
+    python -m venv <env-name>
+    ```
+3. Activate venv:
+    ```bash
+    source <env-name>/bin/activate
+    ```
+4. Install dependencies:
+    ```bash
+    pip install -r requirements.txt
+    ```
+5. Create .env:
+    ```bash
+    echo 'OPENAI_API_KEY=<your-openai-api-key>' > .env
+    ```
+4. Run your app:
+    ```bash
+    chainlit run app.py
+    ```
+## Usage
+1. Open Document Reader.
+2. Click on the file upload button to share a document.
+3. Once the file is uploaded, ask questions about its content.
+4. The chatbot will provide information or insights based on the uploaded file.
+Feel free to type "help" at any time for assistance.
+## Configuration
+The chatbot may require configuration through environment variables. Check the `.env` file for details.
+Create an OPENAI_API_KEY here https://platform.openai.com/api-keys

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import os
 import logging
-#pip install pypdf
 #export HNSWLIB_NO_NATIVE = 1
 from langchain.document_loaders import PyPDFDirectoryLoader, TextLoader
@@ -12,22 +12,34 @@ from langchain.memory import ChatMessageHistory, ConversationBufferMemory
 from langchain.chains import ConversationalRetrievalChain
 from langchain.chat_models import ChatOpenAI
 import chainlit as cl
 from src.utils import get_docsearch, get_source
 # text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
 # embeddings = OpenAIEmbeddings()
-welcome_message = """Welcome"""
 @cl.on_chat_start
 async def start():
-    await cl.Message("test").send()
     files = None
     files = await cl.AskFileMessage(
         content=welcome_message,
         accept=["text/plain", "application/pdf"],
     ).send()
     logging.info("file uploaded")
@@ -53,8 +65,8 @@ async def start():
     ## create chain that uses chroma vector store
     chain = ConversationalRetrievalChain.from_llm(
-        ChatOpenAI(model_name="gpt-3.5-turbo",temperature=0, streaming=True),
-        chain_type="stuff",
         retriever=docsearch.as_retriever(),
         memory=memory,
         return_source_documents=True,

 import os
 import logging
 #export HNSWLIB_NO_NATIVE = 1
 from langchain.document_loaders import PyPDFDirectoryLoader, TextLoader
 from langchain.chains import ConversationalRetrievalChain
 from langchain.chat_models import ChatOpenAI
 import chainlit as cl
+from src.config import Config
 from src.utils import get_docsearch, get_source
 # text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
 # embeddings = OpenAIEmbeddings()
+welcome_message = """Welcome to Your Document Reader!
+Here to assist you with any questions you have about a file. You can upload a file and ask me questions related to its content. Here's how you can get started:
+1. Click on the file upload button to share a document or image.
+2. Once the file is uploaded, feel free to ask me any questions about its content.
+3. I'll do my best to provide information or insights based on the uploaded file.
+If you need help or have any specific queries, type "help" at any time.
+Let's get the conversation started! """
 @cl.on_chat_start
 async def start():
+    await cl.Message("YOU ARE IN").send()
     files = None
     files = await cl.AskFileMessage(
         content=welcome_message,
         accept=["text/plain", "application/pdf"],
+        max_size_mb=Config.max_size_mb,
+        timeout=Config.timeout
     ).send()
     logging.info("file uploaded")
     ## create chain that uses chroma vector store
     chain = ConversationalRetrievalChain.from_llm(
+        ChatOpenAI(model_name=Config.model_name,temperature=Config.temperature, streaming=Config.streaming),
+        chain_type=Config.chain_type,
         retriever=docsearch.as_retriever(),
         memory=memory,
         return_source_documents=True,

chainlit.md CHANGED Viewed

@@ -1,14 +1,11 @@
-# Welcome to Chainlit! 🚀🤖
-Hi there, Developer! 👋 We're excited to have you on board. Chainlit is a powerful tool designed to help you prototype, debug and share applications built on top of LLMs.
-## Useful Links 🔗
-- **Documentation:** Get started with our comprehensive [Chainlit Documentation](https://docs.chainlit.io) 📚
-- **Discord Community:** Join our friendly [Chainlit Discord](https://discord.gg/k73SQ3FyUh) to ask questions, share your projects, and connect with other developers! 💬
-We can't wait to see what you create with Chainlit! Happy coding! 💻😊
-## Welcome screen
-To modify the welcome screen, edit the `chainlit.md` file at the root of your project. If you do not want a welcome screen, just leave this file empty.

+Welcome to Your Document Reader!
+Here to assist you with any questions you have about a file. You can upload a file and ask me questions related to its content. Here's how you can get started:
+1. Click on the file upload button to share a document or image.
+2. Once the file is uploaded, feel free to ask me any questions about its content.
+3. I'll do my best to provide information or insights based on the uploaded file.
+If you need help or have any specific queries, type "help" at any time.
+Let's get the conversation started!

requirements.txt CHANGED Viewed

@@ -4,4 +4,4 @@ pypdf
 chainlit
 openai
 chromadb
-tiktoken

 chainlit
 openai
 chromadb
+tiktoken

src/config.py CHANGED Viewed

@@ -1,13 +1,24 @@
 import os
 from langchain.embeddings.openai import OpenAIEmbeddings
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 class Config:
     temperature = 0
     streaming = True
     chain_type = "stuff"
     max_token_limit = 4098
     embeddings = OpenAIEmbeddings()
-    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)

 import os
 from langchain.embeddings.openai import OpenAIEmbeddings
 from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.memory import ChatMessageHistory, ConversationBufferMemory
 class Config:
     temperature = 0
     streaming = True
+    max_size_mb=20
+    timeout=180
     chain_type = "stuff"
     max_token_limit = 4098
     embeddings = OpenAIEmbeddings()
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
+    model_name="gpt-3.5-turbo"
+    message_history = ChatMessageHistory()
+    memory = ConversationBufferMemory(
+        memory_key="chat_history",
+        output_key="answer",
+        chat_memory=message_history,
+        return_messages=True
+    )

src/model.py CHANGED Viewed

@@ -2,6 +2,7 @@ from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesCha
 from langchain.chat_models import ChatOpenAI
 import logging
 import os
 from src.config import Config
@@ -11,16 +12,21 @@ from src.config import Config
 def load_model():
-    model = ChatOpenAI(temperature=Config.temperature,
-                   streaming=Config.streaming)
     return model
 def load_chain(docsearch):
     model = load_model()
-    chain = RetrievalQAWithSourcesChain.from_chain_type(
-        ChatOpenAI(temperature=0, streaming=True),
-        chain_type="stuff",
-        retriever=docsearch.as_retriever(max_tokens_limit=4097),
     )
     return chain

 from langchain.chat_models import ChatOpenAI
 import logging
 import os
+from langchain.chains import ConversationalRetrievalChain
 from src.config import Config
 def load_model():
+    model = ChatOpenAI(model_name=Config.model_name,
+                       temperature=Config.temperature,
+                       streaming=Config.streaming)
     return model
 def load_chain(docsearch):
     model = load_model()
+    chain = ConversationalRetrievalChain.from_llm(load_model,
+        chain_type=Config.chain_type,
+        retriever=docsearch.as_retriever(),
+        memory=Config.memory,
+        return_source_documents=True,
     )
     return chain

src/utils.py CHANGED Viewed

@@ -6,13 +6,13 @@ from langchain.vectorstores import Chroma
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.embeddings.openai import OpenAIEmbeddings
 import chainlit as cl
 from src.config import Config
 import logging
-text_splitter = RecursiveCharacterTextSplitter()
-embeddings = OpenAIEmbeddings()
 def process_file(file: AskFileResponse):
     import tempfile
@@ -26,7 +26,7 @@ def process_file(file: AskFileResponse):
         tempfile.write(file.content)
         loader = Loader(tempfile.name)
         documents = loader.load()
-        docs = text_splitter.split_documents(documents)
         for i, doc in enumerate(docs):
             doc.metadata["source"] = f"source_{i}"
         return docs
@@ -40,7 +40,7 @@ def get_docsearch(file: AskFileResponse):
     # Create a unique namespace for the file
     docsearch = Chroma.from_documents(
-        docs, embeddings
     )
     return docsearch

 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.embeddings.openai import OpenAIEmbeddings
 import chainlit as cl
+from src.config import Config
 from src.config import Config
 import logging
+# text_splitter = RecursiveCharacterTextSplitter()
+# embeddings = OpenAIEmbeddings()
 def process_file(file: AskFileResponse):
     import tempfile
         tempfile.write(file.content)
         loader = Loader(tempfile.name)
         documents = loader.load()
+        docs = Config.text_splitter.split_documents(documents)
         for i, doc in enumerate(docs):
             doc.metadata["source"] = f"source_{i}"
         return docs
     # Create a unique namespace for the file
     docsearch = Chroma.from_documents(
+        docs, Config.embeddings
     )
     return docsearch