Spaces:

jjovalle99
/

DocuQuery2

Sleeping

App Files Files Community

jjovalle commited on Mar 8, 2024

Commit

32efd97

1 Parent(s): 61387c0

app

Browse files

Files changed (9) hide show

.chainlit/config.toml +97 -0
.chainlit/translations/en-US.json +155 -0
.chainlit/translations/pt-BR.json +155 -0
app.py +125 -0
chainlit.md +4 -0
poetry.lock +0 -0
pyproject.toml +26 -0
src/__init__.py +0 -0
src/loaders.py +25 -0

.chainlit/config.toml ADDED Viewed

	@@ -0,0 +1,97 @@

+[project]
+# Whether to enable telemetry (default: true). No personal data is collected.
+enable_telemetry = true
+# List of environment variables to be provided by each user to use the app.
+user_env = []
+# Duration (in seconds) during which the session is saved when the connection is lost
+session_timeout = 3600
+# Enable third parties caching (e.g LangChain cache)
+cache = false
+# Authorized origins
+allow_origins = ["*"]
+# Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317)
+# follow_symlink = false
+[features]
+# Show the prompt playground
+prompt_playground = false
+# Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript)
+unsafe_allow_html = false
+# Process and display mathematical expressions. This can clash with "$" characters in messages.
+latex = false
+# Authorize users to upload files with messages
+multi_modal = true
+# Allows user to use speech to text
+[features.speech_to_text]
+    enabled = false
+    # See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string
+    # language = "en-US"
+[UI]
+# Name of the app and chatbot.
+name = "Chatbot"
+# Show the readme while the thread is empty.
+show_readme_as_default = true
+# Description of the app and chatbot. This is used for HTML tags.
+# description = ""
+# Large size content are by default collapsed for a cleaner ui
+default_collapse_content = true
+# The default value for the expand messages settings.
+default_expand_messages = false
+# Hide the chain of thought details from the user in the UI.
+hide_cot = false
+# Link to your github repo. This will add a github button in the UI's header.
+# github = ""
+# Specify a CSS file that can be used to customize the user interface.
+# The CSS file can be served from the public directory or via an external link.
+# custom_css = "/public/test.css"
+# Specify a Javascript file that can be used to customize the user interface.
+# The Javascript file can be served from the public directory.
+# custom_js = "/public/test.js"
+# Specify a custom font url.
+# custom_font = "https://fonts.googleapis.com/css2?family=Inter:wght@400;500;700&display=swap"
+# Override default MUI light theme. (Check theme.ts)
+[UI.theme]
+    #font_family = "Inter, sans-serif"
+[UI.theme.light]
+    #background = "#FAFAFA"
+    #paper = "#FFFFFF"
+    [UI.theme.light.primary]
+        #main = "#F80061"
+        #dark = "#980039"
+        #light = "#FFE7EB"
+# Override default MUI dark theme. (Check theme.ts)
+[UI.theme.dark]
+    #background = "#FAFAFA"
+    #paper = "#FFFFFF"
+    [UI.theme.dark.primary]
+        #main = "#F80061"
+        #dark = "#980039"
+        #light = "#FFE7EB"
+[meta]
+generated_by = "1.0.400"

.chainlit/translations/en-US.json ADDED Viewed

	@@ -0,0 +1,155 @@

+{
+    "components": {
+        "atoms": {
+            "buttons": {
+                "userButton": {
+                    "menu": {
+                        "settings": "Settings",
+                        "settingsKey": "S",
+                        "APIKeys": "API Keys",
+                        "logout": "Logout"
+                    }
+                }
+            }
+        },
+        "molecules": {
+            "newChatButton": {
+                "newChat": "New Chat"
+            },
+            "tasklist": {
+                "TaskList": {
+                    "title": "\ud83d\uddd2\ufe0f Task List",
+                    "loading": "Loading...",
+                    "error": "An error occured"
+                }
+            },
+            "attachments": {
+                "cancelUpload": "Cancel upload",
+                "removeAttachment": "Remove attachment"
+            },
+            "newChatDialog": {
+                "createNewChat": "Create new chat?",
+                "clearChat": "This will clear the current messages and start a new chat.",
+                "cancel": "Cancel",
+                "confirm": "Confirm"
+            },
+            "settingsModal": {
+                "expandMessages": "Expand Messages",
+                "hideChainOfThought": "Hide Chain of Thought",
+                "darkMode": "Dark Mode"
+            }
+        },
+        "organisms": {
+            "chat": {
+                "history": {
+                    "index": {
+                        "lastInputs": "Last Inputs",
+                        "noInputs": "Such empty...",
+                        "loading": "Loading..."
+                    }
+                },
+                "inputBox": {
+                    "input": {
+                        "placeholder": "Type your message here..."
+                    },
+                    "speechButton": {
+                        "start": "Start recording",
+                        "stop": "Stop recording"
+                    },
+                    "SubmitButton": {
+                        "sendMessage": "Send message",
+                        "stopTask": "Stop Task"
+                    },
+                    "UploadButton": {
+                        "attachFiles": "Attach files"
+                    },
+                    "waterMark": {
+                        "text": "Built with"
+                    }
+                },
+                "Messages": {
+                    "index": {
+                        "running": "Running",
+                        "executedSuccessfully": "executed successfully",
+                        "failed": "failed",
+                        "feedbackUpdated": "Feedback updated",
+                        "updating": "Updating"
+                    }
+                },
+                "dropScreen": {
+                    "dropYourFilesHere": "Drop your files here"
+                },
+                "index": {
+                    "failedToUpload": "Failed to upload",
+                    "cancelledUploadOf": "Cancelled upload of",
+                    "couldNotReachServer": "Could not reach the server",
+                    "continuingChat": "Continuing previous chat"
+                },
+                "settings": {
+                    "settingsPanel": "Settings panel",
+                    "reset": "Reset",
+                    "cancel": "Cancel",
+                    "confirm": "Confirm"
+                }
+            },
+            "threadHistory": {
+                "sidebar": {
+                    "filters": {
+                        "FeedbackSelect": {
+                            "feedbackAll": "Feedback: All",
+                            "feedbackPositive": "Feedback: Positive",
+                            "feedbackNegative": "Feedback: Negative"
+                        },
+                        "SearchBar": {
+                            "search": "Search"
+                        }
+                    },
+                    "DeleteThreadButton": {
+                        "confirmMessage": "This will delete the thread as well as it's messages and elements.",
+                        "cancel": "Cancel",
+                        "confirm": "Confirm",
+                        "deletingChat": "Deleting chat",
+                        "chatDeleted": "Chat deleted"
+                    },
+                    "index": {
+                        "pastChats": "Past Chats"
+                    },
+                    "ThreadList": {
+                        "empty": "Empty..."
+                    },
+                    "TriggerButton": {
+                        "closeSidebar": "Close sidebar",
+                        "openSidebar": "Open sidebar"
+                    }
+                },
+                "Thread": {
+                    "backToChat": "Go back to chat",
+                    "chatCreatedOn": "This chat was created on"
+                }
+            },
+            "header": {
+                "chat": "Chat",
+                "readme": "Readme"
+            }
+        }
+    },
+    "hooks": {
+        "useLLMProviders": {
+            "failedToFetchProviders": "Failed to fetch providers:"
+        }
+    },
+    "pages": {
+        "Design": {},
+        "Env": {
+            "savedSuccessfully": "Saved successfully",
+            "requiredApiKeys": "Required API Keys",
+            "requiredApiKeysInfo": "To use this app, the following API keys are required. The keys are stored on your device's local storage."
+        },
+        "Page": {
+            "notPartOfProject": "You are not part of this project."
+        },
+        "ResumeButton": {
+            "resumeChat": "Resume Chat"
+        }
+    }
+}

.chainlit/translations/pt-BR.json ADDED Viewed

	@@ -0,0 +1,155 @@

+{
+    "components": {
+        "atoms": {
+            "buttons": {
+                "userButton": {
+                    "menu": {
+                        "settings": "Configura\u00e7\u00f5es",
+                        "settingsKey": "S",
+                        "APIKeys": "Chaves de API",
+                        "logout": "Sair"
+                    }
+                }
+            }
+        },
+        "molecules": {
+            "newChatButton": {
+                "newChat": "Nova Conversa"
+            },
+            "tasklist": {
+                "TaskList": {
+                    "title": "\ud83d\uddd2\ufe0f Lista de Tarefas",
+                    "loading": "Carregando...",
+                    "error": "Ocorreu um erro"
+                }
+            },
+            "attachments": {
+                "cancelUpload": "Cancelar envio",
+                "removeAttachment": "Remover anexo"
+            },
+            "newChatDialog": {
+                "createNewChat": "Criar novo chat?",
+                "clearChat": "Isso limpar\u00e1 as mensagens atuais e iniciar\u00e1 uma nova conversa.",
+                "cancel": "Cancelar",
+                "confirm": "Confirmar"
+            },
+            "settingsModal": {
+                "expandMessages": "Expandir Mensagens",
+                "hideChainOfThought": "Esconder Sequ\u00eancia de Pensamento",
+                "darkMode": "Modo Escuro"
+            }
+        },
+        "organisms": {
+            "chat": {
+                "history": {
+                    "index": {
+                        "lastInputs": "\u00daltimas Entradas",
+                        "noInputs": "Vazio...",
+                        "loading": "Carregando..."
+                    }
+                },
+                "inputBox": {
+                    "input": {
+                        "placeholder": "Digite sua mensagem aqui..."
+                    },
+                    "speechButton": {
+                        "start": "Iniciar grava\u00e7\u00e3o",
+                        "stop": "Parar grava\u00e7\u00e3o"
+                    },
+                    "SubmitButton": {
+                        "sendMessage": "Enviar mensagem",
+                        "stopTask": "Parar Tarefa"
+                    },
+                    "UploadButton": {
+                        "attachFiles": "Anexar arquivos"
+                    },
+                    "waterMark": {
+                        "text": "Constru\u00eddo com"
+                    }
+                },
+                "Messages": {
+                    "index": {
+                        "running": "Executando",
+                        "executedSuccessfully": "executado com sucesso",
+                        "failed": "falhou",
+                        "feedbackUpdated": "Feedback atualizado",
+                        "updating": "Atualizando"
+                    }
+                },
+                "dropScreen": {
+                    "dropYourFilesHere": "Solte seus arquivos aqui"
+                },
+                "index": {
+                    "failedToUpload": "Falha ao enviar",
+                    "cancelledUploadOf": "Envio cancelado de",
+                    "couldNotReachServer": "N\u00e3o foi poss\u00edvel conectar ao servidor",
+                    "continuingChat": "Continuando o chat anterior"
+                },
+                "settings": {
+                    "settingsPanel": "Painel de Configura\u00e7\u00f5es",
+                    "reset": "Redefinir",
+                    "cancel": "Cancelar",
+                    "confirm": "Confirmar"
+                }
+            },
+            "threadHistory": {
+                "sidebar": {
+                    "filters": {
+                        "FeedbackSelect": {
+                            "feedbackAll": "Feedback: Todos",
+                            "feedbackPositive": "Feedback: Positivo",
+                            "feedbackNegative": "Feedback: Negativo"
+                        },
+                        "SearchBar": {
+                            "search": "Buscar"
+                        }
+                    },
+                    "DeleteThreadButton": {
+                        "confirmMessage": "Isso deletar\u00e1 a conversa, assim como suas mensagens e elementos.",
+                        "cancel": "Cancelar",
+                        "confirm": "Confirmar",
+                        "deletingChat": "Deletando conversa",
+                        "chatDeleted": "Conversa deletada"
+                    },
+                    "index": {
+                        "pastChats": "Conversas Anteriores"
+                    },
+                    "ThreadList": {
+                        "empty": "Vazio..."
+                    },
+                    "TriggerButton": {
+                        "closeSidebar": "Fechar barra lateral",
+                        "openSidebar": "Abrir barra lateral"
+                    }
+                },
+                "Thread": {
+                    "backToChat": "Voltar para a conversa",
+                    "chatCreatedOn": "Esta conversa foi criada em"
+                }
+            },
+            "header": {
+                "chat": "Conversa",
+                "readme": "Leia-me"
+            }
+        },
+        "hooks": {
+            "useLLMProviders": {
+                "failedToFetchProviders": "Falha ao buscar provedores:"
+            }
+        },
+        "pages": {
+            "Design": {},
+            "Env": {
+                "savedSuccessfully": "Salvo com sucesso",
+                "requiredApiKeys": "Chaves de API necess\u00e1rias",
+                "requiredApiKeysInfo": "Para usar este aplicativo, as seguintes chaves de API s\u00e3o necess\u00e1rias. As chaves s\u00e3o armazenadas localmente em seu dispositivo."
+            },
+            "Page": {
+                "notPartOfProject": "Voc\u00ea n\u00e3o faz parte deste projeto."
+            },
+            "ResumeButton": {
+                "resumeChat": "Continuar Conversa"
+            }
+        }
+    }
+}

app.py ADDED Viewed

	@@ -0,0 +1,125 @@

+from dotenv import load_dotenv
+load_dotenv()
+import logging
+import os
+from time import perf_counter
+import chainlit as cl
+from langchain.cache import InMemoryCache
+from langchain.embeddings import CacheBackedEmbeddings
+from langchain.globals import set_llm_cache
+from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
+from langchain.schema import SystemMessage
+from langchain.schema.output_parser import StrOutputParser
+from langchain.schema.runnable import RunnableParallel, RunnablePassthrough
+from langchain.storage import LocalFileStore
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_openai.chat_models import ChatOpenAI
+from langchain_openai.embeddings import OpenAIEmbeddings
+from langchain_pinecone import PineconeVectorStore
+from pinecone import Index
+from src.loaders import get_docs
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+os.environ["LANGCHAIN_WANDB_TRACING"] = "true"
+try:
+    set_llm_cache(InMemoryCache())  # Use in-memory cache for LLM
+    index = Index(
+        api_key=os.environ["PINECONE_API_KEY"],
+        index_name=os.environ["PINECONE_INDEX"],
+        host=os.environ["PINECONE_HOST"],
+    )
+    store = LocalFileStore("./cache/")
+    embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
+    embeddings = CacheBackedEmbeddings.from_bytes_store(
+        underlying_embeddings=embeddings,
+        document_embedding_cache=store,
+        namespace=embeddings.model,
+    )
+    vectorstore = PineconeVectorStore(
+        index=index, embedding=embeddings, text_key="text"
+    )
+    system_message = SystemMessage(content="You are a helpful assistant.")
+    human_template = """Based on the following context generate an answer for the question. If the answer is not available say I dont know.
+    Context: {context}
+    Question: {question}
+    Answer:"""
+    human_message = HumanMessagePromptTemplate.from_template(template=human_template)
+    llm = ChatOpenAI(model="gpt-3.5-turbo-1106", streaming=True)
+    prompt = ChatPromptTemplate.from_messages(messages=[system_message, human_message])
+    parser = StrOutputParser()
+    prompt_chain = prompt | llm | parser
+    splitter = RecursiveCharacterTextSplitter(
+        chunk_size=500, chunk_overlap=100, length_function=len
+    )
+    logger.info("Initialization completed successfully.")
+except Exception as e:
+    logger.exception("Failed during initialization: %s", str(e))
+@cl.on_chat_start
+async def start_chat():
+    files = await cl.AskFileMessage(
+        content="Please upload a PDF file to answer questions from.",
+        accept=["application/pdf"],
+        max_size_mb=100,
+        max_files=10,
+        timeout=60 * 60 * 24 * 7 * 365,
+    ).send()
+    out = cl.Message(content="")
+    await out.send()
+    paths = [file.path for file in files]
+    logger.info(files[0])
+    logger.info("Preparing docs: %s", paths)
+    start = perf_counter()
+    splitted_docs = get_docs(files=files, splitter=splitter)
+    end = perf_counter()
+    logger.info("Preparing docs took %s seconds.", end - start)
+    retriever = vectorstore.as_retriever(
+        search_kwargs={"filter": {"source": {"$in": paths}}}
+    )
+    logger.info("Adding documents to vector store retriever.")
+    start = perf_counter()
+    await retriever.aadd_documents(splitted_docs)
+    end = perf_counter()
+    logger.info("Adding documents took %s seconds.", end - start)
+    cl.user_session.set("retriever", retriever)
+    out.content = f"{len(files)} file(s) loaded! You can now ask questions"
+    await out.update()
+    logger.info("Files loaded and retriever updated.")
+@cl.on_message
+async def main(message: cl.Message):
+    retriever = cl.user_session.get("retriever")
+    retriever_chain = RunnableParallel(
+        {"context": retriever, "question": RunnablePassthrough()}
+    )
+    out = cl.Message(content="")
+    await out.send()
+    chain = retriever_chain | prompt_chain
+    stream = chain.astream(message.content)
+    async for chunk in stream:
+        await out.stream_token(chunk)
+    await out.update()

chainlit.md ADDED Viewed

	@@ -0,0 +1,4 @@

+# Welcome to DocuQuery 2.0: Your PDF & Text File QA Expert 📄🔍
+Hello, Knowledge Seekers! 👋 Excited to introduce you to DocuQuery, your smart assistant for navigating and extracting information from PDFs and text files.
+In this new version Pinecone, LangChain and W&B are used!

poetry.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

pyproject.toml ADDED Viewed

	@@ -0,0 +1,26 @@

+[tool.poetry]
+name = "docuquery2-0-1"
+version = "0.1.0"
+description = ""
+authors = ["jjovalle <jj.ovalle@uniandes.edu.co>"]
+readme = "README.md"
+packages = [{include = "src"}]
+[tool.poetry.dependencies]
+python = "~3.11"
+chainlit = "^1.0.400"
+python-dotenv = "^1.0.1"
+langchain = "^0.1.11"
+langchain-pinecone = "^0.0.3"
+langchain-openai = "^0.0.8"
+pypdf = "^4.1.0"
+[tool.poetry.group.dev.dependencies]
+black = "^24.2.0"
+isort = "^5.13.2"
+ipykernel = "^6.29.3"
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"

src/__init__.py ADDED Viewed

File without changes

src/loaders.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from typing import List
+from chainlit.types import AskFileResponse
+from langchain.docstore.document import Document
+from pypdf import PdfReader
+def get_docs(files: List[AskFileResponse], splitter) -> List[str]:
+    docs = []
+    for file in files:
+        reader = PdfReader(file.path)
+        doc = [
+            Document(
+                page_content=page.extract_text(),
+                metadata={"source": file.path, "page": page.page_number},
+            )
+            for page in reader.pages
+        ]
+        docs.append(doc)
+    splitted_docs = [splitter.split_documents(doc) for doc in docs]
+    for doc in splitted_docs:
+        for i, chunk in enumerate(doc, start=1):
+            chunk.metadata["chunk"] = i
+    unnested_splitted_docs = [chunk for doc in splitted_docs for chunk in doc]
+    return unnested_splitted_docs