Spaces:

data354
/

AkwabaGPT

Runtime error

App Files Files Community

Monsia commited on Feb 10

Commit

ed5def4

•

1 Parent(s): b9c7274

v 0.1.0

Browse files

Files changed (9) hide show

app.py +7 -4
chainlit.md +20 -8
config.py → configs.py +1 -1
data/history.txt +0 -0
prompts.py +1 -12
public/favicon.png +0 -0
public/logo_dark.png +0 -0
public/logo_light.png +0 -0
utils.py +7 -6

app.py CHANGED Viewed

@@ -4,12 +4,12 @@ from langchain.schema import StrOutputParser
 from langchain.schema.runnable import RunnableConfig, RunnablePassthrough
 from langchain_openai import ChatOpenAI
-import config
 from prompts import prompt
 from utils import format_docs, process_documents
-doc_search = process_documents(config.DOCS_STORAGE_PATH)
-model = ChatOpenAI(name="gpt-3.5-turbo", streaming=True)
 @cl.on_chat_start
@@ -44,7 +44,7 @@ async def on_message(message: cl.Message):
         def on_retriever_end(self, documents, *, run_id, parent_run_id, **kwargs):
             for d in documents:
-                source_page_pair = (d.metadata["source"])
                 self.sources.add(source_page_pair)  # Add unique pairs to the set
         def on_llm_end(self, response, *, run_id, parent_run_id, **kwargs):
@@ -64,4 +64,7 @@ async def on_message(message: cl.Message):
             ),
         ):
             await msg.stream_token(chunk)
     await msg.send()

 from langchain.schema.runnable import RunnableConfig, RunnablePassthrough
 from langchain_openai import ChatOpenAI
+import configs
 from prompts import prompt
 from utils import format_docs, process_documents
+doc_search = process_documents(configs.DOCS_STORAGE_PATH)
+model = ChatOpenAI(name=configs.CHAT_MODEL, streaming=True)
 @cl.on_chat_start
         def on_retriever_end(self, documents, *, run_id, parent_run_id, **kwargs):
             for d in documents:
+                source_page_pair = d.metadata["source"]
                 self.sources.add(source_page_pair)  # Add unique pairs to the set
         def on_llm_end(self, response, *, run_id, parent_run_id, **kwargs):
             ),
         ):
             await msg.stream_token(chunk)
+        with open(configs.HIISTORY_FILE, "a") as f:
+            f.write(f"""{message.content}[SEP]{msg.content}[END]\n\n""")
     await msg.send()

chainlit.md CHANGED Viewed

@@ -1,14 +1,26 @@
-# Welcome to Chainlit! 🚀🤖
-Hi there, Developer! 👋 We're excited to have you on board. Chainlit is a powerful tool designed to help you prototype, debug and share applications built on top of LLMs.
-## Useful Links 🔗
-- **Documentation:** Get started with our comprehensive [Chainlit Documentation](https://docs.chainlit.io) 📚
-- **Discord Community:** Join our friendly [Chainlit Discord](https://discord.gg/k73SQ3FyUh) to ask questions, share your projects, and connect with other developers! 💬
-We can't wait to see what you create with Chainlit! Happy coding! 💻😊
-## Welcome screen
-To modify the welcome screen, edit the `chainlit.md` file at the root of your project. If you do not want a welcome screen, just leave this file empty.

+# Welcome to AkwabaGPT! 🚀🤖
+AkwabaGPT is a multilingual assistant designed by [data354](https://data354.com/) to help visitors during the Africa Cup of Nations 2023 in Côte d'Ivoire. It provides information about the tournament, including details on stadiums, match schedules, results, top scorers, and answers other tournament-related queries.
+===
+AkwabaGPT est un assistant multilingue conçu par [data354](https://data354.com/) pour aider les visiteurs pendant la Coupe d'Afrique des Nations 2023 en Côte d'Ivoire. Il fournit des informations sur le tournoi, y compris des détails sur les stades, les horaires des matchs, les résultats, les meilleurs buteurs, et répond à d'autres questions liées au tournoi.
+## Query examples / Exemples de requêtes 💬🔗
+- **Which teams have qualified for the quarter-finals?**
+- **Who are the competition's top scorers?**
+- **List of all CIV matches**
+- **How many cards have been handed out?**
+===
+- **Quelles sont les équipes qualifiées pour les 1/4 de finales**
+- **Qui sont les meilleurs buteurs de la compétition ?**
+- **Liste moi tous les matchs de la CIV**
+- **Combien de cartons ont été distribués ?**
+> That's it ! Now you can talk with AkwabaGPT 💻😊.
+> Voilà, c'est fait ! Vous pouvez maintenant parler avec AkwabaGPT 💻😊.

config.py → configs.py RENAMED Viewed

@@ -2,7 +2,7 @@ import os
 # Model metadata
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
-CHAT_MODEL = "gpt-3.5-turbo"  # or "gpt-4-0314"
 # vectorstore
 STORE_FILE = "./data/chroma_db"

 # Model metadata
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+CHAT_MODEL = "gpt-4-0314"  # "gpt-3.5-turbo"
 # vectorstore
 STORE_FILE = "./data/chroma_db"

data/history.txt ADDED Viewed

File without changes

prompts.py CHANGED Viewed

@@ -1,17 +1,6 @@
 from langchain.prompts import ChatPromptTemplate
-template = """Tu es AkwabaGPT, un assistant multilingue conçu pour aider les visiteurs pendant la Coupe d'Afrique des Nations 2023 en Côte d'Ivoire.
-Ton role est de :
-- Fournir des informations sur le tournoi,
-- Donner les détails sur les matchs tels que les horaires des matchs, les résultats, les meilleurs buteurs
-- Donner les détails sur les stades
-- Donner le nombre de cartons jaunes ou rouges
-Ta réponse doit être basée sur les documents fournis comme contexte pour tes réponses.
-Ta réponse doit être concise et claire.
-Ne tente pas d'inventer une réponse.
-Si la question ne concerne pas la Coupe d'Afrique des Nations, il faut informer poliment que tu n'es autorisé à répondre uniquement aux questions portant sur la Coupe d'Afrique des Nations.
 #########
 {context}

 from langchain.prompts import ChatPromptTemplate
+template = """AkwabaGpt is a multilingual assistant designed to help visitors during the Africa Cup of Nations 2023 in Côte d'Ivoire. It provides information about the tournament, including details on stadiums, match schedules, results, top scorers, and answers other tournament-related queries. Additionally, AkwabaGpt offers general information about Côte d'Ivoire. The data comes from user-provided files, not online sources, and does not include real-time updates. When asked for a match summary, AkwabaGpt provides the names of the two teams and the scores. AkwabaGpt prioritizes accuracy and clarity in delivering information, ensuring users have a comprehensive understanding of the tournament and its surrounding context. AkwabaGpt is not designed for in-depth analysis or opinions but focuses on factual data delivery. The assistant will avoid speculation, predictions, or subjective commentary on matches or teams. It will refrain from providing personal opinions or engaging in debates. For clarity, AkwabaGpt will ask for file uploads if needed and avoid guessing or making assumptions about incomplete data.
 #########
 {context}

public/favicon.png ADDED Viewed

public/logo_dark.png ADDED Viewed

public/logo_light.png ADDED Viewed

utils.py CHANGED Viewed

@@ -7,7 +7,7 @@ from langchain.vectorstores.chroma import Chroma
 from langchain_community.document_loaders import TextLoader
 from langchain_openai import OpenAIEmbeddings
-import config
 embeddings_model = OpenAIEmbeddings()
@@ -17,18 +17,19 @@ def process_documents(doc_storage_path: str):
     doc_directory = Path(doc_storage_path)
     docs = []  # type: List[Document]
     text_splitter = RecursiveCharacterTextSplitter(
-        chunk_size=config.CHUNK_SIZE, chunk_overlap=config.CHUNK_OVERLAP
     )
     doc_search = Chroma(
-        persist_directory=config.STORE_FILE, embedding_function=embeddings_model
     )
     for file_path in doc_directory.glob("*.txt"):
-        print(str(file_path))
         loader = TextLoader(str(file_path))
         documents = loader.load()
         docs = text_splitter.split_documents(documents)
-        doc_search.from_documents(docs, embeddings_model)
-        print(len(docs))
     print("doc preprocessing end.")
     return doc_search

 from langchain_community.document_loaders import TextLoader
 from langchain_openai import OpenAIEmbeddings
+import configs
 embeddings_model = OpenAIEmbeddings()
     doc_directory = Path(doc_storage_path)
     docs = []  # type: List[Document]
     text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=configs.CHUNK_SIZE, chunk_overlap=configs.CHUNK_OVERLAP
     )
     doc_search = Chroma(
+        persist_directory=configs.STORE_FILE, embedding_function=embeddings_model
     )
     for file_path in doc_directory.glob("*.txt"):
         loader = TextLoader(str(file_path))
         documents = loader.load()
         docs = text_splitter.split_documents(documents)
+        doc_search = doc_search.from_documents(
+            docs, embeddings_model, persist_directory=configs.STORE_FILE
+        )
+    doc_search.persist()
     print("doc preprocessing end.")
     return doc_search