Monsia commited on
Commit
ed5def4
1 Parent(s): b9c7274
app.py CHANGED
@@ -4,12 +4,12 @@ from langchain.schema import StrOutputParser
4
  from langchain.schema.runnable import RunnableConfig, RunnablePassthrough
5
  from langchain_openai import ChatOpenAI
6
 
7
- import config
8
  from prompts import prompt
9
  from utils import format_docs, process_documents
10
 
11
- doc_search = process_documents(config.DOCS_STORAGE_PATH)
12
- model = ChatOpenAI(name="gpt-3.5-turbo", streaming=True)
13
 
14
 
15
  @cl.on_chat_start
@@ -44,7 +44,7 @@ async def on_message(message: cl.Message):
44
 
45
  def on_retriever_end(self, documents, *, run_id, parent_run_id, **kwargs):
46
  for d in documents:
47
- source_page_pair = (d.metadata["source"])
48
  self.sources.add(source_page_pair) # Add unique pairs to the set
49
 
50
  def on_llm_end(self, response, *, run_id, parent_run_id, **kwargs):
@@ -64,4 +64,7 @@ async def on_message(message: cl.Message):
64
  ),
65
  ):
66
  await msg.stream_token(chunk)
 
 
 
67
  await msg.send()
 
4
  from langchain.schema.runnable import RunnableConfig, RunnablePassthrough
5
  from langchain_openai import ChatOpenAI
6
 
7
+ import configs
8
  from prompts import prompt
9
  from utils import format_docs, process_documents
10
 
11
+ doc_search = process_documents(configs.DOCS_STORAGE_PATH)
12
+ model = ChatOpenAI(name=configs.CHAT_MODEL, streaming=True)
13
 
14
 
15
  @cl.on_chat_start
 
44
 
45
  def on_retriever_end(self, documents, *, run_id, parent_run_id, **kwargs):
46
  for d in documents:
47
+ source_page_pair = d.metadata["source"]
48
  self.sources.add(source_page_pair) # Add unique pairs to the set
49
 
50
  def on_llm_end(self, response, *, run_id, parent_run_id, **kwargs):
 
64
  ),
65
  ):
66
  await msg.stream_token(chunk)
67
+
68
+ with open(configs.HIISTORY_FILE, "a") as f:
69
+ f.write(f"""{message.content}[SEP]{msg.content}[END]\n\n""")
70
  await msg.send()
chainlit.md CHANGED
@@ -1,14 +1,26 @@
1
- # Welcome to Chainlit! 🚀🤖
2
 
3
- Hi there, Developer! 👋 We're excited to have you on board. Chainlit is a powerful tool designed to help you prototype, debug and share applications built on top of LLMs.
4
 
5
- ## Useful Links 🔗
6
 
7
- - **Documentation:** Get started with our comprehensive [Chainlit Documentation](https://docs.chainlit.io) 📚
8
- - **Discord Community:** Join our friendly [Chainlit Discord](https://discord.gg/k73SQ3FyUh) to ask questions, share your projects, and connect with other developers! 💬
9
 
10
- We can't wait to see what you create with Chainlit! Happy coding! 💻😊
11
 
12
- ## Welcome screen
 
 
 
13
 
14
- To modify the welcome screen, edit the `chainlit.md` file at the root of your project. If you do not want a welcome screen, just leave this file empty.
 
 
 
 
 
 
 
 
 
 
 
1
+ # Welcome to AkwabaGPT! 🚀🤖
2
 
3
+ AkwabaGPT is a multilingual assistant designed by [data354](https://data354.com/) to help visitors during the Africa Cup of Nations 2023 in Côte d'Ivoire. It provides information about the tournament, including details on stadiums, match schedules, results, top scorers, and answers other tournament-related queries.
4
 
5
+ ===
6
 
7
+ AkwabaGPT est un assistant multilingue conçu par [data354](https://data354.com/) pour aider les visiteurs pendant la Coupe d'Afrique des Nations 2023 en Côte d'Ivoire. Il fournit des informations sur le tournoi, y compris des détails sur les stades, les horaires des matchs, les résultats, les meilleurs buteurs, et répond à d'autres questions liées au tournoi.
 
8
 
9
+ ## Query examples / Exemples de requêtes 💬🔗
10
 
11
+ - **Which teams have qualified for the quarter-finals?**
12
+ - **Who are the competition's top scorers?**
13
+ - **List of all CIV matches**
14
+ - **How many cards have been handed out?**
15
 
16
+ ===
17
+
18
+ - **Quelles sont les équipes qualifiées pour les 1/4 de finales**
19
+ - **Qui sont les meilleurs buteurs de la compétition ?**
20
+ - **Liste moi tous les matchs de la CIV**
21
+ - **Combien de cartons ont été distribués ?**
22
+
23
+
24
+ > That's it ! Now you can talk with AkwabaGPT 💻😊.
25
+
26
+ > Voilà, c'est fait ! Vous pouvez maintenant parler avec AkwabaGPT 💻😊.
config.py → configs.py RENAMED
@@ -2,7 +2,7 @@ import os
2
 
3
  # Model metadata
4
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
5
- CHAT_MODEL = "gpt-3.5-turbo" # or "gpt-4-0314"
6
 
7
  # vectorstore
8
  STORE_FILE = "./data/chroma_db"
 
2
 
3
  # Model metadata
4
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
5
+ CHAT_MODEL = "gpt-4-0314" # "gpt-3.5-turbo"
6
 
7
  # vectorstore
8
  STORE_FILE = "./data/chroma_db"
data/history.txt ADDED
File without changes
prompts.py CHANGED
@@ -1,17 +1,6 @@
1
  from langchain.prompts import ChatPromptTemplate
2
 
3
- template = """Tu es AkwabaGPT, un assistant multilingue conçu pour aider les visiteurs pendant la Coupe d'Afrique des Nations 2023 en Côte d'Ivoire.
4
-
5
- Ton role est de :
6
- - Fournir des informations sur le tournoi,
7
- - Donner les détails sur les matchs tels que les horaires des matchs, les résultats, les meilleurs buteurs
8
- - Donner les détails sur les stades
9
- - Donner le nombre de cartons jaunes ou rouges
10
-
11
- Ta réponse doit être basée sur les documents fournis comme contexte pour tes réponses.
12
- Ta réponse doit être concise et claire.
13
- Ne tente pas d'inventer une réponse.
14
- Si la question ne concerne pas la Coupe d'Afrique des Nations, il faut informer poliment que tu n'es autorisé à répondre uniquement aux questions portant sur la Coupe d'Afrique des Nations.
15
 
16
  #########
17
  {context}
 
1
  from langchain.prompts import ChatPromptTemplate
2
 
3
+ template = """AkwabaGpt is a multilingual assistant designed to help visitors during the Africa Cup of Nations 2023 in Côte d'Ivoire. It provides information about the tournament, including details on stadiums, match schedules, results, top scorers, and answers other tournament-related queries. Additionally, AkwabaGpt offers general information about Côte d'Ivoire. The data comes from user-provided files, not online sources, and does not include real-time updates. When asked for a match summary, AkwabaGpt provides the names of the two teams and the scores. AkwabaGpt prioritizes accuracy and clarity in delivering information, ensuring users have a comprehensive understanding of the tournament and its surrounding context. AkwabaGpt is not designed for in-depth analysis or opinions but focuses on factual data delivery. The assistant will avoid speculation, predictions, or subjective commentary on matches or teams. It will refrain from providing personal opinions or engaging in debates. For clarity, AkwabaGpt will ask for file uploads if needed and avoid guessing or making assumptions about incomplete data.
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  #########
6
  {context}
public/favicon.png ADDED
public/logo_dark.png ADDED
public/logo_light.png ADDED
utils.py CHANGED
@@ -7,7 +7,7 @@ from langchain.vectorstores.chroma import Chroma
7
  from langchain_community.document_loaders import TextLoader
8
  from langchain_openai import OpenAIEmbeddings
9
 
10
- import config
11
 
12
  embeddings_model = OpenAIEmbeddings()
13
 
@@ -17,18 +17,19 @@ def process_documents(doc_storage_path: str):
17
  doc_directory = Path(doc_storage_path)
18
  docs = [] # type: List[Document]
19
  text_splitter = RecursiveCharacterTextSplitter(
20
- chunk_size=config.CHUNK_SIZE, chunk_overlap=config.CHUNK_OVERLAP
21
  )
22
  doc_search = Chroma(
23
- persist_directory=config.STORE_FILE, embedding_function=embeddings_model
24
  )
25
  for file_path in doc_directory.glob("*.txt"):
26
- print(str(file_path))
27
  loader = TextLoader(str(file_path))
28
  documents = loader.load()
29
  docs = text_splitter.split_documents(documents)
30
- doc_search.from_documents(docs, embeddings_model)
31
- print(len(docs))
 
 
32
  print("doc preprocessing end.")
33
  return doc_search
34
 
 
7
  from langchain_community.document_loaders import TextLoader
8
  from langchain_openai import OpenAIEmbeddings
9
 
10
+ import configs
11
 
12
  embeddings_model = OpenAIEmbeddings()
13
 
 
17
  doc_directory = Path(doc_storage_path)
18
  docs = [] # type: List[Document]
19
  text_splitter = RecursiveCharacterTextSplitter(
20
+ chunk_size=configs.CHUNK_SIZE, chunk_overlap=configs.CHUNK_OVERLAP
21
  )
22
  doc_search = Chroma(
23
+ persist_directory=configs.STORE_FILE, embedding_function=embeddings_model
24
  )
25
  for file_path in doc_directory.glob("*.txt"):
 
26
  loader = TextLoader(str(file_path))
27
  documents = loader.load()
28
  docs = text_splitter.split_documents(documents)
29
+ doc_search = doc_search.from_documents(
30
+ docs, embeddings_model, persist_directory=configs.STORE_FILE
31
+ )
32
+ doc_search.persist()
33
  print("doc preprocessing end.")
34
  return doc_search
35