v 0.1.0
Browse files- app.py +7 -4
- chainlit.md +20 -8
- config.py → configs.py +1 -1
- data/history.txt +0 -0
- prompts.py +1 -12
- public/favicon.png +0 -0
- public/logo_dark.png +0 -0
- public/logo_light.png +0 -0
- utils.py +7 -6
app.py
CHANGED
@@ -4,12 +4,12 @@ from langchain.schema import StrOutputParser
|
|
4 |
from langchain.schema.runnable import RunnableConfig, RunnablePassthrough
|
5 |
from langchain_openai import ChatOpenAI
|
6 |
|
7 |
-
import
|
8 |
from prompts import prompt
|
9 |
from utils import format_docs, process_documents
|
10 |
|
11 |
-
doc_search = process_documents(
|
12 |
-
model = ChatOpenAI(name=
|
13 |
|
14 |
|
15 |
@cl.on_chat_start
|
@@ -44,7 +44,7 @@ async def on_message(message: cl.Message):
|
|
44 |
|
45 |
def on_retriever_end(self, documents, *, run_id, parent_run_id, **kwargs):
|
46 |
for d in documents:
|
47 |
-
source_page_pair =
|
48 |
self.sources.add(source_page_pair) # Add unique pairs to the set
|
49 |
|
50 |
def on_llm_end(self, response, *, run_id, parent_run_id, **kwargs):
|
@@ -64,4 +64,7 @@ async def on_message(message: cl.Message):
|
|
64 |
),
|
65 |
):
|
66 |
await msg.stream_token(chunk)
|
|
|
|
|
|
|
67 |
await msg.send()
|
|
|
4 |
from langchain.schema.runnable import RunnableConfig, RunnablePassthrough
|
5 |
from langchain_openai import ChatOpenAI
|
6 |
|
7 |
+
import configs
|
8 |
from prompts import prompt
|
9 |
from utils import format_docs, process_documents
|
10 |
|
11 |
+
doc_search = process_documents(configs.DOCS_STORAGE_PATH)
|
12 |
+
model = ChatOpenAI(name=configs.CHAT_MODEL, streaming=True)
|
13 |
|
14 |
|
15 |
@cl.on_chat_start
|
|
|
44 |
|
45 |
def on_retriever_end(self, documents, *, run_id, parent_run_id, **kwargs):
|
46 |
for d in documents:
|
47 |
+
source_page_pair = d.metadata["source"]
|
48 |
self.sources.add(source_page_pair) # Add unique pairs to the set
|
49 |
|
50 |
def on_llm_end(self, response, *, run_id, parent_run_id, **kwargs):
|
|
|
64 |
),
|
65 |
):
|
66 |
await msg.stream_token(chunk)
|
67 |
+
|
68 |
+
with open(configs.HIISTORY_FILE, "a") as f:
|
69 |
+
f.write(f"""{message.content}[SEP]{msg.content}[END]\n\n""")
|
70 |
await msg.send()
|
chainlit.md
CHANGED
@@ -1,14 +1,26 @@
|
|
1 |
-
# Welcome to
|
2 |
|
3 |
-
|
4 |
|
5 |
-
|
6 |
|
7 |
-
|
8 |
-
- **Discord Community:** Join our friendly [Chainlit Discord](https://discord.gg/k73SQ3FyUh) to ask questions, share your projects, and connect with other developers! 💬
|
9 |
|
10 |
-
|
11 |
|
12 |
-
|
|
|
|
|
|
|
13 |
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Welcome to AkwabaGPT! 🚀🤖
|
2 |
|
3 |
+
AkwabaGPT is a multilingual assistant designed by [data354](https://data354.com/) to help visitors during the Africa Cup of Nations 2023 in Côte d'Ivoire. It provides information about the tournament, including details on stadiums, match schedules, results, top scorers, and answers other tournament-related queries.
|
4 |
|
5 |
+
===
|
6 |
|
7 |
+
AkwabaGPT est un assistant multilingue conçu par [data354](https://data354.com/) pour aider les visiteurs pendant la Coupe d'Afrique des Nations 2023 en Côte d'Ivoire. Il fournit des informations sur le tournoi, y compris des détails sur les stades, les horaires des matchs, les résultats, les meilleurs buteurs, et répond à d'autres questions liées au tournoi.
|
|
|
8 |
|
9 |
+
## Query examples / Exemples de requêtes 💬🔗
|
10 |
|
11 |
+
- **Which teams have qualified for the quarter-finals?**
|
12 |
+
- **Who are the competition's top scorers?**
|
13 |
+
- **List of all CIV matches**
|
14 |
+
- **How many cards have been handed out?**
|
15 |
|
16 |
+
===
|
17 |
+
|
18 |
+
- **Quelles sont les équipes qualifiées pour les 1/4 de finales**
|
19 |
+
- **Qui sont les meilleurs buteurs de la compétition ?**
|
20 |
+
- **Liste moi tous les matchs de la CIV**
|
21 |
+
- **Combien de cartons ont été distribués ?**
|
22 |
+
|
23 |
+
|
24 |
+
> That's it ! Now you can talk with AkwabaGPT 💻😊.
|
25 |
+
|
26 |
+
> Voilà, c'est fait ! Vous pouvez maintenant parler avec AkwabaGPT 💻😊.
|
config.py → configs.py
RENAMED
@@ -2,7 +2,7 @@ import os
|
|
2 |
|
3 |
# Model metadata
|
4 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
5 |
-
CHAT_MODEL = "gpt-
|
6 |
|
7 |
# vectorstore
|
8 |
STORE_FILE = "./data/chroma_db"
|
|
|
2 |
|
3 |
# Model metadata
|
4 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
5 |
+
CHAT_MODEL = "gpt-4-0314" # "gpt-3.5-turbo"
|
6 |
|
7 |
# vectorstore
|
8 |
STORE_FILE = "./data/chroma_db"
|
data/history.txt
ADDED
File without changes
|
prompts.py
CHANGED
@@ -1,17 +1,6 @@
|
|
1 |
from langchain.prompts import ChatPromptTemplate
|
2 |
|
3 |
-
template = """
|
4 |
-
|
5 |
-
Ton role est de :
|
6 |
-
- Fournir des informations sur le tournoi,
|
7 |
-
- Donner les détails sur les matchs tels que les horaires des matchs, les résultats, les meilleurs buteurs
|
8 |
-
- Donner les détails sur les stades
|
9 |
-
- Donner le nombre de cartons jaunes ou rouges
|
10 |
-
|
11 |
-
Ta réponse doit être basée sur les documents fournis comme contexte pour tes réponses.
|
12 |
-
Ta réponse doit être concise et claire.
|
13 |
-
Ne tente pas d'inventer une réponse.
|
14 |
-
Si la question ne concerne pas la Coupe d'Afrique des Nations, il faut informer poliment que tu n'es autorisé à répondre uniquement aux questions portant sur la Coupe d'Afrique des Nations.
|
15 |
|
16 |
#########
|
17 |
{context}
|
|
|
1 |
from langchain.prompts import ChatPromptTemplate
|
2 |
|
3 |
+
template = """AkwabaGpt is a multilingual assistant designed to help visitors during the Africa Cup of Nations 2023 in Côte d'Ivoire. It provides information about the tournament, including details on stadiums, match schedules, results, top scorers, and answers other tournament-related queries. Additionally, AkwabaGpt offers general information about Côte d'Ivoire. The data comes from user-provided files, not online sources, and does not include real-time updates. When asked for a match summary, AkwabaGpt provides the names of the two teams and the scores. AkwabaGpt prioritizes accuracy and clarity in delivering information, ensuring users have a comprehensive understanding of the tournament and its surrounding context. AkwabaGpt is not designed for in-depth analysis or opinions but focuses on factual data delivery. The assistant will avoid speculation, predictions, or subjective commentary on matches or teams. It will refrain from providing personal opinions or engaging in debates. For clarity, AkwabaGpt will ask for file uploads if needed and avoid guessing or making assumptions about incomplete data.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
#########
|
6 |
{context}
|
public/favicon.png
ADDED
public/logo_dark.png
ADDED
public/logo_light.png
ADDED
utils.py
CHANGED
@@ -7,7 +7,7 @@ from langchain.vectorstores.chroma import Chroma
|
|
7 |
from langchain_community.document_loaders import TextLoader
|
8 |
from langchain_openai import OpenAIEmbeddings
|
9 |
|
10 |
-
import
|
11 |
|
12 |
embeddings_model = OpenAIEmbeddings()
|
13 |
|
@@ -17,18 +17,19 @@ def process_documents(doc_storage_path: str):
|
|
17 |
doc_directory = Path(doc_storage_path)
|
18 |
docs = [] # type: List[Document]
|
19 |
text_splitter = RecursiveCharacterTextSplitter(
|
20 |
-
chunk_size=
|
21 |
)
|
22 |
doc_search = Chroma(
|
23 |
-
persist_directory=
|
24 |
)
|
25 |
for file_path in doc_directory.glob("*.txt"):
|
26 |
-
print(str(file_path))
|
27 |
loader = TextLoader(str(file_path))
|
28 |
documents = loader.load()
|
29 |
docs = text_splitter.split_documents(documents)
|
30 |
-
doc_search.from_documents(
|
31 |
-
|
|
|
|
|
32 |
print("doc preprocessing end.")
|
33 |
return doc_search
|
34 |
|
|
|
7 |
from langchain_community.document_loaders import TextLoader
|
8 |
from langchain_openai import OpenAIEmbeddings
|
9 |
|
10 |
+
import configs
|
11 |
|
12 |
embeddings_model = OpenAIEmbeddings()
|
13 |
|
|
|
17 |
doc_directory = Path(doc_storage_path)
|
18 |
docs = [] # type: List[Document]
|
19 |
text_splitter = RecursiveCharacterTextSplitter(
|
20 |
+
chunk_size=configs.CHUNK_SIZE, chunk_overlap=configs.CHUNK_OVERLAP
|
21 |
)
|
22 |
doc_search = Chroma(
|
23 |
+
persist_directory=configs.STORE_FILE, embedding_function=embeddings_model
|
24 |
)
|
25 |
for file_path in doc_directory.glob("*.txt"):
|
|
|
26 |
loader = TextLoader(str(file_path))
|
27 |
documents = loader.load()
|
28 |
docs = text_splitter.split_documents(documents)
|
29 |
+
doc_search = doc_search.from_documents(
|
30 |
+
docs, embeddings_model, persist_directory=configs.STORE_FILE
|
31 |
+
)
|
32 |
+
doc_search.persist()
|
33 |
print("doc preprocessing end.")
|
34 |
return doc_search
|
35 |
|