Spaces:
Runtime error
Runtime error
| import click | |
| import numpy as np | |
| import os | |
| import tiktoken | |
| from typing import List, Tuple | |
| import requests | |
| from dotenv import load_dotenv | |
| from sentence_transformers import SentenceTransformer | |
| from langchain_community.chat_models import ChatOllama | |
| from basesrc.strategies import convert_pdf_to_text, split_pages_into_chunks, vectorize | |
| from prompt_query import generate_prompt | |
| # load_dotenv() | |
| # CACHE_FOLDER = os.environ["CACHE_FOLDER"] | |
| CACHE_FOLDER = None #"./cache" # si toutefois (¨_^) | |
| def main(pdf_url:str, pdf_path:str, embedding_model:str, llm_model:str, top_k:int): | |
| #os.makedirs(CACHE_FOLDER, exist_ok=True) | |
| response = requests.get(pdf_url) | |
| with open(pdf_path, 'wb') as f: | |
| f.write(response.content) | |
| pages = convert_pdf_to_text(pdf_path) | |
| print( | |
| """ | |
| Je suis Llama3, de l'équipe DREAMS TEAM, votre assistant QA pour répondre à vos questions liés aux documents 🙂 ! | |
| Déjà pour info, le nombre de pages de vote document est: """, len(pages) | |
| ) | |
| tokenizer = tiktoken.get_encoding("cl100k_base") | |
| chunks = split_pages_into_chunks(pages, 128, tokenizer) | |
| embedding_model = SentenceTransformer(embedding_model) | |
| knowledge_base = vectorize(chunks, embedding_model) | |
| chunks, embeddings = list(zip(*knowledge_base)) | |
| corpus_embeddings = np.vstack(embeddings) | |
| llm_model = ChatOllama(model=llm_model) | |
| print('📑 Voici le contenu de la première page du document 😎:\n', pages[0]) | |
| keep_looping = True | |
| while keep_looping: | |
| try: | |
| question = input("Entrez votre question ✍️ | (ou tapez 'exit' pour quitter) ✨: ") | |
| if question.lower() == 'exit': | |
| break | |
| response = generate_prompt(question, chunks, corpus_embeddings, embedding_model, llm_model, top_k) | |
| colored_response = f"Llama3 : {response}" # la réponse de Llama | |
| print(colored_response) | |
| except KeyboardInterrupt: | |
| print("\nFin de la session de chat 👋.") | |
| keep_looping = False | |
| if __name__ == "__main__": | |
| main() | |
| """ | |
| export CACHE_FOLDER="./cache" | |
| export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python3 | |
| python main.py --pdf_url "https://hellofuture.orange.com/app/uploads/2024/05/2024-Orange-white-paper-on-Mobile-Network-Technology-Evolutions-Beyond-2030.pdf" --embedding_model "Sahajtomar/french_semantic" --llm_model "llama3" --top_k 5 | |
| """ |