# Think Paraguayo import os import random import time os.system("wget https://huggingface.co/thinkPy/gua-a_v0.2-dpo_mistral-7b_GGUF/resolve/main/gua-a_v0.2-dpo_mistral-7b_q4_K_M.gguf -O model.gguf") from llama_cpp import Llama import gradio as gr from ragatouille import RAGPretrainedModel from llama_index.core import Document, SimpleDirectoryReader from llama_index.core.node_parser import SentenceSplitter max_seq_length = 256 prompt = """Responde a preguntas de forma clara, amable, concisa y solamente en el lenguaje español, sobre el libro Ñande Ypykuéra. Contexto ------------------------- {} ------------------------- ### Pregunta: {} ### Respuesta: {}""" # prompt = """Responde a preguntas de forma clara, amable, concisa y solamente en el lenguaje español. # ------------------------- # Contexto: # {} # ------------------------- # ### Pregunta: # {} # - Debes utilizar el contexto para responder la pregunta. # ### Respuesta: # {}""" # Initialize the LLM llm = Llama(model_path="model.gguf", n_ctx=max_seq_length, n_threads=2) DOC_PATH = "/home/user/app/index" RAG = RAGPretrainedModel.from_pretrained("AdrienB134/ColBERTv2.0-spanish-mmarcoES") RAG = RAG.from_index(DOC_PATH, n_gpu=None) RAG.search("init", None, k=1) def reformat_rag(results_rag): if results_rag is not None: return [result["content"] for result in results_rag] else: return [""] def chat_stream_completion(message, history): context = reformat_rag(RAG.search(message, None, k=1)) context = " \n ".join(context) full_prompt = prompt.format(context,message,"") print(full_prompt) response = llm.create_completion( prompt=full_prompt, temperature=0.01, max_tokens=max_seq_length, stream=True ) message_repl = "" for chunk in response: if len(chunk['choices'][0]["text"]) != 0: message_repl = message_repl + chunk['choices'][0]["text"] yield message_repl def launcher(): with gr.Blocks(css=css) as demo: gr.Markdown("# Think Paraguayo") gr.Markdown("## Conoce la cultura guaraní!!") with gr.Row(variant='panel'): with gr.Column(scale=1): gr.Image(value="think_paraguayo.jpeg", type="filepath", label="Imagen Estática") with gr.Column(scale=1): chatbot = gr.ChatInterface( fn=chat_stream_completion, retry_btn = None, stop_btn = None, undo_btn = None ).queue() demo.launch() if __name__ == "__main__": launcher()