Spaces:

MefhigosetH
/

Soy-Harry-Potter

Sleeping

App Files Files Community

MefhigosetH commited on Oct 20, 2024

Commit

b6a664f

1 Parent(s): 66f7e10

Implementamos Llama.cpp y HuggingFace Hub.

Browse files

Files changed (3) hide show

.gitignore +1 -0
Pipfile +2 -0
app.py +23 -22

.gitignore CHANGED Viewed

@@ -1,2 +1,3 @@
 .env
 Pipfile.lock

 .env
+*.gguf
 Pipfile.lock

Pipfile CHANGED Viewed

@@ -10,6 +10,8 @@ langchain-huggingface = "*"
 langchain = "*"
 langchain-core = "*"
 transformers = "*"
 [dev-packages]

 langchain = "*"
 langchain-core = "*"
 transformers = "*"
+llama-cpp-python = "*"
+langchain-community = "*"
 [dev-packages]

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import gradio as gr
-from langchain_huggingface import HuggingFaceEndpoint, HuggingFacePipeline
-from langchain_core.prompts import PromptTemplate
 from langchain.globals import set_verbose, set_debug
 import os
@@ -10,17 +11,18 @@ def isDevelopmentEnv():
 def initPrompt():
-    template = """[INST]Tu eres Harry Potter, el estudiante de magia más hábil de todo el mundo mágico.
     Responde amablemente a la consulta del usuario basado en la información disponible y a las siguientes reglas:
     1. Si no sabes la respuesta, pide al usuario que intente reformular su consulta.
     2. Responde siempre en idioma Español.
     3. Da respuestas únicamente relacionadas al mundo mágico.
-    Consulta: {question}
-    [/INST]
     """
-    prompt = PromptTemplate.from_template(template)
     return prompt
@@ -30,21 +32,19 @@ def initLLM():
     Inicializamos el modelo LLM.
     Otros modelos que podríamos usar:
-        - meta-llama/Meta-Llama-3.1-8B-Instruct
-        - HuggingFaceH4/zephyr-7b-beta
     """
-    model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
-    llm = HuggingFaceEndpoint(
-        repo_id = model_id,
-        task = "text-generation",
-        temperature = 0.5,
-        model_kwargs = {
-            "min_length": 200,
-            "max_length": 2000,
-            "num_return_sequences": 1
-        }
     )
     return llm
@@ -55,11 +55,12 @@ def respond(message, history):
     response = ""
     try:
-        response = llm_chain.invoke(message)
     except:
         raise gradio.Error("Se ha producido un error al interactuar con el modelo LLM.", duratio=5)
-    return response

 import gradio as gr
+from huggingface_hub import hf_hub_download
+from langchain_community.chat_models import ChatLlamaCpp
+from langchain_core.prompts import ChatPromptTemplate
 from langchain.globals import set_verbose, set_debug
 import os
 def initPrompt():
+    system_prompt = """Tu eres Harry Potter, el estudiante de magia más hábil de todo el mundo mágico.
     Responde amablemente a la consulta del usuario basado en la información disponible y a las siguientes reglas:
     1. Si no sabes la respuesta, pide al usuario que intente reformular su consulta.
     2. Responde siempre en idioma Español.
     3. Da respuestas únicamente relacionadas al mundo mágico.
     """
+    prompt = ChatPromptTemplate.from_messages([
+      ("system", system_prompt),
+      ("human", "{input}"),
+    ])
     return prompt
     Inicializamos el modelo LLM.
     Otros modelos que podríamos usar:
+        * bartowski/Llama-3.2-1B-Instruct-GGUF
+        * HuggingFaceH4/zephyr-7b-beta
     """
+    model_path = hf_hub_download(repo_id="bartowski/Llama-3.2-1B-Instruct-GGUF", filename="Llama-3.2-1B-Instruct-Q5_K_S.gguf")
+    llm = ChatLlamaCpp(
+        model_path=model_path,
+        temperature=0.7,
+        max_tokens=500,
+        top_p=1,
+        # callback_manager=callback_manager,
+        # verbose=True,  # Verbose is required to pass to the callback manager
     )
     return llm
     response = ""
     try:
+        response = llm_chain.invoke({"input": message})
     except:
         raise gradio.Error("Se ha producido un error al interactuar con el modelo LLM.", duratio=5)
+    print(response)
+    return response.content