Spaces:

Hexamind
/

Chatbot_llama2_questions

Runtime error

adrien.aribaut-gaudin commited on Oct 16, 2023

Commit

ed437ad

1 Parent(s): b548316

updating to real llama2

Files changed (2) hide show

app.py CHANGED Viewed

@@ -36,12 +36,12 @@ client_db = chromadb.Client()
 try:
     client_db.get_collection(name="illumio_database")
-    llm = LlmAgent(model="TheBloke/Llama-2-7b-Chat-GPTQ")
     retriever = Retriever(client_db, None, "illumio_database", llmagent=llm)
 except:
     print("Database is empty")
     doc = Doc(path=content_en_path_real)
-    llm = LlmAgent(model="TheBloke/Llama-2-7b-Chat-GPTQ")
     retriever = Retriever(client_db,doc.container,"illumio_database",llmagent=llm)

 try:
     client_db.get_collection(name="illumio_database")
+    llm = LlmAgent(model="meta-llama/Llama-2-7b")
     retriever = Retriever(client_db, None, "illumio_database", llmagent=llm)
 except:
     print("Database is empty")
     doc = Doc(path=content_en_path_real)
+    llm = LlmAgent(model="meta-llama/Llama-2-7b")
     retriever = Retriever(client_db,doc.container,"illumio_database",llmagent=llm)

src/tools/llm.py CHANGED Viewed

@@ -2,16 +2,17 @@ from langchain.llms.huggingface_pipeline import HuggingFacePipeline
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 from langchain.chains import LLMChain
 from langchain.prompts import PromptTemplate
 class LlmAgent:
-    def __init__(self, model :str = "TheBloke/Llama-2-7b-Chat-GPTQ"):
         self.tokenizer = AutoTokenizer.from_pretrained(model, use_fast=True)
         self.model = AutoModelForCausalLM.from_pretrained(model,
                                                           device_map="cuda",
                                                 trust_remote_code=False,             #A CHANGER SELON LES MODELES, POUR CELUI DE LAMA2 CA MARCHE (celui par default)
                                                 revision="main")
-        self.pipe = pipeline("text-generation", model=self.model, tokenizer=self.tokenizer)
     def generate_paragraph(self, query: str, context: {}, histo: [(str, str)], language='fr') -> str:
         locallm = HuggingFacePipeline(pipeline=self.pipe)

 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 from langchain.chains import LLMChain
 from langchain.prompts import PromptTemplate
+import torch
 class LlmAgent:
+    def __init__(self, model :str = "meta-llama/Llama-2-7b"):
         self.tokenizer = AutoTokenizer.from_pretrained(model, use_fast=True)
         self.model = AutoModelForCausalLM.from_pretrained(model,
                                                           device_map="cuda",
                                                 trust_remote_code=False,             #A CHANGER SELON LES MODELES, POUR CELUI DE LAMA2 CA MARCHE (celui par default)
                                                 revision="main")
+        self.pipe = pipeline("text-generation", model=self.model, tokenizer=self.tokenizer,torch_dtype=torch.float16)
     def generate_paragraph(self, query: str, context: {}, histo: [(str, str)], language='fr') -> str:
         locallm = HuggingFacePipeline(pipeline=self.pipe)