Spaces:

Hexamind
/

Chatbot_llama2_questions

Runtime error

App Files Files Community

adrien.aribaut-gaudin commited on Oct 17, 2023

Commit

4de2d8b

•

1 Parent(s): 2ca433f

fix: changing control.py so that it doesnt execute unecessary functions and also changing llm.py to better prompts and better responses

Browse files

Files changed (2) hide show

src/control/control.py +5 -2
src/tools/llm.py +58 -38

src/control/control.py CHANGED Viewed

@@ -11,13 +11,16 @@ class Chatbot:
     def get_response(self, query, histo):
         histo_conversation, histo_queries = self._get_histo(histo)
         langage_of_query = self.llm.detect_language(query).lower()
-        queries = self.llm.translate(text=histo_queries)
         block_sources = self.retriever.similarity_search(query=queries)
         block_sources = self._select_best_sources(block_sources)
         sources_contents = [s.content for s in block_sources]
         context = '\n'.join(sources_contents)
         answer = self.llm.generate_paragraph(query=queries, histo=histo_conversation, context=context, language=langage_of_query)
-        answer = self.llm.generate_answer(answer=answer, query=query, histo=histo_conversation, context=context,language=langage_of_query)
         # print(answer.split('bot:')[1].strip())
         # print("*************")
         # answer = self._clean_answer(answer)

     def get_response(self, query, histo):
         histo_conversation, histo_queries = self._get_histo(histo)
         langage_of_query = self.llm.detect_language(query).lower()
+        if langage_of_query != "en":
+            queries = self.llm.translate(text=histo_queries)
+        else:
+            queries = histo_queries
         block_sources = self.retriever.similarity_search(query=queries)
         block_sources = self._select_best_sources(block_sources)
         sources_contents = [s.content for s in block_sources]
         context = '\n'.join(sources_contents)
         answer = self.llm.generate_paragraph(query=queries, histo=histo_conversation, context=context, language=langage_of_query)
+        # answer = self.llm.generate_answer(answer=answer, query=query, histo=histo_conversation, context=context,language=langage_of_query)
         # print(answer.split('bot:')[1].strip())
         # print("*************")
         # answer = self._clean_answer(answer)

src/tools/llm.py CHANGED Viewed

@@ -12,13 +12,14 @@ class LlmAgent:
                                                           device_map="cuda:0",
                                                 trust_remote_code=False,             #A CHANGER SELON LES MODELES, POUR CELUI DE LAMA2 CA MARCHE (celui par default)
                                                 revision="main",token=token)
-        self.pipe = pipeline("text-generation", model=self.model, tokenizer=self.tokenizer,torch_dtype=torch.float16)
     def generate_paragraph(self, query: str, context: {}, histo: [(str, str)], language='fr') -> str:
         torch.cuda.empty_cache()
-        locallm = HuggingFacePipeline(pipeline=self.pipe)
         """generates the  answer"""
         template = (f'''[INST] <<SYS>>"
                     "You are a conversation bot designed to answer to the query from users"
                     "Your answer is based on the context delimited by triple backticks: "
                     "\\n ``` {context} ```\\n"
@@ -26,43 +27,48 @@ class LlmAgent:
                     "delimited by triple backticks: "
                     "\\n ``` {histo} ```\\n"
                     "Your response shall be in {language} and shall be concise"
                     "\\n <</SYS>>"
                     "\\n {query}[/INST]''')
-        prompt = PromptTemplate(input_variables=[], template=template)
-        llm_chain = LLMChain(prompt=prompt,llm=locallm)
-        p = llm_chain.predict()
         # print("****************")
         # print(template)
         # print("----")
-        # print(p)
-        return p
     def translate(self, text: str, language="en") -> str:
         torch.cuda.empty_cache()
-        locallm = HuggingFacePipeline(pipeline=self.pipe)
         """translates"""
         # languages = "`French to English" if language == "en" else "English to French"
-        tempate = (f'''[INST] <<SYS>>
-                   Your task consists in translating in English\\n"
                     the following text:
                     <</SYS>>
                     {text}[/INST]'''
                     )
-        prompt = PromptTemplate(input_variables=[], template=tempate)
-        llm_chain = LLMChain(prompt=prompt,llm=locallm,verbose=True)
-        p = llm_chain.predict()
-        return p
     def generate_answer(self, query: str, answer: str, histo: str, context: str,language : str) -> str:
         torch.cuda.empty_cache()
         """provides the final answer in {language} based on the initial query and the answer in english"""
         def _cut_unfinished_sentence(s: str):
             return '.'.join(s.split('.')[:-1])
-        locallm = HuggingFacePipeline(pipeline=self.pipe)
         template = (f'''[INST] <<SYS>>
                     Your task consists in translating the answer in {language}, if its not already the case, to the query "
                     delimited by triple backticks: ```{query}``` \\n"
                     \\n You don't add new content to the answer but: "
@@ -70,21 +76,25 @@ class LlmAgent:
                     ```{context}```"
                     \\n 2 You are consistent and avoid redundancies with the rest of the initial"
                     conversation delimited by triple backticks: ```{histo}```"
                     You are given the answer in {language}:
                     <</SYS>>
                     {answer}[/INST]'''
                     )
-        prompt = PromptTemplate(input_variables=[], template=template)
-        llm_chain = LLMChain(prompt=prompt,llm=locallm,verbose=True)
-        p = llm_chain.predict()
-        # p = _cut_unfinished_sentence(p)
-        return p
     def transform_parahraph_into_question(self, prompt : str, title_doc : str = '',title_para : str = '') -> str:
         torch.cuda.empty_cache()
-        self.tokenizer.pad_token = self.tokenizer.eos_token
-        max_tokens = 45
         prompt_template=f'''[INST] <<SYS>>
         You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
@@ -92,35 +102,45 @@ class LlmAgent:
         Your job is to create a question about a paragraph of a document untitled "{title_doc}".
         The paragraph title is "{title_para}".
         If you see that the question that you are creating will not respect {max_tokens} tokens, find a way to make it shorter.
-        If you see that the document paragraph seems to be code flattened, try to analyze it and create a question about it.
-        If you see that the paragraph is a table, try to create a question about it.
         If you can't create a question about the paragraph, just rephrase {title_para} so that it becomes a question.
-        Your response shall only contains one question, shall be concise and shall respect the following format:
-        "Question: <question>"
         The paragraph you need to create a question about is the following :
         <</SYS>>
         {prompt}[/INST]
         '''
-        input_ids = self.tokenizer(prompt_template, return_tensors='pt').input_ids.cuda()
-        output = self.model.generate(inputs=input_ids, temperature=0.7, do_sample=True, top_p=0.95, top_k=40, max_new_tokens=max_tokens,num_return_sequences=1)
-        res1 = self.tokenizer.decode(output[0][input_ids.shape[-1]:], skip_special_tokens=True)
-        print(res1)
-        print("-"*len(res1))
-        return res1
     def detect_language(self, text: str) -> str:
         torch.cuda.empty_cache()
         """detects the language"""
-        locallm = HuggingFacePipeline(pipeline=self.pipe)
         template = (f'''[INST] <<SYS>>
                     Your task consists in detecting the language of the user query"
                     Your answer shall be the two letters code of the language"
                     \\n <</SYS>>"
                     \\n {text}[/INST]'''
                     )
-        prompt = PromptTemplate(input_variables=[], template=template)
-        llm_chain = LLMChain(prompt=prompt,llm=locallm,verbose=True)
-        p = llm_chain.predict()
-        return p

                                                           device_map="cuda:0",
                                                 trust_remote_code=False,             #A CHANGER SELON LES MODELES, POUR CELUI DE LAMA2 CA MARCHE (celui par default)
                                                 revision="main",token=token)
+        self.pipe = pipeline("text-generation", model=self.model, tokenizer=self.tokenizer,torch_dtype=torch.float16,max_new_tokens=256,repetition_penalty=1.1,top_k=40,top_p=0.95,temperature=0.7,do_sample=True,return_full_text=False)
     def generate_paragraph(self, query: str, context: {}, histo: [(str, str)], language='fr') -> str:
         torch.cuda.empty_cache()
         """generates the  answer"""
         template = (f'''[INST] <<SYS>>"
+                    You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
+                    If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
                     "You are a conversation bot designed to answer to the query from users"
                     "Your answer is based on the context delimited by triple backticks: "
                     "\\n ``` {context} ```\\n"
                     "delimited by triple backticks: "
                     "\\n ``` {histo} ```\\n"
                     "Your response shall be in {language} and shall be concise"
+                    You should respect the following format: "
+                    <response>"
                     "\\n <</SYS>>"
                     "\\n {query}[/INST]''')
+        pipe = self.pipe(template)
         # print("****************")
         # print(template)
         # print("----")
+        res = pipe[0]["generated_text"]
+        print(res)
+        return res
     def translate(self, text: str, language="en") -> str:
         torch.cuda.empty_cache()
         """translates"""
         # languages = "`French to English" if language == "en" else "English to French"
+        template = (f'''[INST] <<SYS>>
+                    You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
+                    If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
+                    Your task consists in translating in English\\n"
                     the following text:
                     <</SYS>>
                     {text}[/INST]'''
                     )
+        pipe = self.pipe(template)
+        # print("****************")
+        # print(template)
+        # print("----")
+        res = pipe[0]["generated_text"]
+        return res
     def generate_answer(self, query: str, answer: str, histo: str, context: str,language : str) -> str:
         torch.cuda.empty_cache()
         """provides the final answer in {language} based on the initial query and the answer in english"""
         def _cut_unfinished_sentence(s: str):
             return '.'.join(s.split('.')[:-1])
         template = (f'''[INST] <<SYS>>
+                    You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
+                    If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
                     Your task consists in translating the answer in {language}, if its not already the case, to the query "
                     delimited by triple backticks: ```{query}``` \\n"
                     \\n You don't add new content to the answer but: "
                     ```{context}```"
                     \\n 2 You are consistent and avoid redundancies with the rest of the initial"
                     conversation delimited by triple backticks: ```{histo}```"
+                    Your response shall respect the following format: "
+                    <response>"
                     You are given the answer in {language}:
                     <</SYS>>
                     {answer}[/INST]'''
                     )
+        pipe = self.pipe(template)
+        # print("****************")
+        # print(template)
+        # print("----")
+        res = pipe[0]["generated_text"]
+        print(res)
+        return res
     def transform_parahraph_into_question(self, prompt : str, title_doc : str = '',title_para : str = '') -> str:
         torch.cuda.empty_cache()
+        max_tokens = 80
+        pipeline_modified = pipeline("text-generation", model=self.model, tokenizer=self.tokenizer,torch_dtype=torch.float16,max_new_tokens=max_tokens,repetition_penalty=1.1,top_k=40,top_p=0.95,temperature=0.7,do_sample=True,return_full_text=False)
         prompt_template=f'''[INST] <<SYS>>
         You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
         Your job is to create a question about a paragraph of a document untitled "{title_doc}".
         The paragraph title is "{title_para}".
         If you see that the question that you are creating will not respect {max_tokens} tokens, find a way to make it shorter.
         If you can't create a question about the paragraph, just rephrase {title_para} so that it becomes a question.
+        Your response shall contains two questions, shall be concise and shall respect the following format:
+        "Question: <question1>\\nQuestion: <question2>\\n"
+        You should not answer to the question, just create it.
         The paragraph you need to create a question about is the following :
         <</SYS>>
         {prompt}[/INST]
         '''
+        pipe = pipeline_modified(prompt_template)
+        # print("****************")
+        # print(template)
+        # print("----")
+        #filter the answer to only keep the question
+        res = pipe[0]["generated_text"]
+        # res = res.split("Question: ")
+        # res1 = res[1]
+        # res2 = res[2]
+        # print(res1)
+        # print(res2)
+        print(res)
+        return res
     def detect_language(self, text: str) -> str:
         torch.cuda.empty_cache()
         """detects the language"""
         template = (f'''[INST] <<SYS>>
+                    You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
+                    If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
                     Your task consists in detecting the language of the user query"
                     Your answer shall be the two letters code of the language"
+                    and should respect the following format: "
+                    <code>"
                     \\n <</SYS>>"
                     \\n {text}[/INST]'''
                     )
+        pipe = self.pipe(template)
+        # print("****************")
+        # print(template)
+        # print("----")
+        res = pipe[0]["generated_text"]
+        return res