Spaces:

Hexamind
/

Chatbot_llama2_questions

Runtime error

File size: 9,232 Bytes

2e3ba97
 
 
 
ed437ad
2e3ba97
 
 
62d4365
 
2e3ba97
62d4365
2e3ba97
9b8ba2b
4de2d8b
2e3ba97
 
62d4365
2e3ba97
60354bb
4de2d8b
 
60354bb
 
 
 
 
 
 
4de2d8b
 
60354bb
 
4de2d8b
2e3ba97
 
 
4de2d8b
 
 
2e3ba97
 
62d4365
2e3ba97
 
 
 
4de2d8b
 
 
 
60354bb
 
 
2e3ba97
 
4de2d8b
 
 
 
 
 
2e3ba97
 
62d4365
2e3ba97
 
 
60354bb
4de2d8b
 
60354bb
 
 
 
 
 
 
4de2d8b
 
60354bb
 
 
2e3ba97
4de2d8b
 
 
 
 
 
 
2e3ba97
 
 
62d4365
4de2d8b
 
2e3ba97
 
 
 
 
 
 
 
4de2d8b
 
 
2e3ba97
 
 
 
 
4de2d8b
 
 
 
 
 
 
 
 
 
 
 
 
2e3ba97
 
62d4365
2e3ba97
60354bb
4de2d8b
 
60354bb
 
4de2d8b
 
60354bb
 
2e3ba97
4de2d8b

from langchain.llms.huggingface_pipeline import HuggingFacePipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
import torch

class LlmAgent:

    def __init__(self, model :str = "TheBloke/Llama-2-7B-chat-GPTQ",token : str = None):
        self.tokenizer = AutoTokenizer.from_pretrained(model, use_fast=False,token=token,legacy=False)
        self.model = AutoModelForCausalLM.from_pretrained(model,
                                                          device_map="cuda:0",
                                                trust_remote_code=False,             #A CHANGER SELON LES MODELES, POUR CELUI DE LAMA2 CA MARCHE (celui par default)
                                                revision="main",token=token)
        self.pipe = pipeline("text-generation", model=self.model, tokenizer=self.tokenizer,torch_dtype=torch.float16,max_new_tokens=256,repetition_penalty=1.1,top_k=40,top_p=0.95,temperature=0.7,do_sample=True,return_full_text=False)

    def generate_paragraph(self, query: str, context: {}, histo: [(str, str)], language='fr') -> str:
        torch.cuda.empty_cache()
        """generates the  answer"""
        template = (f'''[INST] <<SYS>>"
                    You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
                    If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
                    "You are a conversation bot designed to answer to the query from users"
                    "Your answer is based on the context delimited by triple backticks: "
                    "\\n ``` {context} ```\\n"
                    " You are consistent and avoid redundancies with the rest of the initial conversation "
                    "delimited by triple backticks: "
                    "\\n ``` {histo} ```\\n"
                    "Your response shall be in {language} and shall be concise"
                    You should respect the following format: "
                    <response>"
                    "\\n <</SYS>>"
                    "\\n {query}[/INST]''')
        pipe = self.pipe(template)
        # print("****************")
        # print(template)
        # print("----")
        res = pipe[0]["generated_text"]
        print(res)
        return res

    def translate(self, text: str, language="en") -> str:
        torch.cuda.empty_cache()
        """translates"""

        # languages = "`French to English" if language == "en" else "English to French"

        template = (f'''[INST] <<SYS>>
                    You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
                    If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
                    Your task consists in translating in English\\n"
                    the following text:
                    <</SYS>>
                    {text}[/INST]'''
                    )

        pipe = self.pipe(template)
        # print("****************")
        # print(template)
        # print("----")
        res = pipe[0]["generated_text"]
        return res

    def generate_answer(self, query: str, answer: str, histo: str, context: str,language : str) -> str:
        torch.cuda.empty_cache()
        """provides the final answer in {language} based on the initial query and the answer in english"""
        def _cut_unfinished_sentence(s: str):
            return '.'.join(s.split('.')[:-1])
        template = (f'''[INST] <<SYS>>
                    You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
                    If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
                    Your task consists in translating the answer in {language}, if its not already the case, to the query "
                    delimited by triple backticks: ```{query}``` \\n"
                    \\n You don't add new content to the answer but: "
                    \\n 1 You can use some vocabulary from the context delimited by triple backticks: "
                    ```{context}```"
                    \\n 2 You are consistent and avoid redundancies with the rest of the initial"
                    conversation delimited by triple backticks: ```{histo}```"
                    Your response shall respect the following format: "
                    <response>"
                    You are given the answer in {language}:
                    <</SYS>>
                    {answer}[/INST]'''
                    )
        pipe = self.pipe(template)
        # print("****************")
        # print(template)
        # print("----")
        res = pipe[0]["generated_text"]
        print(res)
        return res
    

    def transform_parahraph_into_question(self, prompt : str, title_doc : str = '',title_para : str = '') -> str:
        torch.cuda.empty_cache()
        max_tokens = 80
        pipeline_modified = pipeline("text-generation", model=self.model, tokenizer=self.tokenizer,torch_dtype=torch.float16,max_new_tokens=max_tokens,repetition_penalty=1.1,top_k=40,top_p=0.95,temperature=0.7,do_sample=True,return_full_text=False)

        prompt_template=f'''[INST] <<SYS>>
        You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
        If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
        Your job is to create a question about a paragraph of a document untitled "{title_doc}".
        The paragraph title is "{title_para}".
        If you see that the question that you are creating will not respect {max_tokens} tokens, find a way to make it shorter.
        If you can't create a question about the paragraph, just rephrase {title_para} so that it becomes a question.
        Your response shall contains two questions, shall be concise and shall respect the following format:
        "Question: <question1>\\nQuestion: <question2>\\n"
        You should not answer to the question, just create it.
        The paragraph you need to create a question about is the following :
        <</SYS>>
        {prompt}[/INST]

        '''
        pipe = pipeline_modified(prompt_template)
        # print("****************")
        # print(template)
        # print("----")
        #filter the answer to only keep the question
        res = pipe[0]["generated_text"]
        # res = res.split("Question: ")
        # res1 = res[1]
        # res2 = res[2]
        # print(res1)
        # print(res2)
        print(res)
        return res
    
    def detect_language(self, text: str) -> str:
        torch.cuda.empty_cache()
        """detects the language"""
        template = (f'''[INST] <<SYS>>
                    You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
                    If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
                    Your task consists in detecting the language of the user query"
                    Your answer shall be the two letters code of the language"
                    and should respect the following format: "
                    <code>"
                    \\n <</SYS>>"
                    \\n {text}[/INST]'''
                    )
        pipe = self.pipe(template)
        # print("****************")
        # print(template)
        # print("----")
        res = pipe[0]["generated_text"]
        return res