from langchain.llms.huggingface_pipeline import HuggingFacePipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
import torch
class LlmAgent:
def __init__(self, model :str = "TheBloke/Llama-2-7B-chat-GPTQ",token : str = None):
self.tokenizer = AutoTokenizer.from_pretrained(model, use_fast=False,token=token,legacy=False)
self.model = AutoModelForCausalLM.from_pretrained(model,
device_map="cuda:0",
trust_remote_code=False, #A CHANGER SELON LES MODELES, POUR CELUI DE LAMA2 CA MARCHE (celui par default)
revision="main",token=token)
self.pipe = pipeline("text-generation", model=self.model, tokenizer=self.tokenizer,torch_dtype=torch.float16,max_new_tokens=256,repetition_penalty=1.1,top_k=40,top_p=0.95,temperature=0.7,do_sample=True,return_full_text=False)
def generate_paragraph(self, query: str, context: {}, histo: [(str, str)], language='fr') -> str:
torch.cuda.empty_cache()
"""generates the answer"""
template = (f'''[INST] <>"
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
"You are a conversation bot designed to answer to the query from users"
"Your answer is based on the context delimited by triple backticks: "
"\\n ``` {context} ```\\n"
" You are consistent and avoid redundancies with the rest of the initial conversation "
"delimited by triple backticks: "
"\\n ``` {histo} ```\\n"
"Your response shall be in {language} and shall be concise"
You should respect the following format: "
"
"\\n <>"
"\\n {query}[/INST]''')
pipe = self.pipe(template)
# print("****************")
# print(template)
# print("----")
res = pipe[0]["generated_text"]
print(res)
return res
def translate(self, text: str, language="en") -> str:
torch.cuda.empty_cache()
"""translates"""
# languages = "`French to English" if language == "en" else "English to French"
template = (f'''[INST] <>
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
Your task consists in translating in English\\n"
the following text:
<>
{text}[/INST]'''
)
pipe = self.pipe(template)
# print("****************")
# print(template)
# print("----")
res = pipe[0]["generated_text"]
return res
def generate_answer(self, query: str, answer: str, histo: str, context: str,language : str) -> str:
torch.cuda.empty_cache()
"""provides the final answer in {language} based on the initial query and the answer in english"""
def _cut_unfinished_sentence(s: str):
return '.'.join(s.split('.')[:-1])
template = (f'''[INST] <>
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
Your task consists in translating the answer in {language}, if its not already the case, to the query "
delimited by triple backticks: ```{query}``` \\n"
\\n You don't add new content to the answer but: "
\\n 1 You can use some vocabulary from the context delimited by triple backticks: "
```{context}```"
\\n 2 You are consistent and avoid redundancies with the rest of the initial"
conversation delimited by triple backticks: ```{histo}```"
Your response shall respect the following format: "
"
You are given the answer in {language}:
<>
{answer}[/INST]'''
)
pipe = self.pipe(template)
# print("****************")
# print(template)
# print("----")
res = pipe[0]["generated_text"]
print(res)
return res
def transform_parahraph_into_question(self, prompt : str, title_doc : str = '',title_para : str = '') -> str:
torch.cuda.empty_cache()
max_tokens = 80
pipeline_modified = pipeline("text-generation", model=self.model, tokenizer=self.tokenizer,torch_dtype=torch.float16,max_new_tokens=max_tokens,repetition_penalty=1.1,top_k=40,top_p=0.95,temperature=0.7,do_sample=True,return_full_text=False)
prompt_template=f'''[INST] <>
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
Your job is to create a question about a paragraph of a document untitled "{title_doc}".
The paragraph title is "{title_para}".
If you see that the question that you are creating will not respect {max_tokens} tokens, find a way to make it shorter.
If you can't create a question about the paragraph, just rephrase {title_para} so that it becomes a question.
Your response shall contains two questions, shall be concise and shall respect the following format:
"Question: \\nQuestion: \\n"
You should not answer to the question, just create it.
The paragraph you need to create a question about is the following :
<>
{prompt}[/INST]
'''
pipe = pipeline_modified(prompt_template)
# print("****************")
# print(template)
# print("----")
#filter the answer to only keep the question
res = pipe[0]["generated_text"]
# res = res.split("Question: ")
# res1 = res[1]
# res2 = res[2]
# print(res1)
# print(res2)
print(res)
return res
def detect_language(self, text: str) -> str:
torch.cuda.empty_cache()
"""detects the language"""
template = (f'''[INST] <>
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
Your task consists in detecting the language of the user query"
Your answer shall be the two letters code of the language"
and should respect the following format: "
"
\\n <
>"
\\n {text}[/INST]'''
)
pipe = self.pipe(template)
# print("****************")
# print(template)
# print("----")
res = pipe[0]["generated_text"]
return res