adrien.aribaut-gaudin commited on
Commit
60354bb
1 Parent(s): 43c4414
Files changed (4) hide show
  1. .gitignore +2 -1
  2. requirements.txt +0 -0
  3. src/tools/llm.py +31 -22
  4. test.py +0 -7
.gitignore CHANGED
@@ -1,8 +1,9 @@
1
  config_key.py
2
 
3
 
4
- #Test folder
5
  data/Test/
 
6
 
7
  #database folder
8
  database/
 
1
  config_key.py
2
 
3
 
4
+ #Test folder + files
5
  data/Test/
6
+ test.py
7
 
8
  #database folder
9
  database/
requirements.txt CHANGED
Binary files a/requirements.txt and b/requirements.txt differ
 
src/tools/llm.py CHANGED
@@ -16,15 +16,16 @@ class LlmAgent:
16
  def generate_paragraph(self, query: str, context: {}, histo: [(str, str)], language='fr') -> str:
17
  locallm = HuggingFacePipeline(pipeline=self.pipe)
18
  """generates the answer"""
19
- template = (f"You are a conversation bot designed to answer to the query from users delimited by "
20
- f"triple backticks: "
21
- f"\\n ``` {query} ```\\n"
22
- f"Your answer is based on the context delimited by triple backticks: "
23
- f"\\n ``` {context} ```\\n"
24
- f" You are consistent and avoid redundancies with the rest of the initial conversation "
25
- f"delimited by triple backticks: "
26
- f"\\n ``` {histo} ```\\n"
27
- f"Your response shall be in {language} and shall be concise")
 
28
  prompt = PromptTemplate(input_variables=[], template=template)
29
  llm_chain = LLMChain(prompt=prompt,llm=locallm)
30
  p = llm_chain.predict()
@@ -40,8 +41,11 @@ class LlmAgent:
40
 
41
  # languages = "`French to English" if language == "en" else "English to French"
42
 
43
- tempate = (f" Your task consists in translating in English\\n"
44
- f" the following text delimited by by triple backticks: ```{text}```\n"
 
 
 
45
  )
46
 
47
  prompt = PromptTemplate(input_variables=[], template=tempate)
@@ -54,14 +58,17 @@ class LlmAgent:
54
  def _cut_unfinished_sentence(s: str):
55
  return '.'.join(s.split('.')[:-1])
56
  locallm = HuggingFacePipeline(pipeline=self.pipe)
57
- template = (f"Your task consists in translating the answer in {language}, if its not already the case, to the query "
58
- f"delimited by triple backticks: ```{query}``` \\n"
59
- f"You are given the answer in {language} delimited by triple backticks: ```{answer}```"
60
- f"\\n You don't add new content to the answer but: "
61
- f"\\n 1 You can use some vocabulary from the context delimited by triple backticks: "
62
- f"```{context}```"
63
- f"\\n 2 You are consistent and avoid redundancies with the rest of the initial"
64
- f" conversation delimited by triple backticks: ```{histo}```"
 
 
 
65
  )
66
  prompt = PromptTemplate(input_variables=[], template=template)
67
  llm_chain = LLMChain(prompt=prompt,llm=locallm,verbose=True)
@@ -101,9 +108,11 @@ class LlmAgent:
101
  def detect_language(self, text: str) -> str:
102
  """detects the language"""
103
  locallm = HuggingFacePipeline(pipeline=self.pipe)
104
- template = (f"Your task consists in detecting the language of the following text delimited by triple backticks: "
105
- f"```{text}```"
106
- f" Your answer shall be the two letters code of the language"
 
 
107
  )
108
  prompt = PromptTemplate(input_variables=[], template=template)
109
  llm_chain = LLMChain(prompt=prompt,llm=locallm,verbose=True)
 
16
  def generate_paragraph(self, query: str, context: {}, histo: [(str, str)], language='fr') -> str:
17
  locallm = HuggingFacePipeline(pipeline=self.pipe)
18
  """generates the answer"""
19
+ template = (f'''[INST] <<SYS>>"
20
+ "You are a conversation bot designed to answer to the query from users"
21
+ "Your answer is based on the context delimited by triple backticks: "
22
+ "\\n ``` {context} ```\\n"
23
+ " You are consistent and avoid redundancies with the rest of the initial conversation "
24
+ "delimited by triple backticks: "
25
+ "\\n ``` {histo} ```\\n"
26
+ "Your response shall be in {language} and shall be concise"
27
+ "\\n <</SYS>>"
28
+ "\\n {query}[/INST]''')
29
  prompt = PromptTemplate(input_variables=[], template=template)
30
  llm_chain = LLMChain(prompt=prompt,llm=locallm)
31
  p = llm_chain.predict()
 
41
 
42
  # languages = "`French to English" if language == "en" else "English to French"
43
 
44
+ tempate = (f'''[INST] <<SYS>>
45
+ Your task consists in translating in English\\n"
46
+ the following text:
47
+ <</SYS>>
48
+ {text}[/INST]'''
49
  )
50
 
51
  prompt = PromptTemplate(input_variables=[], template=tempate)
 
58
  def _cut_unfinished_sentence(s: str):
59
  return '.'.join(s.split('.')[:-1])
60
  locallm = HuggingFacePipeline(pipeline=self.pipe)
61
+ template = (f'''[INST] <<SYS>>
62
+ Your task consists in translating the answer in {language}, if its not already the case, to the query "
63
+ delimited by triple backticks: ```{query}``` \\n"
64
+ \\n You don't add new content to the answer but: "
65
+ \\n 1 You can use some vocabulary from the context delimited by triple backticks: "
66
+ ```{context}```"
67
+ \\n 2 You are consistent and avoid redundancies with the rest of the initial"
68
+ conversation delimited by triple backticks: ```{histo}```"
69
+ You are given the answer in {language}:
70
+ <</SYS>>
71
+ {answer}[/INST]'''
72
  )
73
  prompt = PromptTemplate(input_variables=[], template=template)
74
  llm_chain = LLMChain(prompt=prompt,llm=locallm,verbose=True)
 
108
  def detect_language(self, text: str) -> str:
109
  """detects the language"""
110
  locallm = HuggingFacePipeline(pipeline=self.pipe)
111
+ template = (f'''[INST] <<SYS>>
112
+ Your task consists in detecting the language of the user query"
113
+ Your answer shall be the two letters code of the language"
114
+ \\n <</SYS>>"
115
+ \\n {text}[/INST]'''
116
  )
117
  prompt = PromptTemplate(input_variables=[], template=template)
118
  llm_chain = LLMChain(prompt=prompt,llm=locallm,verbose=True)
test.py DELETED
@@ -1,7 +0,0 @@
1
- from src.model.doc import Doc
2
- from config import *
3
- from src.tools.llm import LlmAgent
4
-
5
- llmagent = LlmAgent(model="TheBloke/Llama-2-7b-Chat-GPTQ")
6
- doc = Doc(path=content_en_path_real)
7
- [llmagent.transform_parahraph_into_question(block.content, title_doc=doc.title,title_para=block.title) for block in doc.blocks]