Mohannad commited on
Commit
cba19c5
1 Parent(s): 5e2a02e

llama 2 13b

Browse files
Files changed (1) hide show
  1. app.py +100 -5
app.py CHANGED
@@ -38,6 +38,90 @@ embedding_model = PretrainedSpeakerEmbedding(
38
  "speechbrain/spkrec-ecapa-voxceleb",
39
  device=torch.device("cuda"))
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  def segment_embedding(segment, duration, audio_file):
42
  audio = Audio()
43
  start = segment["start"]
@@ -157,11 +241,22 @@ question = 'Can she answer'
157
  tokenizer = AutoTokenizer.from_pretrained('UKP-SQuARE/roberta-base-pf-boolq-onnx')
158
 
159
  def answer(context, question):
160
- inputs = tokenizer(question, context, padding=True, truncation=True, return_tensors='np')
161
- inputs = {key: np.array(inputs[key], dtype=np.int64) for key in inputs}
162
- outputs = onnx_model.run(input_feed=dict(inputs), output_names=None)
163
-
164
- return outputs
 
 
 
 
 
 
 
 
 
 
 
165
 
166
 
167
  uploaded_file = st.sidebar.file_uploader("Choose a file")
 
38
  "speechbrain/spkrec-ecapa-voxceleb",
39
  device=torch.device("cuda"))
40
 
41
+
42
+ #LLAMA prep
43
+ import torch
44
+ import transformers
45
+ from transformers import AutoTokenizer, AutoModelForCausalLM
46
+
47
+
48
+ tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-13b-chat-hf",
49
+ use_auth_token=True,)
50
+
51
+ model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-13b-chat-hf",
52
+ device_map='auto',
53
+ torch_dtype=torch.float16,
54
+ use_auth_token=True,
55
+ # load_in_8bit=True,
56
+ # load_in_4bit=True
57
+ )
58
+ # Use a pipeline for later
59
+ from transformers import pipeline
60
+
61
+ pipe = pipeline("text-generation",
62
+ model=model,
63
+ tokenizer= tokenizer,
64
+ torch_dtype=torch.bfloat16,
65
+ device_map="auto",
66
+ max_new_tokens = 512,
67
+ do_sample=True,
68
+ top_k=30,
69
+ num_return_sequences=1,
70
+ eos_token_id=tokenizer.eos_token_id
71
+ )
72
+
73
+
74
+ import json
75
+ import textwrap
76
+
77
+ B_INST, E_INST = "[INST]", "[/INST]"
78
+ B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
79
+ DEFAULT_SYSTEM_PROMPT = """\
80
+ You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
81
+
82
+ If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."""
83
+
84
+
85
+
86
+ def get_prompt(instruction, new_system_prompt=DEFAULT_SYSTEM_PROMPT ):
87
+ SYSTEM_PROMPT = B_SYS + new_system_prompt + E_SYS
88
+ prompt_template = B_INST + SYSTEM_PROMPT + instruction + E_INST
89
+ return prompt_template
90
+
91
+ def cut_off_text(text, prompt):
92
+ cutoff_phrase = prompt
93
+ index = text.find(cutoff_phrase)
94
+ if index != -1:
95
+ return text[:index]
96
+ else:
97
+ return text
98
+
99
+ def remove_substring(string, substring):
100
+ return string.replace(substring, "")
101
+
102
+
103
+
104
+ def generate(text):
105
+ prompt = get_prompt(text)
106
+ with torch.autocast('cuda', dtype=torch.bfloat16):
107
+ inputs = tokenizer(prompt, return_tensors="pt").to('cuda')
108
+ outputs = model.generate(**inputs,
109
+ max_new_tokens=512,
110
+ eos_token_id=tokenizer.eos_token_id,
111
+ pad_token_id=tokenizer.eos_token_id,
112
+ )
113
+ final_outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
114
+ final_outputs = cut_off_text(final_outputs, '</s>')
115
+ final_outputs = remove_substring(final_outputs, prompt)
116
+
117
+ return final_outputs#, outputs
118
+
119
+ def parse_text(text):
120
+ wrapped_text = textwrap.fill(text, width=100)
121
+ print(wrapped_text +'\n\n')
122
+ # return assistant_text
123
+
124
+
125
  def segment_embedding(segment, duration, audio_file):
126
  audio = Audio()
127
  start = segment["start"]
 
241
  tokenizer = AutoTokenizer.from_pretrained('UKP-SQuARE/roberta-base-pf-boolq-onnx')
242
 
243
  def answer(context, question):
244
+ # inputs = tokenizer(question, context, padding=True, truncation=True, return_tensors='np')
245
+ # inputs = {key: np.array(inputs[key], dtype=np.int64) for key in inputs}
246
+ # outputs = onnx_model.run(input_feed=dict(inputs), output_names=None)
247
+
248
+ instruction = f"conversation: '''{context}'''"+"\n based on the provided conversation in triple quotes answer next question.\n Question: {text}"
249
+
250
+ system_prompt = "You are an expert and answer any question based on conversation. You analys the conversation in light of the question then you answer with yes, no or not clear only. You only output one or two words"
251
+
252
+ template = get_prompt(instruction, system_prompt)
253
+ print(template)
254
+
255
+ prompt = PromptTemplate(template=template, input_variables=["text"])
256
+ llm_chain = LLMChain(prompt=prompt, llm=llm)
257
+ output = llm_chain.run(question)
258
+
259
+ return parse_text(output)
260
 
261
 
262
  uploaded_file = st.sidebar.file_uploader("Choose a file")