Spaces:

phamngoctukts
/

assistant

Runtime error

App Files Files Community

phamngoctukts commited on Nov 9, 2024

Commit

ff4edf1

verified ·

1 Parent(s): deacc32

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -8

app.py CHANGED Viewed

@@ -18,12 +18,10 @@ import os
 tk = token = os.environ.get("HF_TOKEN")
 login(tk)
 model_id = "meta-llama/Llama-3.2-1B"
-text2text = pipeline(
-    "text2text",
-    model=model_id,
-    torch_dtype=torch.bfloat16,
-    device_map="auto"
-)
 r = sr.Recognizer()
 @dataclass
@@ -85,7 +83,7 @@ def process_audio(audio:tuple, state:AppState):
         return gr.Audio(recording=False), state
     return None, state
-def response(state:AppState):
     if not state.pause_detected and not state.started_talking:
         return None, AppState()
     audio_buffer = BytesIO()
@@ -107,7 +105,54 @@ def response(state:AppState):
     if textin != "":
         print("Đang nghĩ...")
         textout=str(text2text(textin))
-        textout = textout.replace('*','')
         state.conversation.append({"role": "user", "content": "Trợ lý: " + textout})
     if textout != "":
         print("Đang đọc...")

 tk = token = os.environ.get("HF_TOKEN")
 login(tk)
 model_id = "meta-llama/Llama-3.2-1B"
+ckpt = "meta-llama/Llama-3.2-11B-Vision-Instruct"
+model = MllamaForConditionalGeneration.from_pretrained(ckpt,
+    torch_dtype=torch.bfloat16).to("cpu")
+processor = AutoProcessor.from_pretrained(ckpt)
 r = sr.Recognizer()
 @dataclass
         return gr.Audio(recording=False), state
     return None, state
+def response(state:AppState, message, history, max_new_tokens=250):
     if not state.pause_detected and not state.started_talking:
         return None, AppState()
     audio_buffer = BytesIO()
     if textin != "":
         print("Đang nghĩ...")
         textout=str(text2text(textin))
+        for i, msg in enumerate(history):
+            if isinstance(msg[0], tuple):
+                messages.append({"role": "user", "content": [{"type": "text", "text": history[i+1][0]}, {"type": "image"}]})
+                messages.append({"role": "assistant", "content": [{"type": "text", "text": history[i+1][1]}]})
+                images.append(Image.open(msg[0][0]).convert("RGB"))
+            elif isinstance(history[i-1], tuple) and isinstance(msg[0], str):
+                # messages are already handled
+                pass
+            elif isinstance(history[i-1][0], str) and isinstance(msg[0], str): # text only turn
+                messages.append({"role": "user", "content": [{"type": "text", "text": msg[0]}]})
+                messages.append({"role": "assistant", "content": [{"type": "text", "text": msg[1]}]})
+        # add current message
+        if len(message["files"]) == 1:
+            if isinstance(message["files"][0], str): # examples
+                image = Image.open(message["files"][0]).convert("RGB")
+            else: # regular input
+                image = Image.open(message["files"][0]["path"]).convert("RGB")
+            images.append(image)
+            messages.append({"role": "user", "content": [{"type": "text", "text": txt}, {"type": "image"}]})
+        else:
+            messages.append({"role": "user", "content": [{"type": "text", "text": txt}]})
+        texts = processor.apply_chat_template(messages, add_generation_prompt=True)
+        if images == []:
+            inputs = processor(text=texts, return_tensors="pt").to("cpu")
+        else:
+            inputs = processor(text=texts, images=images, return_tensors="pt").to("cpu")
+        streamer = TextIteratorStreamer(processor, skip_special_tokens=True, skip_prompt=True)
+        generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=max_new_tokens)
+        generated_text = streamer
+        thread = Thread(target=model.generate, kwargs=generation_kwargs)
+        thread.start()
+        buffer = ""
+        for new_text in streamer:
+            buffer += new_text
+            generated_text_without_prompt = buffer
+            time.sleep(0.01)
+            yield buffer
+        textout = generated_text.replace('*','')
         state.conversation.append({"role": "user", "content": "Trợ lý: " + textout})
     if textout != "":
         print("Đang đọc...")