Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -21,7 +21,7 @@ import io
|
|
21 |
import datasets
|
22 |
|
23 |
import gradio as gr
|
24 |
-
from transformers import AutoProcessor, TextIteratorStreamer
|
25 |
from transformers import Idefics2ForConditionalGeneration
|
26 |
import tempfile
|
27 |
from streaming_stt_nemo import Model
|
@@ -30,17 +30,24 @@ import edge_tts
|
|
30 |
import asyncio
|
31 |
from transformers import pipeline
|
32 |
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
from gradio import Image, Textbox
|
46 |
|
@@ -307,7 +314,7 @@ def extract_images_from_msg_list(msg_list):
|
|
307 |
return all_images
|
308 |
|
309 |
|
310 |
-
@spaces.GPU(duration=
|
311 |
def model_inference(
|
312 |
user_prompt,
|
313 |
chat_history,
|
@@ -535,7 +542,7 @@ with gr.Blocks() as voice2:
|
|
535 |
outputs=[output], live=True)
|
536 |
|
537 |
with gr.Blocks() as video:
|
538 |
-
gr.Markdown(" ## Live Chat
|
539 |
gr.Markdown("### Click camera option to update image")
|
540 |
gr.Interface(
|
541 |
fn=answer_question,
|
|
|
21 |
import datasets
|
22 |
|
23 |
import gradio as gr
|
24 |
+
from transformers import AutoModel, AutoProcessor, TextIteratorStreamer
|
25 |
from transformers import Idefics2ForConditionalGeneration
|
26 |
import tempfile
|
27 |
from streaming_stt_nemo import Model
|
|
|
30 |
import asyncio
|
31 |
from transformers import pipeline
|
32 |
|
33 |
+
model = AutoModel.from_pretrained("unum-cloud/uform-gen2-dpo", trust_remote_code=True)
|
34 |
+
processor = AutoProcessor.from_pretrained("unum-cloud/uform-gen2-dpo", trust_remote_code=True)
|
35 |
+
|
36 |
+
@spaces.GPU(duration=10, queue=False)
|
37 |
+
def answer_question(image, prompt):
|
38 |
+
inputs = processor(text=[prompt], images=[image], return_tensors="pt")
|
39 |
+
with torch.inference_mode():
|
40 |
+
output = model.generate(
|
41 |
+
**inputs,
|
42 |
+
do_sample=False,
|
43 |
+
use_cache=True,
|
44 |
+
max_new_tokens=256,
|
45 |
+
eos_token_id=151645,
|
46 |
+
pad_token_id=processor.tokenizer.pad_token_id
|
47 |
+
)
|
48 |
+
prompt_len = inputs["input_ids"].shape[1]
|
49 |
+
decoded_text = processor.batch_decode(output[:, prompt_len:])[0]
|
50 |
+
return decoded_text
|
51 |
|
52 |
from gradio import Image, Textbox
|
53 |
|
|
|
314 |
return all_images
|
315 |
|
316 |
|
317 |
+
@spaces.GPU(duration=30, queue=False)
|
318 |
def model_inference(
|
319 |
user_prompt,
|
320 |
chat_history,
|
|
|
542 |
outputs=[output], live=True)
|
543 |
|
544 |
with gr.Blocks() as video:
|
545 |
+
gr.Markdown(" ## Live Chat")
|
546 |
gr.Markdown("### Click camera option to update image")
|
547 |
gr.Interface(
|
548 |
fn=answer_question,
|