KingNish commited on
Commit
de0f0d9
1 Parent(s): f62245a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -24
app.py CHANGED
@@ -21,7 +21,7 @@ import io
21
  import datasets
22
 
23
  import gradio as gr
24
- from transformers import AutoModel, AutoProcessor, TextIteratorStreamer
25
  from transformers import Idefics2ForConditionalGeneration
26
  import tempfile
27
  from streaming_stt_nemo import Model
@@ -29,15 +29,18 @@ from huggingface_hub import InferenceClient
29
  import edge_tts
30
  import asyncio
31
  from transformers import pipeline
 
 
 
32
 
33
- model = AutoModel.from_pretrained("unum-cloud/uform-gen2-dpo", trust_remote_code=True)
34
  processor = AutoProcessor.from_pretrained("unum-cloud/uform-gen2-dpo", trust_remote_code=True)
35
 
36
- @spaces.GPU(duration=10, queue=False)
37
- def generate_caption(image, prompt):
38
- inputs = processor(text=[prompt], images=[image], return_tensors="pt")
39
  with torch.inference_mode():
40
- output = model.generate(
41
  **inputs,
42
  do_sample=False,
43
  use_cache=True,
@@ -48,9 +51,9 @@ def generate_caption(image, prompt):
48
 
49
  prompt_len = inputs["input_ids"].shape[1]
50
  decoded_text = processor.batch_decode(output[:, prompt_len:])[0]
51
-
52
- yield decoded_text
53
-
54
 
55
  theme = gr.themes.Base(
56
  font=[gr.themes.GoogleFont('Libre Franklin'), gr.themes.GoogleFont('Public Sans'), 'system-ui', 'sans-serif'],
@@ -118,24 +121,16 @@ SYSTEM_PROMPT = [
118
  {
119
  "type": "text",
120
  "text": """I am OpenGPT 4o, an exceptionally capable and versatile AI assistant meticulously crafted by KingNish. Designed to assist human users through insightful conversations, I aim to provide an unparalleled experience. My key attributes include:
121
-
122
  - **Intelligence and Knowledge:** I possess an extensive knowledge base, enabling me to offer insightful answers and intelligent responses to User queries. My understanding of complex concepts is exceptional, ensuring accurate and reliable information.
123
-
124
  - **Image Generation and Perception:** One of my standout features is the ability to generate and perceive images. Utilizing the following link structure, I create unique and contextually rich visuals:
125
-
126
  > ![](https://image.pollinations.ai/prompt/{StyleofImage}%20{OptimizedPrompt}%20{adjective}%20{charactersDetailed}%20{visualStyle}%20{genre}?width={width}&height={height}&nologo=poll&nofeed=yes&seed={random})
127
-
128
  For image generation, I replace {info inside curly braces} with specific details according to their requiremnts to create relevant visuals. The width and height parameters are adjusted as needed, often favoring HD dimensions for a superior viewing experience.
129
-
130
  For instance, if the User requests:
131
-
132
  [USER] Show me an image of A futuristic cityscape with towering skyscrapers and flying cars.
133
  [OpenGPT 4o] Generating Image you requested:
134
  ![](https://image.pollinations.ai/prompt/Photorealistic%20futuristic%20cityscape%20with%20towering%20skyscrapers%20and%20flying%20cars%20in%20the%20year%202154?width=1024&height=768&nologo=poll&nofeed=yes&seed=85432)
135
-
136
  **Bulk Image Generation with Links:** I excel at generating multiple images link simultaneously, always providing unique links and visuals. I ensure that each image is distinct and captivates the User.
137
  Note: Make sure to always provide image links starting with ! .As given in examples.
138
-
139
  **Engaging Conversations:** While my image generation skills are impressive, I also excel at natural language processing. I can engage in captivating conversations, offering informative and entertaining responses to the User.
140
  **Reasoning, Memory, and Identification:** My reasoning skills are exceptional, allowing me to make logical connections. My memory capabilities are vast, enabling me to retain context and provide consistent responses. I can identify people and objects within images or text, providing relevant insights and details.
141
  **Attention to Detail:** I am attentive to the smallest details, ensuring that my responses and generated content are of the highest quality. I strive to provide a refined and polished experience.
@@ -385,8 +380,6 @@ def model_inference(
385
  if acc_text.endswith("<end_of_utterance>"):
386
  acc_text = acc_text[:-18]
387
  yield acc_text
388
- print("Success - generated the following text:", acc_text)
389
- print("-----")
390
 
391
 
392
  FEATURES = datasets.Features(
@@ -542,15 +535,13 @@ with gr.Blocks() as voice2:
542
  outputs=[output], live=True)
543
 
544
  with gr.Blocks() as video:
545
- gr.Markdown(" ## Live Chat")
546
- gr.Markdown("### Click camera option to update image")
547
  gr.Interface(
548
- fn=generate_caption,
549
  inputs=[gr.Image(type="pil", label="Upload Image"), gr.Textbox(label="Prompt", value="what he is doing")],
550
- outputs=gr.Textbox(label="Answer"),
551
  )
552
 
553
- with gr.Blocks(theme=theme, css="footer {visibility: hidden}textbox{resize:none}", title="GPT 4o DEMO") as demo:
554
  gr.Markdown("# OpenGPT 4o")
555
  gr.TabbedInterface([img, voice, video, voice2], ['💬 SuperChat','🗣️ Voice Chat','📸 Live Chat', '🗣️ Voice Chat 2'])
556
 
 
21
  import datasets
22
 
23
  import gradio as gr
24
+ from transformers import TextIteratorStreamer
25
  from transformers import Idefics2ForConditionalGeneration
26
  import tempfile
27
  from streaming_stt_nemo import Model
 
29
  import edge_tts
30
  import asyncio
31
  from transformers import pipeline
32
+ from transformers import AutoTokenizer, AutoModelForCausalLM
33
+ from transformers import AutoModel
34
+ from transformers import AutoProcessor
35
 
36
+ model3 = AutoModel.from_pretrained("unum-cloud/uform-gen2-dpo", trust_remote_code=True)
37
  processor = AutoProcessor.from_pretrained("unum-cloud/uform-gen2-dpo", trust_remote_code=True)
38
 
39
+ @spaces.GPU(queue=False)
40
+ def videochat(image3, prompt3):
41
+ inputs = processor(text=[prompt3], images=[image3], return_tensors="pt")
42
  with torch.inference_mode():
43
+ output = model3.generate(
44
  **inputs,
45
  do_sample=False,
46
  use_cache=True,
 
51
 
52
  prompt_len = inputs["input_ids"].shape[1]
53
  decoded_text = processor.batch_decode(output[:, prompt_len:])[0]
54
+ if decoded_text.endswith("<|im_end|>"):
55
+ decoded_text = decoded_text[:-18]
56
+ yield acc_text
57
 
58
  theme = gr.themes.Base(
59
  font=[gr.themes.GoogleFont('Libre Franklin'), gr.themes.GoogleFont('Public Sans'), 'system-ui', 'sans-serif'],
 
121
  {
122
  "type": "text",
123
  "text": """I am OpenGPT 4o, an exceptionally capable and versatile AI assistant meticulously crafted by KingNish. Designed to assist human users through insightful conversations, I aim to provide an unparalleled experience. My key attributes include:
 
124
  - **Intelligence and Knowledge:** I possess an extensive knowledge base, enabling me to offer insightful answers and intelligent responses to User queries. My understanding of complex concepts is exceptional, ensuring accurate and reliable information.
 
125
  - **Image Generation and Perception:** One of my standout features is the ability to generate and perceive images. Utilizing the following link structure, I create unique and contextually rich visuals:
 
126
  > ![](https://image.pollinations.ai/prompt/{StyleofImage}%20{OptimizedPrompt}%20{adjective}%20{charactersDetailed}%20{visualStyle}%20{genre}?width={width}&height={height}&nologo=poll&nofeed=yes&seed={random})
 
127
  For image generation, I replace {info inside curly braces} with specific details according to their requiremnts to create relevant visuals. The width and height parameters are adjusted as needed, often favoring HD dimensions for a superior viewing experience.
 
128
  For instance, if the User requests:
 
129
  [USER] Show me an image of A futuristic cityscape with towering skyscrapers and flying cars.
130
  [OpenGPT 4o] Generating Image you requested:
131
  ![](https://image.pollinations.ai/prompt/Photorealistic%20futuristic%20cityscape%20with%20towering%20skyscrapers%20and%20flying%20cars%20in%20the%20year%202154?width=1024&height=768&nologo=poll&nofeed=yes&seed=85432)
 
132
  **Bulk Image Generation with Links:** I excel at generating multiple images link simultaneously, always providing unique links and visuals. I ensure that each image is distinct and captivates the User.
133
  Note: Make sure to always provide image links starting with ! .As given in examples.
 
134
  **Engaging Conversations:** While my image generation skills are impressive, I also excel at natural language processing. I can engage in captivating conversations, offering informative and entertaining responses to the User.
135
  **Reasoning, Memory, and Identification:** My reasoning skills are exceptional, allowing me to make logical connections. My memory capabilities are vast, enabling me to retain context and provide consistent responses. I can identify people and objects within images or text, providing relevant insights and details.
136
  **Attention to Detail:** I am attentive to the smallest details, ensuring that my responses and generated content are of the highest quality. I strive to provide a refined and polished experience.
 
380
  if acc_text.endswith("<end_of_utterance>"):
381
  acc_text = acc_text[:-18]
382
  yield acc_text
 
 
383
 
384
 
385
  FEATURES = datasets.Features(
 
535
  outputs=[output], live=True)
536
 
537
  with gr.Blocks() as video:
 
 
538
  gr.Interface(
539
+ fn=videochat,
540
  inputs=[gr.Image(type="pil", label="Upload Image"), gr.Textbox(label="Prompt", value="what he is doing")],
541
+ outputs=gr.Textbox(label="Answer")
542
  )
543
 
544
+ with gr.Blocks(theme=theme, title="OpenGPT 4o DEMO") as demo:
545
  gr.Markdown("# OpenGPT 4o")
546
  gr.TabbedInterface([img, voice, video, voice2], ['💬 SuperChat','🗣️ Voice Chat','📸 Live Chat', '🗣️ Voice Chat 2'])
547