KingNish commited on
Commit
a185be6
1 Parent(s): 2217b90

Added models in Voice chat and Improved UI

Browse files
Files changed (1) hide show
  1. app.py +40 -12
app.py CHANGED
@@ -54,9 +54,20 @@ def videochat(image3, prompt3):
54
  decoded_text = decoded_text[:-10]
55
  yield decoded_text
56
 
57
- theme = gr.themes.Base(
58
- font=[gr.themes.GoogleFont('Libre Franklin'), gr.themes.GoogleFont('Public Sans'), 'system-ui', 'sans-serif'],
59
- )
 
 
 
 
 
 
 
 
 
 
 
60
 
61
  MODEL_NAME = "openai/whisper-medium"
62
  BATCH_SIZE = 10
@@ -78,18 +89,39 @@ def transcribe(inputs):
78
  text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": "transcribe"})["text"]
79
  return text
80
 
81
- client1 = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
  system_instructions1 = "[SYSTEM] Answer as Real OpenGPT 4o, Made by 'KingNish', Keep conversation very short, clear, friendly and concise. The text provided is a request for a specific type of response from you, the virtual assistant. You will avoid introductions and start answering the query directly, elaborating on all aspects of the request. As an AI-powered assistant, your task is to generate responses that appear as if they are created by the user and not an AI-powered assistant. [USER]"
84
 
85
- def model(text):
 
 
 
 
 
86
  generate_kwargs = dict(
87
  temperature=0.7,
88
  max_new_tokens=512,
89
  top_p=0.95,
90
  repetition_penalty=1,
91
  do_sample=True,
92
- seed=42,
93
  )
94
 
95
  formatted_prompt = system_instructions1 + text + "[OpenGPT 4o]"
@@ -115,7 +147,7 @@ DEVICE = torch.device("cuda")
115
  MODELS = {
116
  "idefics2-8b-chatty": Idefics2ForConditionalGeneration.from_pretrained(
117
  "HuggingFaceM4/idefics2-8b-chatty",
118
- torch_dtype=torch.bfloat16,
119
  _attn_implementation="flash_attention_2",
120
  ).to(DEVICE),
121
  }
@@ -521,16 +553,12 @@ with gr.Blocks() as voice:
521
  autoplay=True,
522
  elem_classes="audio")
523
  gr.Interface(
524
- batch=True,
525
- max_batch_size=10,
526
  fn=respond,
527
  inputs=[input],
528
- outputs=[output], live=True)
529
 
530
  with gr.Blocks() as livechat:
531
  gr.Interface(
532
- batch=True,
533
- max_batch_size=10,
534
  fn=videochat,
535
  inputs=[gr.Image(type="pil",sources="webcam", label="Upload Image"), gr.Textbox(label="Prompt", value="what he is doing")],
536
  outputs=gr.Textbox(label="Answer")
 
54
  decoded_text = decoded_text[:-10]
55
  yield decoded_text
56
 
57
+ theme = gr.themes.Soft(
58
+ primary_hue="blue",
59
+ secondary_hue="orange",
60
+ neutral_hue="gray",
61
+ font=[gr.themes.GoogleFont('Libre Franklin'), gr.themes.GoogleFont('Public Sans'), 'system-ui', 'sans-serif']).set(
62
+ body_background_fill_dark="#111111",
63
+ block_background_fill_dark="#111111",
64
+ block_border_width="1px",
65
+ block_title_background_fill_dark="#1e1c26",
66
+ input_background_fill_dark="#292733",
67
+ button_secondary_background_fill_dark="#24212b",
68
+ border_color_primary_dark="#343140",
69
+ background_fill_secondary_dark="#111111",
70
+ color_accent_soft_dark="transparent")
71
 
72
  MODEL_NAME = "openai/whisper-medium"
73
  BATCH_SIZE = 10
 
89
  text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": "transcribe"})["text"]
90
  return text
91
 
92
+ HF_TOKEN = os.environ.get("HF_TOKEN", None)
93
+
94
+ def client_fn(model):
95
+ if "Mixtral" in model:
96
+ return InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
97
+ elif "Llama" in model:
98
+ return InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct")
99
+ elif "Mistral" in model:
100
+ return InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
101
+ elif "Phi" in model:
102
+ return InferenceClient("microsoft/Phi-3-mini-4k-instruct")
103
+ else:
104
+ return InferenceClient("microsoft/Phi-3-mini-4k-instruct")
105
+
106
+ def randomize_seed_fn(seed: int) -> int:
107
+ seed = random.randint(0, 999999)
108
+ return seed
109
 
110
  system_instructions1 = "[SYSTEM] Answer as Real OpenGPT 4o, Made by 'KingNish', Keep conversation very short, clear, friendly and concise. The text provided is a request for a specific type of response from you, the virtual assistant. You will avoid introductions and start answering the query directly, elaborating on all aspects of the request. As an AI-powered assistant, your task is to generate responses that appear as if they are created by the user and not an AI-powered assistant. [USER]"
111
 
112
+ def models(text, model="Mixtral 8x7B", seed=42):
113
+
114
+ seed = int(randomize_seed_fn(seed))
115
+ generator = torch.Generator().manual_seed(seed)
116
+
117
+ client = client_fn(model)
118
  generate_kwargs = dict(
119
  temperature=0.7,
120
  max_new_tokens=512,
121
  top_p=0.95,
122
  repetition_penalty=1,
123
  do_sample=True,
124
+ seed=seed,
125
  )
126
 
127
  formatted_prompt = system_instructions1 + text + "[OpenGPT 4o]"
 
147
  MODELS = {
148
  "idefics2-8b-chatty": Idefics2ForConditionalGeneration.from_pretrained(
149
  "HuggingFaceM4/idefics2-8b-chatty",
150
+ torch_dtype=torch.float16,
151
  _attn_implementation="flash_attention_2",
152
  ).to(DEVICE),
153
  }
 
553
  autoplay=True,
554
  elem_classes="audio")
555
  gr.Interface(
 
 
556
  fn=respond,
557
  inputs=[input],
558
+ outputs=[output], api_name="translate", live=True)
559
 
560
  with gr.Blocks() as livechat:
561
  gr.Interface(
 
 
562
  fn=videochat,
563
  inputs=[gr.Image(type="pil",sources="webcam", label="Upload Image"), gr.Textbox(label="Prompt", value="what he is doing")],
564
  outputs=gr.Textbox(label="Answer")