ibrahimlasfar commited on
Commit
b8d38d2
·
1 Parent(s): 7250ede

Update chatbot with real-time audio/image input and model selection

Browse files
Files changed (5) hide show
  1. README.md +9 -2
  2. api/endpoints.py +33 -22
  3. main.py +74 -45
  4. utils/generation.py +61 -196
  5. utils/web_search.py +5 -5
README.md CHANGED
@@ -1,9 +1,9 @@
1
  ---
2
- title: MGZON FLAN-T5 API
3
  emoji: "🤖"
4
  colorFrom: "blue"
5
  colorTo: "green"
6
- sdk: docker
7
  app_file: main.py
8
  pinned: false
9
  ---
@@ -37,6 +37,13 @@ This model is a fine-tuned version of [MGZON/Veltrix](https://huggingface.co/MGZ
37
  It achieves the following results on the evaluation set:
38
  - Loss: nan
39
 
 
 
 
 
 
 
 
40
  ## Model description
41
 
42
  More information needed
 
1
  ---
2
+ title: MGZon Chatbot
3
  emoji: "🤖"
4
  colorFrom: "blue"
5
  colorTo: "green"
6
+ sdk: gradio
7
  app_file: main.py
8
  pinned: false
9
  ---
 
37
  It achieves the following results on the evaluation set:
38
  - Loss: nan
39
 
40
+ ## Features
41
+ - Real-time voice input/output with Whisper and Parler-TTS.
42
+ - Image capture and analysis with CLIP.
43
+ - Web search integration with Google API.
44
+ - Model selection for flexible query handling.
45
+ - Enhanced UI with custom icons and responsive design.
46
+
47
  ## Model description
48
 
49
  More information needed
api/endpoints.py CHANGED
@@ -1,21 +1,24 @@
1
  import os
2
  from fastapi import APIRouter, HTTPException, UploadFile, File
 
3
  from openai import OpenAI
4
  from api.models import QueryRequest
5
  from utils.generation import request_generation, select_model
6
  from utils.web_search import web_search
 
7
 
8
  router = APIRouter()
9
 
10
  HF_TOKEN = os.getenv("HF_TOKEN")
 
11
  API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
12
- MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-20b:fireworks-ai")
13
 
14
  @router.get("/api/model-info")
15
  def model_info():
16
  return {
17
  "model_name": MODEL_NAME,
18
- "secondary_model": os.getenv("SECONDARY_MODEL_NAME", "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"),
19
  "tertiary_model": os.getenv("TERTIARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1"),
20
  "clip_base_model": os.getenv("CLIP_BASE_MODEL", "openai/clip-vit-base-patch32"),
21
  "clip_large_model": os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14"),
@@ -33,7 +36,7 @@ async def performance_stats():
33
 
34
  @router.post("/api/chat")
35
  async def chat_endpoint(req: QueryRequest):
36
- model_name, api_endpoint = select_model(req.message)
37
  stream = request_generation(
38
  api_key=HF_TOKEN,
39
  api_base=api_endpoint,
@@ -44,17 +47,16 @@ async def chat_endpoint(req: QueryRequest):
44
  temperature=req.temperature,
45
  max_new_tokens=req.max_new_tokens,
46
  deep_search=req.enable_browsing,
 
47
  )
48
- response = "".join(list(stream))
49
  return {"response": response}
50
 
51
-
52
- # في api/endpoints.py
53
  @router.post("/api/audio-transcription")
54
  async def audio_transcription_endpoint(file: UploadFile = File(...)):
55
  model_name, api_endpoint = select_model("transcribe audio", input_type="audio")
56
  audio_data = await file.read()
57
- response = "".join(list(request_generation(
58
  api_key=HF_TOKEN,
59
  api_base=api_endpoint,
60
  message="Transcribe audio",
@@ -64,14 +66,16 @@ async def audio_transcription_endpoint(file: UploadFile = File(...)):
64
  max_new_tokens=128000,
65
  input_type="audio",
66
  audio_data=audio_data,
67
- )))
 
 
68
  return {"transcription": response}
69
 
70
  @router.post("/api/text-to-speech")
71
  async def text_to_speech_endpoint(req: dict):
72
  text = req.get("text", "")
73
  model_name, api_endpoint = select_model("text to speech", input_type="text")
74
- response = request_generation(
75
  api_key=HF_TOKEN,
76
  api_base=api_endpoint,
77
  message=text,
@@ -80,8 +84,9 @@ async def text_to_speech_endpoint(req: dict):
80
  temperature=0.7,
81
  max_new_tokens=128000,
82
  input_type="text",
 
83
  )
84
- audio_data = b"".join(list(response))
85
  return StreamingResponse(io.BytesIO(audio_data), media_type="audio/wav")
86
 
87
  @router.post("/api/code")
@@ -91,7 +96,7 @@ async def code_endpoint(req: dict):
91
  code = req.get("code", "")
92
  prompt = f"Generate code for task: {task} using {framework}. Existing code: {code}"
93
  model_name, api_endpoint = select_model(prompt)
94
- response = "".join(list(request_generation(
95
  api_key=HF_TOKEN,
96
  api_base=api_endpoint,
97
  message=prompt,
@@ -99,14 +104,16 @@ async def code_endpoint(req: dict):
99
  model_name=model_name,
100
  temperature=0.7,
101
  max_new_tokens=128000,
102
- )))
 
 
103
  return {"generated_code": response}
104
 
105
  @router.post("/api/analysis")
106
  async def analysis_endpoint(req: dict):
107
  message = req.get("text", "")
108
  model_name, api_endpoint = select_model(message)
109
- response = "".join(list(request_generation(
110
  api_key=HF_TOKEN,
111
  api_base=api_endpoint,
112
  message=message,
@@ -114,24 +121,28 @@ async def analysis_endpoint(req: dict):
114
  model_name=model_name,
115
  temperature=0.7,
116
  max_new_tokens=128000,
117
- )))
 
 
118
  return {"analysis": response}
119
 
120
  @router.post("/api/image-analysis")
121
- async def image_analysis_endpoint(req: dict):
122
- image_url = req.get("image_url", "")
123
- task = req.get("task", "describe")
124
- prompt = f"Perform the following task on the image at {image_url}: {task}"
125
- model_name, api_endpoint = select_model(prompt)
126
- response = "".join(list(request_generation(
127
  api_key=HF_TOKEN,
128
  api_base=api_endpoint,
129
- message=prompt,
130
  system_prompt="You are an expert in image analysis. Provide detailed descriptions or classifications based on the query.",
131
  model_name=model_name,
132
  temperature=0.7,
133
  max_new_tokens=128000,
134
- )))
 
 
 
 
135
  return {"image_analysis": response}
136
 
137
  @router.get("/api/test-model")
 
1
  import os
2
  from fastapi import APIRouter, HTTPException, UploadFile, File
3
+ from fastapi.responses import StreamingResponse
4
  from openai import OpenAI
5
  from api.models import QueryRequest
6
  from utils.generation import request_generation, select_model
7
  from utils.web_search import web_search
8
+ import io
9
 
10
  router = APIRouter()
11
 
12
  HF_TOKEN = os.getenv("HF_TOKEN")
13
+ BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
14
  API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
15
+ MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b:cerebras")
16
 
17
  @router.get("/api/model-info")
18
  def model_info():
19
  return {
20
  "model_name": MODEL_NAME,
21
+ "secondary_model": os.getenv("SECONDARY_MODEL_NAME", "openai/gpt-oss-20b:together"),
22
  "tertiary_model": os.getenv("TERTIARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1"),
23
  "clip_base_model": os.getenv("CLIP_BASE_MODEL", "openai/clip-vit-base-patch32"),
24
  "clip_large_model": os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14"),
 
36
 
37
  @router.post("/api/chat")
38
  async def chat_endpoint(req: QueryRequest):
39
+ model_name, api_endpoint = select_model(req.message, model_choice=req.model_choice if hasattr(req, 'model_choice') else None)
40
  stream = request_generation(
41
  api_key=HF_TOKEN,
42
  api_base=api_endpoint,
 
47
  temperature=req.temperature,
48
  max_new_tokens=req.max_new_tokens,
49
  deep_search=req.enable_browsing,
50
+ output_type="text"
51
  )
52
+ response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
53
  return {"response": response}
54
 
 
 
55
  @router.post("/api/audio-transcription")
56
  async def audio_transcription_endpoint(file: UploadFile = File(...)):
57
  model_name, api_endpoint = select_model("transcribe audio", input_type="audio")
58
  audio_data = await file.read()
59
+ stream = request_generation(
60
  api_key=HF_TOKEN,
61
  api_base=api_endpoint,
62
  message="Transcribe audio",
 
66
  max_new_tokens=128000,
67
  input_type="audio",
68
  audio_data=audio_data,
69
+ output_type="text"
70
+ )
71
+ response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
72
  return {"transcription": response}
73
 
74
  @router.post("/api/text-to-speech")
75
  async def text_to_speech_endpoint(req: dict):
76
  text = req.get("text", "")
77
  model_name, api_endpoint = select_model("text to speech", input_type="text")
78
+ stream = request_generation(
79
  api_key=HF_TOKEN,
80
  api_base=api_endpoint,
81
  message=text,
 
84
  temperature=0.7,
85
  max_new_tokens=128000,
86
  input_type="text",
87
+ output_type="speech"
88
  )
89
+ audio_data = b"".join([chunk for chunk in stream if isinstance(chunk, bytes)])
90
  return StreamingResponse(io.BytesIO(audio_data), media_type="audio/wav")
91
 
92
  @router.post("/api/code")
 
96
  code = req.get("code", "")
97
  prompt = f"Generate code for task: {task} using {framework}. Existing code: {code}"
98
  model_name, api_endpoint = select_model(prompt)
99
+ stream = request_generation(
100
  api_key=HF_TOKEN,
101
  api_base=api_endpoint,
102
  message=prompt,
 
104
  model_name=model_name,
105
  temperature=0.7,
106
  max_new_tokens=128000,
107
+ output_type="text"
108
+ )
109
+ response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
110
  return {"generated_code": response}
111
 
112
  @router.post("/api/analysis")
113
  async def analysis_endpoint(req: dict):
114
  message = req.get("text", "")
115
  model_name, api_endpoint = select_model(message)
116
+ stream = request_generation(
117
  api_key=HF_TOKEN,
118
  api_base=api_endpoint,
119
  message=message,
 
121
  model_name=model_name,
122
  temperature=0.7,
123
  max_new_tokens=128000,
124
+ output_type="text"
125
+ )
126
+ response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
127
  return {"analysis": response}
128
 
129
  @router.post("/api/image-analysis")
130
+ async def image_analysis_endpoint(file: UploadFile = File(...)):
131
+ model_name, api_endpoint = select_model("image analysis", input_type="image")
132
+ image_data = await file.read()
133
+ stream = request_generation(
 
 
134
  api_key=HF_TOKEN,
135
  api_base=api_endpoint,
136
+ message="Analyze this image",
137
  system_prompt="You are an expert in image analysis. Provide detailed descriptions or classifications based on the query.",
138
  model_name=model_name,
139
  temperature=0.7,
140
  max_new_tokens=128000,
141
+ input_type="image",
142
+ image_data=image_data,
143
+ output_type="text"
144
+ )
145
+ response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
146
  return {"image_analysis": response}
147
 
148
  @router.get("/api/test-model")
main.py CHANGED
@@ -20,7 +20,7 @@ logger.info("Files in /app/: %s", os.listdir("/app"))
20
 
21
  # إعداد العميل لـ Hugging Face Inference API
22
  HF_TOKEN = os.getenv("HF_TOKEN")
23
- BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN") # إضافة التوكن الاحتياطي
24
  if not HF_TOKEN:
25
  logger.error("HF_TOKEN is not set in environment variables.")
26
  raise ValueError("HF_TOKEN is required for Inference API.")
@@ -31,71 +31,84 @@ CONCURRENCY_LIMIT = int(os.getenv("CONCURRENCY_LIMIT", 20))
31
 
32
  # إعداد CSS
33
  css = """
34
- .gradio-container { max-width: 1200px; margin: auto; }
35
- .chatbot { border: 1px solid #ccc; border-radius: 10px; padding: 15px; background-color: #f9f9f9; }
36
- .input-textbox { font-size: 16px; padding: 10px; }
37
  .upload-button::before {
38
- content: '📷';
39
- margin-right: 8px;
40
- font-size: 22px;
41
  }
42
  .audio-input::before {
43
- content: '🎤';
44
- margin-right: 8px;
45
- font-size: 22px;
46
  }
47
  .audio-output::before {
48
  content: '🔊';
49
- margin-right: 8px;
50
- font-size: 22px;
 
 
 
 
 
 
 
 
 
 
 
 
51
  }
52
  .loading::after {
53
  content: '';
54
  display: inline-block;
55
- width: 16px;
56
- height: 16px;
57
- border: 2px solid #333;
58
  border-top-color: transparent;
59
  border-radius: 50%;
60
  animation: spin 1s linear infinite;
61
- margin-left: 8px;
62
  }
63
  @keyframes spin {
64
  to { transform: rotate(360deg); }
65
  }
66
  .output-container {
67
  margin-top: 20px;
68
- padding: 10px;
69
  border: 1px solid #ddd;
70
- border-radius: 8px;
 
71
  }
72
  .audio-output-container {
73
  display: flex;
74
  align-items: center;
75
- gap: 10px;
76
- margin-top: 10px;
 
 
 
 
 
77
  }
78
  """
79
 
80
  # دالة لمعالجة الإدخال (نص، صوت، صور، ملفات)
81
- def process_input(message, audio_input=None, file_input=None, history=None, system_prompt=None, temperature=0.7, reasoning_effort="medium", enable_browsing=True, max_new_tokens=128000):
82
  input_type = "text"
83
  audio_data = None
84
  image_data = None
 
85
  if audio_input:
86
  input_type = "audio"
87
- with open(audio_input, "rb") as f:
88
- audio_data = f.read()
89
- message = "Transcribe this audio"
90
- elif file_input:
91
- input_type = "file"
92
- if file_input.endswith(('.png', '.jpg', '.jpeg')):
93
- input_type = "image"
94
- with open(file_input, "rb") as f:
95
- image_data = f.read()
96
- message = f"Analyze image: {file_input}"
97
- else:
98
- message = f"Analyze file: {file_input}"
99
 
100
  response_text = ""
101
  audio_response = None
@@ -109,7 +122,9 @@ def process_input(message, audio_input=None, file_input=None, history=None, syst
109
  max_new_tokens=max_new_tokens,
110
  input_type=input_type,
111
  audio_data=audio_data,
112
- image_data=image_data
 
 
113
  ):
114
  if isinstance(chunk, bytes):
115
  audio_response = io.BytesIO(chunk)
@@ -122,7 +137,7 @@ def process_input(message, audio_input=None, file_input=None, history=None, syst
122
  chatbot_ui = gr.ChatInterface(
123
  fn=process_input,
124
  chatbot=gr.Chatbot(
125
- label="MGZon Chatbot",
126
  height=800,
127
  latex_delimiters=LATEX_DELIMS,
128
  ),
@@ -130,28 +145,42 @@ chatbot_ui = gr.ChatInterface(
130
  additional_inputs=[
131
  gr.Textbox(
132
  label="System Prompt",
133
- value="You are an expert assistant providing detailed, comprehensive, and well-structured responses. Support text, audio, image, and file inputs. For audio, transcribe using Whisper. For text-to-speech, use Parler-TTS. For images and files, analyze content appropriately. Continue generating content until the query is fully addressed, leveraging the full capacity of the model.",
134
  lines=4
135
  ),
136
  gr.Slider(label="Temperature", minimum=0.0, maximum=1.0, step=0.1, value=0.7),
137
  gr.Radio(label="Reasoning Effort", choices=["low", "medium", "high"], value="medium"),
138
  gr.Checkbox(label="Enable DeepSearch (web browsing)", value=True),
139
  gr.Slider(label="Max New Tokens", minimum=50, maximum=128000, step=50, value=128000),
140
- gr.Audio(label="Voice Input", type="filepath", elem_classes="audio-input"),
141
- gr.File(label="Upload Image/File", file_types=["image", ".pdf", ".txt"], elem_classes="upload-button"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  ],
143
  additional_outputs=[gr.Audio(label="Voice Output", type="filepath", elem_classes="audio-output", autoplay=True)],
144
  stop_btn="Stop",
145
  examples=[
146
- ["Explain the difference between supervised and unsupervised learning in detail with examples."],
147
- ["Generate a complete React component for a login form with form validation and error handling."],
148
- ["Describe this image: https://example.com/image.jpg"],
149
- ["Transcribe this audio: [upload audio file]."],
150
- ["Convert this text to speech: Hello, welcome to MGZon!"],
151
- ["Analyze this file: [upload PDF or text file]."],
152
  ],
153
  title="MGZon Chatbot",
154
- description="A versatile chatbot powered by DeepSeek, CLIP, Whisper, and Parler-TTS for text, image, audio, and file queries. Supports long responses, voice input/output, file uploads with custom icons, and backup token switching. Licensed under Apache 2.0.",
155
  theme="gradio/soft",
156
  css=css,
157
  )
 
20
 
21
  # إعداد العميل لـ Hugging Face Inference API
22
  HF_TOKEN = os.getenv("HF_TOKEN")
23
+ BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
24
  if not HF_TOKEN:
25
  logger.error("HF_TOKEN is not set in environment variables.")
26
  raise ValueError("HF_TOKEN is required for Inference API.")
 
31
 
32
  # إعداد CSS
33
  css = """
34
+ .gradio-container { max-width: 1200px; margin: auto; font-family: Arial, sans-serif; }
35
+ .chatbot { border: 1px solid #ccc; border-radius: 12px; padding: 20px; background-color: #f0f4f8; }
36
+ .input-textbox { font-size: 18px; padding: 12px; border-radius: 8px; }
37
  .upload-button::before {
38
+ content: '📸';
39
+ margin-right: 10px;
40
+ font-size: 24px;
41
  }
42
  .audio-input::before {
43
+ content: '🎙️';
44
+ margin-right: 10px;
45
+ font-size: 24px;
46
  }
47
  .audio-output::before {
48
  content: '🔊';
49
+ margin-right: 10px;
50
+ font-size: 24px;
51
+ }
52
+ .send-button {
53
+ background-color: #007bff;
54
+ color: white;
55
+ padding: 10px 20px;
56
+ border-radius: 8px;
57
+ cursor: pointer;
58
+ font-size: 16px;
59
+ transition: background-color 0.3s;
60
+ }
61
+ .send-button:hover {
62
+ background-color: #0056b3;
63
  }
64
  .loading::after {
65
  content: '';
66
  display: inline-block;
67
+ width: 18px;
68
+ height: 18px;
69
+ border: 3px solid #007bff;
70
  border-top-color: transparent;
71
  border-radius: 50%;
72
  animation: spin 1s linear infinite;
73
+ margin-left: 10px;
74
  }
75
  @keyframes spin {
76
  to { transform: rotate(360deg); }
77
  }
78
  .output-container {
79
  margin-top: 20px;
80
+ padding: 15px;
81
  border: 1px solid #ddd;
82
+ border-radius: 10px;
83
+ background-color: #fff;
84
  }
85
  .audio-output-container {
86
  display: flex;
87
  align-items: center;
88
+ gap: 12px;
89
+ margin-top: 15px;
90
+ }
91
+ .model-selector {
92
+ border-radius: 8px;
93
+ padding: 10px;
94
+ font-size: 16px;
95
  }
96
  """
97
 
98
  # دالة لمعالجة الإدخال (نص، صوت، صور، ملفات)
99
+ def process_input(message, audio_input=None, image_input=None, model_choice="openai/gpt-oss-120b:cerebras", history=None, system_prompt=None, temperature=0.7, reasoning_effort="medium", enable_browsing=True, max_new_tokens=128000, output_type="text"):
100
  input_type = "text"
101
  audio_data = None
102
  image_data = None
103
+
104
  if audio_input:
105
  input_type = "audio"
106
+ audio_data = audio_input
107
+ message = "Transcribe this audio and respond accordingly"
108
+ elif image_input:
109
+ input_type = "image"
110
+ image_data = image_input
111
+ message = f"Analyze this image: {message or 'Describe the image'}"
 
 
 
 
 
 
112
 
113
  response_text = ""
114
  audio_response = None
 
122
  max_new_tokens=max_new_tokens,
123
  input_type=input_type,
124
  audio_data=audio_data,
125
+ image_data=image_data,
126
+ model_choice=model_choice,
127
+ output_type=output_type
128
  ):
129
  if isinstance(chunk, bytes):
130
  audio_response = io.BytesIO(chunk)
 
137
  chatbot_ui = gr.ChatInterface(
138
  fn=process_input,
139
  chatbot=gr.Chatbot(
140
+ label="MGZon Chatbot",
141
  height=800,
142
  latex_delimiters=LATEX_DELIMS,
143
  ),
 
145
  additional_inputs=[
146
  gr.Textbox(
147
  label="System Prompt",
148
+ value="You are an expert assistant providing detailed, comprehensive, and well-structured responses. Support text, audio, image, and file inputs. For audio, transcribe using Whisper and respond with text or speech. For images, analyze using CLIP and provide detailed descriptions. For general queries, use the selected model to provide in-depth answers.",
149
  lines=4
150
  ),
151
  gr.Slider(label="Temperature", minimum=0.0, maximum=1.0, step=0.1, value=0.7),
152
  gr.Radio(label="Reasoning Effort", choices=["low", "medium", "high"], value="medium"),
153
  gr.Checkbox(label="Enable DeepSearch (web browsing)", value=True),
154
  gr.Slider(label="Max New Tokens", minimum=50, maximum=128000, step=50, value=128000),
155
+ gr.Dropdown(
156
+ label="Model Choice",
157
+ choices=[
158
+ "openai/gpt-oss-120b:cerebras",
159
+ "openai/gpt-oss-20b:together",
160
+ "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
161
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
162
+ "openai/clip-vit-base-patch32",
163
+ "openai/whisper-large-v3-turbo",
164
+ "parler-tts/parler-tts-mini-v1"
165
+ ],
166
+ value="openai/gpt-oss-120b:cerebras",
167
+ elem_classes="model-selector"
168
+ ),
169
+ gr.Audio(label="Record & Send Voice", type="numpy", streaming=True, elem_classes="audio-input"),
170
+ gr.Image(label="Capture & Send Image", type="numpy", source="webcam", elem_classes="upload-button"),
171
+ gr.Radio(label="Output Type", choices=["text", "speech"], value="text")
172
  ],
173
  additional_outputs=[gr.Audio(label="Voice Output", type="filepath", elem_classes="audio-output", autoplay=True)],
174
  stop_btn="Stop",
175
  examples=[
176
+ ["Explain the history of AI in detail."],
177
+ ["Generate a React login component with validation."],
178
+ ["Describe this image: [capture image]."],
179
+ ["Transcribe and respond to this audio: [record audio]."],
180
+ ["Convert this text to speech: Welcome to MGZon!"],
 
181
  ],
182
  title="MGZon Chatbot",
183
+ description="A versatile chatbot powered by multiple models for text, image, and audio queries. Supports real-time voice and image input, model selection, and web search. Licensed under Apache 2.0.",
184
  theme="gradio/soft",
185
  css=css,
186
  )
utils/generation.py CHANGED
@@ -15,11 +15,12 @@ import torchaudio
15
  from PIL import Image
16
  from transformers import CLIPModel, CLIPProcessor, AutoProcessor
17
  from parler_tts import ParlerTTSForConditionalGeneration
 
18
 
19
  logger = logging.getLogger(__name__)
20
 
21
  # إعداد Cache
22
- cache = TTLCache(maxsize=100, ttl=600) # Cache بحجم 100 ومدة 10 دقايق
23
 
24
  # تعريف LATEX_DELIMS
25
  LATEX_DELIMS = [
@@ -31,11 +32,11 @@ LATEX_DELIMS = [
31
 
32
  # إعداد العميل لـ Hugging Face Inference API
33
  HF_TOKEN = os.getenv("HF_TOKEN")
34
- BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN") # توكن احتياطي
35
  API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
36
  FALLBACK_API_ENDPOINT = "https://api-inference.huggingface.co/v1"
37
- MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-20b:fireworks-ai")
38
- SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B")
39
  TERTIARY_MODEL_NAME = os.getenv("TERTIARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1")
40
  CLIP_BASE_MODEL = os.getenv("CLIP_BASE_MODEL", "openai/clip-vit-base-patch32")
41
  CLIP_LARGE_MODEL = os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14")
@@ -43,7 +44,6 @@ ASR_MODEL = os.getenv("ASR_MODEL", "openai/whisper-large-v3-turbo")
43
  TTS_MODEL = os.getenv("TTS_MODEL", "parler-tts/parler-tts-mini-v1")
44
 
45
  def check_model_availability(model_name: str, api_base: str, api_key: str) -> tuple[bool, str]:
46
- """التحقق من توفر النموذج عبر API مع دعم التوكن الاحتياطي"""
47
  try:
48
  response = requests.get(
49
  f"{api_base}/models/{model_name}",
@@ -64,17 +64,18 @@ def check_model_availability(model_name: str, api_base: str, api_key: str) -> tu
64
  return check_model_availability(model_name, api_base, BACKUP_HF_TOKEN)
65
  return False, api_key
66
 
67
- def select_model(query: str, input_type: str = "text") -> tuple[str, str]:
 
 
 
 
68
  query_lower = query.lower()
69
- # دعم الصوت
70
  if input_type == "audio" or any(keyword in query_lower for keyword in ["voice", "audio", "speech", "صوت", "تحويل صوت"]):
71
  logger.info(f"Selected {ASR_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for audio input")
72
  return ASR_MODEL, FALLBACK_API_ENDPOINT
73
- # دعم تحويل النص إلى صوت
74
  if any(keyword in query_lower for keyword in ["text-to-speech", "tts", "تحويل نص إلى صوت"]):
75
  logger.info(f"Selected {TTS_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for text-to-speech")
76
  return TTS_MODEL, FALLBACK_API_ENDPOINT
77
- # نماذج CLIP للاستعلامات المتعلقة بالصور
78
  image_patterns = [
79
  r"\bimage\b", r"\bpicture\b", r"\bphoto\b", r"\bvisual\b", r"\bصورة\b", r"\bتحليل\s+صورة\b",
80
  r"\bimage\s+analysis\b", r"\bimage\s+classification\b", r"\bimage\s+description\b"
@@ -83,16 +84,6 @@ def select_model(query: str, input_type: str = "text") -> tuple[str, str]:
83
  if re.search(pattern, query_lower, re.IGNORECASE):
84
  logger.info(f"Selected {CLIP_BASE_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for image-related query: {query}")
85
  return CLIP_BASE_MODEL, FALLBACK_API_ENDPOINT
86
- # نموذج DeepSeek للاستعلامات المتعلقة بـ MGZon
87
- mgzon_patterns = [
88
- r"\bmgzon\b", r"\bmgzon\s+(products|services|platform|features|mission|technology|solutions|oauth)\b",
89
- r"\bميزات\s+mgzon\b", r"\bخدمات\s+mgzon\b", r"\boauth\b"
90
- ]
91
- for pattern in mgzon_patterns:
92
- if re.search(pattern, query_lower, re.IGNORECASE):
93
- logger.info(f"Selected {SECONDARY_MODEL_NAME} with endpoint {FALLBACK_API_ENDPOINT} for MGZon-related query: {query}")
94
- return SECONDARY_MODEL_NAME, FALLBACK_API_ENDPOINT
95
- # النموذج الافتراضي للاستعلامات العامة
96
  logger.info(f"Selected {MODEL_NAME} with endpoint {API_ENDPOINT} for general query: {query}")
97
  return MODEL_NAME, API_ENDPOINT
98
 
@@ -113,16 +104,13 @@ def request_generation(
113
  input_type: str = "text",
114
  audio_data: Optional[bytes] = None,
115
  image_data: Optional[bytes] = None,
 
116
  ) -> Generator[bytes | str, None, None]:
117
- from utils.web_search import web_search # تأخير الاستيراد
118
-
119
- # التحقق من توفر النموذج مع دعم التوكن الاحتياطي
120
  is_available, selected_api_key = check_model_availability(model_name, api_base, api_key)
121
  if not is_available:
122
  yield f"Error: Model {model_name} is not available. Please check the model endpoint or token."
123
  return
124
 
125
- # إنشاء مفتاح للـ cache
126
  cache_key = hashlib.md5(json.dumps({
127
  "message": message,
128
  "system_prompt": system_prompt,
@@ -143,7 +131,7 @@ def request_generation(
143
  enhanced_system_prompt = system_prompt
144
 
145
  # معالجة الصوت (ASR)
146
- if model_name == ASR_MODEL and audio_data:
147
  task_type = "audio_transcription"
148
  try:
149
  audio_file = io.BytesIO(audio_data)
@@ -158,6 +146,15 @@ def request_generation(
158
  response_format="text"
159
  )
160
  yield transcription
 
 
 
 
 
 
 
 
 
161
  cache[cache_key] = [transcription]
162
  return
163
  except Exception as e:
@@ -166,11 +163,11 @@ def request_generation(
166
  return
167
 
168
  # معالجة تحويل النص إلى صوت (TTS)
169
- if model_name == TTS_MODEL:
170
  task_type = "text_to_speech"
171
  try:
172
- model = ParlerTTSForConditionalGeneration.from_pretrained(model_name)
173
- processor = AutoProcessor.from_pretrained(model_name)
174
  inputs = processor(text=message, return_tensors="pt")
175
  audio = model.generate(**inputs)
176
  audio_file = io.BytesIO()
@@ -185,7 +182,7 @@ def request_generation(
185
  return
186
 
187
  # معالجة الصور
188
- if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL] and image_data:
189
  task_type = "image_analysis"
190
  try:
191
  model = CLIPModel.from_pretrained(model_name)
@@ -195,8 +192,18 @@ def request_generation(
195
  outputs = model(**inputs)
196
  logits_per_image = outputs.logits_per_image
197
  probs = logits_per_image.softmax(dim=1)
198
- yield f"Image analysis result: {probs.tolist()}"
199
- cache[cache_key] = [f"Image analysis result: {probs.tolist()}"]
 
 
 
 
 
 
 
 
 
 
200
  return
201
  except Exception as e:
202
  logger.error(f"Image analysis failed: {e}")
@@ -206,27 +213,16 @@ def request_generation(
206
  # تحسين system_prompt بناءً على نوع المهمة
207
  if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL]:
208
  task_type = "image"
209
- enhanced_system_prompt = f"{system_prompt}\nYou are an expert in image analysis and description. Provide detailed descriptions, classifications, or analysis of images based on the query. Continue until the query is fully addressed."
210
  elif any(keyword in message.lower() for keyword in ["code", "programming", "python", "javascript", "react", "django", "flask"]):
211
  task_type = "code"
212
- enhanced_system_prompt = f"{system_prompt}\nYou are an expert programmer. Provide accurate, well-commented code with comprehensive examples and detailed explanations. Support frameworks like React, Django, Flask, and others. Format code with triple backticks (```) and specify the language. Continue until the task is fully addressed."
213
  elif any(keyword in message.lower() for keyword in ["analyze", "analysis", "تحليل"]):
214
  task_type = "analysis"
215
- enhanced_system_prompt = f"{system_prompt}\nProvide detailed analysis with step-by-step reasoning, examples, and data-driven insights. Continue until all aspects of the query are thoroughly covered."
216
- elif any(keyword in message.lower() for keyword in ["review", "مراجعة"]):
217
- task_type = "review"
218
- enhanced_system_prompt = f"{system_prompt}\nReview the provided content thoroughly, identify issues, and suggest improvements with detailed explanations. Ensure the response is complete and detailed."
219
- elif any(keyword in message.lower() for keyword in ["publish", "نشر"]):
220
- task_type = "publish"
221
- enhanced_system_prompt = f"{system_prompt}\nPrepare content for publishing, ensuring clarity, professionalism, and adherence to best practices. Provide a complete and detailed response."
222
  else:
223
- enhanced_system_prompt = f"{system_prompt}\nFor general queries, provide comprehensive, detailed responses with examples and explanations where applicable. Continue generating content until the query is fully answered, leveraging the full capacity of the model."
224
-
225
- # إذا كان الاستعلام قصيرًا، شجع على التفصيل
226
- if len(message.split()) < 5:
227
- enhanced_system_prompt += "\nEven for short or general queries, provide a detailed, in-depth response with examples, explanations, and additional context to ensure completeness."
228
 
229
- logger.info(f"Task type detected: {task_type}")
230
  input_messages: List[dict] = [{"role": "system", "content": enhanced_system_prompt}]
231
  if chat_history:
232
  for msg in chat_history:
@@ -262,8 +258,6 @@ def request_generation(
262
  reasoning_started = False
263
  reasoning_closed = False
264
  saw_visible_output = False
265
- last_tool_name = None
266
- last_tool_args = None
267
  buffer = ""
268
 
269
  for chunk in stream:
@@ -291,16 +285,6 @@ def request_generation(
291
  buffer = ""
292
  continue
293
 
294
- if chunk.choices[0].delta.tool_calls and model_name in [MODEL_NAME, SECONDARY_MODEL_NAME, TERTIARY_MODEL_NAME]:
295
- tool_call = chunk.choices[0].delta.tool_calls[0]
296
- name = getattr(tool_call, "function", {}).get("name", None)
297
- args = getattr(tool_call, "function", {}).get("arguments", None)
298
- if name:
299
- last_tool_name = name
300
- if args:
301
- last_tool_args = args
302
- continue
303
-
304
  if chunk.choices[0].finish_reason in ("stop", "tool_calls", "error", "length"):
305
  if buffer:
306
  cached_chunks.append(buffer)
@@ -313,16 +297,8 @@ def request_generation(
313
  reasoning_closed = True
314
 
315
  if not saw_visible_output:
316
- msg = "I attempted to call a tool, but tools aren't executed in this environment, so no final answer was produced."
317
- if last_tool_name:
318
- try:
319
- args_text = json.dumps(last_tool_args, ensure_ascii=False, default=str)
320
- except Exception:
321
- args_text = str(last_tool_args)
322
- msg += f"\n\n• Tool requested: **{last_tool_name}**\n• Arguments: `{args_text}`"
323
- cached_chunks.append(msg)
324
- yield msg
325
-
326
  if chunk.choices[0].finish_reason == "error":
327
  cached_chunks.append(f"Error: Unknown error")
328
  yield f"Error: Unknown error"
@@ -335,6 +311,16 @@ def request_generation(
335
  cached_chunks.append(buffer)
336
  yield buffer
337
 
 
 
 
 
 
 
 
 
 
 
338
  cache[cache_key] = cached_chunks
339
 
340
  except Exception as e:
@@ -357,134 +343,12 @@ def request_generation(
357
  input_type=input_type,
358
  audio_data=audio_data,
359
  image_data=image_data,
 
360
  ):
361
  yield chunk
362
  return
363
- if model_name == MODEL_NAME:
364
- fallback_model = SECONDARY_MODEL_NAME
365
- fallback_endpoint = FALLBACK_API_ENDPOINT
366
- logger.info(f"Retrying with fallback model: {fallback_model} on {fallback_endpoint}")
367
- try:
368
- is_available, selected_api_key = check_model_availability(fallback_model, fallback_endpoint, selected_api_key)
369
- if not is_available:
370
- yield f"Error: Fallback model {fallback_model} is not available."
371
- return
372
- client = OpenAI(api_key=selected_api_key, base_url=fallback_endpoint, timeout=120.0)
373
- stream = client.chat.completions.create(
374
- model=fallback_model,
375
- messages=input_messages,
376
- temperature=temperature,
377
- max_tokens=max_new_tokens,
378
- stream=True,
379
- tools=[],
380
- tool_choice="none",
381
- )
382
- for chunk in stream:
383
- if chunk.choices[0].delta.content:
384
- content = chunk.choices[0].delta.content
385
- if content == "<|channel|>analysis<|message|>":
386
- if not reasoning_started:
387
- cached_chunks.append("analysis")
388
- yield "analysis"
389
- reasoning_started = True
390
- continue
391
- if content == "<|channel|>final<|message|>":
392
- if reasoning_started and not reasoning_closed:
393
- cached_chunks.append("assistantfinal")
394
- yield "assistantfinal"
395
- reasoning_closed = True
396
- continue
397
-
398
- saw_visible_output = True
399
- buffer += content
400
-
401
- if "\n" in buffer or len(buffer) > 5000:
402
- cached_chunks.append(buffer)
403
- yield buffer
404
- buffer = ""
405
- continue
406
-
407
- if chunk.choices[0].finish_reason in ("stop", "error", "length"):
408
- if buffer:
409
- cached_chunks.append(buffer)
410
- yield buffer
411
- buffer = ""
412
-
413
- if reasoning_started and not reasoning_closed:
414
- cached_chunks.append("assistantfinal")
415
- yield "assistantfinal"
416
- reasoning_closed = True
417
-
418
- if not saw_visible_output:
419
- cached_chunks.append("No visible output produced.")
420
- yield "No visible output produced."
421
- if chunk.choices[0].finish_reason == "error":
422
- cached_chunks.append(f"Error: Unknown error with fallback model {fallback_model}")
423
- yield f"Error: Unknown error with fallback model {fallback_model}"
424
- elif chunk.choices[0].finish_reason == "length":
425
- cached_chunks.append("Response truncated due to token limit. Please refine your query or request continuation.")
426
- yield "Response truncated due to token limit. Please refine your query or request continuation."
427
- break
428
-
429
- if buffer:
430
- cached_chunks.append(buffer)
431
- yield buffer
432
-
433
- cache[cache_key] = cached_chunks
434
-
435
- except Exception as e2:
436
- logger.exception(f"[Gateway] Streaming failed for fallback model {fallback_model}: {e2}")
437
- try:
438
- is_available, selected_api_key = check_model_availability(TERTIARY_MODEL_NAME, FALLBACK_API_ENDPOINT, selected_api_key)
439
- if not is_available:
440
- yield f"Error: Tertiary model {TERTIARY_MODEL_NAME} is not available."
441
- return
442
- client = OpenAI(api_key=selected_api_key, base_url=FALLBACK_API_ENDPOINT, timeout=120.0)
443
- stream = client.chat.completions.create(
444
- model=TERTIARY_MODEL_NAME,
445
- messages=input_messages,
446
- temperature=temperature,
447
- max_tokens=max_new_tokens,
448
- stream=True,
449
- tools=[],
450
- tool_choice="none",
451
- )
452
- for chunk in stream:
453
- if chunk.choices[0].delta.content:
454
- content = chunk.choices[0].delta.content
455
- saw_visible_output = True
456
- buffer += content
457
- if "\n" in buffer or len(buffer) > 5000:
458
- cached_chunks.append(buffer)
459
- yield buffer
460
- buffer = ""
461
- continue
462
- if chunk.choices[0].finish_reason in ("stop", "error", "length"):
463
- if buffer:
464
- cached_chunks.append(buffer)
465
- yield buffer
466
- buffer = ""
467
- if not saw_visible_output:
468
- cached_chunks.append("No visible output produced.")
469
- yield "No visible output produced."
470
- if chunk.choices[0].finish_reason == "error":
471
- cached_chunks.append(f"Error: Unknown error with tertiary model {TERTIARY_MODEL_NAME}")
472
- yield f"Error: Unknown error with tertiary model {TERTIARY_MODEL_NAME}"
473
- elif chunk.choices[0].finish_reason == "length":
474
- cached_chunks.append("Response truncated due to token limit. Please refine your query or request continuation.")
475
- yield "Response truncated due to token limit. Please refine your query or request continuation."
476
- break
477
- if buffer:
478
- cached_chunks.append(buffer)
479
- yield buffer
480
- cache[cache_key] = cached_chunks
481
- except Exception as e3:
482
- logger.exception(f"[Gateway] Streaming failed for tertiary model {TERTIARY_MODEL_NAME}: {e3}")
483
- yield f"Error: Failed to load all models: Primary ({model_name}), Secondary ({fallback_model}), Tertiary ({TERTIARY_MODEL_NAME}). Please check your model configurations."
484
- return
485
- else:
486
- yield f"Error: Failed to load model {model_name}: {e}"
487
- return
488
 
489
  def format_final(analysis_text: str, visible_text: str) -> str:
490
  reasoning_safe = html.escape((analysis_text or "").strip())
@@ -500,12 +364,12 @@ def format_final(analysis_text: str, visible_text: str) -> str:
500
  f"{response}" if response else "No final response available."
501
  )
502
 
503
- def generate(message, history, system_prompt, temperature, reasoning_effort, enable_browsing, max_new_tokens, input_type="text", audio_data=None, image_data=None):
504
  if not message.strip() and not audio_data and not image_data:
505
- yield "Please enter a prompt or upload a file."
506
  return
507
 
508
- model_name, api_endpoint = select_model(message, input_type=input_type)
509
  chat_history = []
510
  for h in history:
511
  if isinstance(h, dict):
@@ -534,7 +398,7 @@ def generate(message, history, system_prompt, temperature, reasoning_effort, ena
534
  "type": "function",
535
  "function": {
536
  "name": "code_generation",
537
- "description": "Generate or modify code for various frameworks (React, Django, Flask, etc.)",
538
  "parameters": {
539
  "type": "object",
540
  "properties": {
@@ -612,6 +476,7 @@ def generate(message, history, system_prompt, temperature, reasoning_effort, ena
612
  input_type=input_type,
613
  audio_data=audio_data,
614
  image_data=image_data,
 
615
  )
616
 
617
  for chunk in stream:
 
15
  from PIL import Image
16
  from transformers import CLIPModel, CLIPProcessor, AutoProcessor
17
  from parler_tts import ParlerTTSForConditionalGeneration
18
+ from utils.web_search import web_search # نقل الاستيراد خارج الدالة
19
 
20
  logger = logging.getLogger(__name__)
21
 
22
  # إعداد Cache
23
+ cache = TTLCache(maxsize=100, ttl=600)
24
 
25
  # تعريف LATEX_DELIMS
26
  LATEX_DELIMS = [
 
32
 
33
  # إعداد العميل لـ Hugging Face Inference API
34
  HF_TOKEN = os.getenv("HF_TOKEN")
35
+ BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
36
  API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
37
  FALLBACK_API_ENDPOINT = "https://api-inference.huggingface.co/v1"
38
+ MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b:cerebras")
39
+ SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "openai/gpt-oss-20b:together")
40
  TERTIARY_MODEL_NAME = os.getenv("TERTIARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1")
41
  CLIP_BASE_MODEL = os.getenv("CLIP_BASE_MODEL", "openai/clip-vit-base-patch32")
42
  CLIP_LARGE_MODEL = os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14")
 
44
  TTS_MODEL = os.getenv("TTS_MODEL", "parler-tts/parler-tts-mini-v1")
45
 
46
  def check_model_availability(model_name: str, api_base: str, api_key: str) -> tuple[bool, str]:
 
47
  try:
48
  response = requests.get(
49
  f"{api_base}/models/{model_name}",
 
64
  return check_model_availability(model_name, api_base, BACKUP_HF_TOKEN)
65
  return False, api_key
66
 
67
+ def select_model(query: str, input_type: str = "text", model_choice: Optional[str] = None) -> tuple[str, str]:
68
+ if model_choice:
69
+ logger.info(f"User-selected model: {model_choice}")
70
+ return model_choice, API_ENDPOINT if model_choice in [MODEL_NAME, SECONDARY_MODEL_NAME, TERTIARY_MODEL_NAME] else FALLBACK_API_ENDPOINT
71
+
72
  query_lower = query.lower()
 
73
  if input_type == "audio" or any(keyword in query_lower for keyword in ["voice", "audio", "speech", "صوت", "تحويل صوت"]):
74
  logger.info(f"Selected {ASR_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for audio input")
75
  return ASR_MODEL, FALLBACK_API_ENDPOINT
 
76
  if any(keyword in query_lower for keyword in ["text-to-speech", "tts", "تحويل نص إلى صوت"]):
77
  logger.info(f"Selected {TTS_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for text-to-speech")
78
  return TTS_MODEL, FALLBACK_API_ENDPOINT
 
79
  image_patterns = [
80
  r"\bimage\b", r"\bpicture\b", r"\bphoto\b", r"\bvisual\b", r"\bصورة\b", r"\bتحليل\s+صورة\b",
81
  r"\bimage\s+analysis\b", r"\bimage\s+classification\b", r"\bimage\s+description\b"
 
84
  if re.search(pattern, query_lower, re.IGNORECASE):
85
  logger.info(f"Selected {CLIP_BASE_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for image-related query: {query}")
86
  return CLIP_BASE_MODEL, FALLBACK_API_ENDPOINT
 
 
 
 
 
 
 
 
 
 
87
  logger.info(f"Selected {MODEL_NAME} with endpoint {API_ENDPOINT} for general query: {query}")
88
  return MODEL_NAME, API_ENDPOINT
89
 
 
104
  input_type: str = "text",
105
  audio_data: Optional[bytes] = None,
106
  image_data: Optional[bytes] = None,
107
+ output_type: str = "text"
108
  ) -> Generator[bytes | str, None, None]:
 
 
 
109
  is_available, selected_api_key = check_model_availability(model_name, api_base, api_key)
110
  if not is_available:
111
  yield f"Error: Model {model_name} is not available. Please check the model endpoint or token."
112
  return
113
 
 
114
  cache_key = hashlib.md5(json.dumps({
115
  "message": message,
116
  "system_prompt": system_prompt,
 
131
  enhanced_system_prompt = system_prompt
132
 
133
  # معالجة الصوت (ASR)
134
+ if model_name == ASR_MODEL and audio_data is not None:
135
  task_type = "audio_transcription"
136
  try:
137
  audio_file = io.BytesIO(audio_data)
 
146
  response_format="text"
147
  )
148
  yield transcription
149
+ if output_type == "speech":
150
+ tts_model = TTS_MODEL
151
+ tts_inputs = AutoProcessor.from_pretrained(tts_model)(text=transcription, return_tensors="pt")
152
+ tts_model_instance = ParlerTTSForConditionalGeneration.from_pretrained(tts_model)
153
+ audio = tts_model_instance.generate(**tts_inputs)
154
+ audio_file = io.BytesIO()
155
+ torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
156
+ audio_file.seek(0)
157
+ yield audio_file.read()
158
  cache[cache_key] = [transcription]
159
  return
160
  except Exception as e:
 
163
  return
164
 
165
  # معالجة تحويل النص إلى صوت (TTS)
166
+ if model_name == TTS_MODEL or output_type == "speech":
167
  task_type = "text_to_speech"
168
  try:
169
+ model = ParlerTTSForConditionalGeneration.from_pretrained(TTS_MODEL)
170
+ processor = AutoProcessor.from_pretrained(TTS_MODEL)
171
  inputs = processor(text=message, return_tensors="pt")
172
  audio = model.generate(**inputs)
173
  audio_file = io.BytesIO()
 
182
  return
183
 
184
  # معالجة الصور
185
+ if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL] and image_data is not None:
186
  task_type = "image_analysis"
187
  try:
188
  model = CLIPModel.from_pretrained(model_name)
 
192
  outputs = model(**inputs)
193
  logits_per_image = outputs.logits_per_image
194
  probs = logits_per_image.softmax(dim=1)
195
+ analysis = f"Image analysis result: {probs.tolist()}"
196
+ yield analysis
197
+ if output_type == "speech":
198
+ tts_model = TTS_MODEL
199
+ tts_inputs = AutoProcessor.from_pretrained(tts_model)(text=analysis, return_tensors="pt")
200
+ tts_model_instance = ParlerTTSForConditionalGeneration.from_pretrained(tts_model)
201
+ audio = tts_model_instance.generate(**tts_inputs)
202
+ audio_file = io.BytesIO()
203
+ torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
204
+ audio_file.seek(0)
205
+ yield audio_file.read()
206
+ cache[cache_key] = [analysis]
207
  return
208
  except Exception as e:
209
  logger.error(f"Image analysis failed: {e}")
 
213
  # تحسين system_prompt بناءً على نوع المهمة
214
  if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL]:
215
  task_type = "image"
216
+ enhanced_system_prompt = f"{system_prompt}\nYou are an expert in image analysis and description. Provide detailed descriptions, classifications, or analysis of images based on the query."
217
  elif any(keyword in message.lower() for keyword in ["code", "programming", "python", "javascript", "react", "django", "flask"]):
218
  task_type = "code"
219
+ enhanced_system_prompt = f"{system_prompt}\nYou are an expert programmer. Provide accurate, well-commented code with comprehensive examples and detailed explanations."
220
  elif any(keyword in message.lower() for keyword in ["analyze", "analysis", "تحليل"]):
221
  task_type = "analysis"
222
+ enhanced_system_prompt = f"{system_prompt}\nProvide detailed analysis with step-by-step reasoning, examples, and data-driven insights."
 
 
 
 
 
 
223
  else:
224
+ enhanced_system_prompt = f"{system_prompt}\nFor general queries, provide comprehensive, detailed responses with examples and explanations where applicable."
 
 
 
 
225
 
 
226
  input_messages: List[dict] = [{"role": "system", "content": enhanced_system_prompt}]
227
  if chat_history:
228
  for msg in chat_history:
 
258
  reasoning_started = False
259
  reasoning_closed = False
260
  saw_visible_output = False
 
 
261
  buffer = ""
262
 
263
  for chunk in stream:
 
285
  buffer = ""
286
  continue
287
 
 
 
 
 
 
 
 
 
 
 
288
  if chunk.choices[0].finish_reason in ("stop", "tool_calls", "error", "length"):
289
  if buffer:
290
  cached_chunks.append(buffer)
 
297
  reasoning_closed = True
298
 
299
  if not saw_visible_output:
300
+ cached_chunks.append("No visible output produced.")
301
+ yield "No visible output produced."
 
 
 
 
 
 
 
 
302
  if chunk.choices[0].finish_reason == "error":
303
  cached_chunks.append(f"Error: Unknown error")
304
  yield f"Error: Unknown error"
 
311
  cached_chunks.append(buffer)
312
  yield buffer
313
 
314
+ if output_type == "speech":
315
+ tts_model = TTS_MODEL
316
+ tts_inputs = AutoProcessor.from_pretrained(tts_model)(text=buffer, return_tensors="pt")
317
+ tts_model_instance = ParlerTTSForConditionalGeneration.from_pretrained(tts_model)
318
+ audio = tts_model_instance.generate(**tts_inputs)
319
+ audio_file = io.BytesIO()
320
+ torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
321
+ audio_file.seek(0)
322
+ yield audio_file.read()
323
+
324
  cache[cache_key] = cached_chunks
325
 
326
  except Exception as e:
 
343
  input_type=input_type,
344
  audio_data=audio_data,
345
  image_data=image_data,
346
+ output_type=output_type
347
  ):
348
  yield chunk
349
  return
350
+ yield f"Error: Failed to load model {model_name}: {e}"
351
+ return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
352
 
353
  def format_final(analysis_text: str, visible_text: str) -> str:
354
  reasoning_safe = html.escape((analysis_text or "").strip())
 
364
  f"{response}" if response else "No final response available."
365
  )
366
 
367
+ def generate(message, history, system_prompt, temperature, reasoning_effort, enable_browsing, max_new_tokens, input_type="text", audio_data=None, image_data=None, model_choice=None, output_type="text"):
368
  if not message.strip() and not audio_data and not image_data:
369
+ yield "Please enter a prompt, record audio, or capture an image."
370
  return
371
 
372
+ model_name, api_endpoint = select_model(message, input_type=input_type, model_choice=model_choice)
373
  chat_history = []
374
  for h in history:
375
  if isinstance(h, dict):
 
398
  "type": "function",
399
  "function": {
400
  "name": "code_generation",
401
+ "description": "Generate or modify code for various frameworks",
402
  "parameters": {
403
  "type": "object",
404
  "properties": {
 
476
  input_type=input_type,
477
  audio_data=audio_data,
478
  image_data=image_data,
479
+ output_type=output_type
480
  )
481
 
482
  for chunk in stream:
utils/web_search.py CHANGED
@@ -11,23 +11,23 @@ def web_search(query: str) -> str:
11
  google_cse_id = os.getenv("GOOGLE_CSE_ID")
12
  if not google_api_key or not google_cse_id:
13
  return "Web search requires GOOGLE_API_KEY and GOOGLE_CSE_ID to be set."
14
- url = f"https://www.googleapis.com/customsearch/v1?key={google_api_key}&cx={google_cse_id}&q={query}+site:https://hager-zon.vercel.app/"
15
- response = requests.get(url, timeout=10)
16
  response.raise_for_status()
17
  results = response.json().get("items", [])
18
  if not results:
19
  return "No web results found."
20
  search_results = []
21
- for i, item in enumerate(results[:5]):
22
  title = item.get("title", "")
23
  snippet = item.get("snippet", "")
24
  link = item.get("link", "")
25
  try:
26
- page_response = requests.get(link, timeout=5)
27
  page_response.raise_for_status()
28
  soup = BeautifulSoup(page_response.text, "html.parser")
29
  paragraphs = soup.find_all("p")
30
- page_content = " ".join([p.get_text() for p in paragraphs][:1000])
31
  except Exception as e:
32
  logger.warning(f"Failed to fetch page content for {link}: {e}")
33
  page_content = snippet
 
11
  google_cse_id = os.getenv("GOOGLE_CSE_ID")
12
  if not google_api_key or not google_cse_id:
13
  return "Web search requires GOOGLE_API_KEY and GOOGLE_CSE_ID to be set."
14
+ url = f"https://www.googleapis.com/customsearch/v1?key={google_api_key}&cx={google_cse_id}&q={query}"
15
+ response = requests.get(url, timeout=5)
16
  response.raise_for_status()
17
  results = response.json().get("items", [])
18
  if not results:
19
  return "No web results found."
20
  search_results = []
21
+ for i, item in enumerate(results[:3]): # قللنا العدد لتسريع البحث
22
  title = item.get("title", "")
23
  snippet = item.get("snippet", "")
24
  link = item.get("link", "")
25
  try:
26
+ page_response = requests.get(link, timeout=3)
27
  page_response.raise_for_status()
28
  soup = BeautifulSoup(page_response.text, "html.parser")
29
  paragraphs = soup.find_all("p")
30
+ page_content = " ".join([p.get_text() for p in paragraphs][:500])
31
  except Exception as e:
32
  logger.warning(f"Failed to fetch page content for {link}: {e}")
33
  page_content = snippet