ibrahimlasfar commited on
Commit
bb3c951
·
1 Parent(s): 2ee9112

Update app with audio/image buttons, model fixes, and UI enhancements

Browse files
Files changed (8) hide show
  1. Dockerfile +2 -1
  2. README.md +1 -1
  3. api/endpoints.py +39 -12
  4. api/models.py +3 -2
  5. main.py +137 -63
  6. requirements.txt +3 -3
  7. utils/generation.py +190 -42
  8. utils/web_search.py +13 -4
Dockerfile CHANGED
@@ -3,12 +3,13 @@ FROM python:3.10-slim
3
  # Set working directory
4
  WORKDIR /app
5
 
6
- # Install chromium-driver and build dependencies
7
  RUN apt-get update && apt-get install -y \
8
  chromium-driver \
9
  git \
10
  gcc \
11
  libc-dev \
 
12
  && apt-get clean && rm -rf /var/lib/apt/lists/*
13
 
14
  # Update pip
 
3
  # Set working directory
4
  WORKDIR /app
5
 
6
+ # Install system dependencies
7
  RUN apt-get update && apt-get install -y \
8
  chromium-driver \
9
  git \
10
  gcc \
11
  libc-dev \
12
+ ffmpeg \
13
  && apt-get clean && rm -rf /var/lib/apt/lists/*
14
 
15
  # Update pip
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: MGZON Chat
3
  emoji: "🤖"
4
  colorFrom: "blue"
5
  colorTo: "green"
 
1
  ---
2
+ title: MGZon Chatbot
3
  emoji: "🤖"
4
  colorFrom: "blue"
5
  colorTo: "green"
api/endpoints.py CHANGED
@@ -5,6 +5,7 @@ import io
5
  from openai import OpenAI
6
  from api.models import QueryRequest
7
  from utils.generation import request_generation, select_model
 
8
 
9
  router = APIRouter()
10
 
@@ -12,13 +13,15 @@ HF_TOKEN = os.getenv("HF_TOKEN")
12
  BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
13
  API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
14
  MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-20b:together")
 
 
15
 
16
  @router.get("/api/model-info")
17
  def model_info():
18
  return {
19
  "model_name": MODEL_NAME,
20
- "secondary_model": os.getenv("SECONDARY_MODEL_NAME", "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B:featherless-ai"),
21
- "tertiary_model": os.getenv("TERTIARY_MODEL_NAME", "openai/gpt-oss-120b:cerebras"),
22
  "clip_base_model": os.getenv("CLIP_BASE_MODEL", "openai/clip-vit-base-patch32"),
23
  "clip_large_model": os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14"),
24
  "api_base": API_ENDPOINT,
@@ -46,7 +49,11 @@ async def chat_endpoint(req: QueryRequest):
46
  temperature=req.temperature,
47
  max_new_tokens=req.max_new_tokens,
48
  deep_search=req.enable_browsing,
 
49
  )
 
 
 
50
  response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
51
  return {"response": response}
52
 
@@ -54,7 +61,7 @@ async def chat_endpoint(req: QueryRequest):
54
  async def audio_transcription_endpoint(file: UploadFile = File(...)):
55
  model_name, api_endpoint = select_model("transcribe audio", input_type="audio")
56
  audio_data = await file.read()
57
- response = "".join([chunk for chunk in request_generation(
58
  api_key=HF_TOKEN,
59
  api_base=api_endpoint,
60
  message="Transcribe audio",
@@ -64,14 +71,15 @@ async def audio_transcription_endpoint(file: UploadFile = File(...)):
64
  max_new_tokens=128000,
65
  input_type="audio",
66
  audio_data=audio_data,
67
- ) if isinstance(chunk, str)])
 
68
  return {"transcription": response}
69
 
70
  @router.post("/api/text-to-speech")
71
  async def text_to_speech_endpoint(req: dict):
72
  text = req.get("text", "")
73
  model_name, api_endpoint = select_model("text to speech", input_type="text")
74
- response = request_generation(
75
  api_key=HF_TOKEN,
76
  api_base=api_endpoint,
77
  message=text,
@@ -80,8 +88,9 @@ async def text_to_speech_endpoint(req: dict):
80
  temperature=0.7,
81
  max_new_tokens=128000,
82
  input_type="text",
 
83
  )
84
- audio_data = b"".join([chunk for chunk in response if isinstance(chunk, bytes)])
85
  return StreamingResponse(io.BytesIO(audio_data), media_type="audio/wav")
86
 
87
  @router.post("/api/code")
@@ -89,9 +98,10 @@ async def code_endpoint(req: dict):
89
  framework = req.get("framework")
90
  task = req.get("task")
91
  code = req.get("code", "")
 
92
  prompt = f"Generate code for task: {task} using {framework}. Existing code: {code}"
93
  model_name, api_endpoint = select_model(prompt)
94
- response = "".join([chunk for chunk in request_generation(
95
  api_key=HF_TOKEN,
96
  api_base=api_endpoint,
97
  message=prompt,
@@ -99,14 +109,20 @@ async def code_endpoint(req: dict):
99
  model_name=model_name,
100
  temperature=0.7,
101
  max_new_tokens=128000,
102
- ) if isinstance(chunk, str)])
 
 
 
 
 
103
  return {"generated_code": response}
104
 
105
  @router.post("/api/analysis")
106
  async def analysis_endpoint(req: dict):
107
  message = req.get("text", "")
 
108
  model_name, api_endpoint = select_model(message)
109
- response = "".join([chunk for chunk in request_generation(
110
  api_key=HF_TOKEN,
111
  api_base=api_endpoint,
112
  message=message,
@@ -114,14 +130,20 @@ async def analysis_endpoint(req: dict):
114
  model_name=model_name,
115
  temperature=0.7,
116
  max_new_tokens=128000,
117
- ) if isinstance(chunk, str)])
 
 
 
 
 
118
  return {"analysis": response}
119
 
120
  @router.post("/api/image-analysis")
121
  async def image_analysis_endpoint(file: UploadFile = File(...)):
 
122
  model_name, api_endpoint = select_model("analyze image", input_type="image")
123
  image_data = await file.read()
124
- response = "".join([chunk for chunk in request_generation(
125
  api_key=HF_TOKEN,
126
  api_base=api_endpoint,
127
  message="Analyze this image",
@@ -131,7 +153,12 @@ async def image_analysis_endpoint(file: UploadFile = File(...)):
131
  max_new_tokens=128000,
132
  input_type="image",
133
  image_data=image_data,
134
- ) if isinstance(chunk, str)])
 
 
 
 
 
135
  return {"image_analysis": response}
136
 
137
  @router.get("/api/test-model")
 
5
  from openai import OpenAI
6
  from api.models import QueryRequest
7
  from utils.generation import request_generation, select_model
8
+ from utils.web_search import web_search
9
 
10
  router = APIRouter()
11
 
 
13
  BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
14
  API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
15
  MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-20b:together")
16
+ SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B:featherless-ai")
17
+ TERTIARY_MODEL_NAME = os.getenv("TERTIARY_MODEL_NAME", "openai/gpt-oss-120b:cerebras")
18
 
19
  @router.get("/api/model-info")
20
  def model_info():
21
  return {
22
  "model_name": MODEL_NAME,
23
+ "secondary_model": SECONDARY_MODEL_NAME,
24
+ "tertiary_model": TERTIARY_MODEL_NAME,
25
  "clip_base_model": os.getenv("CLIP_BASE_MODEL", "openai/clip-vit-base-patch32"),
26
  "clip_large_model": os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14"),
27
  "api_base": API_ENDPOINT,
 
49
  temperature=req.temperature,
50
  max_new_tokens=req.max_new_tokens,
51
  deep_search=req.enable_browsing,
52
+ output_format=req.output_format
53
  )
54
+ if req.output_format == "audio":
55
+ audio_data = b"".join([chunk for chunk in stream if isinstance(chunk, bytes)])
56
+ return StreamingResponse(io.BytesIO(audio_data), media_type="audio/wav")
57
  response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
58
  return {"response": response}
59
 
 
61
  async def audio_transcription_endpoint(file: UploadFile = File(...)):
62
  model_name, api_endpoint = select_model("transcribe audio", input_type="audio")
63
  audio_data = await file.read()
64
+ response = "".join(list(request_generation(
65
  api_key=HF_TOKEN,
66
  api_base=api_endpoint,
67
  message="Transcribe audio",
 
71
  max_new_tokens=128000,
72
  input_type="audio",
73
  audio_data=audio_data,
74
+ output_format="text"
75
+ )))
76
  return {"transcription": response}
77
 
78
  @router.post("/api/text-to-speech")
79
  async def text_to_speech_endpoint(req: dict):
80
  text = req.get("text", "")
81
  model_name, api_endpoint = select_model("text to speech", input_type="text")
82
+ stream = request_generation(
83
  api_key=HF_TOKEN,
84
  api_base=api_endpoint,
85
  message=text,
 
88
  temperature=0.7,
89
  max_new_tokens=128000,
90
  input_type="text",
91
+ output_format="audio"
92
  )
93
+ audio_data = b"".join([chunk for chunk in stream if isinstance(chunk, bytes)])
94
  return StreamingResponse(io.BytesIO(audio_data), media_type="audio/wav")
95
 
96
  @router.post("/api/code")
 
98
  framework = req.get("framework")
99
  task = req.get("task")
100
  code = req.get("code", "")
101
+ output_format = req.get("output_format", "text")
102
  prompt = f"Generate code for task: {task} using {framework}. Existing code: {code}"
103
  model_name, api_endpoint = select_model(prompt)
104
+ stream = request_generation(
105
  api_key=HF_TOKEN,
106
  api_base=api_endpoint,
107
  message=prompt,
 
109
  model_name=model_name,
110
  temperature=0.7,
111
  max_new_tokens=128000,
112
+ output_format=output_format
113
+ )
114
+ if output_format == "audio":
115
+ audio_data = b"".join([chunk for chunk in stream if isinstance(chunk, bytes)])
116
+ return StreamingResponse(io.BytesIO(audio_data), media_type="audio/wav")
117
+ response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
118
  return {"generated_code": response}
119
 
120
  @router.post("/api/analysis")
121
  async def analysis_endpoint(req: dict):
122
  message = req.get("text", "")
123
+ output_format = req.get("output_format", "text")
124
  model_name, api_endpoint = select_model(message)
125
+ stream = request_generation(
126
  api_key=HF_TOKEN,
127
  api_base=api_endpoint,
128
  message=message,
 
130
  model_name=model_name,
131
  temperature=0.7,
132
  max_new_tokens=128000,
133
+ output_format=output_format
134
+ )
135
+ if output_format == "audio":
136
+ audio_data = b"".join([chunk for chunk in stream if isinstance(chunk, bytes)])
137
+ return StreamingResponse(io.BytesIO(audio_data), media_type="audio/wav")
138
+ response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
139
  return {"analysis": response}
140
 
141
  @router.post("/api/image-analysis")
142
  async def image_analysis_endpoint(file: UploadFile = File(...)):
143
+ output_format = "text" # يمكن تعديله لدعم الصوت
144
  model_name, api_endpoint = select_model("analyze image", input_type="image")
145
  image_data = await file.read()
146
+ stream = request_generation(
147
  api_key=HF_TOKEN,
148
  api_base=api_endpoint,
149
  message="Analyze this image",
 
153
  max_new_tokens=128000,
154
  input_type="image",
155
  image_data=image_data,
156
+ output_format=output_format
157
+ )
158
+ if output_format == "audio":
159
+ audio_data = b"".join([chunk for chunk in stream if isinstance(chunk, bytes)])
160
+ return StreamingResponse(io.BytesIO(audio_data), media_type="audio/wav")
161
+ response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
162
  return {"image_analysis": response}
163
 
164
  @router.get("/api/test-model")
api/models.py CHANGED
@@ -3,8 +3,9 @@ from typing import List, Optional
3
 
4
  class QueryRequest(BaseModel):
5
  message: str
6
- system_prompt: str = "You are an expert assistant providing detailed, comprehensive, and well-structured responses. Support text, audio, image inputs. For audio, transcribe using Whisper. For text-to-speech, use Parler-TTS. For images, analyze using CLIP. Respond with voice output when requested. Continue until the query is fully addressed."
7
  history: Optional[List[dict]] = None
8
  temperature: float = 0.7
9
  max_new_tokens: int = 128000
10
- enable_browsing: bool = True
 
 
3
 
4
  class QueryRequest(BaseModel):
5
  message: str
6
+ system_prompt: str = "You are an expert assistant providing detailed, comprehensive, and well-structured responses. For code, include comments, examples, and complete implementations. For image-related queries, provide detailed analysis or descriptions. For general queries, provide in-depth explanations with examples and additional context where applicable. Respond in the requested output format (text or audio)."
7
  history: Optional[List[dict]] = None
8
  temperature: float = 0.7
9
  max_new_tokens: int = 128000
10
+ enable_browsing: bool = False
11
+ output_format: str = "text" # جديد: دعم نوع الإخراج
main.py CHANGED
@@ -32,42 +32,88 @@ CONCURRENCY_LIMIT = int(os.getenv("CONCURRENCY_LIMIT", 20))
32
  # إعداد CSS
33
  css = """
34
  .gradio-container { max-width: 1200px; margin: auto; font-family: Arial, sans-serif; }
35
- .chatbot { border: 1px solid #ccc; border-radius: 12px; padding: 20px; background-color: #f5f5f5; }
36
- .input-textbox { font-size: 16px; padding: 12px; border-radius: 8px; }
37
- .upload-button, .capture-button, .record-button {
38
- background-color: #4CAF50; color: white; padding: 10px 20px; border-radius: 8px; font-size: 16px; cursor: pointer;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  }
40
- .upload-button:hover, .capture-button:hover, .record-button:hover { background-color: #45a049; }
41
- .upload-button::before { content: '📷 '; font-size: 20px; }
42
- .capture-button::before { content: '🎥 '; font-size: 20px; }
43
- .record-button::before { content: '🎤 '; font-size: 20px; }
44
- .audio-output::before { content: '🔊 '; font-size: 20px; }
45
  .loading::after {
46
- content: ''; display: inline-block; width: 18px; height: 18px; border: 3px solid #333;
47
- border-top-color: transparent; border-radius: 50%; animation: spin 1s linear infinite; margin-left: 10px;
 
 
 
 
 
 
 
 
 
 
48
  }
49
- @keyframes spin { to { transform: rotate(360deg); } }
50
  .output-container {
51
- margin-top: 20px; padding: 15px; border: 1px solid #ddd; border-radius: 10px; background-color: #fff;
 
 
 
 
52
  }
53
  .audio-output-container {
54
- display: flex; align-items: center; gap: 12px; margin-top: 15px;
 
 
 
 
 
 
55
  }
56
  """
57
 
58
  # دالة لمعالجة الإدخال
59
- def process_input(message, audio_input=None, image_input=None, history=None, system_prompt=None, temperature=0.7, reasoning_effort="medium", enable_browsing=True, max_new_tokens=128000):
60
  input_type = "text"
61
  audio_data = None
62
  image_data = None
63
  if audio_input:
64
  input_type = "audio"
65
- audio_data = audio_input
 
66
  message = "Transcribe this audio"
67
  elif image_input:
68
  input_type = "image"
69
- image_data = image_input
70
- message = "Analyze this image"
 
71
 
72
  response_text = ""
73
  audio_response = None
@@ -81,7 +127,8 @@ def process_input(message, audio_input=None, image_input=None, history=None, sys
81
  max_new_tokens=max_new_tokens,
82
  input_type=input_type,
83
  audio_data=audio_data,
84
- image_data=image_data
 
85
  ):
86
  if isinstance(chunk, bytes):
87
  audio_response = io.BytesIO(chunk)
@@ -90,56 +137,78 @@ def process_input(message, audio_input=None, image_input=None, history=None, sys
90
  response_text += chunk
91
  yield response_text, audio_response
92
 
93
- # دالة لتفعيل تسجيل الصوت
94
- def start_recording():
95
- return gr.update(visible=True)
 
 
96
 
97
- # دالة لتفعيل التقاط الصورة
98
- def start_image_capture():
99
- return gr.update(visible=True)
 
 
100
 
101
  # إعداد واجهة Gradio
102
- chatbot_ui = gr.Interface(
103
- fn=process_input,
104
- inputs=[
105
- gr.Textbox(label="Message", placeholder="Type your message or use buttons below...", elem_classes="input-textbox"),
106
- gr.Audio(label="Record Audio", sources=["microphone"], type="numpy", streaming=True, visible=False, elem_classes="record-button"),
107
- gr.Image(label="Capture/Upload Image", sources=["webcam", "upload"], type="numpy", visible=False, elem_classes="capture-button"),
108
- gr.State(value=[]), # History
109
- gr.Textbox(
110
- label="System Prompt",
111
- value="You are an expert assistant providing detailed, comprehensive, and well-structured responses. Support text, audio, image inputs. For audio, transcribe using Whisper. For text-to-speech, use Parler-TTS. For images, analyze using CLIP. Respond with voice output when requested. Continue until the query is fully addressed.",
112
- lines=4
113
- ),
114
- gr.Slider(label="Temperature", minimum=0.0, maximum=1.0, step=0.1, value=0.7),
115
- gr.Radio(label="Reasoning Effort", choices=["low", "medium", "high"], value="medium"),
116
- gr.Checkbox(label="Enable DeepSearch", value=True),
117
- gr.Slider(label="Max New Tokens", minimum=50, maximum=128000, step=50, value=128000),
118
- ],
119
- outputs=[
120
- gr.Markdown(label="Response", elem_classes="output-container"),
121
- gr.Audio(label="Voice Output", type="filepath", elem_classes="audio-output", autoplay=True)
122
- ],
123
- additional_inputs=[
124
- gr.Button("Record Audio", elem_classes="record-button", onclick=start_recording),
125
- gr.Button("Capture/Upload Image", elem_classes="capture-button", onclick=start_image_capture),
126
- ],
127
- examples=[
128
- ["Explain the history of AI in detail."],
129
- ["Generate a React component for a login form."],
130
- ["Transcribe this audio: [record audio]."],
131
- ["Convert this text to speech: Hello, welcome to MGZon!"],
132
- ["Analyze this image: [capture/upload image]."],
133
- ],
134
- title="MGZon Chatbot",
135
- description="A versatile chatbot powered by advanced AI models. Supports text, audio, and image inputs with voice responses. Licensed under Apache 2.0.",
136
- theme="gradio/soft",
137
- css=css,
138
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
  # إعداد FastAPI
141
  app = FastAPI(title="MGZon Chatbot API")
142
- app.include_router(api_router)
143
 
144
  # ربط Gradio مع FastAPI
145
  app = gr.mount_gradio_app(app, chatbot_ui, path="/gradio")
@@ -163,22 +232,27 @@ class NotFoundMiddleware(BaseHTTPMiddleware):
163
 
164
  app.add_middleware(NotFoundMiddleware)
165
 
 
166
  @app.get("/", response_class=HTMLResponse)
167
  async def root(request: Request):
168
  return templates.TemplateResponse("index.html", {"request": request})
169
 
 
170
  @app.get("/docs", response_class=HTMLResponse)
171
  async def docs(request: Request):
172
  return templates.TemplateResponse("docs.html", {"request": request})
173
 
 
174
  @app.get("/swagger", response_class=HTMLResponse)
175
  async def swagger_ui():
176
  return get_swagger_ui_html(openapi_url="/openapi.json", title="MGZon API Documentation")
177
 
 
178
  @app.get("/launch-chatbot", response_class=RedirectResponse)
179
  async def launch_chatbot():
180
  return RedirectResponse(url="/gradio", status_code=302)
181
 
 
182
  if __name__ == "__main__":
183
  import uvicorn
184
  uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", 7860)))
 
32
  # إعداد CSS
33
  css = """
34
  .gradio-container { max-width: 1200px; margin: auto; font-family: Arial, sans-serif; }
35
+ .chatbot {
36
+ border: 1px solid #ccc;
37
+ border-radius: 15px;
38
+ padding: 20px;
39
+ background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
40
+ }
41
+ .input-textbox {
42
+ font-size: 18px;
43
+ padding: 12px;
44
+ border-radius: 8px;
45
+ border: 1px solid #aaa;
46
+ }
47
+ .upload-button, .audio-input-button, .audio-record-button {
48
+ background: #4CAF50;
49
+ color: white;
50
+ border-radius: 8px;
51
+ padding: 10px 20px;
52
+ font-size: 16px;
53
+ cursor: pointer;
54
+ }
55
+ .upload-button:hover, .audio-input-button:hover, .audio-record-button:hover {
56
+ background: #45a049;
57
+ }
58
+ .upload-button::before {
59
+ content: '📷 ';
60
+ font-size: 20px;
61
+ }
62
+ .audio-input-button::before {
63
+ content: '🎤 ';
64
+ font-size: 20px;
65
+ }
66
+ .audio-record-button::before {
67
+ content: '🔊 ';
68
+ font-size: 20px;
69
  }
 
 
 
 
 
70
  .loading::after {
71
+ content: '';
72
+ display: inline-block;
73
+ width: 18px;
74
+ height: 18px;
75
+ border: 3px solid #333;
76
+ border-top-color: transparent;
77
+ border-radius: 50%;
78
+ animation: spin 1s linear infinite;
79
+ margin-left: 10px;
80
+ }
81
+ @keyframes spin {
82
+ to { transform: rotate(360deg); }
83
  }
 
84
  .output-container {
85
+ margin-top: 25px;
86
+ padding: 15px;
87
+ border: 1px solid #ddd;
88
+ border-radius: 10px;
89
+ background: #fff;
90
  }
91
  .audio-output-container {
92
+ display: flex;
93
+ align-items: center;
94
+ gap: 15px;
95
+ margin-top: 15px;
96
+ }
97
+ .output-format-radio {
98
+ margin-top: 10px;
99
  }
100
  """
101
 
102
  # دالة لمعالجة الإدخال
103
+ def process_input(message, audio_input=None, image_input=None, history=None, system_prompt=None, temperature=0.7, reasoning_effort="medium", enable_browsing=True, max_new_tokens=128000, output_format="text"):
104
  input_type = "text"
105
  audio_data = None
106
  image_data = None
107
  if audio_input:
108
  input_type = "audio"
109
+ with open(audio_input, "rb") as f:
110
+ audio_data = f.read()
111
  message = "Transcribe this audio"
112
  elif image_input:
113
  input_type = "image"
114
+ with open(image_input, "rb") as f:
115
+ image_data = f.read()
116
+ message = f"Analyze this image"
117
 
118
  response_text = ""
119
  audio_response = None
 
127
  max_new_tokens=max_new_tokens,
128
  input_type=input_type,
129
  audio_data=audio_data,
130
+ image_data=image_data,
131
+ output_format=output_format
132
  ):
133
  if isinstance(chunk, bytes):
134
  audio_response = io.BytesIO(chunk)
 
137
  response_text += chunk
138
  yield response_text, audio_response
139
 
140
+ # دالة لمعالجة زر إرسال الصوت
141
+ def submit_audio(audio_input, output_format):
142
+ if not audio_input:
143
+ return "Please upload or record an audio file.", None
144
+ return process_input(message="", audio_input=audio_input, output_format=output_format)
145
 
146
+ # دالة لمعالجة زر إرسال الصورة
147
+ def submit_image(image_input, output_format):
148
+ if not image_input:
149
+ return "Please upload an image.", None
150
+ return process_input(message="", image_input=image_input, output_format=output_format)
151
 
152
  # إعداد واجهة Gradio
153
+ with gr.Blocks(css=css, theme="gradio/soft") as chatbot_ui:
154
+ gr.Markdown(
155
+ """
156
+ # MGZon Chatbot 🤖
157
+ A versatile chatbot powered by DeepSeek, GPT-OSS, CLIP, Whisper, and Parler-TTS. Supports text, audio, and image inputs with text or voice outputs. Upload files, record audio, or type your query and choose your output format!
158
+ """
159
+ )
160
+ with gr.Row():
161
+ with gr.Column(scale=3):
162
+ chatbot = gr.Chatbot(label="Chat", height=500, latex_delimiters=LATEX_DELIMS)
163
+ with gr.Column(scale=1):
164
+ with gr.Accordion("⚙️ Settings", open=True):
165
+ system_prompt = gr.Textbox(
166
+ label="System Prompt",
167
+ value="You are an expert assistant providing detailed, comprehensive, and well-structured responses. Support text, audio, image, and file inputs. For audio, transcribe using Whisper. For text-to-speech, use Parler-TTS. For images, analyze content appropriately. Respond in the requested output format (text or audio).",
168
+ lines=4
169
+ )
170
+ temperature = gr.Slider(label="Temperature", minimum=0.0, maximum=1.0, step=0.1, value=0.7)
171
+ reasoning_effort = gr.Radio(label="Reasoning Effort", choices=["low", "medium", "high"], value="medium")
172
+ enable_browsing = gr.Checkbox(label="Enable DeepSearch (web browsing)", value=True)
173
+ max_new_tokens = gr.Slider(label="Max New Tokens", minimum=50, maximum=128000, step=50, value=128000)
174
+ output_format = gr.Radio(
175
+ label="Output Format",
176
+ choices=["text", "audio"],
177
+ value="text",
178
+ elem_classes="output-format-radio"
179
+ )
180
+ with gr.Row():
181
+ message = gr.Textbox(label="Type your message", placeholder="Enter your query or describe your request...", lines=2, elem_classes="input-textbox")
182
+ submit_btn = gr.Button("Send", variant="primary")
183
+ with gr.Row():
184
+ with gr.Column(scale=1):
185
+ audio_input = gr.Audio(label="Record or Upload Audio", type="filepath", elem_classes="audio-input")
186
+ audio_submit_btn = gr.Button("Send Audio", elem_classes="audio-input-button")
187
+ with gr.Column(scale=1):
188
+ image_input = gr.File(label="Upload Image", file_types=["image"], elem_classes="upload-button")
189
+ image_submit_btn = gr.Button("Send Image", elem_classes="upload-button")
190
+ output_text = gr.Textbox(label="Response", lines=10, elem_classes="output-container")
191
+ output_audio = gr.Audio(label="Voice Output", type="filepath", elem_classes="audio-output-container", autoplay=True)
192
+
193
+ # ربط الأزرار
194
+ submit_btn.click(
195
+ fn=process_input,
196
+ inputs=[message, audio_input, image_input, chatbot, system_prompt, temperature, reasoning_effort, enable_browsing, max_new_tokens, output_format],
197
+ outputs=[output_text, output_audio]
198
+ )
199
+ audio_submit_btn.click(
200
+ fn=submit_audio,
201
+ inputs=[audio_input, output_format],
202
+ outputs=[output_text, output_audio]
203
+ )
204
+ image_submit_btn.click(
205
+ fn=submit_image,
206
+ inputs=[image_input, output_format],
207
+ outputs=[output_text, output_audio]
208
+ )
209
 
210
  # إعداد FastAPI
211
  app = FastAPI(title="MGZon Chatbot API")
 
212
 
213
  # ربط Gradio مع FastAPI
214
  app = gr.mount_gradio_app(app, chatbot_ui, path="/gradio")
 
232
 
233
  app.add_middleware(NotFoundMiddleware)
234
 
235
+ # Root endpoint
236
  @app.get("/", response_class=HTMLResponse)
237
  async def root(request: Request):
238
  return templates.TemplateResponse("index.html", {"request": request})
239
 
240
+ # Docs endpoint
241
  @app.get("/docs", response_class=HTMLResponse)
242
  async def docs(request: Request):
243
  return templates.TemplateResponse("docs.html", {"request": request})
244
 
245
+ # Swagger UI endpoint
246
  @app.get("/swagger", response_class=HTMLResponse)
247
  async def swagger_ui():
248
  return get_swagger_ui_html(openapi_url="/openapi.json", title="MGZon API Documentation")
249
 
250
+ # Redirect لـ /gradio
251
  @app.get("/launch-chatbot", response_class=RedirectResponse)
252
  async def launch_chatbot():
253
  return RedirectResponse(url="/gradio", status_code=302)
254
 
255
+ # تشغيل الخادم
256
  if __name__ == "__main__":
257
  import uvicorn
258
  uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", 7860)))
requirements.txt CHANGED
@@ -1,11 +1,11 @@
1
  fastapi==0.115.2
2
  uvicorn==0.30.6
3
- gradio>=4.44.1
4
  openai==1.42.0
5
  httpx==0.27.0
6
  python-dotenv==1.0.1
7
  pydocstyle==6.3.0
8
- requests==2.32.5
9
  beautifulsoup4==4.12.3
10
  tenacity==8.5.0
11
  selenium==4.25.0
@@ -18,7 +18,7 @@ numpy==1.26.4
18
  parler-tts @ git+https://github.com/huggingface/parler-tts.git@5d0aca9753ab74ded179732f5bd797f7a8c6f8ee
19
  torch==2.4.1
20
  torchaudio==2.4.1
21
- transformers==4.43.3
22
  webrtcvad==2.0.10
23
  Pillow==10.4.0
24
  urllib3==2.0.7
 
1
  fastapi==0.115.2
2
  uvicorn==0.30.6
3
+ gradio==4.48.0
4
  openai==1.42.0
5
  httpx==0.27.0
6
  python-dotenv==1.0.1
7
  pydocstyle==6.3.0
8
+ requests==2.32.3
9
  beautifulsoup4==4.12.3
10
  tenacity==8.5.0
11
  selenium==4.25.0
 
18
  parler-tts @ git+https://github.com/huggingface/parler-tts.git@5d0aca9753ab74ded179732f5bd797f7a8c6f8ee
19
  torch==2.4.1
20
  torchaudio==2.4.1
21
+ transformers==4.45.1
22
  webrtcvad==2.0.10
23
  Pillow==10.4.0
24
  urllib3==2.0.7
utils/generation.py CHANGED
@@ -15,7 +15,7 @@ import torchaudio
15
  from PIL import Image
16
  from transformers import CLIPModel, CLIPProcessor, AutoProcessor
17
  from parler_tts import ParlerTTSForConditionalGeneration
18
- from utils.web_search import web_search # استيراد مباشر
19
 
20
  logger = logging.getLogger(__name__)
21
 
@@ -66,19 +66,35 @@ def check_model_availability(model_name: str, api_base: str, api_key: str) -> tu
66
 
67
  def select_model(query: str, input_type: str = "text") -> tuple[str, str]:
68
  query_lower = query.lower()
 
69
  if input_type == "audio" or any(keyword in query_lower for keyword in ["voice", "audio", "speech", "صوت", "تحويل صوت"]):
70
  logger.info(f"Selected {ASR_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for audio input")
71
  return ASR_MODEL, FALLBACK_API_ENDPOINT
 
72
  if any(keyword in query_lower for keyword in ["text-to-speech", "tts", "تحويل نص إلى صوت"]):
73
  logger.info(f"Selected {TTS_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for text-to-speech")
74
  return TTS_MODEL, FALLBACK_API_ENDPOINT
75
- if input_type == "image" or any(pattern in query_lower for pattern in [
 
76
  r"\bimage\b", r"\bpicture\b", r"\bphoto\b", r"\bvisual\b", r"\bصورة\b", r"\bتحليل\s+صورة\b",
77
  r"\bimage\s+analysis\b", r"\bimage\s+classification\b", r"\bimage\s+description\b"
78
- ]):
79
- logger.info(f"Selected {CLIP_BASE_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for image-related query: {query}")
80
- return CLIP_BASE_MODEL, FALLBACK_API_ENDPOINT
81
- logger.info(f"Selected {MODEL_NAME} with endpoint {API_ENDPOINT} for general query: {query}")
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  return MODEL_NAME, API_ENDPOINT
83
 
84
  @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=2, min=4, max=60))
@@ -98,6 +114,7 @@ def request_generation(
98
  input_type: str = "text",
99
  audio_data: Optional[bytes] = None,
100
  image_data: Optional[bytes] = None,
 
101
  ) -> Generator[bytes | str, None, None]:
102
  is_available, selected_api_key = check_model_availability(model_name, api_base, api_key)
103
  if not is_available:
@@ -110,7 +127,8 @@ def request_generation(
110
  "model_name": model_name,
111
  "chat_history": chat_history,
112
  "temperature": temperature,
113
- "max_new_tokens": max_new_tokens
 
114
  }, sort_keys=True).encode()).hexdigest()
115
 
116
  if cache_key in cache:
@@ -123,7 +141,8 @@ def request_generation(
123
  task_type = "general"
124
  enhanced_system_prompt = system_prompt
125
 
126
- if model_name == ASR_MODEL and audio_data is not None:
 
127
  task_type = "audio_transcription"
128
  try:
129
  audio_file = io.BytesIO(audio_data)
@@ -145,11 +164,12 @@ def request_generation(
145
  yield f"Error: Audio transcription failed: {e}"
146
  return
147
 
148
- if model_name == TTS_MODEL:
 
149
  task_type = "text_to_speech"
150
  try:
151
- model = ParlerTTSForConditionalGeneration.from_pretrained(model_name, token=selected_api_key)
152
- processor = AutoProcessor.from_pretrained(model_name, token=selected_api_key)
153
  inputs = processor(text=message, return_tensors="pt")
154
  audio = model.generate(**inputs)
155
  audio_file = io.BytesIO()
@@ -163,44 +183,58 @@ def request_generation(
163
  yield f"Error: Text-to-speech failed: {e}"
164
  return
165
 
166
- if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL] and image_data is not None:
 
167
  task_type = "image_analysis"
168
  try:
169
- model = CLIPModel.from_pretrained(model_name, token=selected_api_key)
170
- processor = CLIPProcessor.from_pretrained(model_name, token=selected_api_key)
171
  image = Image.open(io.BytesIO(image_data)).convert("RGB")
172
  inputs = processor(text=message, images=image, return_tensors="pt", padding=True)
173
  outputs = model(**inputs)
174
  logits_per_image = outputs.logits_per_image
175
  probs = logits_per_image.softmax(dim=1)
176
- yield f"Image analysis result: {probs.tolist()}"
177
- cache[cache_key] = [f"Image analysis result: {probs.tolist()}"]
 
 
 
 
 
 
 
 
 
 
 
 
178
  return
179
  except Exception as e:
180
  logger.error(f"Image analysis failed: {e}")
181
  yield f"Error: Image analysis failed: {e}"
182
  return
183
 
 
184
  if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL]:
185
  task_type = "image"
186
- enhanced_system_prompt = f"{system_prompt}\nYou are an expert in image analysis and description. Provide detailed descriptions, classifications, or analysis of images based on the query."
187
  elif any(keyword in message.lower() for keyword in ["code", "programming", "python", "javascript", "react", "django", "flask"]):
188
  task_type = "code"
189
- enhanced_system_prompt = f"{system_prompt}\nYou are an expert programmer. Provide accurate, well-commented code with comprehensive examples and detailed explanations."
190
  elif any(keyword in message.lower() for keyword in ["analyze", "analysis", "تحليل"]):
191
  task_type = "analysis"
192
- enhanced_system_prompt = f"{system_prompt}\nProvide detailed analysis with step-by-step reasoning, examples, and data-driven insights."
193
  elif any(keyword in message.lower() for keyword in ["review", "مراجعة"]):
194
  task_type = "review"
195
- enhanced_system_prompt = f"{system_prompt}\nReview the provided content thoroughly, identify issues, and suggest improvements with detailed explanations."
196
  elif any(keyword in message.lower() for keyword in ["publish", "نشر"]):
197
  task_type = "publish"
198
- enhanced_system_prompt = f"{system_prompt}\nPrepare content for publishing, ensuring clarity, professionalism, and adherence to best practices."
199
  else:
200
- enhanced_system_prompt = f"{system_prompt}\nFor general queries, provide comprehensive, detailed responses with examples and explanations where applicable."
201
 
202
  if len(message.split()) < 5:
203
- enhanced_system_prompt += "\nEven for short queries, provide a detailed, in-depth response with examples and context."
204
 
205
  logger.info(f"Task type detected: {task_type}")
206
  input_messages: List[dict] = [{"role": "system", "content": enhanced_system_prompt}]
@@ -289,7 +323,7 @@ def request_generation(
289
  reasoning_closed = True
290
 
291
  if not saw_visible_output:
292
- msg = "I attempted to call a tool, but tools aren't executed in this environment."
293
  if last_tool_name:
294
  try:
295
  args_text = json.dumps(last_tool_args, ensure_ascii=False, default=str)
@@ -303,14 +337,30 @@ def request_generation(
303
  cached_chunks.append(f"Error: Unknown error")
304
  yield f"Error: Unknown error"
305
  elif chunk.choices[0].finish_reason == "length":
306
- cached_chunks.append("Response truncated due to token limit. Please refine your query.")
307
- yield "Response truncated due to token limit. Please refine your query."
308
  break
309
 
310
  if buffer:
311
  cached_chunks.append(buffer)
312
  yield buffer
313
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
314
  cache[cache_key] = cached_chunks
315
 
316
  except Exception as e:
@@ -333,16 +383,20 @@ def request_generation(
333
  input_type=input_type,
334
  audio_data=audio_data,
335
  image_data=image_data,
 
336
  ):
337
  yield chunk
338
  return
339
- for fallback_model in [SECONDARY_MODEL_NAME, TERTIARY_MODEL_NAME]:
340
- logger.info(f"Retrying with fallback model: {fallback_model}")
 
 
341
  try:
342
- is_available, selected_api_key = check_model_availability(fallback_model, FALLBACK_API_ENDPOINT, selected_api_key)
343
  if not is_available:
344
- continue
345
- client = OpenAI(api_key=selected_api_key, base_url=FALLBACK_API_ENDPOINT, timeout=120.0)
 
346
  stream = client.chat.completions.create(
347
  model=fallback_model,
348
  messages=input_messages,
@@ -355,18 +409,39 @@ def request_generation(
355
  for chunk in stream:
356
  if chunk.choices[0].delta.content:
357
  content = chunk.choices[0].delta.content
 
 
 
 
 
 
 
 
 
 
 
 
 
358
  saw_visible_output = True
359
  buffer += content
 
360
  if "\n" in buffer or len(buffer) > 5000:
361
  cached_chunks.append(buffer)
362
  yield buffer
363
  buffer = ""
364
  continue
 
365
  if chunk.choices[0].finish_reason in ("stop", "error", "length"):
366
  if buffer:
367
  cached_chunks.append(buffer)
368
  yield buffer
369
  buffer = ""
 
 
 
 
 
 
370
  if not saw_visible_output:
371
  cached_chunks.append("No visible output produced.")
372
  yield "No visible output produced."
@@ -374,19 +449,91 @@ def request_generation(
374
  cached_chunks.append(f"Error: Unknown error with fallback model {fallback_model}")
375
  yield f"Error: Unknown error with fallback model {fallback_model}"
376
  elif chunk.choices[0].finish_reason == "length":
377
- cached_chunks.append("Response truncated due to token limit.")
378
- yield "Response truncated due to token limit."
379
  break
380
- if buffer:
381
- cached_chunks.append(buffer)
382
- yield buffer
 
 
 
 
 
 
 
 
 
 
 
 
 
383
  cache[cache_key] = cached_chunks
384
- return
385
  except Exception as e2:
386
  logger.exception(f"[Gateway] Streaming failed for fallback model {fallback_model}: {e2}")
387
- continue
388
- yield f"Error: Failed to load all models: Primary ({model_name}), Secondary ({SECONDARY_MODEL_NAME}), Tertiary ({TERTIARY_MODEL_NAME})."
389
- return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
390
 
391
  def format_final(analysis_text: str, visible_text: str) -> str:
392
  reasoning_safe = html.escape((analysis_text or "").strip())
@@ -402,7 +549,7 @@ def format_final(analysis_text: str, visible_text: str) -> str:
402
  f"{response}" if response else "No final response available."
403
  )
404
 
405
- def generate(message, history, system_prompt, temperature, reasoning_effort, enable_browsing, max_new_tokens, input_type="text", audio_data=None, image_data=None):
406
  if not message.strip() and not audio_data and not image_data:
407
  yield "Please enter a prompt or upload a file."
408
  return
@@ -436,7 +583,7 @@ def generate(message, history, system_prompt, temperature, reasoning_effort, ena
436
  "type": "function",
437
  "function": {
438
  "name": "code_generation",
439
- "description": "Generate or modify code for various frameworks",
440
  "parameters": {
441
  "type": "object",
442
  "properties": {
@@ -514,6 +661,7 @@ def generate(message, history, system_prompt, temperature, reasoning_effort, ena
514
  input_type=input_type,
515
  audio_data=audio_data,
516
  image_data=image_data,
 
517
  )
518
 
519
  for chunk in stream:
 
15
  from PIL import Image
16
  from transformers import CLIPModel, CLIPProcessor, AutoProcessor
17
  from parler_tts import ParlerTTSForConditionalGeneration
18
+ from utils.web_search import web_search # نقل الاستيراد لأعلى
19
 
20
  logger = logging.getLogger(__name__)
21
 
 
66
 
67
  def select_model(query: str, input_type: str = "text") -> tuple[str, str]:
68
  query_lower = query.lower()
69
+ # دعم الصوت
70
  if input_type == "audio" or any(keyword in query_lower for keyword in ["voice", "audio", "speech", "صوت", "تحويل صوت"]):
71
  logger.info(f"Selected {ASR_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for audio input")
72
  return ASR_MODEL, FALLBACK_API_ENDPOINT
73
+ # دعم تحويل النص إلى صوت
74
  if any(keyword in query_lower for keyword in ["text-to-speech", "tts", "تحويل نص إلى صوت"]):
75
  logger.info(f"Selected {TTS_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for text-to-speech")
76
  return TTS_MODEL, FALLBACK_API_ENDPOINT
77
+ # نماذج CLIP للصور
78
+ image_patterns = [
79
  r"\bimage\b", r"\bpicture\b", r"\bphoto\b", r"\bvisual\b", r"\bصورة\b", r"\bتحليل\s+صورة\b",
80
  r"\bimage\s+analysis\b", r"\bimage\s+classification\b", r"\bimage\s+description\b"
81
+ ]
82
+ for pattern in image_patterns:
83
+ if re.search(pattern, query_lower, re.IGNORECASE):
84
+ logger.info(f"Selected {CLIP_BASE_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for image-related query: {query}")
85
+ return CLIP_BASE_MODEL, FALLBACK_API_ENDPOINT
86
+ # اختيار النموذج بناءً على توفره
87
+ available_models = [
88
+ (MODEL_NAME, API_ENDPOINT),
89
+ (SECONDARY_MODEL_NAME, FALLBACK_API_ENDPOINT),
90
+ (TERTIARY_MODEL_NAME, FALLBACK_API_ENDPOINT)
91
+ ]
92
+ for model_name, api_endpoint in available_models:
93
+ is_available, _ = check_model_availability(model_name, api_endpoint, HF_TOKEN)
94
+ if is_available:
95
+ logger.info(f"Selected {model_name} with endpoint {api_endpoint} for query: {query}")
96
+ return model_name, api_endpoint
97
+ logger.error("No models available. Falling back to default.")
98
  return MODEL_NAME, API_ENDPOINT
99
 
100
  @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=2, min=4, max=60))
 
114
  input_type: str = "text",
115
  audio_data: Optional[bytes] = None,
116
  image_data: Optional[bytes] = None,
117
+ output_format: str = "text" # جديد: تحديد نوع الإخراج (text أو audio)
118
  ) -> Generator[bytes | str, None, None]:
119
  is_available, selected_api_key = check_model_availability(model_name, api_base, api_key)
120
  if not is_available:
 
127
  "model_name": model_name,
128
  "chat_history": chat_history,
129
  "temperature": temperature,
130
+ "max_new_tokens": max_new_tokens,
131
+ "output_format": output_format
132
  }, sort_keys=True).encode()).hexdigest()
133
 
134
  if cache_key in cache:
 
141
  task_type = "general"
142
  enhanced_system_prompt = system_prompt
143
 
144
+ # معالجة الصوت (ASR)
145
+ if model_name == ASR_MODEL and audio_data:
146
  task_type = "audio_transcription"
147
  try:
148
  audio_file = io.BytesIO(audio_data)
 
164
  yield f"Error: Audio transcription failed: {e}"
165
  return
166
 
167
+ # معالجة تحويل النص إلى صوت (TTS)
168
+ if model_name == TTS_MODEL or output_format == "audio":
169
  task_type = "text_to_speech"
170
  try:
171
+ model = ParlerTTSForConditionalGeneration.from_pretrained(TTS_MODEL)
172
+ processor = AutoProcessor.from_pretrained(TTS_MODEL)
173
  inputs = processor(text=message, return_tensors="pt")
174
  audio = model.generate(**inputs)
175
  audio_file = io.BytesIO()
 
183
  yield f"Error: Text-to-speech failed: {e}"
184
  return
185
 
186
+ # معالجة الصور
187
+ if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL] and image_data:
188
  task_type = "image_analysis"
189
  try:
190
+ model = CLIPModel.from_pretrained(model_name)
191
+ processor = CLIPProcessor.from_pretrained(model_name)
192
  image = Image.open(io.BytesIO(image_data)).convert("RGB")
193
  inputs = processor(text=message, images=image, return_tensors="pt", padding=True)
194
  outputs = model(**inputs)
195
  logits_per_image = outputs.logits_per_image
196
  probs = logits_per_image.softmax(dim=1)
197
+ result = f"Image analysis result: {probs.tolist()}"
198
+ if output_format == "audio":
199
+ # تحويل النتيجة إلى صوت
200
+ model = ParlerTTSForConditionalGeneration.from_pretrained(TTS_MODEL)
201
+ processor = AutoProcessor.from_pretrained(TTS_MODEL)
202
+ inputs = processor(text=result, return_tensors="pt")
203
+ audio = model.generate(**inputs)
204
+ audio_file = io.BytesIO()
205
+ torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
206
+ audio_file.seek(0)
207
+ yield audio_file.read()
208
+ else:
209
+ yield result
210
+ cache[cache_key] = [result]
211
  return
212
  except Exception as e:
213
  logger.error(f"Image analysis failed: {e}")
214
  yield f"Error: Image analysis failed: {e}"
215
  return
216
 
217
+ # تحسين system_prompt بناءً على نوع المهمة
218
  if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL]:
219
  task_type = "image"
220
+ enhanced_system_prompt = f"{system_prompt}\nYou are an expert in image analysis and description. Provide detailed descriptions, classifications, or analysis of images based on the query. Continue until the query is fully addressed."
221
  elif any(keyword in message.lower() for keyword in ["code", "programming", "python", "javascript", "react", "django", "flask"]):
222
  task_type = "code"
223
+ enhanced_system_prompt = f"{system_prompt}\nYou are an expert programmer. Provide accurate, well-commented code with comprehensive examples and detailed explanations. Support frameworks like React, Django, Flask, and others. Format code with triple backticks (```) and specify the language. Continue until the task is fully addressed."
224
  elif any(keyword in message.lower() for keyword in ["analyze", "analysis", "تحليل"]):
225
  task_type = "analysis"
226
+ enhanced_system_prompt = f"{system_prompt}\nProvide detailed analysis with step-by-step reasoning, examples, and data-driven insights. Continue until all aspects of the query are thoroughly covered."
227
  elif any(keyword in message.lower() for keyword in ["review", "مراجعة"]):
228
  task_type = "review"
229
+ enhanced_system_prompt = f"{system_prompt}\nReview the provided content thoroughly, identify issues, and suggest improvements with detailed explanations. Ensure the response is complete and detailed."
230
  elif any(keyword in message.lower() for keyword in ["publish", "نشر"]):
231
  task_type = "publish"
232
+ enhanced_system_prompt = f"{system_prompt}\nPrepare content for publishing, ensuring clarity, professionalism, and adherence to best practices. Provide a complete and detailed response."
233
  else:
234
+ enhanced_system_prompt = f"{system_prompt}\nFor general queries, provide comprehensive, detailed responses with examples and explanations where applicable. Continue generating content until the query is fully answered, leveraging the full capacity of the model."
235
 
236
  if len(message.split()) < 5:
237
+ enhanced_system_prompt += "\nEven for short or general queries, provide a detailed, in-depth response with examples, explanations, and additional context to ensure completeness."
238
 
239
  logger.info(f"Task type detected: {task_type}")
240
  input_messages: List[dict] = [{"role": "system", "content": enhanced_system_prompt}]
 
323
  reasoning_closed = True
324
 
325
  if not saw_visible_output:
326
+ msg = "I attempted to call a tool, but tools aren't executed in this environment, so no final answer was produced."
327
  if last_tool_name:
328
  try:
329
  args_text = json.dumps(last_tool_args, ensure_ascii=False, default=str)
 
337
  cached_chunks.append(f"Error: Unknown error")
338
  yield f"Error: Unknown error"
339
  elif chunk.choices[0].finish_reason == "length":
340
+ cached_chunks.append("Response truncated due to token limit. Please refine your query or request continuation.")
341
+ yield "Response truncated due to token limit. Please refine your query or request continuation."
342
  break
343
 
344
  if buffer:
345
  cached_chunks.append(buffer)
346
  yield buffer
347
 
348
+ # إذا طلب الإخراج صوتي
349
+ if output_format == "audio" and buffer:
350
+ try:
351
+ model = ParlerTTSForConditionalGeneration.from_pretrained(TTS_MODEL)
352
+ processor = AutoProcessor.from_pretrained(TTS_MODEL)
353
+ inputs = processor(text=buffer, return_tensors="pt")
354
+ audio = model.generate(**inputs)
355
+ audio_file = io.BytesIO()
356
+ torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
357
+ audio_file.seek(0)
358
+ cached_chunks.append(audio_file.read())
359
+ yield audio_file.read()
360
+ except Exception as e:
361
+ logger.error(f"Text-to-speech conversion failed: {e}")
362
+ yield f"Error: Text-to-speech conversion failed: {e}"
363
+
364
  cache[cache_key] = cached_chunks
365
 
366
  except Exception as e:
 
383
  input_type=input_type,
384
  audio_data=audio_data,
385
  image_data=image_data,
386
+ output_format=output_format,
387
  ):
388
  yield chunk
389
  return
390
+ if model_name == MODEL_NAME:
391
+ fallback_model = SECONDARY_MODEL_NAME
392
+ fallback_endpoint = FALLBACK_API_ENDPOINT
393
+ logger.info(f"Retrying with fallback model: {fallback_model} on {fallback_endpoint}")
394
  try:
395
+ is_available, selected_api_key = check_model_availability(fallback_model, fallback_endpoint, selected_api_key)
396
  if not is_available:
397
+ yield f"Error: Fallback model {fallback_model} is not available."
398
+ return
399
+ client = OpenAI(api_key=selected_api_key, base_url=fallback_endpoint, timeout=120.0)
400
  stream = client.chat.completions.create(
401
  model=fallback_model,
402
  messages=input_messages,
 
409
  for chunk in stream:
410
  if chunk.choices[0].delta.content:
411
  content = chunk.choices[0].delta.content
412
+ if content == "<|channel|>analysis<|message|>":
413
+ if not reasoning_started:
414
+ cached_chunks.append("analysis")
415
+ yield "analysis"
416
+ reasoning_started = True
417
+ continue
418
+ if content == "<|channel|>final<|message|>":
419
+ if reasoning_started and not reasoning_closed:
420
+ cached_chunks.append("assistantfinal")
421
+ yield "assistantfinal"
422
+ reasoning_closed = True
423
+ continue
424
+
425
  saw_visible_output = True
426
  buffer += content
427
+
428
  if "\n" in buffer or len(buffer) > 5000:
429
  cached_chunks.append(buffer)
430
  yield buffer
431
  buffer = ""
432
  continue
433
+
434
  if chunk.choices[0].finish_reason in ("stop", "error", "length"):
435
  if buffer:
436
  cached_chunks.append(buffer)
437
  yield buffer
438
  buffer = ""
439
+
440
+ if reasoning_started and not reasoning_closed:
441
+ cached_chunks.append("assistantfinal")
442
+ yield "assistantfinal"
443
+ reasoning_closed = True
444
+
445
  if not saw_visible_output:
446
  cached_chunks.append("No visible output produced.")
447
  yield "No visible output produced."
 
449
  cached_chunks.append(f"Error: Unknown error with fallback model {fallback_model}")
450
  yield f"Error: Unknown error with fallback model {fallback_model}"
451
  elif chunk.choices[0].finish_reason == "length":
452
+ cached_chunks.append("Response truncated due to token limit. Please refine your query or request continuation.")
453
+ yield "Response truncated due to token limit. Please refine your query or request continuation."
454
  break
455
+
456
+ if buffer and output_format == "audio":
457
+ try:
458
+ model = ParlerTTSForConditionalGeneration.from_pretrained(TTS_MODEL)
459
+ processor = AutoProcessor.from_pretrained(TTS_MODEL)
460
+ inputs = processor(text=buffer, return_tensors="pt")
461
+ audio = model.generate(**inputs)
462
+ audio_file = io.BytesIO()
463
+ torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
464
+ audio_file.seek(0)
465
+ cached_chunks.append(audio_file.read())
466
+ yield audio_file.read()
467
+ except Exception as e:
468
+ logger.error(f"Text-to-speech conversion failed: {e}")
469
+ yield f"Error: Text-to-speech conversion failed: {e}"
470
+
471
  cache[cache_key] = cached_chunks
472
+
473
  except Exception as e2:
474
  logger.exception(f"[Gateway] Streaming failed for fallback model {fallback_model}: {e2}")
475
+ try:
476
+ is_available, selected_api_key = check_model_availability(TERTIARY_MODEL_NAME, FALLBACK_API_ENDPOINT, selected_api_key)
477
+ if not is_available:
478
+ yield f"Error: Tertiary model {TERTIARY_MODEL_NAME} is not available."
479
+ return
480
+ client = OpenAI(api_key=selected_api_key, base_url=FALLBACK_API_ENDPOINT, timeout=120.0)
481
+ stream = client.chat.completions.create(
482
+ model=TERTIARY_MODEL_NAME,
483
+ messages=input_messages,
484
+ temperature=temperature,
485
+ max_tokens=max_new_tokens,
486
+ stream=True,
487
+ tools=[],
488
+ tool_choice="none",
489
+ )
490
+ for chunk in stream:
491
+ if chunk.choices[0].delta.content:
492
+ content = chunk.choices[0].delta.content
493
+ saw_visible_output = True
494
+ buffer += content
495
+ if "\n" in buffer or len(buffer) > 5000:
496
+ cached_chunks.append(buffer)
497
+ yield buffer
498
+ buffer = ""
499
+ continue
500
+ if chunk.choices[0].finish_reason in ("stop", "error", "length"):
501
+ if buffer:
502
+ cached_chunks.append(buffer)
503
+ yield buffer
504
+ buffer = ""
505
+ if not saw_visible_output:
506
+ cached_chunks.append("No visible output produced.")
507
+ yield "No visible output produced."
508
+ if chunk.choices[0].finish_reason == "error":
509
+ cached_chunks.append(f"Error: Unknown error with tertiary model {TERTIARY_MODEL_NAME}")
510
+ yield f"Error: Unknown error with tertiary model {TERTIARY_MODEL_NAME}"
511
+ elif chunk.choices[0].finish_reason == "length":
512
+ cached_chunks.append("Response truncated due to token limit. Please refine your query or request continuation.")
513
+ yield "Response truncated due to token limit. Please refine your query or request continuation."
514
+ break
515
+ if buffer and output_format == "audio":
516
+ try:
517
+ model = ParlerTTSForConditionalGeneration.from_pretrained(TTS_MODEL)
518
+ processor = AutoProcessor.from_pretrained(TTS_MODEL)
519
+ inputs = processor(text=buffer, return_tensors="pt")
520
+ audio = model.generate(**inputs)
521
+ audio_file = io.BytesIO()
522
+ torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
523
+ audio_file.seek(0)
524
+ cached_chunks.append(audio_file.read())
525
+ yield audio_file.read()
526
+ except Exception as e:
527
+ logger.error(f"Text-to-speech conversion failed: {e}")
528
+ yield f"Error: Text-to-speech conversion failed: {e}"
529
+ cache[cache_key] = cached_chunks
530
+ except Exception as e3:
531
+ logger.exception(f"[Gateway] Streaming failed for tertiary model {TERTIARY_MODEL_NAME}: {e3}")
532
+ yield f"Error: Failed to load all models: Primary ({model_name}), Secondary ({fallback_model}), Tertiary ({TERTIARY_MODEL_NAME}). Please check your model configurations."
533
+ return
534
+ else:
535
+ yield f"Error: Failed to load model {model_name}: {e}"
536
+ return
537
 
538
  def format_final(analysis_text: str, visible_text: str) -> str:
539
  reasoning_safe = html.escape((analysis_text or "").strip())
 
549
  f"{response}" if response else "No final response available."
550
  )
551
 
552
+ def generate(message, history, system_prompt, temperature, reasoning_effort, enable_browsing, max_new_tokens, input_type="text", audio_data=None, image_data=None, output_format="text"):
553
  if not message.strip() and not audio_data and not image_data:
554
  yield "Please enter a prompt or upload a file."
555
  return
 
583
  "type": "function",
584
  "function": {
585
  "name": "code_generation",
586
+ "description": "Generate or modify code for various frameworks (React, Django, Flask, etc.)",
587
  "parameters": {
588
  "type": "object",
589
  "properties": {
 
661
  input_type=input_type,
662
  audio_data=audio_data,
663
  image_data=image_data,
664
+ output_format=output_format,
665
  )
666
 
667
  for chunk in stream:
utils/web_search.py CHANGED
@@ -12,18 +12,27 @@ def web_search(query: str) -> str:
12
  if not google_api_key or not google_cse_id:
13
  return "Web search requires GOOGLE_API_KEY and GOOGLE_CSE_ID to be set."
14
  url = f"https://www.googleapis.com/customsearch/v1?key={google_api_key}&cx={google_cse_id}&q={query}"
15
- response = requests.get(url, timeout=5)
16
  response.raise_for_status()
17
  results = response.json().get("items", [])
18
  if not results:
19
  return "No web results found."
20
  search_results = []
21
- for i, item in enumerate(results[:3]): # قللنا العدد لتسريع البحث
22
  title = item.get("title", "")
23
  snippet = item.get("snippet", "")
24
  link = item.get("link", "")
25
- search_results.append(f"Result {i+1}:\nTitle: {title}\nLink: {link}\nContent: {snippet}\n")
 
 
 
 
 
 
 
 
 
26
  return "\n".join(search_results)
27
  except Exception as e:
28
- logger.exception(f"Web search failed: {e}")
29
  return f"Web search error: {e}"
 
12
  if not google_api_key or not google_cse_id:
13
  return "Web search requires GOOGLE_API_KEY and GOOGLE_CSE_ID to be set."
14
  url = f"https://www.googleapis.com/customsearch/v1?key={google_api_key}&cx={google_cse_id}&q={query}"
15
+ response = requests.get(url, timeout=10)
16
  response.raise_for_status()
17
  results = response.json().get("items", [])
18
  if not results:
19
  return "No web results found."
20
  search_results = []
21
+ for i, item in enumerate(results[:5]):
22
  title = item.get("title", "")
23
  snippet = item.get("snippet", "")
24
  link = item.get("link", "")
25
+ try:
26
+ page_response = requests.get(link, timeout=5)
27
+ page_response.raise_for_status()
28
+ soup = BeautifulSoup(page_response.text, "html.parser")
29
+ paragraphs = soup.find_all("p")
30
+ page_content = " ".join([p.get_text() for p in paragraphs][:1000])
31
+ except Exception as e:
32
+ logger.warning(f"Failed to fetch page content for {link}: {e}")
33
+ page_content = snippet
34
+ search_results.append(f"Result {i+1}:\nTitle: {title}\nLink: {link}\nContent: {page_content}\n")
35
  return "\n".join(search_results)
36
  except Exception as e:
37
+ logger.exception("Web search failed")
38
  return f"Web search error: {e}"