Commit
·
b8d38d2
1
Parent(s):
7250ede
Update chatbot with real-time audio/image input and model selection
Browse files- README.md +9 -2
- api/endpoints.py +33 -22
- main.py +74 -45
- utils/generation.py +61 -196
- utils/web_search.py +5 -5
README.md
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
emoji: "🤖"
|
| 4 |
colorFrom: "blue"
|
| 5 |
colorTo: "green"
|
| 6 |
-
sdk:
|
| 7 |
app_file: main.py
|
| 8 |
pinned: false
|
| 9 |
---
|
|
@@ -37,6 +37,13 @@ This model is a fine-tuned version of [MGZON/Veltrix](https://huggingface.co/MGZ
|
|
| 37 |
It achieves the following results on the evaluation set:
|
| 38 |
- Loss: nan
|
| 39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
## Model description
|
| 41 |
|
| 42 |
More information needed
|
|
|
|
| 1 |
---
|
| 2 |
+
title: MGZon Chatbot
|
| 3 |
emoji: "🤖"
|
| 4 |
colorFrom: "blue"
|
| 5 |
colorTo: "green"
|
| 6 |
+
sdk: gradio
|
| 7 |
app_file: main.py
|
| 8 |
pinned: false
|
| 9 |
---
|
|
|
|
| 37 |
It achieves the following results on the evaluation set:
|
| 38 |
- Loss: nan
|
| 39 |
|
| 40 |
+
## Features
|
| 41 |
+
- Real-time voice input/output with Whisper and Parler-TTS.
|
| 42 |
+
- Image capture and analysis with CLIP.
|
| 43 |
+
- Web search integration with Google API.
|
| 44 |
+
- Model selection for flexible query handling.
|
| 45 |
+
- Enhanced UI with custom icons and responsive design.
|
| 46 |
+
|
| 47 |
## Model description
|
| 48 |
|
| 49 |
More information needed
|
api/endpoints.py
CHANGED
|
@@ -1,21 +1,24 @@
|
|
| 1 |
import os
|
| 2 |
from fastapi import APIRouter, HTTPException, UploadFile, File
|
|
|
|
| 3 |
from openai import OpenAI
|
| 4 |
from api.models import QueryRequest
|
| 5 |
from utils.generation import request_generation, select_model
|
| 6 |
from utils.web_search import web_search
|
|
|
|
| 7 |
|
| 8 |
router = APIRouter()
|
| 9 |
|
| 10 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
|
|
|
| 11 |
API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
|
| 12 |
-
MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-
|
| 13 |
|
| 14 |
@router.get("/api/model-info")
|
| 15 |
def model_info():
|
| 16 |
return {
|
| 17 |
"model_name": MODEL_NAME,
|
| 18 |
-
"secondary_model": os.getenv("SECONDARY_MODEL_NAME", "
|
| 19 |
"tertiary_model": os.getenv("TERTIARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1"),
|
| 20 |
"clip_base_model": os.getenv("CLIP_BASE_MODEL", "openai/clip-vit-base-patch32"),
|
| 21 |
"clip_large_model": os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14"),
|
|
@@ -33,7 +36,7 @@ async def performance_stats():
|
|
| 33 |
|
| 34 |
@router.post("/api/chat")
|
| 35 |
async def chat_endpoint(req: QueryRequest):
|
| 36 |
-
model_name, api_endpoint = select_model(req.message)
|
| 37 |
stream = request_generation(
|
| 38 |
api_key=HF_TOKEN,
|
| 39 |
api_base=api_endpoint,
|
|
@@ -44,17 +47,16 @@ async def chat_endpoint(req: QueryRequest):
|
|
| 44 |
temperature=req.temperature,
|
| 45 |
max_new_tokens=req.max_new_tokens,
|
| 46 |
deep_search=req.enable_browsing,
|
|
|
|
| 47 |
)
|
| 48 |
-
response = "".join(
|
| 49 |
return {"response": response}
|
| 50 |
|
| 51 |
-
|
| 52 |
-
# في api/endpoints.py
|
| 53 |
@router.post("/api/audio-transcription")
|
| 54 |
async def audio_transcription_endpoint(file: UploadFile = File(...)):
|
| 55 |
model_name, api_endpoint = select_model("transcribe audio", input_type="audio")
|
| 56 |
audio_data = await file.read()
|
| 57 |
-
|
| 58 |
api_key=HF_TOKEN,
|
| 59 |
api_base=api_endpoint,
|
| 60 |
message="Transcribe audio",
|
|
@@ -64,14 +66,16 @@ async def audio_transcription_endpoint(file: UploadFile = File(...)):
|
|
| 64 |
max_new_tokens=128000,
|
| 65 |
input_type="audio",
|
| 66 |
audio_data=audio_data,
|
| 67 |
-
|
|
|
|
|
|
|
| 68 |
return {"transcription": response}
|
| 69 |
|
| 70 |
@router.post("/api/text-to-speech")
|
| 71 |
async def text_to_speech_endpoint(req: dict):
|
| 72 |
text = req.get("text", "")
|
| 73 |
model_name, api_endpoint = select_model("text to speech", input_type="text")
|
| 74 |
-
|
| 75 |
api_key=HF_TOKEN,
|
| 76 |
api_base=api_endpoint,
|
| 77 |
message=text,
|
|
@@ -80,8 +84,9 @@ async def text_to_speech_endpoint(req: dict):
|
|
| 80 |
temperature=0.7,
|
| 81 |
max_new_tokens=128000,
|
| 82 |
input_type="text",
|
|
|
|
| 83 |
)
|
| 84 |
-
audio_data = b"".join(
|
| 85 |
return StreamingResponse(io.BytesIO(audio_data), media_type="audio/wav")
|
| 86 |
|
| 87 |
@router.post("/api/code")
|
|
@@ -91,7 +96,7 @@ async def code_endpoint(req: dict):
|
|
| 91 |
code = req.get("code", "")
|
| 92 |
prompt = f"Generate code for task: {task} using {framework}. Existing code: {code}"
|
| 93 |
model_name, api_endpoint = select_model(prompt)
|
| 94 |
-
|
| 95 |
api_key=HF_TOKEN,
|
| 96 |
api_base=api_endpoint,
|
| 97 |
message=prompt,
|
|
@@ -99,14 +104,16 @@ async def code_endpoint(req: dict):
|
|
| 99 |
model_name=model_name,
|
| 100 |
temperature=0.7,
|
| 101 |
max_new_tokens=128000,
|
| 102 |
-
|
|
|
|
|
|
|
| 103 |
return {"generated_code": response}
|
| 104 |
|
| 105 |
@router.post("/api/analysis")
|
| 106 |
async def analysis_endpoint(req: dict):
|
| 107 |
message = req.get("text", "")
|
| 108 |
model_name, api_endpoint = select_model(message)
|
| 109 |
-
|
| 110 |
api_key=HF_TOKEN,
|
| 111 |
api_base=api_endpoint,
|
| 112 |
message=message,
|
|
@@ -114,24 +121,28 @@ async def analysis_endpoint(req: dict):
|
|
| 114 |
model_name=model_name,
|
| 115 |
temperature=0.7,
|
| 116 |
max_new_tokens=128000,
|
| 117 |
-
|
|
|
|
|
|
|
| 118 |
return {"analysis": response}
|
| 119 |
|
| 120 |
@router.post("/api/image-analysis")
|
| 121 |
-
async def image_analysis_endpoint(
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
model_name, api_endpoint = select_model(prompt)
|
| 126 |
-
response = "".join(list(request_generation(
|
| 127 |
api_key=HF_TOKEN,
|
| 128 |
api_base=api_endpoint,
|
| 129 |
-
message=
|
| 130 |
system_prompt="You are an expert in image analysis. Provide detailed descriptions or classifications based on the query.",
|
| 131 |
model_name=model_name,
|
| 132 |
temperature=0.7,
|
| 133 |
max_new_tokens=128000,
|
| 134 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
return {"image_analysis": response}
|
| 136 |
|
| 137 |
@router.get("/api/test-model")
|
|
|
|
| 1 |
import os
|
| 2 |
from fastapi import APIRouter, HTTPException, UploadFile, File
|
| 3 |
+
from fastapi.responses import StreamingResponse
|
| 4 |
from openai import OpenAI
|
| 5 |
from api.models import QueryRequest
|
| 6 |
from utils.generation import request_generation, select_model
|
| 7 |
from utils.web_search import web_search
|
| 8 |
+
import io
|
| 9 |
|
| 10 |
router = APIRouter()
|
| 11 |
|
| 12 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 13 |
+
BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
|
| 14 |
API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
|
| 15 |
+
MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b:cerebras")
|
| 16 |
|
| 17 |
@router.get("/api/model-info")
|
| 18 |
def model_info():
|
| 19 |
return {
|
| 20 |
"model_name": MODEL_NAME,
|
| 21 |
+
"secondary_model": os.getenv("SECONDARY_MODEL_NAME", "openai/gpt-oss-20b:together"),
|
| 22 |
"tertiary_model": os.getenv("TERTIARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1"),
|
| 23 |
"clip_base_model": os.getenv("CLIP_BASE_MODEL", "openai/clip-vit-base-patch32"),
|
| 24 |
"clip_large_model": os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14"),
|
|
|
|
| 36 |
|
| 37 |
@router.post("/api/chat")
|
| 38 |
async def chat_endpoint(req: QueryRequest):
|
| 39 |
+
model_name, api_endpoint = select_model(req.message, model_choice=req.model_choice if hasattr(req, 'model_choice') else None)
|
| 40 |
stream = request_generation(
|
| 41 |
api_key=HF_TOKEN,
|
| 42 |
api_base=api_endpoint,
|
|
|
|
| 47 |
temperature=req.temperature,
|
| 48 |
max_new_tokens=req.max_new_tokens,
|
| 49 |
deep_search=req.enable_browsing,
|
| 50 |
+
output_type="text"
|
| 51 |
)
|
| 52 |
+
response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
|
| 53 |
return {"response": response}
|
| 54 |
|
|
|
|
|
|
|
| 55 |
@router.post("/api/audio-transcription")
|
| 56 |
async def audio_transcription_endpoint(file: UploadFile = File(...)):
|
| 57 |
model_name, api_endpoint = select_model("transcribe audio", input_type="audio")
|
| 58 |
audio_data = await file.read()
|
| 59 |
+
stream = request_generation(
|
| 60 |
api_key=HF_TOKEN,
|
| 61 |
api_base=api_endpoint,
|
| 62 |
message="Transcribe audio",
|
|
|
|
| 66 |
max_new_tokens=128000,
|
| 67 |
input_type="audio",
|
| 68 |
audio_data=audio_data,
|
| 69 |
+
output_type="text"
|
| 70 |
+
)
|
| 71 |
+
response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
|
| 72 |
return {"transcription": response}
|
| 73 |
|
| 74 |
@router.post("/api/text-to-speech")
|
| 75 |
async def text_to_speech_endpoint(req: dict):
|
| 76 |
text = req.get("text", "")
|
| 77 |
model_name, api_endpoint = select_model("text to speech", input_type="text")
|
| 78 |
+
stream = request_generation(
|
| 79 |
api_key=HF_TOKEN,
|
| 80 |
api_base=api_endpoint,
|
| 81 |
message=text,
|
|
|
|
| 84 |
temperature=0.7,
|
| 85 |
max_new_tokens=128000,
|
| 86 |
input_type="text",
|
| 87 |
+
output_type="speech"
|
| 88 |
)
|
| 89 |
+
audio_data = b"".join([chunk for chunk in stream if isinstance(chunk, bytes)])
|
| 90 |
return StreamingResponse(io.BytesIO(audio_data), media_type="audio/wav")
|
| 91 |
|
| 92 |
@router.post("/api/code")
|
|
|
|
| 96 |
code = req.get("code", "")
|
| 97 |
prompt = f"Generate code for task: {task} using {framework}. Existing code: {code}"
|
| 98 |
model_name, api_endpoint = select_model(prompt)
|
| 99 |
+
stream = request_generation(
|
| 100 |
api_key=HF_TOKEN,
|
| 101 |
api_base=api_endpoint,
|
| 102 |
message=prompt,
|
|
|
|
| 104 |
model_name=model_name,
|
| 105 |
temperature=0.7,
|
| 106 |
max_new_tokens=128000,
|
| 107 |
+
output_type="text"
|
| 108 |
+
)
|
| 109 |
+
response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
|
| 110 |
return {"generated_code": response}
|
| 111 |
|
| 112 |
@router.post("/api/analysis")
|
| 113 |
async def analysis_endpoint(req: dict):
|
| 114 |
message = req.get("text", "")
|
| 115 |
model_name, api_endpoint = select_model(message)
|
| 116 |
+
stream = request_generation(
|
| 117 |
api_key=HF_TOKEN,
|
| 118 |
api_base=api_endpoint,
|
| 119 |
message=message,
|
|
|
|
| 121 |
model_name=model_name,
|
| 122 |
temperature=0.7,
|
| 123 |
max_new_tokens=128000,
|
| 124 |
+
output_type="text"
|
| 125 |
+
)
|
| 126 |
+
response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
|
| 127 |
return {"analysis": response}
|
| 128 |
|
| 129 |
@router.post("/api/image-analysis")
|
| 130 |
+
async def image_analysis_endpoint(file: UploadFile = File(...)):
|
| 131 |
+
model_name, api_endpoint = select_model("image analysis", input_type="image")
|
| 132 |
+
image_data = await file.read()
|
| 133 |
+
stream = request_generation(
|
|
|
|
|
|
|
| 134 |
api_key=HF_TOKEN,
|
| 135 |
api_base=api_endpoint,
|
| 136 |
+
message="Analyze this image",
|
| 137 |
system_prompt="You are an expert in image analysis. Provide detailed descriptions or classifications based on the query.",
|
| 138 |
model_name=model_name,
|
| 139 |
temperature=0.7,
|
| 140 |
max_new_tokens=128000,
|
| 141 |
+
input_type="image",
|
| 142 |
+
image_data=image_data,
|
| 143 |
+
output_type="text"
|
| 144 |
+
)
|
| 145 |
+
response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
|
| 146 |
return {"image_analysis": response}
|
| 147 |
|
| 148 |
@router.get("/api/test-model")
|
main.py
CHANGED
|
@@ -20,7 +20,7 @@ logger.info("Files in /app/: %s", os.listdir("/app"))
|
|
| 20 |
|
| 21 |
# إعداد العميل لـ Hugging Face Inference API
|
| 22 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 23 |
-
BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
|
| 24 |
if not HF_TOKEN:
|
| 25 |
logger.error("HF_TOKEN is not set in environment variables.")
|
| 26 |
raise ValueError("HF_TOKEN is required for Inference API.")
|
|
@@ -31,71 +31,84 @@ CONCURRENCY_LIMIT = int(os.getenv("CONCURRENCY_LIMIT", 20))
|
|
| 31 |
|
| 32 |
# إعداد CSS
|
| 33 |
css = """
|
| 34 |
-
.gradio-container { max-width: 1200px; margin: auto; }
|
| 35 |
-
.chatbot { border: 1px solid #ccc; border-radius:
|
| 36 |
-
.input-textbox { font-size:
|
| 37 |
.upload-button::before {
|
| 38 |
-
content: '
|
| 39 |
-
margin-right:
|
| 40 |
-
font-size:
|
| 41 |
}
|
| 42 |
.audio-input::before {
|
| 43 |
-
content: '
|
| 44 |
-
margin-right:
|
| 45 |
-
font-size:
|
| 46 |
}
|
| 47 |
.audio-output::before {
|
| 48 |
content: '🔊';
|
| 49 |
-
margin-right:
|
| 50 |
-
font-size:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
}
|
| 52 |
.loading::after {
|
| 53 |
content: '';
|
| 54 |
display: inline-block;
|
| 55 |
-
width:
|
| 56 |
-
height:
|
| 57 |
-
border:
|
| 58 |
border-top-color: transparent;
|
| 59 |
border-radius: 50%;
|
| 60 |
animation: spin 1s linear infinite;
|
| 61 |
-
margin-left:
|
| 62 |
}
|
| 63 |
@keyframes spin {
|
| 64 |
to { transform: rotate(360deg); }
|
| 65 |
}
|
| 66 |
.output-container {
|
| 67 |
margin-top: 20px;
|
| 68 |
-
padding:
|
| 69 |
border: 1px solid #ddd;
|
| 70 |
-
border-radius:
|
|
|
|
| 71 |
}
|
| 72 |
.audio-output-container {
|
| 73 |
display: flex;
|
| 74 |
align-items: center;
|
| 75 |
-
gap:
|
| 76 |
-
margin-top:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
}
|
| 78 |
"""
|
| 79 |
|
| 80 |
# دالة لمعالجة الإدخال (نص، صوت، صور، ملفات)
|
| 81 |
-
def process_input(message, audio_input=None,
|
| 82 |
input_type = "text"
|
| 83 |
audio_data = None
|
| 84 |
image_data = None
|
|
|
|
| 85 |
if audio_input:
|
| 86 |
input_type = "audio"
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
input_type = "image"
|
| 94 |
-
with open(file_input, "rb") as f:
|
| 95 |
-
image_data = f.read()
|
| 96 |
-
message = f"Analyze image: {file_input}"
|
| 97 |
-
else:
|
| 98 |
-
message = f"Analyze file: {file_input}"
|
| 99 |
|
| 100 |
response_text = ""
|
| 101 |
audio_response = None
|
|
@@ -109,7 +122,9 @@ def process_input(message, audio_input=None, file_input=None, history=None, syst
|
|
| 109 |
max_new_tokens=max_new_tokens,
|
| 110 |
input_type=input_type,
|
| 111 |
audio_data=audio_data,
|
| 112 |
-
image_data=image_data
|
|
|
|
|
|
|
| 113 |
):
|
| 114 |
if isinstance(chunk, bytes):
|
| 115 |
audio_response = io.BytesIO(chunk)
|
|
@@ -122,7 +137,7 @@ def process_input(message, audio_input=None, file_input=None, history=None, syst
|
|
| 122 |
chatbot_ui = gr.ChatInterface(
|
| 123 |
fn=process_input,
|
| 124 |
chatbot=gr.Chatbot(
|
| 125 |
-
label="MGZon Chatbot",
|
| 126 |
height=800,
|
| 127 |
latex_delimiters=LATEX_DELIMS,
|
| 128 |
),
|
|
@@ -130,28 +145,42 @@ chatbot_ui = gr.ChatInterface(
|
|
| 130 |
additional_inputs=[
|
| 131 |
gr.Textbox(
|
| 132 |
label="System Prompt",
|
| 133 |
-
value="You are an expert assistant providing detailed, comprehensive, and well-structured responses. Support text, audio, image, and file inputs. For audio, transcribe using Whisper
|
| 134 |
lines=4
|
| 135 |
),
|
| 136 |
gr.Slider(label="Temperature", minimum=0.0, maximum=1.0, step=0.1, value=0.7),
|
| 137 |
gr.Radio(label="Reasoning Effort", choices=["low", "medium", "high"], value="medium"),
|
| 138 |
gr.Checkbox(label="Enable DeepSearch (web browsing)", value=True),
|
| 139 |
gr.Slider(label="Max New Tokens", minimum=50, maximum=128000, step=50, value=128000),
|
| 140 |
-
gr.
|
| 141 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
],
|
| 143 |
additional_outputs=[gr.Audio(label="Voice Output", type="filepath", elem_classes="audio-output", autoplay=True)],
|
| 144 |
stop_btn="Stop",
|
| 145 |
examples=[
|
| 146 |
-
["Explain the
|
| 147 |
-
["Generate a
|
| 148 |
-
["Describe this image:
|
| 149 |
-
["Transcribe this audio: [
|
| 150 |
-
["Convert this text to speech:
|
| 151 |
-
["Analyze this file: [upload PDF or text file]."],
|
| 152 |
],
|
| 153 |
title="MGZon Chatbot",
|
| 154 |
-
description="A versatile chatbot powered by
|
| 155 |
theme="gradio/soft",
|
| 156 |
css=css,
|
| 157 |
)
|
|
|
|
| 20 |
|
| 21 |
# إعداد العميل لـ Hugging Face Inference API
|
| 22 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 23 |
+
BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
|
| 24 |
if not HF_TOKEN:
|
| 25 |
logger.error("HF_TOKEN is not set in environment variables.")
|
| 26 |
raise ValueError("HF_TOKEN is required for Inference API.")
|
|
|
|
| 31 |
|
| 32 |
# إعداد CSS
|
| 33 |
css = """
|
| 34 |
+
.gradio-container { max-width: 1200px; margin: auto; font-family: Arial, sans-serif; }
|
| 35 |
+
.chatbot { border: 1px solid #ccc; border-radius: 12px; padding: 20px; background-color: #f0f4f8; }
|
| 36 |
+
.input-textbox { font-size: 18px; padding: 12px; border-radius: 8px; }
|
| 37 |
.upload-button::before {
|
| 38 |
+
content: '📸';
|
| 39 |
+
margin-right: 10px;
|
| 40 |
+
font-size: 24px;
|
| 41 |
}
|
| 42 |
.audio-input::before {
|
| 43 |
+
content: '🎙️';
|
| 44 |
+
margin-right: 10px;
|
| 45 |
+
font-size: 24px;
|
| 46 |
}
|
| 47 |
.audio-output::before {
|
| 48 |
content: '🔊';
|
| 49 |
+
margin-right: 10px;
|
| 50 |
+
font-size: 24px;
|
| 51 |
+
}
|
| 52 |
+
.send-button {
|
| 53 |
+
background-color: #007bff;
|
| 54 |
+
color: white;
|
| 55 |
+
padding: 10px 20px;
|
| 56 |
+
border-radius: 8px;
|
| 57 |
+
cursor: pointer;
|
| 58 |
+
font-size: 16px;
|
| 59 |
+
transition: background-color 0.3s;
|
| 60 |
+
}
|
| 61 |
+
.send-button:hover {
|
| 62 |
+
background-color: #0056b3;
|
| 63 |
}
|
| 64 |
.loading::after {
|
| 65 |
content: '';
|
| 66 |
display: inline-block;
|
| 67 |
+
width: 18px;
|
| 68 |
+
height: 18px;
|
| 69 |
+
border: 3px solid #007bff;
|
| 70 |
border-top-color: transparent;
|
| 71 |
border-radius: 50%;
|
| 72 |
animation: spin 1s linear infinite;
|
| 73 |
+
margin-left: 10px;
|
| 74 |
}
|
| 75 |
@keyframes spin {
|
| 76 |
to { transform: rotate(360deg); }
|
| 77 |
}
|
| 78 |
.output-container {
|
| 79 |
margin-top: 20px;
|
| 80 |
+
padding: 15px;
|
| 81 |
border: 1px solid #ddd;
|
| 82 |
+
border-radius: 10px;
|
| 83 |
+
background-color: #fff;
|
| 84 |
}
|
| 85 |
.audio-output-container {
|
| 86 |
display: flex;
|
| 87 |
align-items: center;
|
| 88 |
+
gap: 12px;
|
| 89 |
+
margin-top: 15px;
|
| 90 |
+
}
|
| 91 |
+
.model-selector {
|
| 92 |
+
border-radius: 8px;
|
| 93 |
+
padding: 10px;
|
| 94 |
+
font-size: 16px;
|
| 95 |
}
|
| 96 |
"""
|
| 97 |
|
| 98 |
# دالة لمعالجة الإدخال (نص، صوت، صور، ملفات)
|
| 99 |
+
def process_input(message, audio_input=None, image_input=None, model_choice="openai/gpt-oss-120b:cerebras", history=None, system_prompt=None, temperature=0.7, reasoning_effort="medium", enable_browsing=True, max_new_tokens=128000, output_type="text"):
|
| 100 |
input_type = "text"
|
| 101 |
audio_data = None
|
| 102 |
image_data = None
|
| 103 |
+
|
| 104 |
if audio_input:
|
| 105 |
input_type = "audio"
|
| 106 |
+
audio_data = audio_input
|
| 107 |
+
message = "Transcribe this audio and respond accordingly"
|
| 108 |
+
elif image_input:
|
| 109 |
+
input_type = "image"
|
| 110 |
+
image_data = image_input
|
| 111 |
+
message = f"Analyze this image: {message or 'Describe the image'}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
|
| 113 |
response_text = ""
|
| 114 |
audio_response = None
|
|
|
|
| 122 |
max_new_tokens=max_new_tokens,
|
| 123 |
input_type=input_type,
|
| 124 |
audio_data=audio_data,
|
| 125 |
+
image_data=image_data,
|
| 126 |
+
model_choice=model_choice,
|
| 127 |
+
output_type=output_type
|
| 128 |
):
|
| 129 |
if isinstance(chunk, bytes):
|
| 130 |
audio_response = io.BytesIO(chunk)
|
|
|
|
| 137 |
chatbot_ui = gr.ChatInterface(
|
| 138 |
fn=process_input,
|
| 139 |
chatbot=gr.Chatbot(
|
| 140 |
+
label="MGZon Chatbot",
|
| 141 |
height=800,
|
| 142 |
latex_delimiters=LATEX_DELIMS,
|
| 143 |
),
|
|
|
|
| 145 |
additional_inputs=[
|
| 146 |
gr.Textbox(
|
| 147 |
label="System Prompt",
|
| 148 |
+
value="You are an expert assistant providing detailed, comprehensive, and well-structured responses. Support text, audio, image, and file inputs. For audio, transcribe using Whisper and respond with text or speech. For images, analyze using CLIP and provide detailed descriptions. For general queries, use the selected model to provide in-depth answers.",
|
| 149 |
lines=4
|
| 150 |
),
|
| 151 |
gr.Slider(label="Temperature", minimum=0.0, maximum=1.0, step=0.1, value=0.7),
|
| 152 |
gr.Radio(label="Reasoning Effort", choices=["low", "medium", "high"], value="medium"),
|
| 153 |
gr.Checkbox(label="Enable DeepSearch (web browsing)", value=True),
|
| 154 |
gr.Slider(label="Max New Tokens", minimum=50, maximum=128000, step=50, value=128000),
|
| 155 |
+
gr.Dropdown(
|
| 156 |
+
label="Model Choice",
|
| 157 |
+
choices=[
|
| 158 |
+
"openai/gpt-oss-120b:cerebras",
|
| 159 |
+
"openai/gpt-oss-20b:together",
|
| 160 |
+
"deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
|
| 161 |
+
"mistralai/Mixtral-8x7B-Instruct-v0.1",
|
| 162 |
+
"openai/clip-vit-base-patch32",
|
| 163 |
+
"openai/whisper-large-v3-turbo",
|
| 164 |
+
"parler-tts/parler-tts-mini-v1"
|
| 165 |
+
],
|
| 166 |
+
value="openai/gpt-oss-120b:cerebras",
|
| 167 |
+
elem_classes="model-selector"
|
| 168 |
+
),
|
| 169 |
+
gr.Audio(label="Record & Send Voice", type="numpy", streaming=True, elem_classes="audio-input"),
|
| 170 |
+
gr.Image(label="Capture & Send Image", type="numpy", source="webcam", elem_classes="upload-button"),
|
| 171 |
+
gr.Radio(label="Output Type", choices=["text", "speech"], value="text")
|
| 172 |
],
|
| 173 |
additional_outputs=[gr.Audio(label="Voice Output", type="filepath", elem_classes="audio-output", autoplay=True)],
|
| 174 |
stop_btn="Stop",
|
| 175 |
examples=[
|
| 176 |
+
["Explain the history of AI in detail."],
|
| 177 |
+
["Generate a React login component with validation."],
|
| 178 |
+
["Describe this image: [capture image]."],
|
| 179 |
+
["Transcribe and respond to this audio: [record audio]."],
|
| 180 |
+
["Convert this text to speech: Welcome to MGZon!"],
|
|
|
|
| 181 |
],
|
| 182 |
title="MGZon Chatbot",
|
| 183 |
+
description="A versatile chatbot powered by multiple models for text, image, and audio queries. Supports real-time voice and image input, model selection, and web search. Licensed under Apache 2.0.",
|
| 184 |
theme="gradio/soft",
|
| 185 |
css=css,
|
| 186 |
)
|
utils/generation.py
CHANGED
|
@@ -15,11 +15,12 @@ import torchaudio
|
|
| 15 |
from PIL import Image
|
| 16 |
from transformers import CLIPModel, CLIPProcessor, AutoProcessor
|
| 17 |
from parler_tts import ParlerTTSForConditionalGeneration
|
|
|
|
| 18 |
|
| 19 |
logger = logging.getLogger(__name__)
|
| 20 |
|
| 21 |
# إعداد Cache
|
| 22 |
-
cache = TTLCache(maxsize=100, ttl=600)
|
| 23 |
|
| 24 |
# تعريف LATEX_DELIMS
|
| 25 |
LATEX_DELIMS = [
|
|
@@ -31,11 +32,11 @@ LATEX_DELIMS = [
|
|
| 31 |
|
| 32 |
# إعداد العميل لـ Hugging Face Inference API
|
| 33 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 34 |
-
BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
|
| 35 |
API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
|
| 36 |
FALLBACK_API_ENDPOINT = "https://api-inference.huggingface.co/v1"
|
| 37 |
-
MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-
|
| 38 |
-
SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "
|
| 39 |
TERTIARY_MODEL_NAME = os.getenv("TERTIARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1")
|
| 40 |
CLIP_BASE_MODEL = os.getenv("CLIP_BASE_MODEL", "openai/clip-vit-base-patch32")
|
| 41 |
CLIP_LARGE_MODEL = os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14")
|
|
@@ -43,7 +44,6 @@ ASR_MODEL = os.getenv("ASR_MODEL", "openai/whisper-large-v3-turbo")
|
|
| 43 |
TTS_MODEL = os.getenv("TTS_MODEL", "parler-tts/parler-tts-mini-v1")
|
| 44 |
|
| 45 |
def check_model_availability(model_name: str, api_base: str, api_key: str) -> tuple[bool, str]:
|
| 46 |
-
"""التحقق من توفر النموذج عبر API مع دعم التوكن الاحتياطي"""
|
| 47 |
try:
|
| 48 |
response = requests.get(
|
| 49 |
f"{api_base}/models/{model_name}",
|
|
@@ -64,17 +64,18 @@ def check_model_availability(model_name: str, api_base: str, api_key: str) -> tu
|
|
| 64 |
return check_model_availability(model_name, api_base, BACKUP_HF_TOKEN)
|
| 65 |
return False, api_key
|
| 66 |
|
| 67 |
-
def select_model(query: str, input_type: str = "text") -> tuple[str, str]:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
query_lower = query.lower()
|
| 69 |
-
# دعم الصوت
|
| 70 |
if input_type == "audio" or any(keyword in query_lower for keyword in ["voice", "audio", "speech", "صوت", "تحويل صوت"]):
|
| 71 |
logger.info(f"Selected {ASR_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for audio input")
|
| 72 |
return ASR_MODEL, FALLBACK_API_ENDPOINT
|
| 73 |
-
# دعم تحويل النص إلى صوت
|
| 74 |
if any(keyword in query_lower for keyword in ["text-to-speech", "tts", "تحويل نص إلى صوت"]):
|
| 75 |
logger.info(f"Selected {TTS_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for text-to-speech")
|
| 76 |
return TTS_MODEL, FALLBACK_API_ENDPOINT
|
| 77 |
-
# نماذج CLIP للاستعلامات المتعلقة بالصور
|
| 78 |
image_patterns = [
|
| 79 |
r"\bimage\b", r"\bpicture\b", r"\bphoto\b", r"\bvisual\b", r"\bصورة\b", r"\bتحليل\s+صورة\b",
|
| 80 |
r"\bimage\s+analysis\b", r"\bimage\s+classification\b", r"\bimage\s+description\b"
|
|
@@ -83,16 +84,6 @@ def select_model(query: str, input_type: str = "text") -> tuple[str, str]:
|
|
| 83 |
if re.search(pattern, query_lower, re.IGNORECASE):
|
| 84 |
logger.info(f"Selected {CLIP_BASE_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for image-related query: {query}")
|
| 85 |
return CLIP_BASE_MODEL, FALLBACK_API_ENDPOINT
|
| 86 |
-
# نموذج DeepSeek للاستعلامات المتعلقة بـ MGZon
|
| 87 |
-
mgzon_patterns = [
|
| 88 |
-
r"\bmgzon\b", r"\bmgzon\s+(products|services|platform|features|mission|technology|solutions|oauth)\b",
|
| 89 |
-
r"\bميزات\s+mgzon\b", r"\bخدمات\s+mgzon\b", r"\boauth\b"
|
| 90 |
-
]
|
| 91 |
-
for pattern in mgzon_patterns:
|
| 92 |
-
if re.search(pattern, query_lower, re.IGNORECASE):
|
| 93 |
-
logger.info(f"Selected {SECONDARY_MODEL_NAME} with endpoint {FALLBACK_API_ENDPOINT} for MGZon-related query: {query}")
|
| 94 |
-
return SECONDARY_MODEL_NAME, FALLBACK_API_ENDPOINT
|
| 95 |
-
# النموذج الافتراضي للاستعلامات العامة
|
| 96 |
logger.info(f"Selected {MODEL_NAME} with endpoint {API_ENDPOINT} for general query: {query}")
|
| 97 |
return MODEL_NAME, API_ENDPOINT
|
| 98 |
|
|
@@ -113,16 +104,13 @@ def request_generation(
|
|
| 113 |
input_type: str = "text",
|
| 114 |
audio_data: Optional[bytes] = None,
|
| 115 |
image_data: Optional[bytes] = None,
|
|
|
|
| 116 |
) -> Generator[bytes | str, None, None]:
|
| 117 |
-
from utils.web_search import web_search # تأخير الاستيراد
|
| 118 |
-
|
| 119 |
-
# التحقق من توفر النموذج مع دعم التوكن الاحتياطي
|
| 120 |
is_available, selected_api_key = check_model_availability(model_name, api_base, api_key)
|
| 121 |
if not is_available:
|
| 122 |
yield f"Error: Model {model_name} is not available. Please check the model endpoint or token."
|
| 123 |
return
|
| 124 |
|
| 125 |
-
# إنشاء مفتاح للـ cache
|
| 126 |
cache_key = hashlib.md5(json.dumps({
|
| 127 |
"message": message,
|
| 128 |
"system_prompt": system_prompt,
|
|
@@ -143,7 +131,7 @@ def request_generation(
|
|
| 143 |
enhanced_system_prompt = system_prompt
|
| 144 |
|
| 145 |
# معالجة الصوت (ASR)
|
| 146 |
-
if model_name == ASR_MODEL and audio_data:
|
| 147 |
task_type = "audio_transcription"
|
| 148 |
try:
|
| 149 |
audio_file = io.BytesIO(audio_data)
|
|
@@ -158,6 +146,15 @@ def request_generation(
|
|
| 158 |
response_format="text"
|
| 159 |
)
|
| 160 |
yield transcription
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
cache[cache_key] = [transcription]
|
| 162 |
return
|
| 163 |
except Exception as e:
|
|
@@ -166,11 +163,11 @@ def request_generation(
|
|
| 166 |
return
|
| 167 |
|
| 168 |
# معالجة تحويل النص إلى صوت (TTS)
|
| 169 |
-
if model_name == TTS_MODEL:
|
| 170 |
task_type = "text_to_speech"
|
| 171 |
try:
|
| 172 |
-
model = ParlerTTSForConditionalGeneration.from_pretrained(
|
| 173 |
-
processor = AutoProcessor.from_pretrained(
|
| 174 |
inputs = processor(text=message, return_tensors="pt")
|
| 175 |
audio = model.generate(**inputs)
|
| 176 |
audio_file = io.BytesIO()
|
|
@@ -185,7 +182,7 @@ def request_generation(
|
|
| 185 |
return
|
| 186 |
|
| 187 |
# معالجة الصور
|
| 188 |
-
if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL] and image_data:
|
| 189 |
task_type = "image_analysis"
|
| 190 |
try:
|
| 191 |
model = CLIPModel.from_pretrained(model_name)
|
|
@@ -195,8 +192,18 @@ def request_generation(
|
|
| 195 |
outputs = model(**inputs)
|
| 196 |
logits_per_image = outputs.logits_per_image
|
| 197 |
probs = logits_per_image.softmax(dim=1)
|
| 198 |
-
|
| 199 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
return
|
| 201 |
except Exception as e:
|
| 202 |
logger.error(f"Image analysis failed: {e}")
|
|
@@ -206,27 +213,16 @@ def request_generation(
|
|
| 206 |
# تحسين system_prompt بناءً على نوع المهمة
|
| 207 |
if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL]:
|
| 208 |
task_type = "image"
|
| 209 |
-
enhanced_system_prompt = f"{system_prompt}\nYou are an expert in image analysis and description. Provide detailed descriptions, classifications, or analysis of images based on the query.
|
| 210 |
elif any(keyword in message.lower() for keyword in ["code", "programming", "python", "javascript", "react", "django", "flask"]):
|
| 211 |
task_type = "code"
|
| 212 |
-
enhanced_system_prompt = f"{system_prompt}\nYou are an expert programmer. Provide accurate, well-commented code with comprehensive examples and detailed explanations.
|
| 213 |
elif any(keyword in message.lower() for keyword in ["analyze", "analysis", "تحليل"]):
|
| 214 |
task_type = "analysis"
|
| 215 |
-
enhanced_system_prompt = f"{system_prompt}\nProvide detailed analysis with step-by-step reasoning, examples, and data-driven insights.
|
| 216 |
-
elif any(keyword in message.lower() for keyword in ["review", "مراجعة"]):
|
| 217 |
-
task_type = "review"
|
| 218 |
-
enhanced_system_prompt = f"{system_prompt}\nReview the provided content thoroughly, identify issues, and suggest improvements with detailed explanations. Ensure the response is complete and detailed."
|
| 219 |
-
elif any(keyword in message.lower() for keyword in ["publish", "نشر"]):
|
| 220 |
-
task_type = "publish"
|
| 221 |
-
enhanced_system_prompt = f"{system_prompt}\nPrepare content for publishing, ensuring clarity, professionalism, and adherence to best practices. Provide a complete and detailed response."
|
| 222 |
else:
|
| 223 |
-
enhanced_system_prompt = f"{system_prompt}\nFor general queries, provide comprehensive, detailed responses with examples and explanations where applicable.
|
| 224 |
-
|
| 225 |
-
# إذا كان الاستعلام قصيرًا، شجع على التفصيل
|
| 226 |
-
if len(message.split()) < 5:
|
| 227 |
-
enhanced_system_prompt += "\nEven for short or general queries, provide a detailed, in-depth response with examples, explanations, and additional context to ensure completeness."
|
| 228 |
|
| 229 |
-
logger.info(f"Task type detected: {task_type}")
|
| 230 |
input_messages: List[dict] = [{"role": "system", "content": enhanced_system_prompt}]
|
| 231 |
if chat_history:
|
| 232 |
for msg in chat_history:
|
|
@@ -262,8 +258,6 @@ def request_generation(
|
|
| 262 |
reasoning_started = False
|
| 263 |
reasoning_closed = False
|
| 264 |
saw_visible_output = False
|
| 265 |
-
last_tool_name = None
|
| 266 |
-
last_tool_args = None
|
| 267 |
buffer = ""
|
| 268 |
|
| 269 |
for chunk in stream:
|
|
@@ -291,16 +285,6 @@ def request_generation(
|
|
| 291 |
buffer = ""
|
| 292 |
continue
|
| 293 |
|
| 294 |
-
if chunk.choices[0].delta.tool_calls and model_name in [MODEL_NAME, SECONDARY_MODEL_NAME, TERTIARY_MODEL_NAME]:
|
| 295 |
-
tool_call = chunk.choices[0].delta.tool_calls[0]
|
| 296 |
-
name = getattr(tool_call, "function", {}).get("name", None)
|
| 297 |
-
args = getattr(tool_call, "function", {}).get("arguments", None)
|
| 298 |
-
if name:
|
| 299 |
-
last_tool_name = name
|
| 300 |
-
if args:
|
| 301 |
-
last_tool_args = args
|
| 302 |
-
continue
|
| 303 |
-
|
| 304 |
if chunk.choices[0].finish_reason in ("stop", "tool_calls", "error", "length"):
|
| 305 |
if buffer:
|
| 306 |
cached_chunks.append(buffer)
|
|
@@ -313,16 +297,8 @@ def request_generation(
|
|
| 313 |
reasoning_closed = True
|
| 314 |
|
| 315 |
if not saw_visible_output:
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
try:
|
| 319 |
-
args_text = json.dumps(last_tool_args, ensure_ascii=False, default=str)
|
| 320 |
-
except Exception:
|
| 321 |
-
args_text = str(last_tool_args)
|
| 322 |
-
msg += f"\n\n• Tool requested: **{last_tool_name}**\n• Arguments: `{args_text}`"
|
| 323 |
-
cached_chunks.append(msg)
|
| 324 |
-
yield msg
|
| 325 |
-
|
| 326 |
if chunk.choices[0].finish_reason == "error":
|
| 327 |
cached_chunks.append(f"Error: Unknown error")
|
| 328 |
yield f"Error: Unknown error"
|
|
@@ -335,6 +311,16 @@ def request_generation(
|
|
| 335 |
cached_chunks.append(buffer)
|
| 336 |
yield buffer
|
| 337 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 338 |
cache[cache_key] = cached_chunks
|
| 339 |
|
| 340 |
except Exception as e:
|
|
@@ -357,134 +343,12 @@ def request_generation(
|
|
| 357 |
input_type=input_type,
|
| 358 |
audio_data=audio_data,
|
| 359 |
image_data=image_data,
|
|
|
|
| 360 |
):
|
| 361 |
yield chunk
|
| 362 |
return
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
fallback_endpoint = FALLBACK_API_ENDPOINT
|
| 366 |
-
logger.info(f"Retrying with fallback model: {fallback_model} on {fallback_endpoint}")
|
| 367 |
-
try:
|
| 368 |
-
is_available, selected_api_key = check_model_availability(fallback_model, fallback_endpoint, selected_api_key)
|
| 369 |
-
if not is_available:
|
| 370 |
-
yield f"Error: Fallback model {fallback_model} is not available."
|
| 371 |
-
return
|
| 372 |
-
client = OpenAI(api_key=selected_api_key, base_url=fallback_endpoint, timeout=120.0)
|
| 373 |
-
stream = client.chat.completions.create(
|
| 374 |
-
model=fallback_model,
|
| 375 |
-
messages=input_messages,
|
| 376 |
-
temperature=temperature,
|
| 377 |
-
max_tokens=max_new_tokens,
|
| 378 |
-
stream=True,
|
| 379 |
-
tools=[],
|
| 380 |
-
tool_choice="none",
|
| 381 |
-
)
|
| 382 |
-
for chunk in stream:
|
| 383 |
-
if chunk.choices[0].delta.content:
|
| 384 |
-
content = chunk.choices[0].delta.content
|
| 385 |
-
if content == "<|channel|>analysis<|message|>":
|
| 386 |
-
if not reasoning_started:
|
| 387 |
-
cached_chunks.append("analysis")
|
| 388 |
-
yield "analysis"
|
| 389 |
-
reasoning_started = True
|
| 390 |
-
continue
|
| 391 |
-
if content == "<|channel|>final<|message|>":
|
| 392 |
-
if reasoning_started and not reasoning_closed:
|
| 393 |
-
cached_chunks.append("assistantfinal")
|
| 394 |
-
yield "assistantfinal"
|
| 395 |
-
reasoning_closed = True
|
| 396 |
-
continue
|
| 397 |
-
|
| 398 |
-
saw_visible_output = True
|
| 399 |
-
buffer += content
|
| 400 |
-
|
| 401 |
-
if "\n" in buffer or len(buffer) > 5000:
|
| 402 |
-
cached_chunks.append(buffer)
|
| 403 |
-
yield buffer
|
| 404 |
-
buffer = ""
|
| 405 |
-
continue
|
| 406 |
-
|
| 407 |
-
if chunk.choices[0].finish_reason in ("stop", "error", "length"):
|
| 408 |
-
if buffer:
|
| 409 |
-
cached_chunks.append(buffer)
|
| 410 |
-
yield buffer
|
| 411 |
-
buffer = ""
|
| 412 |
-
|
| 413 |
-
if reasoning_started and not reasoning_closed:
|
| 414 |
-
cached_chunks.append("assistantfinal")
|
| 415 |
-
yield "assistantfinal"
|
| 416 |
-
reasoning_closed = True
|
| 417 |
-
|
| 418 |
-
if not saw_visible_output:
|
| 419 |
-
cached_chunks.append("No visible output produced.")
|
| 420 |
-
yield "No visible output produced."
|
| 421 |
-
if chunk.choices[0].finish_reason == "error":
|
| 422 |
-
cached_chunks.append(f"Error: Unknown error with fallback model {fallback_model}")
|
| 423 |
-
yield f"Error: Unknown error with fallback model {fallback_model}"
|
| 424 |
-
elif chunk.choices[0].finish_reason == "length":
|
| 425 |
-
cached_chunks.append("Response truncated due to token limit. Please refine your query or request continuation.")
|
| 426 |
-
yield "Response truncated due to token limit. Please refine your query or request continuation."
|
| 427 |
-
break
|
| 428 |
-
|
| 429 |
-
if buffer:
|
| 430 |
-
cached_chunks.append(buffer)
|
| 431 |
-
yield buffer
|
| 432 |
-
|
| 433 |
-
cache[cache_key] = cached_chunks
|
| 434 |
-
|
| 435 |
-
except Exception as e2:
|
| 436 |
-
logger.exception(f"[Gateway] Streaming failed for fallback model {fallback_model}: {e2}")
|
| 437 |
-
try:
|
| 438 |
-
is_available, selected_api_key = check_model_availability(TERTIARY_MODEL_NAME, FALLBACK_API_ENDPOINT, selected_api_key)
|
| 439 |
-
if not is_available:
|
| 440 |
-
yield f"Error: Tertiary model {TERTIARY_MODEL_NAME} is not available."
|
| 441 |
-
return
|
| 442 |
-
client = OpenAI(api_key=selected_api_key, base_url=FALLBACK_API_ENDPOINT, timeout=120.0)
|
| 443 |
-
stream = client.chat.completions.create(
|
| 444 |
-
model=TERTIARY_MODEL_NAME,
|
| 445 |
-
messages=input_messages,
|
| 446 |
-
temperature=temperature,
|
| 447 |
-
max_tokens=max_new_tokens,
|
| 448 |
-
stream=True,
|
| 449 |
-
tools=[],
|
| 450 |
-
tool_choice="none",
|
| 451 |
-
)
|
| 452 |
-
for chunk in stream:
|
| 453 |
-
if chunk.choices[0].delta.content:
|
| 454 |
-
content = chunk.choices[0].delta.content
|
| 455 |
-
saw_visible_output = True
|
| 456 |
-
buffer += content
|
| 457 |
-
if "\n" in buffer or len(buffer) > 5000:
|
| 458 |
-
cached_chunks.append(buffer)
|
| 459 |
-
yield buffer
|
| 460 |
-
buffer = ""
|
| 461 |
-
continue
|
| 462 |
-
if chunk.choices[0].finish_reason in ("stop", "error", "length"):
|
| 463 |
-
if buffer:
|
| 464 |
-
cached_chunks.append(buffer)
|
| 465 |
-
yield buffer
|
| 466 |
-
buffer = ""
|
| 467 |
-
if not saw_visible_output:
|
| 468 |
-
cached_chunks.append("No visible output produced.")
|
| 469 |
-
yield "No visible output produced."
|
| 470 |
-
if chunk.choices[0].finish_reason == "error":
|
| 471 |
-
cached_chunks.append(f"Error: Unknown error with tertiary model {TERTIARY_MODEL_NAME}")
|
| 472 |
-
yield f"Error: Unknown error with tertiary model {TERTIARY_MODEL_NAME}"
|
| 473 |
-
elif chunk.choices[0].finish_reason == "length":
|
| 474 |
-
cached_chunks.append("Response truncated due to token limit. Please refine your query or request continuation.")
|
| 475 |
-
yield "Response truncated due to token limit. Please refine your query or request continuation."
|
| 476 |
-
break
|
| 477 |
-
if buffer:
|
| 478 |
-
cached_chunks.append(buffer)
|
| 479 |
-
yield buffer
|
| 480 |
-
cache[cache_key] = cached_chunks
|
| 481 |
-
except Exception as e3:
|
| 482 |
-
logger.exception(f"[Gateway] Streaming failed for tertiary model {TERTIARY_MODEL_NAME}: {e3}")
|
| 483 |
-
yield f"Error: Failed to load all models: Primary ({model_name}), Secondary ({fallback_model}), Tertiary ({TERTIARY_MODEL_NAME}). Please check your model configurations."
|
| 484 |
-
return
|
| 485 |
-
else:
|
| 486 |
-
yield f"Error: Failed to load model {model_name}: {e}"
|
| 487 |
-
return
|
| 488 |
|
| 489 |
def format_final(analysis_text: str, visible_text: str) -> str:
|
| 490 |
reasoning_safe = html.escape((analysis_text or "").strip())
|
|
@@ -500,12 +364,12 @@ def format_final(analysis_text: str, visible_text: str) -> str:
|
|
| 500 |
f"{response}" if response else "No final response available."
|
| 501 |
)
|
| 502 |
|
| 503 |
-
def generate(message, history, system_prompt, temperature, reasoning_effort, enable_browsing, max_new_tokens, input_type="text", audio_data=None, image_data=None):
|
| 504 |
if not message.strip() and not audio_data and not image_data:
|
| 505 |
-
yield "Please enter a prompt or
|
| 506 |
return
|
| 507 |
|
| 508 |
-
model_name, api_endpoint = select_model(message, input_type=input_type)
|
| 509 |
chat_history = []
|
| 510 |
for h in history:
|
| 511 |
if isinstance(h, dict):
|
|
@@ -534,7 +398,7 @@ def generate(message, history, system_prompt, temperature, reasoning_effort, ena
|
|
| 534 |
"type": "function",
|
| 535 |
"function": {
|
| 536 |
"name": "code_generation",
|
| 537 |
-
"description": "Generate or modify code for various frameworks
|
| 538 |
"parameters": {
|
| 539 |
"type": "object",
|
| 540 |
"properties": {
|
|
@@ -612,6 +476,7 @@ def generate(message, history, system_prompt, temperature, reasoning_effort, ena
|
|
| 612 |
input_type=input_type,
|
| 613 |
audio_data=audio_data,
|
| 614 |
image_data=image_data,
|
|
|
|
| 615 |
)
|
| 616 |
|
| 617 |
for chunk in stream:
|
|
|
|
| 15 |
from PIL import Image
|
| 16 |
from transformers import CLIPModel, CLIPProcessor, AutoProcessor
|
| 17 |
from parler_tts import ParlerTTSForConditionalGeneration
|
| 18 |
+
from utils.web_search import web_search # نقل الاستيراد خارج الدالة
|
| 19 |
|
| 20 |
logger = logging.getLogger(__name__)
|
| 21 |
|
| 22 |
# إعداد Cache
|
| 23 |
+
cache = TTLCache(maxsize=100, ttl=600)
|
| 24 |
|
| 25 |
# تعريف LATEX_DELIMS
|
| 26 |
LATEX_DELIMS = [
|
|
|
|
| 32 |
|
| 33 |
# إعداد العميل لـ Hugging Face Inference API
|
| 34 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 35 |
+
BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
|
| 36 |
API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
|
| 37 |
FALLBACK_API_ENDPOINT = "https://api-inference.huggingface.co/v1"
|
| 38 |
+
MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b:cerebras")
|
| 39 |
+
SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "openai/gpt-oss-20b:together")
|
| 40 |
TERTIARY_MODEL_NAME = os.getenv("TERTIARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1")
|
| 41 |
CLIP_BASE_MODEL = os.getenv("CLIP_BASE_MODEL", "openai/clip-vit-base-patch32")
|
| 42 |
CLIP_LARGE_MODEL = os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14")
|
|
|
|
| 44 |
TTS_MODEL = os.getenv("TTS_MODEL", "parler-tts/parler-tts-mini-v1")
|
| 45 |
|
| 46 |
def check_model_availability(model_name: str, api_base: str, api_key: str) -> tuple[bool, str]:
|
|
|
|
| 47 |
try:
|
| 48 |
response = requests.get(
|
| 49 |
f"{api_base}/models/{model_name}",
|
|
|
|
| 64 |
return check_model_availability(model_name, api_base, BACKUP_HF_TOKEN)
|
| 65 |
return False, api_key
|
| 66 |
|
| 67 |
+
def select_model(query: str, input_type: str = "text", model_choice: Optional[str] = None) -> tuple[str, str]:
|
| 68 |
+
if model_choice:
|
| 69 |
+
logger.info(f"User-selected model: {model_choice}")
|
| 70 |
+
return model_choice, API_ENDPOINT if model_choice in [MODEL_NAME, SECONDARY_MODEL_NAME, TERTIARY_MODEL_NAME] else FALLBACK_API_ENDPOINT
|
| 71 |
+
|
| 72 |
query_lower = query.lower()
|
|
|
|
| 73 |
if input_type == "audio" or any(keyword in query_lower for keyword in ["voice", "audio", "speech", "صوت", "تحويل صوت"]):
|
| 74 |
logger.info(f"Selected {ASR_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for audio input")
|
| 75 |
return ASR_MODEL, FALLBACK_API_ENDPOINT
|
|
|
|
| 76 |
if any(keyword in query_lower for keyword in ["text-to-speech", "tts", "تحويل نص إلى صوت"]):
|
| 77 |
logger.info(f"Selected {TTS_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for text-to-speech")
|
| 78 |
return TTS_MODEL, FALLBACK_API_ENDPOINT
|
|
|
|
| 79 |
image_patterns = [
|
| 80 |
r"\bimage\b", r"\bpicture\b", r"\bphoto\b", r"\bvisual\b", r"\bصورة\b", r"\bتحليل\s+صورة\b",
|
| 81 |
r"\bimage\s+analysis\b", r"\bimage\s+classification\b", r"\bimage\s+description\b"
|
|
|
|
| 84 |
if re.search(pattern, query_lower, re.IGNORECASE):
|
| 85 |
logger.info(f"Selected {CLIP_BASE_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for image-related query: {query}")
|
| 86 |
return CLIP_BASE_MODEL, FALLBACK_API_ENDPOINT
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
logger.info(f"Selected {MODEL_NAME} with endpoint {API_ENDPOINT} for general query: {query}")
|
| 88 |
return MODEL_NAME, API_ENDPOINT
|
| 89 |
|
|
|
|
| 104 |
input_type: str = "text",
|
| 105 |
audio_data: Optional[bytes] = None,
|
| 106 |
image_data: Optional[bytes] = None,
|
| 107 |
+
output_type: str = "text"
|
| 108 |
) -> Generator[bytes | str, None, None]:
|
|
|
|
|
|
|
|
|
|
| 109 |
is_available, selected_api_key = check_model_availability(model_name, api_base, api_key)
|
| 110 |
if not is_available:
|
| 111 |
yield f"Error: Model {model_name} is not available. Please check the model endpoint or token."
|
| 112 |
return
|
| 113 |
|
|
|
|
| 114 |
cache_key = hashlib.md5(json.dumps({
|
| 115 |
"message": message,
|
| 116 |
"system_prompt": system_prompt,
|
|
|
|
| 131 |
enhanced_system_prompt = system_prompt
|
| 132 |
|
| 133 |
# معالجة الصوت (ASR)
|
| 134 |
+
if model_name == ASR_MODEL and audio_data is not None:
|
| 135 |
task_type = "audio_transcription"
|
| 136 |
try:
|
| 137 |
audio_file = io.BytesIO(audio_data)
|
|
|
|
| 146 |
response_format="text"
|
| 147 |
)
|
| 148 |
yield transcription
|
| 149 |
+
if output_type == "speech":
|
| 150 |
+
tts_model = TTS_MODEL
|
| 151 |
+
tts_inputs = AutoProcessor.from_pretrained(tts_model)(text=transcription, return_tensors="pt")
|
| 152 |
+
tts_model_instance = ParlerTTSForConditionalGeneration.from_pretrained(tts_model)
|
| 153 |
+
audio = tts_model_instance.generate(**tts_inputs)
|
| 154 |
+
audio_file = io.BytesIO()
|
| 155 |
+
torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
|
| 156 |
+
audio_file.seek(0)
|
| 157 |
+
yield audio_file.read()
|
| 158 |
cache[cache_key] = [transcription]
|
| 159 |
return
|
| 160 |
except Exception as e:
|
|
|
|
| 163 |
return
|
| 164 |
|
| 165 |
# معالجة تحويل النص إلى صوت (TTS)
|
| 166 |
+
if model_name == TTS_MODEL or output_type == "speech":
|
| 167 |
task_type = "text_to_speech"
|
| 168 |
try:
|
| 169 |
+
model = ParlerTTSForConditionalGeneration.from_pretrained(TTS_MODEL)
|
| 170 |
+
processor = AutoProcessor.from_pretrained(TTS_MODEL)
|
| 171 |
inputs = processor(text=message, return_tensors="pt")
|
| 172 |
audio = model.generate(**inputs)
|
| 173 |
audio_file = io.BytesIO()
|
|
|
|
| 182 |
return
|
| 183 |
|
| 184 |
# معالجة الصور
|
| 185 |
+
if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL] and image_data is not None:
|
| 186 |
task_type = "image_analysis"
|
| 187 |
try:
|
| 188 |
model = CLIPModel.from_pretrained(model_name)
|
|
|
|
| 192 |
outputs = model(**inputs)
|
| 193 |
logits_per_image = outputs.logits_per_image
|
| 194 |
probs = logits_per_image.softmax(dim=1)
|
| 195 |
+
analysis = f"Image analysis result: {probs.tolist()}"
|
| 196 |
+
yield analysis
|
| 197 |
+
if output_type == "speech":
|
| 198 |
+
tts_model = TTS_MODEL
|
| 199 |
+
tts_inputs = AutoProcessor.from_pretrained(tts_model)(text=analysis, return_tensors="pt")
|
| 200 |
+
tts_model_instance = ParlerTTSForConditionalGeneration.from_pretrained(tts_model)
|
| 201 |
+
audio = tts_model_instance.generate(**tts_inputs)
|
| 202 |
+
audio_file = io.BytesIO()
|
| 203 |
+
torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
|
| 204 |
+
audio_file.seek(0)
|
| 205 |
+
yield audio_file.read()
|
| 206 |
+
cache[cache_key] = [analysis]
|
| 207 |
return
|
| 208 |
except Exception as e:
|
| 209 |
logger.error(f"Image analysis failed: {e}")
|
|
|
|
| 213 |
# تحسين system_prompt بناءً على نوع المهمة
|
| 214 |
if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL]:
|
| 215 |
task_type = "image"
|
| 216 |
+
enhanced_system_prompt = f"{system_prompt}\nYou are an expert in image analysis and description. Provide detailed descriptions, classifications, or analysis of images based on the query."
|
| 217 |
elif any(keyword in message.lower() for keyword in ["code", "programming", "python", "javascript", "react", "django", "flask"]):
|
| 218 |
task_type = "code"
|
| 219 |
+
enhanced_system_prompt = f"{system_prompt}\nYou are an expert programmer. Provide accurate, well-commented code with comprehensive examples and detailed explanations."
|
| 220 |
elif any(keyword in message.lower() for keyword in ["analyze", "analysis", "تحليل"]):
|
| 221 |
task_type = "analysis"
|
| 222 |
+
enhanced_system_prompt = f"{system_prompt}\nProvide detailed analysis with step-by-step reasoning, examples, and data-driven insights."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 223 |
else:
|
| 224 |
+
enhanced_system_prompt = f"{system_prompt}\nFor general queries, provide comprehensive, detailed responses with examples and explanations where applicable."
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
|
|
|
|
| 226 |
input_messages: List[dict] = [{"role": "system", "content": enhanced_system_prompt}]
|
| 227 |
if chat_history:
|
| 228 |
for msg in chat_history:
|
|
|
|
| 258 |
reasoning_started = False
|
| 259 |
reasoning_closed = False
|
| 260 |
saw_visible_output = False
|
|
|
|
|
|
|
| 261 |
buffer = ""
|
| 262 |
|
| 263 |
for chunk in stream:
|
|
|
|
| 285 |
buffer = ""
|
| 286 |
continue
|
| 287 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 288 |
if chunk.choices[0].finish_reason in ("stop", "tool_calls", "error", "length"):
|
| 289 |
if buffer:
|
| 290 |
cached_chunks.append(buffer)
|
|
|
|
| 297 |
reasoning_closed = True
|
| 298 |
|
| 299 |
if not saw_visible_output:
|
| 300 |
+
cached_chunks.append("No visible output produced.")
|
| 301 |
+
yield "No visible output produced."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 302 |
if chunk.choices[0].finish_reason == "error":
|
| 303 |
cached_chunks.append(f"Error: Unknown error")
|
| 304 |
yield f"Error: Unknown error"
|
|
|
|
| 311 |
cached_chunks.append(buffer)
|
| 312 |
yield buffer
|
| 313 |
|
| 314 |
+
if output_type == "speech":
|
| 315 |
+
tts_model = TTS_MODEL
|
| 316 |
+
tts_inputs = AutoProcessor.from_pretrained(tts_model)(text=buffer, return_tensors="pt")
|
| 317 |
+
tts_model_instance = ParlerTTSForConditionalGeneration.from_pretrained(tts_model)
|
| 318 |
+
audio = tts_model_instance.generate(**tts_inputs)
|
| 319 |
+
audio_file = io.BytesIO()
|
| 320 |
+
torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
|
| 321 |
+
audio_file.seek(0)
|
| 322 |
+
yield audio_file.read()
|
| 323 |
+
|
| 324 |
cache[cache_key] = cached_chunks
|
| 325 |
|
| 326 |
except Exception as e:
|
|
|
|
| 343 |
input_type=input_type,
|
| 344 |
audio_data=audio_data,
|
| 345 |
image_data=image_data,
|
| 346 |
+
output_type=output_type
|
| 347 |
):
|
| 348 |
yield chunk
|
| 349 |
return
|
| 350 |
+
yield f"Error: Failed to load model {model_name}: {e}"
|
| 351 |
+
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 352 |
|
| 353 |
def format_final(analysis_text: str, visible_text: str) -> str:
|
| 354 |
reasoning_safe = html.escape((analysis_text or "").strip())
|
|
|
|
| 364 |
f"{response}" if response else "No final response available."
|
| 365 |
)
|
| 366 |
|
| 367 |
+
def generate(message, history, system_prompt, temperature, reasoning_effort, enable_browsing, max_new_tokens, input_type="text", audio_data=None, image_data=None, model_choice=None, output_type="text"):
|
| 368 |
if not message.strip() and not audio_data and not image_data:
|
| 369 |
+
yield "Please enter a prompt, record audio, or capture an image."
|
| 370 |
return
|
| 371 |
|
| 372 |
+
model_name, api_endpoint = select_model(message, input_type=input_type, model_choice=model_choice)
|
| 373 |
chat_history = []
|
| 374 |
for h in history:
|
| 375 |
if isinstance(h, dict):
|
|
|
|
| 398 |
"type": "function",
|
| 399 |
"function": {
|
| 400 |
"name": "code_generation",
|
| 401 |
+
"description": "Generate or modify code for various frameworks",
|
| 402 |
"parameters": {
|
| 403 |
"type": "object",
|
| 404 |
"properties": {
|
|
|
|
| 476 |
input_type=input_type,
|
| 477 |
audio_data=audio_data,
|
| 478 |
image_data=image_data,
|
| 479 |
+
output_type=output_type
|
| 480 |
)
|
| 481 |
|
| 482 |
for chunk in stream:
|
utils/web_search.py
CHANGED
|
@@ -11,23 +11,23 @@ def web_search(query: str) -> str:
|
|
| 11 |
google_cse_id = os.getenv("GOOGLE_CSE_ID")
|
| 12 |
if not google_api_key or not google_cse_id:
|
| 13 |
return "Web search requires GOOGLE_API_KEY and GOOGLE_CSE_ID to be set."
|
| 14 |
-
url = f"https://www.googleapis.com/customsearch/v1?key={google_api_key}&cx={google_cse_id}&q={query}
|
| 15 |
-
response = requests.get(url, timeout=
|
| 16 |
response.raise_for_status()
|
| 17 |
results = response.json().get("items", [])
|
| 18 |
if not results:
|
| 19 |
return "No web results found."
|
| 20 |
search_results = []
|
| 21 |
-
for i, item in enumerate(results[:
|
| 22 |
title = item.get("title", "")
|
| 23 |
snippet = item.get("snippet", "")
|
| 24 |
link = item.get("link", "")
|
| 25 |
try:
|
| 26 |
-
page_response = requests.get(link, timeout=
|
| 27 |
page_response.raise_for_status()
|
| 28 |
soup = BeautifulSoup(page_response.text, "html.parser")
|
| 29 |
paragraphs = soup.find_all("p")
|
| 30 |
-
page_content = " ".join([p.get_text() for p in paragraphs][:
|
| 31 |
except Exception as e:
|
| 32 |
logger.warning(f"Failed to fetch page content for {link}: {e}")
|
| 33 |
page_content = snippet
|
|
|
|
| 11 |
google_cse_id = os.getenv("GOOGLE_CSE_ID")
|
| 12 |
if not google_api_key or not google_cse_id:
|
| 13 |
return "Web search requires GOOGLE_API_KEY and GOOGLE_CSE_ID to be set."
|
| 14 |
+
url = f"https://www.googleapis.com/customsearch/v1?key={google_api_key}&cx={google_cse_id}&q={query}"
|
| 15 |
+
response = requests.get(url, timeout=5)
|
| 16 |
response.raise_for_status()
|
| 17 |
results = response.json().get("items", [])
|
| 18 |
if not results:
|
| 19 |
return "No web results found."
|
| 20 |
search_results = []
|
| 21 |
+
for i, item in enumerate(results[:3]): # قللنا العدد لتسريع البحث
|
| 22 |
title = item.get("title", "")
|
| 23 |
snippet = item.get("snippet", "")
|
| 24 |
link = item.get("link", "")
|
| 25 |
try:
|
| 26 |
+
page_response = requests.get(link, timeout=3)
|
| 27 |
page_response.raise_for_status()
|
| 28 |
soup = BeautifulSoup(page_response.text, "html.parser")
|
| 29 |
paragraphs = soup.find_all("p")
|
| 30 |
+
page_content = " ".join([p.get_text() for p in paragraphs][:500])
|
| 31 |
except Exception as e:
|
| 32 |
logger.warning(f"Failed to fetch page content for {link}: {e}")
|
| 33 |
page_content = snippet
|