Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
# main.py (
|
2 |
import os
|
3 |
import re
|
4 |
import logging
|
@@ -7,7 +7,7 @@ import json
|
|
7 |
import html
|
8 |
import contextlib
|
9 |
import traceback
|
10 |
-
from typing import Optional, Dict, Any
|
11 |
|
12 |
# --- Frameworks ---
|
13 |
from starlette.applications import Starlette
|
@@ -152,43 +152,110 @@ async def get_transcript_via_supadata(video_id: str, api_key: str) -> Optional[s
|
|
152 |
elif response.status_code == 404: logger.warning(f"[Supadata] Not found (404) for {video_id}."); return None
|
153 |
else: logger.error(f"[Supadata] Unexpected status {response.status_code} for {video_id}. Resp: {response.text[:200]}"); return None
|
154 |
except httpx.TimeoutException: logger.error(f"[Supadata] Timeout connecting for {video_id}"); return None
|
155 |
-
except httpx.RequestError as e:
|
|
|
|
|
|
|
156 |
except Exception as e: logger.error(f"[Supadata] Unexpected error for {video_id}: {e}", exc_info=True); return None
|
157 |
|
158 |
async def get_transcript_via_apify(video_url: str, api_token: str) -> Optional[str]:
|
|
|
159 |
global APIFY_ACTOR_ID
|
160 |
-
if not video_url: logger.error("[Apify] No video_url provided"); return None
|
161 |
-
if not api_token: logger.error("[Apify] API token missing."); return None
|
162 |
-
logger.info(f"[Apify] Attempting fetch
|
163 |
-
|
164 |
-
|
165 |
-
|
|
|
|
|
|
|
|
|
|
|
166 |
headers = {"Content-Type": "application/json"}
|
|
|
|
|
|
|
|
|
|
|
|
|
167 |
try:
|
168 |
-
async with httpx.AsyncClient(timeout=
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
173 |
try:
|
174 |
-
|
175 |
-
if
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
if
|
180 |
-
|
181 |
-
else:
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
190 |
|
191 |
async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]:
|
|
|
192 |
global SUPADATA_API_KEY, APIFY_API_TOKEN
|
193 |
if not video_id: logger.error("get_youtube_transcript: No video_id"); return None
|
194 |
logger.info(f"Fetching transcript for video ID: {video_id} (URL: {video_url})")
|
@@ -212,16 +279,17 @@ async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]
|
|
212 |
else: logger.warning(f"[Fallback YT 1] Supadata failed or no content for {video_id}.")
|
213 |
else: logger.warning("[Fallback YT 1] Supadata API key unavailable. Skipping.")
|
214 |
if transcript_text is None:
|
215 |
-
logger.info("[Fallback YT 2] Trying Apify REST API...")
|
216 |
if APIFY_API_TOKEN:
|
217 |
-
transcript_text = await get_transcript_via_apify(video_url, APIFY_API_TOKEN)
|
218 |
-
if transcript_text: logger.info(f"[Fallback YT 2] Success via Apify REST for {video_url}"); return transcript_text
|
219 |
-
else: logger.warning(f"[Fallback YT 2] Apify REST failed or no content for {video_url}.")
|
220 |
else: logger.warning("[Fallback YT 2] Apify API token unavailable. Skipping.")
|
221 |
if transcript_text is None: logger.error(f"All methods failed for YT transcript: {video_id}"); return None
|
222 |
-
return transcript_text
|
223 |
|
224 |
async def get_website_content(url: str) -> Optional[str]:
|
|
|
225 |
if not url: logger.error("get_website_content: No URL"); return None
|
226 |
logger.info(f"[Primary Web] Fetching website content for: {url}")
|
227 |
html_content = await fetch_url_content_for_scrape(url)
|
@@ -243,6 +311,7 @@ async def get_website_content(url: str) -> Optional[str]:
|
|
243 |
except Exception as e: logger.error(f"[Primary Web] Error scraping/parsing {url}: {e}", exc_info=True); return None
|
244 |
|
245 |
async def get_website_content_via_api(url: str, api_key: str) -> Optional[str]:
|
|
|
246 |
if not url: logger.error("[Fallback Web API] No URL"); return None
|
247 |
if not api_key: logger.error("[Fallback Web API] urltotext.com API key missing."); return None
|
248 |
logger.info(f"[Fallback Web API] Attempting fetch for: {url} using urltotext.com API")
|
@@ -270,6 +339,7 @@ async def get_website_content_via_api(url: str, api_key: str) -> Optional[str]:
|
|
270 |
except Exception as e: logger.error(f"[Fallback Web API] Unexpected error during urltotext.com API call for {url}: {e}", exc_info=True); return None
|
271 |
|
272 |
# --- Summarization Function ---
|
|
|
273 |
async def generate_summary(text: str, summary_type: str) -> str:
|
274 |
global OPENROUTER_API_KEY, OPENROUTER_MODEL
|
275 |
logger.info(f"Generating {summary_type} summary using {OPENROUTER_MODEL}. Input length: {len(text)}")
|
@@ -280,29 +350,16 @@ async def generate_summary(text: str, summary_type: str) -> str:
|
|
280 |
if len(text) > MAX_INPUT_LENGTH: logger.warning(f"Input length ({len(text)}) exceeds limit ({MAX_INPUT_LENGTH}). Truncating."); text = text[:MAX_INPUT_LENGTH] + "... (Content truncated)"
|
281 |
full_prompt = f"{prompt}\n\n{text}"
|
282 |
headers = { "Authorization": f"Bearer {OPENROUTER_API_KEY}", "Content-Type": "application/json" }; payload = { "model": OPENROUTER_MODEL, "messages": [{"role": "user", "content": full_prompt}] }; openrouter_api_endpoint = "https://openrouter.ai/api/v1/chat/completions"
|
283 |
-
|
284 |
-
# *** FIX: Increase timeout and add logging ***
|
285 |
-
# Set a longer timeout (e.g., 180 seconds = 3 minutes)
|
286 |
-
api_timeout = 180.0
|
287 |
-
response = None # Initialize response variable
|
288 |
-
|
289 |
try:
|
290 |
async with httpx.AsyncClient(timeout=api_timeout) as client:
|
291 |
logger.info(f"Sending request to OpenRouter ({OPENROUTER_MODEL}) with timeout {api_timeout}s...")
|
292 |
response = await client.post(openrouter_api_endpoint, headers=headers, json=payload)
|
293 |
-
|
294 |
-
|
295 |
-
logger.info(f"Received response from OpenRouter. Status code: {response.status_code}")
|
296 |
-
else:
|
297 |
-
# This case should technically not happen if await returns, but good for debugging
|
298 |
-
logger.error("No response received from OpenRouter after await completed (unexpected).")
|
299 |
-
return "Sorry, communication with the AI service failed unexpectedly."
|
300 |
-
|
301 |
-
# Process the response (status code check and JSON parsing)
|
302 |
if response.status_code == 200:
|
303 |
try:
|
304 |
data = response.json()
|
305 |
-
# ... (rest of the success processing logic remains the same)
|
306 |
if data.get("choices") and isinstance(data["choices"], list) and len(data["choices"]) > 0:
|
307 |
message = data["choices"][0].get("message")
|
308 |
if message and isinstance(message, dict):
|
@@ -313,38 +370,26 @@ async def generate_summary(text: str, summary_type: str) -> str:
|
|
313 |
else: logger.error(f"Unexpected choices structure in OpenRouter resp: {data.get('choices')}. Full: {data}"); return "Sorry, could not parse AI response (choices)."
|
314 |
except json.JSONDecodeError: logger.error(f"Failed JSON decode OpenRouter. Status:{response.status_code}. Resp:{response.text[:500]}"); return "Sorry, failed to understand AI response."
|
315 |
except Exception as e: logger.error(f"Error processing OpenRouter success response: {e}", exc_info=True); return "Sorry, error processing AI response."
|
316 |
-
# ... (rest of the error status code handling remains the same)
|
317 |
elif response.status_code == 401: logger.error("OpenRouter API key invalid (401)."); return "Error: AI model configuration key is invalid."
|
318 |
elif response.status_code == 402: logger.error("OpenRouter Payment Required (402)."); return "Sorry, AI service limits/payment issue."
|
319 |
elif response.status_code == 429: logger.warning("OpenRouter Rate Limit Exceeded (429)."); return "Sorry, AI model is busy. Try again."
|
320 |
elif response.status_code == 500: logger.error(f"OpenRouter Internal Server Error (500). Resp:{response.text[:500]}"); return "Sorry, AI service internal error."
|
321 |
else: logger.error(f"Unexpected status {response.status_code} from OpenRouter. Resp:{response.text[:500]}"); return f"Sorry, AI service returned unexpected status ({response.status_code})."
|
322 |
-
|
323 |
-
except httpx.
|
324 |
-
logger.error(f"Timeout error ({api_timeout}s) connecting to OpenRouter API.")
|
325 |
-
return f"Sorry, the request to the AI model timed out after {api_timeout} seconds. The content might be too long or the service busy. Please try again later or with shorter content."
|
326 |
-
except httpx.RequestError as e:
|
327 |
-
logger.error(f"Request error connecting to OpenRouter API: {e}")
|
328 |
-
return "Sorry, there was an error connecting to the AI model service."
|
329 |
except Exception as e:
|
330 |
-
# Catch any other unexpected errors during the request/response cycle
|
331 |
logger.error(f"Unexpected error in generate_summary (OpenRouter request phase): {e}", exc_info=True)
|
332 |
-
# Log response status if available
|
333 |
if response: logger.error(f"--> Last response status before error: {response.status_code}")
|
334 |
return "Sorry, an unexpected error occurred while trying to generate the summary."
|
335 |
|
336 |
# --- Background Task Processing ---
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
) -> None:
|
341 |
-
task_id = f"{user_id}-{message_id_to_edit or 'new'}"
|
342 |
-
logger.info(f"[Task {task_id}] Starting processing for URL: {url}")
|
343 |
background_request: Optional[BaseRequest] = None; bot: Optional[Bot] = None
|
344 |
try: background_request = HTTPXRequest( connect_timeout=15.0, read_timeout=60.0, write_timeout=60.0, pool_timeout=60.0 ); bot = Bot(token=bot_token, request=background_request)
|
345 |
except Exception as e: logger.critical(f"[Task {task_id}] Failed to create background bot: {e}", exc_info=True); return
|
346 |
-
content = None; user_feedback_message = None; success = False
|
347 |
-
status_message_id = message_id_to_edit; message_to_delete_later_id : Optional[int] = None
|
348 |
try:
|
349 |
processing_message_text = f"Got it! Generating '{summary_type}' summary for:\n`{url}`\n\nThis might take a moment..."
|
350 |
if status_message_id:
|
@@ -405,6 +450,7 @@ async def process_summary_task(
|
|
405 |
logger.info(f"[Task {task_id}] Task completed. Success: {success}")
|
406 |
|
407 |
# --- Telegram Bot Handlers ---
|
|
|
408 |
async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
409 |
user = update.effective_user; mention = user.mention_html()
|
410 |
if not user or not update.message: return
|
@@ -468,7 +514,9 @@ async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> N
|
|
468 |
if isinstance(context.error, ignore_errors) and "object has no attribute" in str(context.error): logger.debug(f"Ignoring known/handled error in error_handler: {context.error}"); return
|
469 |
logger.error("Exception while handling an update:", exc_info=context.error)
|
470 |
|
|
|
471 |
# --- Bot Setup ---
|
|
|
472 |
async def setup_bot_config() -> Application:
|
473 |
logger.info("Configuring Telegram Application..."); global TELEGRAM_TOKEN
|
474 |
if not TELEGRAM_TOKEN: raise ValueError("TELEGRAM_TOKEN missing.")
|
@@ -479,6 +527,7 @@ async def setup_bot_config() -> Application:
|
|
479 |
application.add_error_handler(error_handler); logger.info("Telegram application handlers configured."); return application
|
480 |
|
481 |
# --- ASGI Lifespan ---
|
|
|
482 |
@contextlib.asynccontextmanager
|
483 |
async def lifespan(app: Starlette):
|
484 |
global ptb_app, WEBHOOK_SECRET, TELEGRAM_TOKEN
|
@@ -495,7 +544,7 @@ async def lifespan(app: Starlette):
|
|
495 |
except Exception as e: logger.warning(f"Could not delete webhook: {e}"); await asyncio.sleep(1)
|
496 |
space_host = os.environ.get("SPACE_HOST"); webhook_path = "/webhook"; full_webhook_url = None
|
497 |
if space_host:
|
498 |
-
protocol = "https"; host = space_host.split('://')[-1]; full_webhook_url = f"{protocol}://{host.rstrip('/')}{webhook_path}"
|
499 |
if full_webhook_url:
|
500 |
logger.info(f"Setting webhook: {full_webhook_url}"); set_webhook_args = { "url": full_webhook_url, "allowed_updates": Update.ALL_TYPES, "drop_pending_updates": True }
|
501 |
if WEBHOOK_SECRET: set_webhook_args["secret_token"] = WEBHOOK_SECRET; logger.info("Using webhook secret.")
|
@@ -524,6 +573,7 @@ async def lifespan(app: Starlette):
|
|
524 |
logger.info("ASGI Lifespan: Shutdown complete.")
|
525 |
|
526 |
# --- Starlette Route Handlers ---
|
|
|
527 |
async def health_check(request: Request) -> PlainTextResponse:
|
528 |
global OPENROUTER_MODEL, APIFY_ACTOR_ID, _apify_token_exists; bot_status = "Not Initialized"
|
529 |
if ptb_app and ptb_app.bot:
|
@@ -547,10 +597,12 @@ async def telegram_webhook(request: Request) -> Response:
|
|
547 |
except Exception as e: logger.error(f"Error processing webhook update: {e}", exc_info=True); return Response(status_code=200) # OK despite error
|
548 |
|
549 |
# --- Create Starlette ASGI Application ---
|
|
|
550 |
app = Starlette( debug=False, lifespan=lifespan, routes=[ Route("/", endpoint=health_check, methods=["GET"]), Route("/webhook", endpoint=telegram_webhook, methods=["POST"]), ] )
|
551 |
logger.info("Starlette ASGI application created with native routes.")
|
552 |
|
553 |
# --- Development Server Block ---
|
|
|
554 |
if __name__ == '__main__':
|
555 |
import uvicorn
|
556 |
logger.warning("Running in development mode using Uvicorn directly")
|
|
|
1 |
+
# main.py (Implementing Async Apify Flow)
|
2 |
import os
|
3 |
import re
|
4 |
import logging
|
|
|
7 |
import html
|
8 |
import contextlib
|
9 |
import traceback
|
10 |
+
from typing import Optional, Dict, Any, Tuple
|
11 |
|
12 |
# --- Frameworks ---
|
13 |
from starlette.applications import Starlette
|
|
|
152 |
elif response.status_code == 404: logger.warning(f"[Supadata] Not found (404) for {video_id}."); return None
|
153 |
else: logger.error(f"[Supadata] Unexpected status {response.status_code} for {video_id}. Resp: {response.text[:200]}"); return None
|
154 |
except httpx.TimeoutException: logger.error(f"[Supadata] Timeout connecting for {video_id}"); return None
|
155 |
+
except httpx.RequestError as e:
|
156 |
+
if "CERTIFICATE_VERIFY_FAILED" in str(e): logger.error(f"[Supadata] SSL Cert Verify Failed for {video_id}: {e}")
|
157 |
+
else: logger.error(f"[Supadata] Request error for {video_id}: {e}")
|
158 |
+
return None
|
159 |
except Exception as e: logger.error(f"[Supadata] Unexpected error for {video_id}: {e}", exc_info=True); return None
|
160 |
|
161 |
async def get_transcript_via_apify(video_url: str, api_token: str) -> Optional[str]:
|
162 |
+
"""Fetches YouTube transcript using Apify REST API (async start + poll + dataset fetch)."""
|
163 |
global APIFY_ACTOR_ID
|
164 |
+
if not video_url: logger.error("[Apify Async] No video_url provided"); return None
|
165 |
+
if not api_token: logger.error("[Apify Async] API token missing."); return None
|
166 |
+
logger.info(f"[Apify Async] Attempting fetch for URL: {video_url} (Actor: {APIFY_ACTOR_ID})")
|
167 |
+
|
168 |
+
start_run_endpoint = f"https://api.apify.com/v2/acts/{APIFY_ACTOR_ID}/runs"
|
169 |
+
params_base = {"token": api_token}
|
170 |
+
payload = {
|
171 |
+
"urls": [video_url], "outputFormat": "singleStringText", "maxRetries": 5,
|
172 |
+
"channelHandleBoolean": False, "channelNameBoolean": False,
|
173 |
+
"datePublishedBoolean": False, "relativeDateTextBoolean": False,
|
174 |
+
}
|
175 |
headers = {"Content-Type": "application/json"}
|
176 |
+
|
177 |
+
run_id: Optional[str] = None
|
178 |
+
dataset_id: Optional[str] = None
|
179 |
+
max_wait_seconds = 120 # Total time to wait for the run to finish
|
180 |
+
poll_interval = 5 # Seconds between status checks
|
181 |
+
|
182 |
try:
|
183 |
+
async with httpx.AsyncClient(timeout=30.0) as client: # Shorter timeout for individual API calls
|
184 |
+
# 1. Start the run
|
185 |
+
logger.debug(f"[Apify Async] Starting actor run for {video_url}")
|
186 |
+
response_start = await client.post(start_run_endpoint, headers=headers, params=params_base, json=payload)
|
187 |
+
logger.debug(f"[Apify Async] Start run status: {response_start.status_code}")
|
188 |
+
|
189 |
+
if response_start.status_code == 201: # Expected status for starting a run
|
190 |
+
try:
|
191 |
+
run_data = response_start.json().get("data", {})
|
192 |
+
run_id = run_data.get("id")
|
193 |
+
dataset_id = run_data.get("defaultDatasetId")
|
194 |
+
if not run_id or not dataset_id:
|
195 |
+
logger.error(f"[Apify Async] Started run but missing runId or datasetId. Data: {run_data}")
|
196 |
+
return None
|
197 |
+
logger.info(f"[Apify Async] Run started. Run ID: {run_id}, Dataset ID: {dataset_id}")
|
198 |
+
except Exception as e:
|
199 |
+
logger.error(f"[Apify Async] Error parsing start run response: {e}. Response: {response_start.text[:200]}", exc_info=True)
|
200 |
+
return None
|
201 |
+
else:
|
202 |
+
logger.error(f"[Apify Async] Failed to start run. Status: {response_start.status_code}. Response: {response_start.text[:200]}")
|
203 |
+
return None
|
204 |
+
|
205 |
+
# 2. Poll for run completion
|
206 |
+
run_status_endpoint = f"https://api.apify.com/v2/actor-runs/{run_id}"
|
207 |
+
elapsed_time = 0
|
208 |
+
final_status = None
|
209 |
+
while elapsed_time < max_wait_seconds:
|
210 |
+
await asyncio.sleep(poll_interval)
|
211 |
+
elapsed_time += poll_interval
|
212 |
+
logger.debug(f"[Apify Async] Polling status for run {run_id} ({elapsed_time}s elapsed)")
|
213 |
try:
|
214 |
+
response_status = await client.get(run_status_endpoint, params=params_base)
|
215 |
+
if response_status.status_code == 200:
|
216 |
+
status_data = response_status.json().get("data", {})
|
217 |
+
final_status = status_data.get("status")
|
218 |
+
logger.debug(f"[Apify Async] Run status: {final_status}")
|
219 |
+
if final_status in ["SUCCEEDED", "FAILED", "ABORTED", "TIMED-OUT"]:
|
220 |
+
break # Exit polling loop on terminal status
|
221 |
+
else:
|
222 |
+
logger.warning(f"[Apify Async] Non-200 status ({response_status.status_code}) polling run {run_id}.")
|
223 |
+
await asyncio.sleep(poll_interval * 2) # Wait longer on error
|
224 |
+
except Exception as poll_err:
|
225 |
+
logger.error(f"[Apify Async] Error polling run status {run_id}: {poll_err}")
|
226 |
+
await asyncio.sleep(poll_interval * 2) # Wait longer on error
|
227 |
+
|
228 |
+
if final_status != "SUCCEEDED":
|
229 |
+
logger.warning(f"[Apify Async] Run {run_id} did not succeed. Final status: {final_status}")
|
230 |
+
return None
|
231 |
+
|
232 |
+
# 3. Fetch dataset items
|
233 |
+
logger.info(f"[Apify Async] Run {run_id} succeeded. Fetching items from dataset {dataset_id}")
|
234 |
+
dataset_endpoint = f"https://api.apify.com/v2/datasets/{dataset_id}/items"
|
235 |
+
params_dataset = {"token": api_token, "format": "json", "limit": 5}
|
236 |
+
response_dataset = await client.get(dataset_endpoint, params=params_dataset)
|
237 |
+
logger.debug(f"[Apify Async] Dataset fetch status: {response_dataset.status_code}")
|
238 |
+
response_dataset.raise_for_status() # Raise exception for non-2xx status
|
239 |
+
|
240 |
+
results = response_dataset.json()
|
241 |
+
if isinstance(results, list) and len(results) > 0:
|
242 |
+
item = results[0]
|
243 |
+
content = item.get("text") or item.get("transcript") or item.get("captions_concatenated")
|
244 |
+
if not content and item.get("captions") and isinstance(item["captions"], list):
|
245 |
+
content = " ".join(cap.get("text", "") for cap in item["captions"] if cap.get("text"))
|
246 |
+
if content and isinstance(content, str):
|
247 |
+
logger.info(f"[Apify Async] Success via ASYNC REST for {video_url}. Length: {len(content)}")
|
248 |
+
return content.strip()
|
249 |
+
else: logger.warning(f"[Apify Async] Dataset item found but transcript empty/not found for {video_url}. Item: {item}"); return None
|
250 |
+
else: logger.warning(f"[Apify Async] Dataset {dataset_id} was empty for {video_url}. Response: {results}"); return None
|
251 |
+
|
252 |
+
except httpx.TimeoutException as e: logger.error(f"[Apify Async] Timeout during API interaction for {video_url}: {e}"); return None
|
253 |
+
except httpx.HTTPStatusError as e: logger.error(f"[Apify Async] HTTP Status Error during API interaction for {video_url}: {e}"); return None
|
254 |
+
except httpx.RequestError as e: logger.error(f"[Apify Async] Request error during API interaction for {video_url}: {e}"); return None
|
255 |
+
except Exception as e: logger.error(f"[Apify Async] Unexpected error during Apify Async REST call for {video_url}: {e}", exc_info=True); return None
|
256 |
|
257 |
async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]:
|
258 |
+
# This function remains the same, calling the updated helper functions
|
259 |
global SUPADATA_API_KEY, APIFY_API_TOKEN
|
260 |
if not video_id: logger.error("get_youtube_transcript: No video_id"); return None
|
261 |
logger.info(f"Fetching transcript for video ID: {video_id} (URL: {video_url})")
|
|
|
279 |
else: logger.warning(f"[Fallback YT 1] Supadata failed or no content for {video_id}.")
|
280 |
else: logger.warning("[Fallback YT 1] Supadata API key unavailable. Skipping.")
|
281 |
if transcript_text is None:
|
282 |
+
logger.info("[Fallback YT 2] Trying Apify REST API (Async)...") # Updated log message
|
283 |
if APIFY_API_TOKEN:
|
284 |
+
transcript_text = await get_transcript_via_apify(video_url, APIFY_API_TOKEN) # Calls new async version
|
285 |
+
if transcript_text: logger.info(f"[Fallback YT 2] Success via Apify ASYNC REST for {video_url}"); return transcript_text
|
286 |
+
else: logger.warning(f"[Fallback YT 2] Apify ASYNC REST failed or no content for {video_url}.")
|
287 |
else: logger.warning("[Fallback YT 2] Apify API token unavailable. Skipping.")
|
288 |
if transcript_text is None: logger.error(f"All methods failed for YT transcript: {video_id}"); return None
|
289 |
+
return transcript_text # Will be None if all failed
|
290 |
|
291 |
async def get_website_content(url: str) -> Optional[str]:
|
292 |
+
# This function remains the same
|
293 |
if not url: logger.error("get_website_content: No URL"); return None
|
294 |
logger.info(f"[Primary Web] Fetching website content for: {url}")
|
295 |
html_content = await fetch_url_content_for_scrape(url)
|
|
|
311 |
except Exception as e: logger.error(f"[Primary Web] Error scraping/parsing {url}: {e}", exc_info=True); return None
|
312 |
|
313 |
async def get_website_content_via_api(url: str, api_key: str) -> Optional[str]:
|
314 |
+
# This function remains the same
|
315 |
if not url: logger.error("[Fallback Web API] No URL"); return None
|
316 |
if not api_key: logger.error("[Fallback Web API] urltotext.com API key missing."); return None
|
317 |
logger.info(f"[Fallback Web API] Attempting fetch for: {url} using urltotext.com API")
|
|
|
339 |
except Exception as e: logger.error(f"[Fallback Web API] Unexpected error during urltotext.com API call for {url}: {e}", exc_info=True); return None
|
340 |
|
341 |
# --- Summarization Function ---
|
342 |
+
# (generate_summary remains the same)
|
343 |
async def generate_summary(text: str, summary_type: str) -> str:
|
344 |
global OPENROUTER_API_KEY, OPENROUTER_MODEL
|
345 |
logger.info(f"Generating {summary_type} summary using {OPENROUTER_MODEL}. Input length: {len(text)}")
|
|
|
350 |
if len(text) > MAX_INPUT_LENGTH: logger.warning(f"Input length ({len(text)}) exceeds limit ({MAX_INPUT_LENGTH}). Truncating."); text = text[:MAX_INPUT_LENGTH] + "... (Content truncated)"
|
351 |
full_prompt = f"{prompt}\n\n{text}"
|
352 |
headers = { "Authorization": f"Bearer {OPENROUTER_API_KEY}", "Content-Type": "application/json" }; payload = { "model": OPENROUTER_MODEL, "messages": [{"role": "user", "content": full_prompt}] }; openrouter_api_endpoint = "https://openrouter.ai/api/v1/chat/completions"
|
353 |
+
api_timeout = 180.0; response = None
|
|
|
|
|
|
|
|
|
|
|
354 |
try:
|
355 |
async with httpx.AsyncClient(timeout=api_timeout) as client:
|
356 |
logger.info(f"Sending request to OpenRouter ({OPENROUTER_MODEL}) with timeout {api_timeout}s...")
|
357 |
response = await client.post(openrouter_api_endpoint, headers=headers, json=payload)
|
358 |
+
if response: logger.info(f"Received response from OpenRouter. Status code: {response.status_code}")
|
359 |
+
else: logger.error("No response received from OpenRouter after await completed (unexpected)."); return "Sorry, communication with the AI service failed unexpectedly."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
360 |
if response.status_code == 200:
|
361 |
try:
|
362 |
data = response.json()
|
|
|
363 |
if data.get("choices") and isinstance(data["choices"], list) and len(data["choices"]) > 0:
|
364 |
message = data["choices"][0].get("message")
|
365 |
if message and isinstance(message, dict):
|
|
|
370 |
else: logger.error(f"Unexpected choices structure in OpenRouter resp: {data.get('choices')}. Full: {data}"); return "Sorry, could not parse AI response (choices)."
|
371 |
except json.JSONDecodeError: logger.error(f"Failed JSON decode OpenRouter. Status:{response.status_code}. Resp:{response.text[:500]}"); return "Sorry, failed to understand AI response."
|
372 |
except Exception as e: logger.error(f"Error processing OpenRouter success response: {e}", exc_info=True); return "Sorry, error processing AI response."
|
|
|
373 |
elif response.status_code == 401: logger.error("OpenRouter API key invalid (401)."); return "Error: AI model configuration key is invalid."
|
374 |
elif response.status_code == 402: logger.error("OpenRouter Payment Required (402)."); return "Sorry, AI service limits/payment issue."
|
375 |
elif response.status_code == 429: logger.warning("OpenRouter Rate Limit Exceeded (429)."); return "Sorry, AI model is busy. Try again."
|
376 |
elif response.status_code == 500: logger.error(f"OpenRouter Internal Server Error (500). Resp:{response.text[:500]}"); return "Sorry, AI service internal error."
|
377 |
else: logger.error(f"Unexpected status {response.status_code} from OpenRouter. Resp:{response.text[:500]}"); return f"Sorry, AI service returned unexpected status ({response.status_code})."
|
378 |
+
except httpx.TimeoutException: logger.error(f"Timeout error ({api_timeout}s) connecting to OpenRouter API."); return f"Sorry, the request to the AI model timed out after {api_timeout} seconds. The content might be too long or the service busy. Please try again later or with shorter content."
|
379 |
+
except httpx.RequestError as e: logger.error(f"Request error connecting to OpenRouter API: {e}"); return "Sorry, there was an error connecting to the AI model service."
|
|
|
|
|
|
|
|
|
|
|
380 |
except Exception as e:
|
|
|
381 |
logger.error(f"Unexpected error in generate_summary (OpenRouter request phase): {e}", exc_info=True)
|
|
|
382 |
if response: logger.error(f"--> Last response status before error: {response.status_code}")
|
383 |
return "Sorry, an unexpected error occurred while trying to generate the summary."
|
384 |
|
385 |
# --- Background Task Processing ---
|
386 |
+
# (process_summary_task remains the same)
|
387 |
+
async def process_summary_task( user_id: int, chat_id: int, message_id_to_edit: Optional[int], url: str, summary_type: str, bot_token: str ) -> None:
|
388 |
+
task_id = f"{user_id}-{message_id_to_edit or 'new'}"; logger.info(f"[Task {task_id}] Starting processing for URL: {url}")
|
|
|
|
|
|
|
389 |
background_request: Optional[BaseRequest] = None; bot: Optional[Bot] = None
|
390 |
try: background_request = HTTPXRequest( connect_timeout=15.0, read_timeout=60.0, write_timeout=60.0, pool_timeout=60.0 ); bot = Bot(token=bot_token, request=background_request)
|
391 |
except Exception as e: logger.critical(f"[Task {task_id}] Failed to create background bot: {e}", exc_info=True); return
|
392 |
+
content = None; user_feedback_message = None; success = False; status_message_id = message_id_to_edit; message_to_delete_later_id : Optional[int] = None
|
|
|
393 |
try:
|
394 |
processing_message_text = f"Got it! Generating '{summary_type}' summary for:\n`{url}`\n\nThis might take a moment..."
|
395 |
if status_message_id:
|
|
|
450 |
logger.info(f"[Task {task_id}] Task completed. Success: {success}")
|
451 |
|
452 |
# --- Telegram Bot Handlers ---
|
453 |
+
# (start, help_command, handle_potential_url, handle_summary_type_callback, error_handler remain the same)
|
454 |
async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
455 |
user = update.effective_user; mention = user.mention_html()
|
456 |
if not user or not update.message: return
|
|
|
514 |
if isinstance(context.error, ignore_errors) and "object has no attribute" in str(context.error): logger.debug(f"Ignoring known/handled error in error_handler: {context.error}"); return
|
515 |
logger.error("Exception while handling an update:", exc_info=context.error)
|
516 |
|
517 |
+
|
518 |
# --- Bot Setup ---
|
519 |
+
# (setup_bot_config remains the same)
|
520 |
async def setup_bot_config() -> Application:
|
521 |
logger.info("Configuring Telegram Application..."); global TELEGRAM_TOKEN
|
522 |
if not TELEGRAM_TOKEN: raise ValueError("TELEGRAM_TOKEN missing.")
|
|
|
527 |
application.add_error_handler(error_handler); logger.info("Telegram application handlers configured."); return application
|
528 |
|
529 |
# --- ASGI Lifespan ---
|
530 |
+
# (lifespan remains the same)
|
531 |
@contextlib.asynccontextmanager
|
532 |
async def lifespan(app: Starlette):
|
533 |
global ptb_app, WEBHOOK_SECRET, TELEGRAM_TOKEN
|
|
|
544 |
except Exception as e: logger.warning(f"Could not delete webhook: {e}"); await asyncio.sleep(1)
|
545 |
space_host = os.environ.get("SPACE_HOST"); webhook_path = "/webhook"; full_webhook_url = None
|
546 |
if space_host:
|
547 |
+
protocol = "https"; host = space_host.split('://')[-1]; full_webhook_url = f"{protocol}://{host.rstrip('/')}{webhook_path}"
|
548 |
if full_webhook_url:
|
549 |
logger.info(f"Setting webhook: {full_webhook_url}"); set_webhook_args = { "url": full_webhook_url, "allowed_updates": Update.ALL_TYPES, "drop_pending_updates": True }
|
550 |
if WEBHOOK_SECRET: set_webhook_args["secret_token"] = WEBHOOK_SECRET; logger.info("Using webhook secret.")
|
|
|
573 |
logger.info("ASGI Lifespan: Shutdown complete.")
|
574 |
|
575 |
# --- Starlette Route Handlers ---
|
576 |
+
# (health_check and telegram_webhook remain the same)
|
577 |
async def health_check(request: Request) -> PlainTextResponse:
|
578 |
global OPENROUTER_MODEL, APIFY_ACTOR_ID, _apify_token_exists; bot_status = "Not Initialized"
|
579 |
if ptb_app and ptb_app.bot:
|
|
|
597 |
except Exception as e: logger.error(f"Error processing webhook update: {e}", exc_info=True); return Response(status_code=200) # OK despite error
|
598 |
|
599 |
# --- Create Starlette ASGI Application ---
|
600 |
+
# (app definition remains the same)
|
601 |
app = Starlette( debug=False, lifespan=lifespan, routes=[ Route("/", endpoint=health_check, methods=["GET"]), Route("/webhook", endpoint=telegram_webhook, methods=["POST"]), ] )
|
602 |
logger.info("Starlette ASGI application created with native routes.")
|
603 |
|
604 |
# --- Development Server Block ---
|
605 |
+
# (remains the same)
|
606 |
if __name__ == '__main__':
|
607 |
import uvicorn
|
608 |
logger.warning("Running in development mode using Uvicorn directly")
|