fmab777 commited on
Commit
ecd9eb2
·
verified ·
1 Parent(s): 9412ba0

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +122 -70
main.py CHANGED
@@ -1,4 +1,4 @@
1
- # main.py (Increasing timeout and adding logging in generate_summary)
2
  import os
3
  import re
4
  import logging
@@ -7,7 +7,7 @@ import json
7
  import html
8
  import contextlib
9
  import traceback
10
- from typing import Optional, Dict, Any
11
 
12
  # --- Frameworks ---
13
  from starlette.applications import Starlette
@@ -152,43 +152,110 @@ async def get_transcript_via_supadata(video_id: str, api_key: str) -> Optional[s
152
  elif response.status_code == 404: logger.warning(f"[Supadata] Not found (404) for {video_id}."); return None
153
  else: logger.error(f"[Supadata] Unexpected status {response.status_code} for {video_id}. Resp: {response.text[:200]}"); return None
154
  except httpx.TimeoutException: logger.error(f"[Supadata] Timeout connecting for {video_id}"); return None
155
- except httpx.RequestError as e: logger.error(f"[Supadata] Request error for {video_id}: {e}"); return None
 
 
 
156
  except Exception as e: logger.error(f"[Supadata] Unexpected error for {video_id}: {e}", exc_info=True); return None
157
 
158
  async def get_transcript_via_apify(video_url: str, api_token: str) -> Optional[str]:
 
159
  global APIFY_ACTOR_ID
160
- if not video_url: logger.error("[Apify] No video_url provided"); return None
161
- if not api_token: logger.error("[Apify] API token missing."); return None
162
- logger.info(f"[Apify] Attempting fetch via REST for URL: {video_url} (Actor: {APIFY_ACTOR_ID})")
163
- api_endpoint = f"https://api.apify.com/v2/acts/{APIFY_ACTOR_ID}/run-sync-get-dataset-items"
164
- params = {"token": api_token}
165
- payload = { "urls": [video_url], "outputFormat": "singleStringText", "maxRetries": 5, "channelHandleBoolean": False, "channelNameBoolean": False, "datePublishedBoolean": False, "relativeDateTextBoolean": False, }
 
 
 
 
 
166
  headers = {"Content-Type": "application/json"}
 
 
 
 
 
 
167
  try:
168
- async with httpx.AsyncClient(timeout=90.0) as client:
169
- logger.debug(f"[Apify] Sending request to run actor {APIFY_ACTOR_ID} synchronously for {video_url}")
170
- response = await client.post(api_endpoint, headers=headers, params=params, json=payload)
171
- logger.debug(f"[Apify] Received status code {response.status_code} for {video_url}")
172
- if response.status_code == 200:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
  try:
174
- results = response.json()
175
- if isinstance(results, list) and len(results) > 0:
176
- item = results[0]
177
- content = item.get("text") or item.get("transcript") or item.get("captions_concatenated")
178
- if not content and item.get("captions") and isinstance(item["captions"], list): logger.info("[Apify] Processing 'captions' format."); content = " ".join(cap.get("text", "") for cap in item["captions"] if cap.get("text"))
179
- if content and isinstance(content, str): logger.info(f"[Apify] Success via REST for {video_url}. Length: {len(content)}"); return content.strip()
180
- else: logger.warning(f"[Apify] Actor success but transcript empty/not found for {video_url}. Item: {item}"); return None
181
- else: logger.warning(f"[Apify] Actor success but dataset empty for {video_url}. Response: {results}"); return None
182
- except json.JSONDecodeError: logger.error(f"[Apify] Failed JSON decode for {video_url}. Status:{response.status_code}. Resp:{response.text[:200]}"); return None
183
- except Exception as e: logger.error(f"[Apify] Error processing success response for {video_url}: {e}", exc_info=True); return None
184
- elif response.status_code == 400: logger.error(f"[Apify] Bad Request (400) for {video_url}. Check payload. Resp:{response.text[:200]}"); return None
185
- elif response.status_code == 401: logger.error("[Apify] Auth error (401). Check token."); return None
186
- else: logger.error(f"[Apify] Unexpected status {response.status_code} for {video_url}. Resp:{response.text[:200]}"); return None
187
- except httpx.TimeoutException: logger.error(f"[Apify] Timeout running actor for {video_url}"); return None
188
- except httpx.RequestError as e: logger.error(f"[Apify] Request error running actor for {video_url}: {e}"); return None
189
- except Exception as e: logger.error(f"[Apify] Unexpected error during Apify REST call for {video_url}: {e}", exc_info=True); return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
 
191
  async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]:
 
192
  global SUPADATA_API_KEY, APIFY_API_TOKEN
193
  if not video_id: logger.error("get_youtube_transcript: No video_id"); return None
194
  logger.info(f"Fetching transcript for video ID: {video_id} (URL: {video_url})")
@@ -212,16 +279,17 @@ async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]
212
  else: logger.warning(f"[Fallback YT 1] Supadata failed or no content for {video_id}.")
213
  else: logger.warning("[Fallback YT 1] Supadata API key unavailable. Skipping.")
214
  if transcript_text is None:
215
- logger.info("[Fallback YT 2] Trying Apify REST API...")
216
  if APIFY_API_TOKEN:
217
- transcript_text = await get_transcript_via_apify(video_url, APIFY_API_TOKEN)
218
- if transcript_text: logger.info(f"[Fallback YT 2] Success via Apify REST for {video_url}"); return transcript_text
219
- else: logger.warning(f"[Fallback YT 2] Apify REST failed or no content for {video_url}.")
220
  else: logger.warning("[Fallback YT 2] Apify API token unavailable. Skipping.")
221
  if transcript_text is None: logger.error(f"All methods failed for YT transcript: {video_id}"); return None
222
- return transcript_text
223
 
224
  async def get_website_content(url: str) -> Optional[str]:
 
225
  if not url: logger.error("get_website_content: No URL"); return None
226
  logger.info(f"[Primary Web] Fetching website content for: {url}")
227
  html_content = await fetch_url_content_for_scrape(url)
@@ -243,6 +311,7 @@ async def get_website_content(url: str) -> Optional[str]:
243
  except Exception as e: logger.error(f"[Primary Web] Error scraping/parsing {url}: {e}", exc_info=True); return None
244
 
245
  async def get_website_content_via_api(url: str, api_key: str) -> Optional[str]:
 
246
  if not url: logger.error("[Fallback Web API] No URL"); return None
247
  if not api_key: logger.error("[Fallback Web API] urltotext.com API key missing."); return None
248
  logger.info(f"[Fallback Web API] Attempting fetch for: {url} using urltotext.com API")
@@ -270,6 +339,7 @@ async def get_website_content_via_api(url: str, api_key: str) -> Optional[str]:
270
  except Exception as e: logger.error(f"[Fallback Web API] Unexpected error during urltotext.com API call for {url}: {e}", exc_info=True); return None
271
 
272
  # --- Summarization Function ---
 
273
  async def generate_summary(text: str, summary_type: str) -> str:
274
  global OPENROUTER_API_KEY, OPENROUTER_MODEL
275
  logger.info(f"Generating {summary_type} summary using {OPENROUTER_MODEL}. Input length: {len(text)}")
@@ -280,29 +350,16 @@ async def generate_summary(text: str, summary_type: str) -> str:
280
  if len(text) > MAX_INPUT_LENGTH: logger.warning(f"Input length ({len(text)}) exceeds limit ({MAX_INPUT_LENGTH}). Truncating."); text = text[:MAX_INPUT_LENGTH] + "... (Content truncated)"
281
  full_prompt = f"{prompt}\n\n{text}"
282
  headers = { "Authorization": f"Bearer {OPENROUTER_API_KEY}", "Content-Type": "application/json" }; payload = { "model": OPENROUTER_MODEL, "messages": [{"role": "user", "content": full_prompt}] }; openrouter_api_endpoint = "https://openrouter.ai/api/v1/chat/completions"
283
-
284
- # *** FIX: Increase timeout and add logging ***
285
- # Set a longer timeout (e.g., 180 seconds = 3 minutes)
286
- api_timeout = 180.0
287
- response = None # Initialize response variable
288
-
289
  try:
290
  async with httpx.AsyncClient(timeout=api_timeout) as client:
291
  logger.info(f"Sending request to OpenRouter ({OPENROUTER_MODEL}) with timeout {api_timeout}s...")
292
  response = await client.post(openrouter_api_endpoint, headers=headers, json=payload)
293
- # Check response immediately after await returns
294
- if response:
295
- logger.info(f"Received response from OpenRouter. Status code: {response.status_code}")
296
- else:
297
- # This case should technically not happen if await returns, but good for debugging
298
- logger.error("No response received from OpenRouter after await completed (unexpected).")
299
- return "Sorry, communication with the AI service failed unexpectedly."
300
-
301
- # Process the response (status code check and JSON parsing)
302
  if response.status_code == 200:
303
  try:
304
  data = response.json()
305
- # ... (rest of the success processing logic remains the same)
306
  if data.get("choices") and isinstance(data["choices"], list) and len(data["choices"]) > 0:
307
  message = data["choices"][0].get("message")
308
  if message and isinstance(message, dict):
@@ -313,38 +370,26 @@ async def generate_summary(text: str, summary_type: str) -> str:
313
  else: logger.error(f"Unexpected choices structure in OpenRouter resp: {data.get('choices')}. Full: {data}"); return "Sorry, could not parse AI response (choices)."
314
  except json.JSONDecodeError: logger.error(f"Failed JSON decode OpenRouter. Status:{response.status_code}. Resp:{response.text[:500]}"); return "Sorry, failed to understand AI response."
315
  except Exception as e: logger.error(f"Error processing OpenRouter success response: {e}", exc_info=True); return "Sorry, error processing AI response."
316
- # ... (rest of the error status code handling remains the same)
317
  elif response.status_code == 401: logger.error("OpenRouter API key invalid (401)."); return "Error: AI model configuration key is invalid."
318
  elif response.status_code == 402: logger.error("OpenRouter Payment Required (402)."); return "Sorry, AI service limits/payment issue."
319
  elif response.status_code == 429: logger.warning("OpenRouter Rate Limit Exceeded (429)."); return "Sorry, AI model is busy. Try again."
320
  elif response.status_code == 500: logger.error(f"OpenRouter Internal Server Error (500). Resp:{response.text[:500]}"); return "Sorry, AI service internal error."
321
  else: logger.error(f"Unexpected status {response.status_code} from OpenRouter. Resp:{response.text[:500]}"); return f"Sorry, AI service returned unexpected status ({response.status_code})."
322
-
323
- except httpx.TimeoutException:
324
- logger.error(f"Timeout error ({api_timeout}s) connecting to OpenRouter API.")
325
- return f"Sorry, the request to the AI model timed out after {api_timeout} seconds. The content might be too long or the service busy. Please try again later or with shorter content."
326
- except httpx.RequestError as e:
327
- logger.error(f"Request error connecting to OpenRouter API: {e}")
328
- return "Sorry, there was an error connecting to the AI model service."
329
  except Exception as e:
330
- # Catch any other unexpected errors during the request/response cycle
331
  logger.error(f"Unexpected error in generate_summary (OpenRouter request phase): {e}", exc_info=True)
332
- # Log response status if available
333
  if response: logger.error(f"--> Last response status before error: {response.status_code}")
334
  return "Sorry, an unexpected error occurred while trying to generate the summary."
335
 
336
  # --- Background Task Processing ---
337
- async def process_summary_task(
338
- user_id: int, chat_id: int, message_id_to_edit: Optional[int],
339
- url: str, summary_type: str, bot_token: str
340
- ) -> None:
341
- task_id = f"{user_id}-{message_id_to_edit or 'new'}"
342
- logger.info(f"[Task {task_id}] Starting processing for URL: {url}")
343
  background_request: Optional[BaseRequest] = None; bot: Optional[Bot] = None
344
  try: background_request = HTTPXRequest( connect_timeout=15.0, read_timeout=60.0, write_timeout=60.0, pool_timeout=60.0 ); bot = Bot(token=bot_token, request=background_request)
345
  except Exception as e: logger.critical(f"[Task {task_id}] Failed to create background bot: {e}", exc_info=True); return
346
- content = None; user_feedback_message = None; success = False
347
- status_message_id = message_id_to_edit; message_to_delete_later_id : Optional[int] = None
348
  try:
349
  processing_message_text = f"Got it! Generating '{summary_type}' summary for:\n`{url}`\n\nThis might take a moment..."
350
  if status_message_id:
@@ -405,6 +450,7 @@ async def process_summary_task(
405
  logger.info(f"[Task {task_id}] Task completed. Success: {success}")
406
 
407
  # --- Telegram Bot Handlers ---
 
408
  async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
409
  user = update.effective_user; mention = user.mention_html()
410
  if not user or not update.message: return
@@ -468,7 +514,9 @@ async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> N
468
  if isinstance(context.error, ignore_errors) and "object has no attribute" in str(context.error): logger.debug(f"Ignoring known/handled error in error_handler: {context.error}"); return
469
  logger.error("Exception while handling an update:", exc_info=context.error)
470
 
 
471
  # --- Bot Setup ---
 
472
  async def setup_bot_config() -> Application:
473
  logger.info("Configuring Telegram Application..."); global TELEGRAM_TOKEN
474
  if not TELEGRAM_TOKEN: raise ValueError("TELEGRAM_TOKEN missing.")
@@ -479,6 +527,7 @@ async def setup_bot_config() -> Application:
479
  application.add_error_handler(error_handler); logger.info("Telegram application handlers configured."); return application
480
 
481
  # --- ASGI Lifespan ---
 
482
  @contextlib.asynccontextmanager
483
  async def lifespan(app: Starlette):
484
  global ptb_app, WEBHOOK_SECRET, TELEGRAM_TOKEN
@@ -495,7 +544,7 @@ async def lifespan(app: Starlette):
495
  except Exception as e: logger.warning(f"Could not delete webhook: {e}"); await asyncio.sleep(1)
496
  space_host = os.environ.get("SPACE_HOST"); webhook_path = "/webhook"; full_webhook_url = None
497
  if space_host:
498
- protocol = "https"; host = space_host.split('://')[-1]; full_webhook_url = f"{protocol}://{host.rstrip('/')}{webhook_path}" # Corrected URL construction
499
  if full_webhook_url:
500
  logger.info(f"Setting webhook: {full_webhook_url}"); set_webhook_args = { "url": full_webhook_url, "allowed_updates": Update.ALL_TYPES, "drop_pending_updates": True }
501
  if WEBHOOK_SECRET: set_webhook_args["secret_token"] = WEBHOOK_SECRET; logger.info("Using webhook secret.")
@@ -524,6 +573,7 @@ async def lifespan(app: Starlette):
524
  logger.info("ASGI Lifespan: Shutdown complete.")
525
 
526
  # --- Starlette Route Handlers ---
 
527
  async def health_check(request: Request) -> PlainTextResponse:
528
  global OPENROUTER_MODEL, APIFY_ACTOR_ID, _apify_token_exists; bot_status = "Not Initialized"
529
  if ptb_app and ptb_app.bot:
@@ -547,10 +597,12 @@ async def telegram_webhook(request: Request) -> Response:
547
  except Exception as e: logger.error(f"Error processing webhook update: {e}", exc_info=True); return Response(status_code=200) # OK despite error
548
 
549
  # --- Create Starlette ASGI Application ---
 
550
  app = Starlette( debug=False, lifespan=lifespan, routes=[ Route("/", endpoint=health_check, methods=["GET"]), Route("/webhook", endpoint=telegram_webhook, methods=["POST"]), ] )
551
  logger.info("Starlette ASGI application created with native routes.")
552
 
553
  # --- Development Server Block ---
 
554
  if __name__ == '__main__':
555
  import uvicorn
556
  logger.warning("Running in development mode using Uvicorn directly")
 
1
+ # main.py (Implementing Async Apify Flow)
2
  import os
3
  import re
4
  import logging
 
7
  import html
8
  import contextlib
9
  import traceback
10
+ from typing import Optional, Dict, Any, Tuple
11
 
12
  # --- Frameworks ---
13
  from starlette.applications import Starlette
 
152
  elif response.status_code == 404: logger.warning(f"[Supadata] Not found (404) for {video_id}."); return None
153
  else: logger.error(f"[Supadata] Unexpected status {response.status_code} for {video_id}. Resp: {response.text[:200]}"); return None
154
  except httpx.TimeoutException: logger.error(f"[Supadata] Timeout connecting for {video_id}"); return None
155
+ except httpx.RequestError as e:
156
+ if "CERTIFICATE_VERIFY_FAILED" in str(e): logger.error(f"[Supadata] SSL Cert Verify Failed for {video_id}: {e}")
157
+ else: logger.error(f"[Supadata] Request error for {video_id}: {e}")
158
+ return None
159
  except Exception as e: logger.error(f"[Supadata] Unexpected error for {video_id}: {e}", exc_info=True); return None
160
 
161
  async def get_transcript_via_apify(video_url: str, api_token: str) -> Optional[str]:
162
+ """Fetches YouTube transcript using Apify REST API (async start + poll + dataset fetch)."""
163
  global APIFY_ACTOR_ID
164
+ if not video_url: logger.error("[Apify Async] No video_url provided"); return None
165
+ if not api_token: logger.error("[Apify Async] API token missing."); return None
166
+ logger.info(f"[Apify Async] Attempting fetch for URL: {video_url} (Actor: {APIFY_ACTOR_ID})")
167
+
168
+ start_run_endpoint = f"https://api.apify.com/v2/acts/{APIFY_ACTOR_ID}/runs"
169
+ params_base = {"token": api_token}
170
+ payload = {
171
+ "urls": [video_url], "outputFormat": "singleStringText", "maxRetries": 5,
172
+ "channelHandleBoolean": False, "channelNameBoolean": False,
173
+ "datePublishedBoolean": False, "relativeDateTextBoolean": False,
174
+ }
175
  headers = {"Content-Type": "application/json"}
176
+
177
+ run_id: Optional[str] = None
178
+ dataset_id: Optional[str] = None
179
+ max_wait_seconds = 120 # Total time to wait for the run to finish
180
+ poll_interval = 5 # Seconds between status checks
181
+
182
  try:
183
+ async with httpx.AsyncClient(timeout=30.0) as client: # Shorter timeout for individual API calls
184
+ # 1. Start the run
185
+ logger.debug(f"[Apify Async] Starting actor run for {video_url}")
186
+ response_start = await client.post(start_run_endpoint, headers=headers, params=params_base, json=payload)
187
+ logger.debug(f"[Apify Async] Start run status: {response_start.status_code}")
188
+
189
+ if response_start.status_code == 201: # Expected status for starting a run
190
+ try:
191
+ run_data = response_start.json().get("data", {})
192
+ run_id = run_data.get("id")
193
+ dataset_id = run_data.get("defaultDatasetId")
194
+ if not run_id or not dataset_id:
195
+ logger.error(f"[Apify Async] Started run but missing runId or datasetId. Data: {run_data}")
196
+ return None
197
+ logger.info(f"[Apify Async] Run started. Run ID: {run_id}, Dataset ID: {dataset_id}")
198
+ except Exception as e:
199
+ logger.error(f"[Apify Async] Error parsing start run response: {e}. Response: {response_start.text[:200]}", exc_info=True)
200
+ return None
201
+ else:
202
+ logger.error(f"[Apify Async] Failed to start run. Status: {response_start.status_code}. Response: {response_start.text[:200]}")
203
+ return None
204
+
205
+ # 2. Poll for run completion
206
+ run_status_endpoint = f"https://api.apify.com/v2/actor-runs/{run_id}"
207
+ elapsed_time = 0
208
+ final_status = None
209
+ while elapsed_time < max_wait_seconds:
210
+ await asyncio.sleep(poll_interval)
211
+ elapsed_time += poll_interval
212
+ logger.debug(f"[Apify Async] Polling status for run {run_id} ({elapsed_time}s elapsed)")
213
  try:
214
+ response_status = await client.get(run_status_endpoint, params=params_base)
215
+ if response_status.status_code == 200:
216
+ status_data = response_status.json().get("data", {})
217
+ final_status = status_data.get("status")
218
+ logger.debug(f"[Apify Async] Run status: {final_status}")
219
+ if final_status in ["SUCCEEDED", "FAILED", "ABORTED", "TIMED-OUT"]:
220
+ break # Exit polling loop on terminal status
221
+ else:
222
+ logger.warning(f"[Apify Async] Non-200 status ({response_status.status_code}) polling run {run_id}.")
223
+ await asyncio.sleep(poll_interval * 2) # Wait longer on error
224
+ except Exception as poll_err:
225
+ logger.error(f"[Apify Async] Error polling run status {run_id}: {poll_err}")
226
+ await asyncio.sleep(poll_interval * 2) # Wait longer on error
227
+
228
+ if final_status != "SUCCEEDED":
229
+ logger.warning(f"[Apify Async] Run {run_id} did not succeed. Final status: {final_status}")
230
+ return None
231
+
232
+ # 3. Fetch dataset items
233
+ logger.info(f"[Apify Async] Run {run_id} succeeded. Fetching items from dataset {dataset_id}")
234
+ dataset_endpoint = f"https://api.apify.com/v2/datasets/{dataset_id}/items"
235
+ params_dataset = {"token": api_token, "format": "json", "limit": 5}
236
+ response_dataset = await client.get(dataset_endpoint, params=params_dataset)
237
+ logger.debug(f"[Apify Async] Dataset fetch status: {response_dataset.status_code}")
238
+ response_dataset.raise_for_status() # Raise exception for non-2xx status
239
+
240
+ results = response_dataset.json()
241
+ if isinstance(results, list) and len(results) > 0:
242
+ item = results[0]
243
+ content = item.get("text") or item.get("transcript") or item.get("captions_concatenated")
244
+ if not content and item.get("captions") and isinstance(item["captions"], list):
245
+ content = " ".join(cap.get("text", "") for cap in item["captions"] if cap.get("text"))
246
+ if content and isinstance(content, str):
247
+ logger.info(f"[Apify Async] Success via ASYNC REST for {video_url}. Length: {len(content)}")
248
+ return content.strip()
249
+ else: logger.warning(f"[Apify Async] Dataset item found but transcript empty/not found for {video_url}. Item: {item}"); return None
250
+ else: logger.warning(f"[Apify Async] Dataset {dataset_id} was empty for {video_url}. Response: {results}"); return None
251
+
252
+ except httpx.TimeoutException as e: logger.error(f"[Apify Async] Timeout during API interaction for {video_url}: {e}"); return None
253
+ except httpx.HTTPStatusError as e: logger.error(f"[Apify Async] HTTP Status Error during API interaction for {video_url}: {e}"); return None
254
+ except httpx.RequestError as e: logger.error(f"[Apify Async] Request error during API interaction for {video_url}: {e}"); return None
255
+ except Exception as e: logger.error(f"[Apify Async] Unexpected error during Apify Async REST call for {video_url}: {e}", exc_info=True); return None
256
 
257
  async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]:
258
+ # This function remains the same, calling the updated helper functions
259
  global SUPADATA_API_KEY, APIFY_API_TOKEN
260
  if not video_id: logger.error("get_youtube_transcript: No video_id"); return None
261
  logger.info(f"Fetching transcript for video ID: {video_id} (URL: {video_url})")
 
279
  else: logger.warning(f"[Fallback YT 1] Supadata failed or no content for {video_id}.")
280
  else: logger.warning("[Fallback YT 1] Supadata API key unavailable. Skipping.")
281
  if transcript_text is None:
282
+ logger.info("[Fallback YT 2] Trying Apify REST API (Async)...") # Updated log message
283
  if APIFY_API_TOKEN:
284
+ transcript_text = await get_transcript_via_apify(video_url, APIFY_API_TOKEN) # Calls new async version
285
+ if transcript_text: logger.info(f"[Fallback YT 2] Success via Apify ASYNC REST for {video_url}"); return transcript_text
286
+ else: logger.warning(f"[Fallback YT 2] Apify ASYNC REST failed or no content for {video_url}.")
287
  else: logger.warning("[Fallback YT 2] Apify API token unavailable. Skipping.")
288
  if transcript_text is None: logger.error(f"All methods failed for YT transcript: {video_id}"); return None
289
+ return transcript_text # Will be None if all failed
290
 
291
  async def get_website_content(url: str) -> Optional[str]:
292
+ # This function remains the same
293
  if not url: logger.error("get_website_content: No URL"); return None
294
  logger.info(f"[Primary Web] Fetching website content for: {url}")
295
  html_content = await fetch_url_content_for_scrape(url)
 
311
  except Exception as e: logger.error(f"[Primary Web] Error scraping/parsing {url}: {e}", exc_info=True); return None
312
 
313
  async def get_website_content_via_api(url: str, api_key: str) -> Optional[str]:
314
+ # This function remains the same
315
  if not url: logger.error("[Fallback Web API] No URL"); return None
316
  if not api_key: logger.error("[Fallback Web API] urltotext.com API key missing."); return None
317
  logger.info(f"[Fallback Web API] Attempting fetch for: {url} using urltotext.com API")
 
339
  except Exception as e: logger.error(f"[Fallback Web API] Unexpected error during urltotext.com API call for {url}: {e}", exc_info=True); return None
340
 
341
  # --- Summarization Function ---
342
+ # (generate_summary remains the same)
343
  async def generate_summary(text: str, summary_type: str) -> str:
344
  global OPENROUTER_API_KEY, OPENROUTER_MODEL
345
  logger.info(f"Generating {summary_type} summary using {OPENROUTER_MODEL}. Input length: {len(text)}")
 
350
  if len(text) > MAX_INPUT_LENGTH: logger.warning(f"Input length ({len(text)}) exceeds limit ({MAX_INPUT_LENGTH}). Truncating."); text = text[:MAX_INPUT_LENGTH] + "... (Content truncated)"
351
  full_prompt = f"{prompt}\n\n{text}"
352
  headers = { "Authorization": f"Bearer {OPENROUTER_API_KEY}", "Content-Type": "application/json" }; payload = { "model": OPENROUTER_MODEL, "messages": [{"role": "user", "content": full_prompt}] }; openrouter_api_endpoint = "https://openrouter.ai/api/v1/chat/completions"
353
+ api_timeout = 180.0; response = None
 
 
 
 
 
354
  try:
355
  async with httpx.AsyncClient(timeout=api_timeout) as client:
356
  logger.info(f"Sending request to OpenRouter ({OPENROUTER_MODEL}) with timeout {api_timeout}s...")
357
  response = await client.post(openrouter_api_endpoint, headers=headers, json=payload)
358
+ if response: logger.info(f"Received response from OpenRouter. Status code: {response.status_code}")
359
+ else: logger.error("No response received from OpenRouter after await completed (unexpected)."); return "Sorry, communication with the AI service failed unexpectedly."
 
 
 
 
 
 
 
360
  if response.status_code == 200:
361
  try:
362
  data = response.json()
 
363
  if data.get("choices") and isinstance(data["choices"], list) and len(data["choices"]) > 0:
364
  message = data["choices"][0].get("message")
365
  if message and isinstance(message, dict):
 
370
  else: logger.error(f"Unexpected choices structure in OpenRouter resp: {data.get('choices')}. Full: {data}"); return "Sorry, could not parse AI response (choices)."
371
  except json.JSONDecodeError: logger.error(f"Failed JSON decode OpenRouter. Status:{response.status_code}. Resp:{response.text[:500]}"); return "Sorry, failed to understand AI response."
372
  except Exception as e: logger.error(f"Error processing OpenRouter success response: {e}", exc_info=True); return "Sorry, error processing AI response."
 
373
  elif response.status_code == 401: logger.error("OpenRouter API key invalid (401)."); return "Error: AI model configuration key is invalid."
374
  elif response.status_code == 402: logger.error("OpenRouter Payment Required (402)."); return "Sorry, AI service limits/payment issue."
375
  elif response.status_code == 429: logger.warning("OpenRouter Rate Limit Exceeded (429)."); return "Sorry, AI model is busy. Try again."
376
  elif response.status_code == 500: logger.error(f"OpenRouter Internal Server Error (500). Resp:{response.text[:500]}"); return "Sorry, AI service internal error."
377
  else: logger.error(f"Unexpected status {response.status_code} from OpenRouter. Resp:{response.text[:500]}"); return f"Sorry, AI service returned unexpected status ({response.status_code})."
378
+ except httpx.TimeoutException: logger.error(f"Timeout error ({api_timeout}s) connecting to OpenRouter API."); return f"Sorry, the request to the AI model timed out after {api_timeout} seconds. The content might be too long or the service busy. Please try again later or with shorter content."
379
+ except httpx.RequestError as e: logger.error(f"Request error connecting to OpenRouter API: {e}"); return "Sorry, there was an error connecting to the AI model service."
 
 
 
 
 
380
  except Exception as e:
 
381
  logger.error(f"Unexpected error in generate_summary (OpenRouter request phase): {e}", exc_info=True)
 
382
  if response: logger.error(f"--> Last response status before error: {response.status_code}")
383
  return "Sorry, an unexpected error occurred while trying to generate the summary."
384
 
385
  # --- Background Task Processing ---
386
+ # (process_summary_task remains the same)
387
+ async def process_summary_task( user_id: int, chat_id: int, message_id_to_edit: Optional[int], url: str, summary_type: str, bot_token: str ) -> None:
388
+ task_id = f"{user_id}-{message_id_to_edit or 'new'}"; logger.info(f"[Task {task_id}] Starting processing for URL: {url}")
 
 
 
389
  background_request: Optional[BaseRequest] = None; bot: Optional[Bot] = None
390
  try: background_request = HTTPXRequest( connect_timeout=15.0, read_timeout=60.0, write_timeout=60.0, pool_timeout=60.0 ); bot = Bot(token=bot_token, request=background_request)
391
  except Exception as e: logger.critical(f"[Task {task_id}] Failed to create background bot: {e}", exc_info=True); return
392
+ content = None; user_feedback_message = None; success = False; status_message_id = message_id_to_edit; message_to_delete_later_id : Optional[int] = None
 
393
  try:
394
  processing_message_text = f"Got it! Generating '{summary_type}' summary for:\n`{url}`\n\nThis might take a moment..."
395
  if status_message_id:
 
450
  logger.info(f"[Task {task_id}] Task completed. Success: {success}")
451
 
452
  # --- Telegram Bot Handlers ---
453
+ # (start, help_command, handle_potential_url, handle_summary_type_callback, error_handler remain the same)
454
  async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
455
  user = update.effective_user; mention = user.mention_html()
456
  if not user or not update.message: return
 
514
  if isinstance(context.error, ignore_errors) and "object has no attribute" in str(context.error): logger.debug(f"Ignoring known/handled error in error_handler: {context.error}"); return
515
  logger.error("Exception while handling an update:", exc_info=context.error)
516
 
517
+
518
  # --- Bot Setup ---
519
+ # (setup_bot_config remains the same)
520
  async def setup_bot_config() -> Application:
521
  logger.info("Configuring Telegram Application..."); global TELEGRAM_TOKEN
522
  if not TELEGRAM_TOKEN: raise ValueError("TELEGRAM_TOKEN missing.")
 
527
  application.add_error_handler(error_handler); logger.info("Telegram application handlers configured."); return application
528
 
529
  # --- ASGI Lifespan ---
530
+ # (lifespan remains the same)
531
  @contextlib.asynccontextmanager
532
  async def lifespan(app: Starlette):
533
  global ptb_app, WEBHOOK_SECRET, TELEGRAM_TOKEN
 
544
  except Exception as e: logger.warning(f"Could not delete webhook: {e}"); await asyncio.sleep(1)
545
  space_host = os.environ.get("SPACE_HOST"); webhook_path = "/webhook"; full_webhook_url = None
546
  if space_host:
547
+ protocol = "https"; host = space_host.split('://')[-1]; full_webhook_url = f"{protocol}://{host.rstrip('/')}{webhook_path}"
548
  if full_webhook_url:
549
  logger.info(f"Setting webhook: {full_webhook_url}"); set_webhook_args = { "url": full_webhook_url, "allowed_updates": Update.ALL_TYPES, "drop_pending_updates": True }
550
  if WEBHOOK_SECRET: set_webhook_args["secret_token"] = WEBHOOK_SECRET; logger.info("Using webhook secret.")
 
573
  logger.info("ASGI Lifespan: Shutdown complete.")
574
 
575
  # --- Starlette Route Handlers ---
576
+ # (health_check and telegram_webhook remain the same)
577
  async def health_check(request: Request) -> PlainTextResponse:
578
  global OPENROUTER_MODEL, APIFY_ACTOR_ID, _apify_token_exists; bot_status = "Not Initialized"
579
  if ptb_app and ptb_app.bot:
 
597
  except Exception as e: logger.error(f"Error processing webhook update: {e}", exc_info=True); return Response(status_code=200) # OK despite error
598
 
599
  # --- Create Starlette ASGI Application ---
600
+ # (app definition remains the same)
601
  app = Starlette( debug=False, lifespan=lifespan, routes=[ Route("/", endpoint=health_check, methods=["GET"]), Route("/webhook", endpoint=telegram_webhook, methods=["POST"]), ] )
602
  logger.info("Starlette ASGI application created with native routes.")
603
 
604
  # --- Development Server Block ---
605
+ # (remains the same)
606
  if __name__ == '__main__':
607
  import uvicorn
608
  logger.warning("Running in development mode using Uvicorn directly")