MB-IDK commited on
Commit
e3d8357
Β·
verified Β·
1 Parent(s): 858d8f6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +462 -0
app.py ADDED
@@ -0,0 +1,462 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException, Header
2
+ from fastapi.responses import StreamingResponse
3
+ from pydantic import BaseModel
4
+ import json, uuid, time, asyncio
5
+ from typing import Optional, List
6
+ from datetime import datetime
7
+
8
+ try:
9
+ from curl_cffi.requests import Session as CurlSession
10
+ HAS_CURL_CFFI = True
11
+ except ImportError:
12
+ HAS_CURL_CFFI = False
13
+
14
+ try:
15
+ import cloudscraper
16
+ HAS_CLOUDSCRAPER = True
17
+ except ImportError:
18
+ HAS_CLOUDSCRAPER = False
19
+
20
+ app = FastAPI(title="Perplexity OpenAI-Compatible API")
21
+
22
+ BASE_URL = "https://www.perplexity.ai"
23
+ ASK_URL = f"{BASE_URL}/rest/sse/perplexity_ask"
24
+ TARGET_USAGE = "ask_text_0_markdown"
25
+ MAX_RETRIES = 3
26
+ RETRY_DELAY = 2
27
+
28
+ HEADERS = {
29
+ "Accept": "text/event-stream",
30
+ "Accept-Language": "fr,fr-FR;q=0.9,en-US;q=0.8,en;q=0.7",
31
+ "Referer": f"{BASE_URL}/",
32
+ "Origin": BASE_URL,
33
+ "content-type": "application/json",
34
+ "X-Perplexity-Request-Reason": "perplexity-query-state-provider",
35
+ "DNT": "1",
36
+ "Sec-GPC": "1",
37
+ "Sec-Fetch-Dest": "empty",
38
+ "Sec-Fetch-Mode": "cors",
39
+ "Sec-Fetch-Site": "same-origin",
40
+ "Cache-Control": "no-cache",
41
+ "Pragma": "no-cache",
42
+ }
43
+
44
+ # ---------------------------------------------------------------------------
45
+ # Session management (module-level singleton)
46
+ # ---------------------------------------------------------------------------
47
+
48
+ _session = None
49
+ _backend = None
50
+
51
+ def get_session():
52
+ global _session, _backend
53
+
54
+ if _session is not None:
55
+ return _session, _backend
56
+
57
+ if HAS_CURL_CFFI:
58
+ try:
59
+ s = CurlSession(impersonate="chrome120")
60
+ r = s.get(BASE_URL, timeout=20)
61
+ r.raise_for_status()
62
+ _session, _backend = s, "curl_cffi"
63
+ return _session, _backend
64
+ except Exception:
65
+ pass
66
+
67
+ if HAS_CLOUDSCRAPER:
68
+ try:
69
+ s = cloudscraper.create_scraper(
70
+ browser={"browser": "chrome", "platform": "windows", "mobile": False}
71
+ )
72
+ r = s.get(BASE_URL, timeout=20)
73
+ r.raise_for_status()
74
+ _session, _backend = s, "cloudscraper"
75
+ return _session, _backend
76
+ except Exception:
77
+ pass
78
+
79
+ raise RuntimeError("Could not initialize any scraping session")
80
+
81
+ def reset_session():
82
+ global _session, _backend
83
+ _session = None
84
+ _backend = None
85
+
86
+ # ---------------------------------------------------------------------------
87
+ # Perplexity core logic
88
+ # ---------------------------------------------------------------------------
89
+
90
+ def build_payload(query: str) -> dict:
91
+ return {
92
+ "params": {
93
+ "attachments": [],
94
+ "language": "fr-FR",
95
+ "timezone": "Europe/Paris",
96
+ "search_focus": "internet",
97
+ "sources": ["web"],
98
+ "frontend_uuid": str(uuid.uuid4()),
99
+ "mode": "copilot",
100
+ "model_preference": "turbo",
101
+ "is_related_query": False,
102
+ "is_sponsored": False,
103
+ "frontend_context_uuid": str(uuid.uuid4()),
104
+ "prompt_source": "user",
105
+ "query_source": "home",
106
+ "is_incognito": False,
107
+ "use_schematized_api": True,
108
+ "send_back_text_in_streaming_api": False,
109
+ "supported_block_use_cases": [
110
+ "answer_modes", "media_items", "knowledge_cards",
111
+ "inline_entity_cards", "place_widgets", "finance_widgets",
112
+ "news_widgets", "search_result_widgets", "inline_images",
113
+ "diff_blocks", "answer_tabs", "in_context_suggestions",
114
+ ],
115
+ "skip_search_enabled": True,
116
+ "source": "default",
117
+ "version": "2.18",
118
+ },
119
+ "query_str": query,
120
+ }
121
+
122
+ def collect_web_results(block: dict) -> list:
123
+ results = []
124
+ for wr in block.get("web_result_block", {}).get("web_results", []):
125
+ results.append(wr)
126
+ for wr in block.get("sources_mode_block", {}).get("web_results", []):
127
+ results.append(wr)
128
+ for step in block.get("plan_block", {}).get("steps", []):
129
+ for wr in step.get("web_results_content", {}).get("web_results", []):
130
+ results.append(wr)
131
+ return results
132
+
133
+ def extract_chunks(patch: dict) -> list:
134
+ op = patch.get("op")
135
+ path = patch.get("path", "")
136
+ if op == "replace" and path == "":
137
+ return patch.get("value", {}).get("chunks", [])
138
+ if op == "add" and "/chunks/" in path:
139
+ return [patch.get("value", "")]
140
+ return []
141
+
142
+ def parse_stream(resp) -> tuple:
143
+ full_answer = ""
144
+ sources = []
145
+ seen_urls = set()
146
+
147
+ for raw_line in resp.iter_lines():
148
+ if isinstance(raw_line, bytes):
149
+ raw_line = raw_line.decode("utf-8", errors="replace")
150
+ if not raw_line or not raw_line.startswith("data:"):
151
+ continue
152
+ json_str = raw_line[len("data:"):].strip()
153
+ if not json_str or json_str == "{}":
154
+ continue
155
+ try:
156
+ event = json.loads(json_str)
157
+ except json.JSONDecodeError:
158
+ continue
159
+
160
+ is_final = event.get("final_sse_message") or event.get("final")
161
+
162
+ for block in event.get("blocks", []):
163
+ usage = block.get("intended_usage", "")
164
+
165
+ for wr in collect_web_results(block):
166
+ url = wr.get("url", "")
167
+ if url and url not in seen_urls:
168
+ seen_urls.add(url)
169
+ sources.append({
170
+ "name": wr.get("name", ""),
171
+ "url": url,
172
+ "snippet": wr.get("snippet", ""),
173
+ })
174
+
175
+ if usage != TARGET_USAGE:
176
+ continue
177
+
178
+ diff = block.get("diff_block", {})
179
+ if diff.get("field") == "markdown_block":
180
+ for patch in diff.get("patches", []):
181
+ for chunk in extract_chunks(patch):
182
+ if chunk:
183
+ full_answer += chunk
184
+
185
+ if is_final:
186
+ md = block.get("markdown_block", {})
187
+ if md.get("answer"):
188
+ full_answer = md["answer"]
189
+
190
+ return full_answer, sources
191
+
192
+ def parse_stream_generator(resp):
193
+ """Yields text chunks as they arrive from the SSE stream."""
194
+ for raw_line in resp.iter_lines():
195
+ if isinstance(raw_line, bytes):
196
+ raw_line = raw_line.decode("utf-8", errors="replace")
197
+ if not raw_line or not raw_line.startswith("data:"):
198
+ continue
199
+ json_str = raw_line[len("data:"):].strip()
200
+ if not json_str or json_str == "{}":
201
+ continue
202
+ try:
203
+ event = json.loads(json_str)
204
+ except json.JSONDecodeError:
205
+ continue
206
+
207
+ is_final = event.get("final_sse_message") or event.get("final")
208
+
209
+ for block in event.get("blocks", []):
210
+ usage = block.get("intended_usage", "")
211
+ if usage != TARGET_USAGE:
212
+ continue
213
+
214
+ diff = block.get("diff_block", {})
215
+ if diff.get("field") == "markdown_block":
216
+ for patch in diff.get("patches", []):
217
+ for chunk in extract_chunks(patch):
218
+ if chunk:
219
+ yield chunk
220
+
221
+ if is_final:
222
+ md = block.get("markdown_block", {})
223
+ if md.get("answer"):
224
+ # final complete answer β€” we already streamed chunks,
225
+ # nothing extra needed here
226
+ pass
227
+
228
+ def do_perplexity_request(query: str):
229
+ session, _ = get_session()
230
+ payload = build_payload(query)
231
+ headers = {**HEADERS, "X-Request-ID": str(uuid.uuid4())}
232
+ last_exc = None
233
+
234
+ for attempt in range(1, MAX_RETRIES + 1):
235
+ try:
236
+ resp = session.post(
237
+ ASK_URL,
238
+ headers=headers,
239
+ json=payload,
240
+ stream=True,
241
+ timeout=60,
242
+ )
243
+ if resp.status_code in (403, 503):
244
+ reset_session()
245
+ raise RuntimeError(f"Blocked (HTTP {resp.status_code})")
246
+ resp.raise_for_status()
247
+ return resp
248
+ except Exception as e:
249
+ last_exc = e
250
+ if attempt < MAX_RETRIES:
251
+ time.sleep(RETRY_DELAY)
252
+ # Try refreshing session on failure
253
+ try:
254
+ reset_session()
255
+ get_session()
256
+ except Exception:
257
+ pass
258
+
259
+ raise RuntimeError(f"All retries failed: {last_exc}")
260
+
261
+ # ---------------------------------------------------------------------------
262
+ # OpenAI-compatible Pydantic models
263
+ # ---------------------------------------------------------------------------
264
+
265
+ class Message(BaseModel):
266
+ role: str
267
+ content: str
268
+
269
+ class ChatCompletionRequest(BaseModel):
270
+ model: str = "perplexity"
271
+ messages: List[Message]
272
+ stream: Optional[bool] = False
273
+ temperature: Optional[float] = None
274
+ max_tokens: Optional[int] = None
275
+
276
+ # ---------------------------------------------------------------------------
277
+ # Helper: build query string from messages
278
+ # ---------------------------------------------------------------------------
279
+
280
+ def messages_to_query(messages: List[Message]) -> str:
281
+ """
282
+ Converts OpenAI message list to a single query string.
283
+ Uses the last user message as the main query,
284
+ prepending any system prompt if present.
285
+ """
286
+ system_parts = [m.content for m in messages if m.role == "system"]
287
+ user_parts = [m.content for m in messages if m.role == "user"]
288
+
289
+ query = ""
290
+ if system_parts:
291
+ query += " ".join(system_parts) + "\n\n"
292
+ if user_parts:
293
+ query += user_parts[-1] # last user turn
294
+ else:
295
+ # fallback: last message regardless of role
296
+ query = messages[-1].content
297
+
298
+ return query.strip()
299
+
300
+ # ---------------------------------------------------------------------------
301
+ # OpenAI-compatible endpoints
302
+ # ---------------------------------------------------------------------------
303
+
304
+ @app.get("/")
305
+ def root():
306
+ return {"status": "ok", "message": "Perplexity OpenAI-compatible API"}
307
+
308
+ @app.get("/health")
309
+ def health():
310
+ return {"status": "ok"}
311
+
312
+ @app.get("/v1/models")
313
+ def list_models():
314
+ return {
315
+ "object": "list",
316
+ "data": [
317
+ {
318
+ "id": "perplexity",
319
+ "object": "model",
320
+ "created": int(datetime.now().timestamp()),
321
+ "owned_by": "perplexity",
322
+ }
323
+ ],
324
+ }
325
+
326
+ @app.post("/v1/chat/completions")
327
+ def chat_completions(
328
+ request: ChatCompletionRequest,
329
+ authorization: Optional[str] = Header(default=None),
330
+ ):
331
+ query = messages_to_query(request.messages)
332
+ if not query:
333
+ raise HTTPException(status_code=400, detail="No query found in messages")
334
+
335
+ completion_id = f"chatcmpl-{uuid.uuid4().hex}"
336
+ created_ts = int(time.time())
337
+ model_name = request.model or "perplexity"
338
+
339
+ # ── Streaming response ──────────────────────────────────────────────────
340
+ if request.stream:
341
+ def stream_generator():
342
+ try:
343
+ resp = do_perplexity_request(query)
344
+ except Exception as e:
345
+ # Send error as a data chunk then stop
346
+ err_chunk = {
347
+ "id": completion_id,
348
+ "object": "chat.completion.chunk",
349
+ "created": created_ts,
350
+ "model": model_name,
351
+ "choices": [{
352
+ "index": 0,
353
+ "delta": {"content": f"[ERROR] {e}"},
354
+ "finish_reason": "stop",
355
+ }],
356
+ }
357
+ yield f"data: {json.dumps(err_chunk)}\n\ndata: [DONE]\n\n"
358
+ return
359
+
360
+ # First chunk with role
361
+ first = {
362
+ "id": completion_id,
363
+ "object": "chat.completion.chunk",
364
+ "created": created_ts,
365
+ "model": model_name,
366
+ "choices": [{
367
+ "index": 0,
368
+ "delta": {"role": "assistant"},
369
+ "finish_reason": None,
370
+ }],
371
+ }
372
+ yield f"data: {json.dumps(first)}\n\n"
373
+
374
+ for chunk_text in parse_stream_generator(resp):
375
+ chunk = {
376
+ "id": completion_id,
377
+ "object": "chat.completion.chunk",
378
+ "created": created_ts,
379
+ "model": model_name,
380
+ "choices": [{
381
+ "index": 0,
382
+ "delta": {"content": chunk_text},
383
+ "finish_reason": None,
384
+ }],
385
+ }
386
+ yield f"data: {json.dumps(chunk)}\n\n"
387
+
388
+ # Final stop chunk
389
+ stop_chunk = {
390
+ "id": completion_id,
391
+ "object": "chat.completion.chunk",
392
+ "created": created_ts,
393
+ "model": model_name,
394
+ "choices": [{
395
+ "index": 0,
396
+ "delta": {},
397
+ "finish_reason": "stop",
398
+ }],
399
+ }
400
+ yield f"data: {json.dumps(stop_chunk)}\n\n"
401
+ yield "data: [DONE]\n\n"
402
+
403
+ return StreamingResponse(
404
+ stream_generator(),
405
+ media_type="text/event-stream",
406
+ headers={
407
+ "Cache-Control": "no-cache",
408
+ "X-Accel-Buffering": "no",
409
+ },
410
+ )
411
+
412
+ # ── Non-streaming response ──────────────────────────────────────────────
413
+ try:
414
+ resp = do_perplexity_request(query)
415
+ answer, sources = parse_stream(resp)
416
+ except Exception as e:
417
+ raise HTTPException(status_code=502, detail=str(e))
418
+
419
+ if not answer:
420
+ raise HTTPException(status_code=502, detail="Empty response from Perplexity")
421
+
422
+ # Append sources as footnotes if any
423
+ if sources:
424
+ footnotes = "\n\n---\n**Sources:**\n"
425
+ for i, src in enumerate(sources, 1):
426
+ footnotes += f"{i}. [{src.get('name', src['url'])}]({src['url']})\n"
427
+ answer += footnotes
428
+
429
+ prompt_tokens = len(query.split())
430
+ completion_tokens = len(answer.split())
431
+
432
+ return {
433
+ "id": completion_id,
434
+ "object": "chat.completion",
435
+ "created": created_ts,
436
+ "model": model_name,
437
+ "choices": [{
438
+ "index": 0,
439
+ "message": {
440
+ "role": "assistant",
441
+ "content": answer,
442
+ },
443
+ "finish_reason": "stop",
444
+ }],
445
+ "usage": {
446
+ "prompt_tokens": prompt_tokens,
447
+ "completion_tokens": completion_tokens,
448
+ "total_tokens": prompt_tokens + completion_tokens,
449
+ },
450
+ }
451
+
452
+ # ---------------------------------------------------------------------------
453
+ # Startup: pre-warm session
454
+ # ---------------------------------------------------------------------------
455
+
456
+ @app.on_event("startup")
457
+ def startup_event():
458
+ try:
459
+ get_session()
460
+ print("[startup] Session initialized successfully")
461
+ except Exception as e:
462
+ print(f"[startup] Session init failed (will retry on first request): {e}")