Spaces:

MB-IDK
/

PerplexityAPI

Running

App Files Files Community

MB-IDK commited on 3 days ago

Commit

e3d8357

verified ·

1 Parent(s): 858d8f6

Create app.py

Browse files

Files changed (1) hide show

app.py +462 -0

app.py ADDED Viewed

	@@ -0,0 +1,462 @@

+from fastapi import FastAPI, HTTPException, Header
+from fastapi.responses import StreamingResponse
+from pydantic import BaseModel
+import json, uuid, time, asyncio
+from typing import Optional, List
+from datetime import datetime
+try:
+    from curl_cffi.requests import Session as CurlSession
+    HAS_CURL_CFFI = True
+except ImportError:
+    HAS_CURL_CFFI = False
+try:
+    import cloudscraper
+    HAS_CLOUDSCRAPER = True
+except ImportError:
+    HAS_CLOUDSCRAPER = False
+app = FastAPI(title="Perplexity OpenAI-Compatible API")
+BASE_URL = "https://www.perplexity.ai"
+ASK_URL = f"{BASE_URL}/rest/sse/perplexity_ask"
+TARGET_USAGE = "ask_text_0_markdown"
+MAX_RETRIES = 3
+RETRY_DELAY = 2
+HEADERS = {
+    "Accept": "text/event-stream",
+    "Accept-Language": "fr,fr-FR;q=0.9,en-US;q=0.8,en;q=0.7",
+    "Referer": f"{BASE_URL}/",
+    "Origin": BASE_URL,
+    "content-type": "application/json",
+    "X-Perplexity-Request-Reason": "perplexity-query-state-provider",
+    "DNT": "1",
+    "Sec-GPC": "1",
+    "Sec-Fetch-Dest": "empty",
+    "Sec-Fetch-Mode": "cors",
+    "Sec-Fetch-Site": "same-origin",
+    "Cache-Control": "no-cache",
+    "Pragma": "no-cache",
+}
+# ---------------------------------------------------------------------------
+# Session management (module-level singleton)
+# ---------------------------------------------------------------------------
+_session = None
+_backend = None
+def get_session():
+    global _session, _backend
+    if _session is not None:
+        return _session, _backend
+    if HAS_CURL_CFFI:
+        try:
+            s = CurlSession(impersonate="chrome120")
+            r = s.get(BASE_URL, timeout=20)
+            r.raise_for_status()
+            _session, _backend = s, "curl_cffi"
+            return _session, _backend
+        except Exception:
+            pass
+    if HAS_CLOUDSCRAPER:
+        try:
+            s = cloudscraper.create_scraper(
+                browser={"browser": "chrome", "platform": "windows", "mobile": False}
+            )
+            r = s.get(BASE_URL, timeout=20)
+            r.raise_for_status()
+            _session, _backend = s, "cloudscraper"
+            return _session, _backend
+        except Exception:
+            pass
+    raise RuntimeError("Could not initialize any scraping session")
+def reset_session():
+    global _session, _backend
+    _session = None
+    _backend = None
+# ---------------------------------------------------------------------------
+# Perplexity core logic
+# ---------------------------------------------------------------------------
+def build_payload(query: str) -> dict:
+    return {
+        "params": {
+            "attachments": [],
+            "language": "fr-FR",
+            "timezone": "Europe/Paris",
+            "search_focus": "internet",
+            "sources": ["web"],
+            "frontend_uuid": str(uuid.uuid4()),
+            "mode": "copilot",
+            "model_preference": "turbo",
+            "is_related_query": False,
+            "is_sponsored": False,
+            "frontend_context_uuid": str(uuid.uuid4()),
+            "prompt_source": "user",
+            "query_source": "home",
+            "is_incognito": False,
+            "use_schematized_api": True,
+            "send_back_text_in_streaming_api": False,
+            "supported_block_use_cases": [
+                "answer_modes", "media_items", "knowledge_cards",
+                "inline_entity_cards", "place_widgets", "finance_widgets",
+                "news_widgets", "search_result_widgets", "inline_images",
+                "diff_blocks", "answer_tabs", "in_context_suggestions",
+            ],
+            "skip_search_enabled": True,
+            "source": "default",
+            "version": "2.18",
+        },
+        "query_str": query,
+    }
+def collect_web_results(block: dict) -> list:
+    results = []
+    for wr in block.get("web_result_block", {}).get("web_results", []):
+        results.append(wr)
+    for wr in block.get("sources_mode_block", {}).get("web_results", []):
+        results.append(wr)
+    for step in block.get("plan_block", {}).get("steps", []):
+        for wr in step.get("web_results_content", {}).get("web_results", []):
+            results.append(wr)
+    return results
+def extract_chunks(patch: dict) -> list:
+    op = patch.get("op")
+    path = patch.get("path", "")
+    if op == "replace" and path == "":
+        return patch.get("value", {}).get("chunks", [])
+    if op == "add" and "/chunks/" in path:
+        return [patch.get("value", "")]
+    return []
+def parse_stream(resp) -> tuple:
+    full_answer = ""
+    sources = []
+    seen_urls = set()
+    for raw_line in resp.iter_lines():
+        if isinstance(raw_line, bytes):
+            raw_line = raw_line.decode("utf-8", errors="replace")
+        if not raw_line or not raw_line.startswith("data:"):
+            continue
+        json_str = raw_line[len("data:"):].strip()
+        if not json_str or json_str == "{}":
+            continue
+        try:
+            event = json.loads(json_str)
+        except json.JSONDecodeError:
+            continue
+        is_final = event.get("final_sse_message") or event.get("final")
+        for block in event.get("blocks", []):
+            usage = block.get("intended_usage", "")
+            for wr in collect_web_results(block):
+                url = wr.get("url", "")
+                if url and url not in seen_urls:
+                    seen_urls.add(url)
+                    sources.append({
+                        "name": wr.get("name", ""),
+                        "url": url,
+                        "snippet": wr.get("snippet", ""),
+                    })
+            if usage != TARGET_USAGE:
+                continue
+            diff = block.get("diff_block", {})
+            if diff.get("field") == "markdown_block":
+                for patch in diff.get("patches", []):
+                    for chunk in extract_chunks(patch):
+                        if chunk:
+                            full_answer += chunk
+            if is_final:
+                md = block.get("markdown_block", {})
+                if md.get("answer"):
+                    full_answer = md["answer"]
+    return full_answer, sources
+def parse_stream_generator(resp):
+    """Yields text chunks as they arrive from the SSE stream."""
+    for raw_line in resp.iter_lines():
+        if isinstance(raw_line, bytes):
+            raw_line = raw_line.decode("utf-8", errors="replace")
+        if not raw_line or not raw_line.startswith("data:"):
+            continue
+        json_str = raw_line[len("data:"):].strip()
+        if not json_str or json_str == "{}":
+            continue
+        try:
+            event = json.loads(json_str)
+        except json.JSONDecodeError:
+            continue
+        is_final = event.get("final_sse_message") or event.get("final")
+        for block in event.get("blocks", []):
+            usage = block.get("intended_usage", "")
+            if usage != TARGET_USAGE:
+                continue
+            diff = block.get("diff_block", {})
+            if diff.get("field") == "markdown_block":
+                for patch in diff.get("patches", []):
+                    for chunk in extract_chunks(patch):
+                        if chunk:
+                            yield chunk
+            if is_final:
+                md = block.get("markdown_block", {})
+                if md.get("answer"):
+                    # final complete answer — we already streamed chunks,
+                    # nothing extra needed here
+                    pass
+def do_perplexity_request(query: str):
+    session, _ = get_session()
+    payload = build_payload(query)
+    headers = {**HEADERS, "X-Request-ID": str(uuid.uuid4())}
+    last_exc = None
+    for attempt in range(1, MAX_RETRIES + 1):
+        try:
+            resp = session.post(
+                ASK_URL,
+                headers=headers,
+                json=payload,
+                stream=True,
+                timeout=60,
+            )
+            if resp.status_code in (403, 503):
+                reset_session()
+                raise RuntimeError(f"Blocked (HTTP {resp.status_code})")
+            resp.raise_for_status()
+            return resp
+        except Exception as e:
+            last_exc = e
+            if attempt < MAX_RETRIES:
+                time.sleep(RETRY_DELAY)
+                # Try refreshing session on failure
+                try:
+                    reset_session()
+                    get_session()
+                except Exception:
+                    pass
+    raise RuntimeError(f"All retries failed: {last_exc}")
+# ---------------------------------------------------------------------------
+# OpenAI-compatible Pydantic models
+# ---------------------------------------------------------------------------
+class Message(BaseModel):
+    role: str
+    content: str
+class ChatCompletionRequest(BaseModel):
+    model: str = "perplexity"
+    messages: List[Message]
+    stream: Optional[bool] = False
+    temperature: Optional[float] = None
+    max_tokens: Optional[int] = None
+# ---------------------------------------------------------------------------
+# Helper: build query string from messages
+# ---------------------------------------------------------------------------
+def messages_to_query(messages: List[Message]) -> str:
+    """
+    Converts OpenAI message list to a single query string.
+    Uses the last user message as the main query,
+    prepending any system prompt if present.
+    """
+    system_parts = [m.content for m in messages if m.role == "system"]
+    user_parts   = [m.content for m in messages if m.role == "user"]
+    query = ""
+    if system_parts:
+        query += " ".join(system_parts) + "\n\n"
+    if user_parts:
+        query += user_parts[-1]  # last user turn
+    else:
+        # fallback: last message regardless of role
+        query = messages[-1].content
+    return query.strip()
+# ---------------------------------------------------------------------------
+# OpenAI-compatible endpoints
+# ---------------------------------------------------------------------------
+@app.get("/")
+def root():
+    return {"status": "ok", "message": "Perplexity OpenAI-compatible API"}
+@app.get("/health")
+def health():
+    return {"status": "ok"}
+@app.get("/v1/models")
+def list_models():
+    return {
+        "object": "list",
+        "data": [
+            {
+                "id": "perplexity",
+                "object": "model",
+                "created": int(datetime.now().timestamp()),
+                "owned_by": "perplexity",
+            }
+        ],
+    }
+@app.post("/v1/chat/completions")
+def chat_completions(
+    request: ChatCompletionRequest,
+    authorization: Optional[str] = Header(default=None),
+):
+    query = messages_to_query(request.messages)
+    if not query:
+        raise HTTPException(status_code=400, detail="No query found in messages")
+    completion_id = f"chatcmpl-{uuid.uuid4().hex}"
+    created_ts = int(time.time())
+    model_name = request.model or "perplexity"
+    # ── Streaming response ──────────────────────────────────────────────────
+    if request.stream:
+        def stream_generator():
+            try:
+                resp = do_perplexity_request(query)
+            except Exception as e:
+                # Send error as a data chunk then stop
+                err_chunk = {
+                    "id": completion_id,
+                    "object": "chat.completion.chunk",
+                    "created": created_ts,
+                    "model": model_name,
+                    "choices": [{
+                        "index": 0,
+                        "delta": {"content": f"[ERROR] {e}"},
+                        "finish_reason": "stop",
+                    }],
+                }
+                yield f"data: {json.dumps(err_chunk)}\n\ndata: [DONE]\n\n"
+                return
+            # First chunk with role
+            first = {
+                "id": completion_id,
+                "object": "chat.completion.chunk",
+                "created": created_ts,
+                "model": model_name,
+                "choices": [{
+                    "index": 0,
+                    "delta": {"role": "assistant"},
+                    "finish_reason": None,
+                }],
+            }
+            yield f"data: {json.dumps(first)}\n\n"
+            for chunk_text in parse_stream_generator(resp):
+                chunk = {
+                    "id": completion_id,
+                    "object": "chat.completion.chunk",
+                    "created": created_ts,
+                    "model": model_name,
+                    "choices": [{
+                        "index": 0,
+                        "delta": {"content": chunk_text},
+                        "finish_reason": None,
+                    }],
+                }
+                yield f"data: {json.dumps(chunk)}\n\n"
+            # Final stop chunk
+            stop_chunk = {
+                "id": completion_id,
+                "object": "chat.completion.chunk",
+                "created": created_ts,
+                "model": model_name,
+                "choices": [{
+                    "index": 0,
+                    "delta": {},
+                    "finish_reason": "stop",
+                }],
+            }
+            yield f"data: {json.dumps(stop_chunk)}\n\n"
+            yield "data: [DONE]\n\n"
+        return StreamingResponse(
+            stream_generator(),
+            media_type="text/event-stream",
+            headers={
+                "Cache-Control": "no-cache",
+                "X-Accel-Buffering": "no",
+            },
+        )
+    # ── Non-streaming response ──────────────────────────────────────────────
+    try:
+        resp = do_perplexity_request(query)
+        answer, sources = parse_stream(resp)
+    except Exception as e:
+        raise HTTPException(status_code=502, detail=str(e))
+    if not answer:
+        raise HTTPException(status_code=502, detail="Empty response from Perplexity")
+    # Append sources as footnotes if any
+    if sources:
+        footnotes = "\n\n---\n**Sources:**\n"
+        for i, src in enumerate(sources, 1):
+            footnotes += f"{i}. [{src.get('name', src['url'])}]({src['url']})\n"
+        answer += footnotes
+    prompt_tokens   = len(query.split())
+    completion_tokens = len(answer.split())
+    return {
+        "id": completion_id,
+        "object": "chat.completion",
+        "created": created_ts,
+        "model": model_name,
+        "choices": [{
+            "index": 0,
+            "message": {
+                "role": "assistant",
+                "content": answer,
+            },
+            "finish_reason": "stop",
+        }],
+        "usage": {
+            "prompt_tokens": prompt_tokens,
+            "completion_tokens": completion_tokens,
+            "total_tokens": prompt_tokens + completion_tokens,
+        },
+    }
+# ---------------------------------------------------------------------------
+# Startup: pre-warm session
+# ---------------------------------------------------------------------------
+@app.on_event("startup")
+def startup_event():
+    try:
+        get_session()
+        print("[startup] Session initialized successfully")
+    except Exception as e:
+        print(f"[startup] Session init failed (will retry on first request): {e}")