File size: 11,306 Bytes
bf8f720
0e51f31
bf8f720
1305b39
 
0e51f31
 
 
ac56ec3
0e51f31
1305b39
 
98c5346
9012b35
b2806a7
 
 
0e51f31
 
1305b39
991cd18
1305b39
bf8f720
 
991cd18
bf8f720
991cd18
 
 
 
 
bf8f720
 
 
 
 
991cd18
 
 
 
 
1305b39
 
fc4bb43
1305b39
 
9012b35
bf8f720
 
1305b39
 
9012b35
 
 
 
1305b39
9012b35
 
 
0e51f31
9012b35
0e51f31
1305b39
bf8f720
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ac56ec3
9012b35
ac56ec3
9012b35
fc4bb43
0e51f31
ac56ec3
9012b35
bf8f720
ac56ec3
9012b35
 
bf8f720
 
 
 
 
 
 
9012b35
1305b39
0e51f31
 
bf8f720
0e51f31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bf8f720
0e51f31
 
 
 
 
 
 
bf8f720
 
 
 
 
 
 
 
 
 
 
 
0e51f31
9012b35
0e51f31
 
 
 
 
9012b35
0e51f31
 
 
9012b35
bf8f720
 
 
 
 
 
 
 
ac56ec3
 
 
 
bf8f720
 
 
 
 
 
ac56ec3
 
0e51f31
bf8f720
0e51f31
9012b35
0e51f31
9012b35
 
0e51f31
9012b35
bf8f720
 
 
9012b35
 
0e51f31
 
 
 
 
 
 
 
 
bf8f720
 
 
 
 
0e51f31
fc4bb43
 
0e51f31
 
 
 
9012b35
1305b39
 
9012b35
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
from fastapi import FastAPI, HTTPException
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
import httpx
import os
import json
import time
import uuid
import asyncio
from typing import List, Dict, Any, Optional, AsyncGenerator

# --- Configuration ---
INFERENCE_API_KEY = os.environ.get("INFERENCE_API_KEY", "inference-00050468cc1c4a20bd5ca0997c752329")
INFERENCE_API_URL = "https://api.inference.net/v1/chat/completions"
SEARCH_API_URL = "https://searchapi.snapzion.com/search"
NEWS_API_URL = "https://searchapi.snapzion.com/news"
IMAGE_API_URL = "https://searchapi.snapzion.com/images" # Added Image API URL
MODEL_NAME = "Binglity-Lite"
BACKEND_MODEL = "meta-llama/llama-3.1-8b-instruct/fp-8"

# --- Final Advanced System Prompt ---
SYSTEM_PROMPT = """
You are "Binglity-Lite", a highly advanced AI search assistant. Your purpose is to provide users with accurate, comprehensive, and trustworthy answers by synthesizing information from a given set of web, news, and image search results.

**Core Directives:**
1.  **Answer Directly**: Immediately address the user's question. **Do not** use introductory phrases like "Based on the search results...". Your tone should be confident, objective, and encyclopedic.
2.  **Synthesize, Don't Summarize**: Your primary task is to weave information from multiple sources into a single, cohesive, and well-structured answer. Do not simply describe what each source says one by one.
3.  **Cite with Inline Markdown Links**: This is your most important instruction. When you present a fact or a piece of information from a source, you **must** cite it immediately using an inline Markdown link.
    *   **Format**: The format must be `[phrase or sentence containing the fact](URL)`. The URL must come from the `URL:` field of the provided source.
    *   **Example**: If a source with URL `https://example.com/science` says "The Earth is the third planet from the Sun", your output should be: "The Earth is the [third planet from the Sun](https://example.com/science)."
    *   **Rule**: Every piece of information in your answer must be attributable to a source via these inline links.
4.  **Be Fact-Based**: Your entire response must be based **exclusively** on the information provided in the search results. Do not use any outside knowledge.
5.  **Interpret Image Results**: For image search results, use the title and context to describe the image if it's relevant to the user's query. Cite the source page URL.
6.  **Filter for Relevance**: If a search result is not relevant to the user's query, ignore it completely. Do not mention it in your response.
7.  **Handle Ambiguity**: If the search results are contradictory or insufficient to answer the question fully, state this clearly in your response, citing the conflicting sources.

**Final Output Structure:**
Your final response MUST be structured in two parts:
1.  **The Synthesized Answer**: A well-written response that directly answers the user's query, with facts and statements properly cited using inline Markdown links as described above.
2.  **Sources Section**: After the answer, add a section header `## Sources`. Under this header, provide a bulleted list of the full titles and URLs of every source you used.
    *   **Format**: `- [Title of Source](URL)`
"""


# --- FastAPI App ---
app = FastAPI(
    title="Binglity-Lite API",
    description="A web, news, and image search-powered, streaming-capable chat completions API.",
    version="1.4.0",
)

# --- Pydantic Models for OpenAI Compatibility ---
class ChatMessage(BaseModel):
    role: str
    content: str

class ChatCompletionRequest(BaseModel):
    model: str
    messages: List[ChatMessage]
    max_tokens: Optional[int] = 2048
    temperature: Optional[float] = 0.7
    stream: Optional[bool] = False

# --- Search Functions ---
async def perform_search(client: httpx.AsyncClient, url: str, query: str, source_type: str) -> List[Dict[str, Any]]:
    """Generic function to perform a search against a given API."""
    try:
        response = await client.get(url, params={"query": query, "max_results": 10})
        response.raise_for_status()
        results = response.json()
        for result in results:
            result['source_type'] = source_type
        return results
    except httpx.HTTPStatusError as e:
        print(f"Error from {source_type} API: {e.response.text}")
        return []
    except Exception as e:
        print(f"An unexpected error occurred during {source_type} search: {str(e)}")
        return []

def format_search_results_for_prompt(results: List[Dict[str, Any]]) -> str:
    """Formats combined search results for the language model prompt."""
    if not results:
        return "No relevant search results were found. Inform the user that you could not find information on their query."
    
    formatted = "### Search Results ###\n\n"
    for i, result in enumerate(results):
        source_type = result.get('source_type', 'Search')
        formatted += f"Source [{i+1}] ({source_type}):\n"
        formatted += f"Title: {result.get('title', 'N/A')}\n"
        formatted += f"URL: {result.get('url', 'N/A')}\n"
        
        if source_type == 'Image':
            formatted += f"Content: [Image Result] A picture titled '{result.get('title', 'N/A')}'\n"
            formatted += f"Image URL: {result.get('image', 'N/A')}\n\n"
        else:
            formatted += f"Content: {result.get('description', 'N/A')}\n\n"
            
    return formatted

# --- Streaming Logic ---
async def stream_response_generator(payload: Dict[str, Any]) -> AsyncGenerator[str, None]:
    """Generates server-sent events for streaming responses."""
    headers = {
        "Authorization": f"Bearer {INFERENCE_API_KEY}",
        "Content-Type": "application/json",
        "Accept": "text/event-stream"
    }
    response_id = f"chatcmpl-{uuid.uuid4()}"
    created_time = int(time.time())

    async with httpx.AsyncClient(timeout=300.0) as client:
        async with client.stream("POST", INFERENCE_API_URL, json=payload, headers=headers) as response:
            if response.status_code != 200:
                error_content = await response.aread()
                raise HTTPException(status_code=response.status_code, detail=f"Error from inference API: {error_content.decode()}")

            async for line in response.aiter_lines():
                if line.startswith("data:"):
                    line_data = line[len("data:"):].strip()
                    if line_data == "[DONE]":
                        yield f"data: {json.dumps({'id': response_id, 'model': MODEL_NAME, 'object': 'chat.completion.chunk', 'created': created_time, 'choices': [{'index': 0, 'delta': {}, 'finish_reason': 'stop'}]})}\n\n"
                        yield "data: [DONE]\n\n"
                        break
                    
                    try:
                        chunk = json.loads(line_data)
                        # **ERROR FIX**: Check if 'choices' exists and is not empty before accessing
                        if chunk.get("choices") and len(chunk["choices"]) > 0:
                            formatted_chunk = {
                                "id": response_id, "object": "chat.completion.chunk", "created": created_time, "model": MODEL_NAME,
                                "choices": [{
                                    "index": 0,
                                    "delta": chunk["choices"][0].get("delta", {}),
                                    "finish_reason": chunk["choices"][0].get("finish_reason")
                                }]
                            }
                            yield f"data: {json.dumps(formatted_chunk)}\n\n"
                    except (json.JSONDecodeError, IndexError):
                        continue

# --- API Endpoint ---
@app.post("/v1/chat/completions")
async def chat_completions(request: ChatCompletionRequest):
    if request.model != MODEL_NAME:
        raise HTTPException(status_code=400, detail=f"Model not supported. Please use '{MODEL_NAME}'.")

    user_query = request.messages[-1].content if request.messages else ""
    if not user_query or request.messages[-1].role.lower() != 'user':
        raise HTTPException(status_code=400, detail="The last message must be from the 'user' and contain content.")

    # Perform all searches concurrently
    async with httpx.AsyncClient() as client:
        search_tasks = [
            perform_search(client, SEARCH_API_URL, user_query, "Web"),
            perform_search(client, NEWS_API_URL, user_query, "News"),
            perform_search(client, IMAGE_API_URL, user_query, "Image"),
        ]
        all_results = await asyncio.gather(*search_tasks)

    # Combine results and remove duplicates by URL
    combined_results = []
    seen_urls = set()
    for result_list in all_results:
        for result in result_list:
            url = result.get('url')
            if url and url not in seen_urls:
                combined_results.append(result)
                seen_urls.add(url)
    
    formatted_results = format_search_results_for_prompt(combined_results)
    
    final_user_prompt = f"User's question: \"{user_query}\"\n\nUse the web, news, and image search results below to answer the user's question. Follow all rules in your system prompt exactly.\n\n{formatted_results}"
    
    payload = {
        "model": BACKEND_MODEL,
        "messages": [
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": final_user_prompt},
        ],
        "max_tokens": request.max_tokens,
        "temperature": request.temperature,
        "stream": request.stream,
    }

    if request.stream:
        return StreamingResponse(stream_response_generator(payload), media_type="text/event-stream")
    else:
        headers = {"Authorization": f"Bearer {INFERENCE_API_KEY}"}
        async with httpx.AsyncClient(timeout=120.0) as client:
            try:
                response = await client.post(INFERENCE_API_URL, json=payload, headers=headers)
                response.raise_for_status()
                model_response = response.json()
                
                # Ensure the response structure is valid before returning
                if not model_response.get("choices") or len(model_response["choices"]) == 0:
                     raise HTTPException(status_code=500, detail="Invalid response from inference API: 'choices' field is missing or empty.")

                return {
                    "id": model_response.get("id", f"chatcmpl-{uuid.uuid4()}"), "object": "chat.completion", "created": model_response.get("created", int(time.time())), "model": MODEL_NAME,
                    "choices": [{"index": 0, "message": {"role": "assistant", "content": model_response["choices"][0]["message"]["content"],}, "finish_reason": "stop",}],
                    "usage": model_response.get("usage", {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}),
                }
            except httpx.HTTPStatusError as e:
                raise HTTPException(status_code=e.response.status_code, detail=f"Error from inference API: {e.response.text}")

@app.get("/")
def read_root():
    return {"message": "Welcome to the Binglity-Lite API. Use the /v1/chat/completions endpoint."}