Spaces:

Marroco93
/

PacmanAI-2

Sleeping

App Files Files Community

Marroco93 commited on Mar 22

Commit

215f4a9

•

1 Parent(s): 71badbc

test

Browse files

Files changed (1) hide show

main.py +23 -4

main.py CHANGED Viewed

@@ -3,6 +3,7 @@ from fastapi.responses import StreamingResponse
 from pydantic import BaseModel
 from huggingface_hub import InferenceClient
 import uvicorn
 app = FastAPI()
@@ -26,6 +27,7 @@ def format_prompt(message, history):
     prompt += f"[INST] {message} [/INST]"
     return prompt
 def generate(item: Item):
     temperature = float(item.temperature)
     if temperature < 1e-2:
@@ -43,11 +45,28 @@ def generate(item: Item):
     formatted_prompt = format_prompt(f"{item.system_prompt}, {item.prompt}", item.history)
     stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
-    for response in stream:
-        yield response.token.text.encode("utf-8")
 @app.post("/generate/")
 async def generate_text(item: Item):
-    return StreamingResponse(generate(item), media_type="text/plain")

 from pydantic import BaseModel
 from huggingface_hub import InferenceClient
 import uvicorn
+import json  # Make sure to import json
 app = FastAPI()
     prompt += f"[INST] {message} [/INST]"
     return prompt
 def generate(item: Item):
     temperature = float(item.temperature)
     if temperature < 1e-2:
     formatted_prompt = format_prompt(f"{item.system_prompt}, {item.prompt}", item.history)
     stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
+    # Initialize a variable to track whether this is the last item
+    is_last = False
+    # Since we're yielding JSON, each chunk must be a complete JSON object.
+    # We'll iterate over the stream and yield each response as a JSON string.
+    for i, response in enumerate(stream):
+        # Check if this is the last item by attempting to peek ahead
+        is_last = True  # Assume it's the last unless proven otherwise in the next iteration
+        # Construct the chunk of data to include the text and completion status
+        chunk_data = {
+            "text": response.token.text,
+            "complete": is_last
+        }
+        # Yield this chunk as a JSON-encoded string followed by a newline to separate chunks
+        yield json.dumps(chunk_data) + "\n"
 @app.post("/generate/")
 async def generate_text(item: Item):
+    # Note the change to media_type to indicate we're streaming JSON
+    return StreamingResponse(generate(item), media_type="application/x-ndjson")