Spaces:
Sleeping
Sleeping
import os | |
import requests | |
from ctransformers import AutoModelForCausalLM | |
from fastapi import FastAPI | |
from pydantic import BaseModel | |
# Define the public URL for the model file | |
MODEL_URL = "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/resolve/main/zephyr-7b-beta.Q5_K_S.gguf" | |
MODEL_PATH = "zephyr-7b-beta.Q4_K_S.gguf" | |
# Download the model file if not already present | |
def download_model(model_url, model_path): | |
if not os.path.exists(model_path): | |
print(f"Downloading model from {model_url}...") | |
response = requests.get(model_url, stream=True) | |
with open(model_path, "wb") as f: | |
for chunk in response.iter_content(chunk_size=8192): | |
f.write(chunk) | |
print("Model download complete.") | |
else: | |
print("Model already exists locally.") | |
# Ensure the model file is downloaded | |
download_model(MODEL_URL, MODEL_PATH) | |
# Load the model | |
llm = AutoModelForCausalLM.from_pretrained( | |
MODEL_PATH, | |
model_type="mistral", | |
max_new_tokens=1096, | |
threads=3, | |
) | |
# Pydantic object for request validation | |
class Validation(BaseModel): | |
prompt: str | |
# Initialize FastAPI app | |
app = FastAPI() | |
# Zephyr completion endpoint | |
async def stream(item: Validation): | |
system_prompt = 'Below is an instruction that describes a task. Write a response that appropriately completes the request.' | |
E_INST = "</s>" | |
user, assistant = "<|user|>", "<|assistant|>" | |
prompt = f"{system_prompt}{E_INST}\n{user}\n{item.prompt.strip()}{E_INST}\n{assistant}\n" | |
return llm(prompt) | |