Spaces:

abhaysastha
/

llm

Sleeping

llm / main.py

ee988d4 verified 16 days ago

1.61 kB

	import os
	import requests
	from ctransformers import AutoModelForCausalLM
	from fastapi import FastAPI
	from pydantic import BaseModel

	# Define the public URL for the model file
	MODEL_URL = "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/resolve/main/zephyr-7b-beta.Q5_K_S.gguf"
	MODEL_PATH = "zephyr-7b-beta.Q4_K_S.gguf"

	# Download the model file if not already present
	def download_model(model_url, model_path):
	if not os.path.exists(model_path):
	print(f"Downloading model from {model_url}...")
	response = requests.get(model_url, stream=True)
	with open(model_path, "wb") as f:
	for chunk in response.iter_content(chunk_size=8192):
	f.write(chunk)
	print("Model download complete.")
	else:
	print("Model already exists locally.")

	# Ensure the model file is downloaded
	download_model(MODEL_URL, MODEL_PATH)

	# Load the model
	llm = AutoModelForCausalLM.from_pretrained(
	MODEL_PATH,
	model_type="mistral",
	max_new_tokens=1096,
	threads=3,
	)

	# Pydantic object for request validation
	class Validation(BaseModel):
	prompt: str

	# Initialize FastAPI app
	app = FastAPI()

	# Zephyr completion endpoint
	@app.post("/llm_on_cpu")
	async def stream(item: Validation):
	system_prompt = 'Below is an instruction that describes a task. Write a response that appropriately completes the request.'
	E_INST = "</s>"
	user, assistant = "<\|user\|>", "<\|assistant\|>"
	prompt = f"{system_prompt}{E_INST}\n{user}\n{item.prompt.strip()}{E_INST}\n{assistant}\n"
	return llm(prompt)