Spaces:

AlsuGibadullina
/

TestRefactoringModels

Sleeping

App Files Files Community

TestRefactoringModels / src /backends.py

AlsuGibadullina

Update src/backends.py

16ecc2a verified about 1 month ago

raw

history blame contribute delete

2.81 kB

	import os
	from dataclasses import dataclass
	from typing import Optional, Dict, Any, Protocol

	from huggingface_hub import InferenceClient

	try:
	from PIL import Image
	except Exception:
	Image = None


	class LLMBackend(Protocol):
	def generate(self, prompt: str, *, system: Optional[str], params: Dict[str, Any]) -> str:
	...


	@dataclass
	class HFInferenceAPIBackend:
	"""
	Uses HF Inference API via huggingface_hub.InferenceClient.
	Works well on Spaces if you provide HF_TOKEN in Secrets.
	"""
	model_id: str
	token: Optional[str] = None
	timeout_s: int = 180

	def __post_init__(self):
	self.token = self.token or os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
	self.client = InferenceClient(model=self.model_id, token=self.token, timeout=self.timeout_s)

	def generate(self, prompt: str, *, system: Optional[str], params: Dict[str, Any]) -> str:
	temperature = float(params.get("temperature", 0.2))
	max_new_tokens = int(params.get("max_new_tokens", 600))
	top_p = float(params.get("top_p", 0.95))
	repetition_penalty = float(params.get("repetition_penalty", 1.05))

	# Prefer chat when supported
	try:
	messages = []
	if system:
	messages.append({"role": "system", "content": system})
	messages.append({"role": "user", "content": prompt})

	resp = self.client.chat.completions.create(
	model=self.model_id,
	messages=messages,
	temperature=temperature,
	max_tokens=max_new_tokens,
	top_p=top_p,
	)
	return resp.choices[0].message.content
	except Exception:
	# Fallback: text generation
	out = self.client.text_generation(
	prompt=(f"{system}\n\n{prompt}" if system else prompt),
	temperature=temperature,
	max_new_tokens=max_new_tokens,
	top_p=top_p,
	repetition_penalty=repetition_penalty,
	do_sample=True,
	return_full_text=False,
	)
	return out

	def image_to_text(self, image: "Image.Image") -> str:
	"""
	HF task 'image-to-text' (captioning / OCR-like depending on model).
	"""
	if Image is None:
	raise RuntimeError("Pillow not installed")
	res = self.client.image_to_text(image)
	# huggingface_hub returns an object with generated_text
	return getattr(res, "generated_text", str(res))


	def make_backend(backend_type: str, model_id: str) -> LLMBackend:
	if backend_type == "hf_inference_api":
	return HFInferenceAPIBackend(model_id=model_id)
	raise ValueError(f"Unknown backend: {backend_type}")