Spaces:

vislupus
/

Bulgarian-Joke-Master

Sleeping

Update app.py

153fb4c verified 29 days ago

1.54 kB

	import gradio as gr
	import os
	from llama_cpp import Llama
	from huggingface_hub import hf_hub_download

	os.environ["LLAMA_CPP_USE_CUDA"] = "0"

	title = "SmolLM 2 - Bulgarian Joke Master - GGUF"
	description = """
	🔎 [SmolLM 2](https://huggingface.co/unsloth/SmolLM2-135M-Instruct-bnb-4bit) fine-tuned for Bulgarian jokes, running on CPU in GGUF format.\n
	This model is fine-tuned for generating humorous content in Bulgarian, utilizing the [Llama.cpp library](https://github.com/ggerganov/llama.cpp).\n
	Running on CPU, it can still produce impressive results, although larger models may require more processing power.
	"""

	model_dir = "models"
	model_name = "unsloth.Q4_K_M.gguf"
	model_path = os.path.join(model_dir, model_name)

	hf_hub_download(
	repo_id="vislupus/bulgarian-joke-master-SmolLM2-135M-Instruct-bnb-4bit-gguf",
	filename=model_name,
	local_dir=model_dir
	)

	if not os.path.exists(model_path):
	raise FileNotFoundError(f"Model file not found at {model_path}")

	llm = Llama(model_path=model_path)

	def generate_response(message, history, temperature=0.7, top_p=1.0, max_tokens=1280):
	try:
	response = llm(message, max_tokens=max_tokens, temperature=temperature, top_p=top_p)
	return response["choices"][0]["text"].strip()

	except Exception as e:
	return f"Error generating response: {str(e)}"

	if __name__ == "__main__":
	gguf_demo = gr.ChatInterface(
	generate_response,
	title=title,
	description=description,
	)

	gguf_demo.launch(share=True)