Spaces:
Sleeping
Sleeping
File size: 1,541 Bytes
4d818ed 61bd824 faabdd0 1b64f3b b36d551 1b64f3b 153fb4c 1b64f3b 153fb4c fca82b6 1b64f3b 61bd824 1b64f3b 61bd824 751a778 153fb4c 751a778 61bd824 751a778 31229da bf8ca9c faabdd0 cc2f3ff b0e3bef b665fc9 ae13704 faabdd0 b665fc9 faabdd0 52af5d8 b665fc9 95cc203 52af5d8 95cc203 b665fc9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
import gradio as gr
import os
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
os.environ["LLAMA_CPP_USE_CUDA"] = "0"
title = "SmolLM 2 - Bulgarian Joke Master - GGUF"
description = """
🔎 [SmolLM 2](https://huggingface.co/unsloth/SmolLM2-135M-Instruct-bnb-4bit) fine-tuned for Bulgarian jokes, running on CPU in GGUF format.\n
This model is fine-tuned for generating humorous content in Bulgarian, utilizing the [Llama.cpp library](https://github.com/ggerganov/llama.cpp).\n
Running on CPU, it can still produce impressive results, although larger models may require more processing power.
"""
model_dir = "models"
model_name = "unsloth.Q4_K_M.gguf"
model_path = os.path.join(model_dir, model_name)
hf_hub_download(
repo_id="vislupus/bulgarian-joke-master-SmolLM2-135M-Instruct-bnb-4bit-gguf",
filename=model_name,
local_dir=model_dir
)
if not os.path.exists(model_path):
raise FileNotFoundError(f"Model file not found at {model_path}")
llm = Llama(model_path=model_path)
def generate_response(message, history, temperature=0.7, top_p=1.0, max_tokens=1280):
try:
response = llm(message, max_tokens=max_tokens, temperature=temperature, top_p=top_p)
return response["choices"][0]["text"].strip()
except Exception as e:
return f"Error generating response: {str(e)}"
if __name__ == "__main__":
gguf_demo = gr.ChatInterface(
generate_response,
title=title,
description=description,
)
gguf_demo.launch(share=True)
|