File size: 1,541 Bytes
4d818ed
61bd824
faabdd0
 
1b64f3b
b36d551
1b64f3b
153fb4c
1b64f3b
153fb4c
 
fca82b6
1b64f3b
 
61bd824
1b64f3b
61bd824
751a778
 
153fb4c
751a778
61bd824
751a778
31229da
bf8ca9c
faabdd0
 
 
 
cc2f3ff
b0e3bef
 
b665fc9
ae13704
faabdd0
b665fc9
faabdd0
52af5d8
b665fc9
95cc203
52af5d8
 
95cc203
 
b665fc9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import gradio as gr
import os
from llama_cpp import Llama
from huggingface_hub import hf_hub_download

os.environ["LLAMA_CPP_USE_CUDA"] = "0"

title = "SmolLM 2 - Bulgarian Joke Master - GGUF"
description = """
🔎 [SmolLM 2](https://huggingface.co/unsloth/SmolLM2-135M-Instruct-bnb-4bit) fine-tuned for Bulgarian jokes, running on CPU in GGUF format.\n
This model is fine-tuned for generating humorous content in Bulgarian, utilizing the [Llama.cpp library](https://github.com/ggerganov/llama.cpp).\n
Running on CPU, it can still produce impressive results, although larger models may require more processing power.
"""

model_dir = "models"
model_name = "unsloth.Q4_K_M.gguf"
model_path = os.path.join(model_dir, model_name)

hf_hub_download(
    repo_id="vislupus/bulgarian-joke-master-SmolLM2-135M-Instruct-bnb-4bit-gguf",
    filename=model_name,
    local_dir=model_dir
)

if not os.path.exists(model_path):
    raise FileNotFoundError(f"Model file not found at {model_path}")

llm = Llama(model_path=model_path)

def generate_response(message, history, temperature=0.7, top_p=1.0, max_tokens=1280):  
    try:   
        response = llm(message, max_tokens=max_tokens, temperature=temperature, top_p=top_p)
        return response["choices"][0]["text"].strip()
        
    except Exception as e:
        return f"Error generating response: {str(e)}"

if __name__ == "__main__":
    gguf_demo = gr.ChatInterface(
        generate_response,
        title=title,
        description=description,
    )

    gguf_demo.launch(share=True)