|
import os |
|
import gradio as gr |
|
from huggingface_hub import hf_hub_download |
|
from llama_cpp import Llama |
|
|
|
model_id = "TheBloke/KafkaLM-70B-German-V0.1-GGUF" |
|
model_filename = "kafkalm-70b-german-v0.1.Q5_K_M.gguf" |
|
model_path = hf_hub_download(repo_id=model_id, filename=model_filename, cache_dir="./") |
|
|
|
|
|
llm = Llama( |
|
model_path=model_path, |
|
n_ctx=4096, |
|
n_threads=8, |
|
n_gpu_layers=35 |
|
) |
|
|
|
def generate_text(user_input, system_prompt): |
|
|
|
prompt = f"\n{system_prompt.strip()}</s>\n\n{user_input.strip()}</s>\n" |
|
|
|
|
|
output = llm(prompt, max_tokens=512, stop=["</s>"], echo=True) |
|
|
|
|
|
generated_text = output['completions'][0]['completion'] |
|
|
|
return generated_text |
|
|
|
|
|
iface = gr.Interface( |
|
fn=generate_text, |
|
inputs=[ |
|
gr.Textbox(lines=2, label="User Prompt", value="Wer ist Kafka?"), |
|
gr.Textbox(lines=5, label="System Prompt", value="Du bist ein freundlicher und hilfsbereiter KI-Assistent. Du beantwortest Fragen faktenorientiert und präzise, ohne dabei relevante Fakten auszulassen.") |
|
], |
|
outputs=gr.Textbox(label="Generated Text"), |
|
title="Text Generation with KafkaLM", |
|
description="Enter a user prompt and a system prompt to generate text using the KafkaLM model." |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
iface.launch() |
|
|