Richard Neuschulz
commited on
Commit
•
0939023
1
Parent(s):
3bb4043
check 70B model
Browse files- app.py +31 -23
- requirements.txt +3 -1
app.py
CHANGED
@@ -1,35 +1,43 @@
|
|
|
|
1 |
import gradio as gr
|
2 |
-
from
|
3 |
-
|
4 |
|
5 |
-
|
|
|
|
|
6 |
|
7 |
-
|
8 |
-
|
9 |
-
|
|
|
|
|
|
|
|
|
10 |
|
11 |
def generate_text(user_input, system_prompt):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
-
# Combine the system prompt and the user input to form the full prompt
|
14 |
-
full_prompt = f"{system_prompt.strip()}\n\n{user_input.strip()}"
|
15 |
-
|
16 |
-
# Initialize the pipeline for text generation with the model and tokenizer
|
17 |
-
text_generator = pipeline('text-generation', model=model, tokenizer=tokenizer,
|
18 |
-
return_full_text=True, temperature=0.5,
|
19 |
-
max_new_tokens=512, top_p=0.95, top_k=50, do_sample=True, device=0) # Ensure device is set to use GPU
|
20 |
-
|
21 |
-
# Generate text based on the full prompt
|
22 |
-
results = text_generator(full_prompt)
|
23 |
-
generated_text = results[0]['generated_text']
|
24 |
-
|
25 |
return generated_text
|
26 |
|
27 |
# Setup the Gradio interface
|
28 |
-
iface = gr.Interface(
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
|
|
|
|
|
|
|
|
|
|
33 |
|
34 |
# Launch the Gradio app
|
35 |
if __name__ == "__main__":
|
|
|
1 |
+
import os
|
2 |
import gradio as gr
|
3 |
+
from huggingface_hub import hf_hub_download
|
4 |
+
from llama_cpp import Llama
|
5 |
|
6 |
+
model_id = "TheBloke/KafkaLM-70B-German-V0.1-GGUF"
|
7 |
+
model_filename = "kafkalm-70b-german-v0.1.Q5_K_M.gguf"
|
8 |
+
model_path = hf_hub_download(repo_id=model_id, filename=model_filename, cache_dir="./")
|
9 |
|
10 |
+
# Initialize the Llama model
|
11 |
+
llm = Llama(
|
12 |
+
model_path=model_path, # Use the downloaded model file
|
13 |
+
n_ctx=4096, # Adjust based on the model's max sequence length
|
14 |
+
n_threads=8, # Tailor to your system
|
15 |
+
n_gpu_layers=35 # Set based on your GPU's capability
|
16 |
+
)
|
17 |
|
18 |
def generate_text(user_input, system_prompt):
|
19 |
+
# Combine the system and user prompts
|
20 |
+
prompt = f"\n{system_prompt.strip()}</s>\n\n{user_input.strip()}</s>\n"
|
21 |
+
|
22 |
+
# Generate text using the Llama model
|
23 |
+
output = llm(prompt, max_tokens=512, stop=["</s>"], echo=True)
|
24 |
+
|
25 |
+
# Extract the generated text from the output
|
26 |
+
generated_text = output['completions'][0]['completion']
|
27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
return generated_text
|
29 |
|
30 |
# Setup the Gradio interface
|
31 |
+
iface = gr.Interface(
|
32 |
+
fn=generate_text,
|
33 |
+
inputs=[
|
34 |
+
gr.Textbox(lines=2, label="User Prompt", value="Wer ist Kafka?"),
|
35 |
+
gr.Textbox(lines=5, label="System Prompt", value="Du bist ein freundlicher und hilfsbereiter KI-Assistent. Du beantwortest Fragen faktenorientiert und präzise, ohne dabei relevante Fakten auszulassen.")
|
36 |
+
],
|
37 |
+
outputs=gr.Textbox(label="Generated Text"),
|
38 |
+
title="Text Generation with KafkaLM",
|
39 |
+
description="Enter a user prompt and a system prompt to generate text using the KafkaLM model."
|
40 |
+
)
|
41 |
|
42 |
# Launch the Gradio app
|
43 |
if __name__ == "__main__":
|
requirements.txt
CHANGED
@@ -3,4 +3,6 @@ gradio
|
|
3 |
torch
|
4 |
bitsandbytes
|
5 |
accelerate
|
6 |
-
autoawq
|
|
|
|
|
|
3 |
torch
|
4 |
bitsandbytes
|
5 |
accelerate
|
6 |
+
autoawq
|
7 |
+
huggingface_hub
|
8 |
+
llama-cpp-python
|