Richard Neuschulz
commited on
Commit
•
64edef9
1
Parent(s):
0bc5ebd
change, test
Browse files
app.py
CHANGED
@@ -7,17 +7,16 @@ model_id = "TheBloke/KafkaLM-70B-German-V0.1-GGUF"
|
|
7 |
model_filename = "kafkalm-70b-german-v0.1.Q5_K_M.gguf"
|
8 |
model_path = hf_hub_download(repo_id=model_id, filename=model_filename)
|
9 |
|
|
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
-
# Initialize the Llama model
|
13 |
-
llm = Llama(
|
14 |
-
model_path=model_path, # Use the downloaded model file
|
15 |
-
n_ctx=4096, # Adjust based on the model's max sequence length
|
16 |
-
n_threads=8, # Tailor to your system
|
17 |
-
n_gpu_layers=35 # Set based on your GPU's capability
|
18 |
-
)
|
19 |
-
|
20 |
-
def generate_text(user_input, system_prompt):
|
21 |
# Combine the system and user prompts
|
22 |
prompt = f"\n{system_prompt.strip()}</s>\n\n{user_input.strip()}</s>\n"
|
23 |
|
|
|
7 |
model_filename = "kafkalm-70b-german-v0.1.Q5_K_M.gguf"
|
8 |
model_path = hf_hub_download(repo_id=model_id, filename=model_filename)
|
9 |
|
10 |
+
def generate_text(user_input, system_prompt):
|
11 |
|
12 |
+
# Initialize the Llama model
|
13 |
+
llm = Llama(
|
14 |
+
model_path=model_path, # Use the downloaded model file
|
15 |
+
n_ctx=4096, # Adjust based on the model's max sequence length
|
16 |
+
n_threads=8, # Tailor to your system
|
17 |
+
n_gpu_layers=35 # Set based on your GPU's capability
|
18 |
+
)
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
# Combine the system and user prompts
|
21 |
prompt = f"\n{system_prompt.strip()}</s>\n\n{user_input.strip()}</s>\n"
|
22 |
|