Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -15,7 +15,14 @@ hf_hub_download(repo_id=repo_id,
|
|
15 |
filename=model_name,local_dir =".")
|
16 |
|
17 |
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
def format_prompt(message, history):
|
20 |
prompt = "<s>"
|
21 |
for user_prompt, bot_response in history:
|
@@ -36,17 +43,11 @@ def generate(
|
|
36 |
|
37 |
formatted_prompt = format_prompt(prompt, history)
|
38 |
|
39 |
-
|
40 |
-
model_path=model_name,
|
41 |
-
temperature=temperature,
|
42 |
-
max_tokens=2000,
|
43 |
-
top_p=top_p,
|
44 |
-
callback_manager=callback_manager,
|
45 |
-
verbose=True, # Verbose is required to pass to the callback manager
|
46 |
-
)
|
47 |
# stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
|
48 |
output = ""
|
49 |
output=llm(formatted_prompt)
|
|
|
50 |
# for response in stream:
|
51 |
# output += response.token.text
|
52 |
# yield output
|
|
|
15 |
filename=model_name,local_dir =".")
|
16 |
|
17 |
|
18 |
+
llm = LlamaCpp(
|
19 |
+
model_path=model_name,
|
20 |
+
temperature=temperature,
|
21 |
+
max_tokens=2000,
|
22 |
+
top_p=top_p,
|
23 |
+
callback_manager=callback_manager,
|
24 |
+
verbose=True, # Verbose is required to pass to the callback manager
|
25 |
+
)
|
26 |
def format_prompt(message, history):
|
27 |
prompt = "<s>"
|
28 |
for user_prompt, bot_response in history:
|
|
|
43 |
|
44 |
formatted_prompt = format_prompt(prompt, history)
|
45 |
|
46 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
# stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
|
48 |
output = ""
|
49 |
output=llm(formatted_prompt)
|
50 |
+
output="ans:"+output
|
51 |
# for response in stream:
|
52 |
# output += response.token.text
|
53 |
# yield output
|