Spaces:
Sleeping
Sleeping
csabakecskemeti
commited on
Commit
•
a8408b2
1
Parent(s):
7f36055
Upload 2 files
Browse files- app.py +48 -0
- requirements.txt +3 -0
app.py
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from llama_cpp import Llama
|
3 |
+
|
4 |
+
|
5 |
+
def llama_cpp_chat(gguf_model, prompt:str, messages:str = ''):
|
6 |
+
prompt_templated = f'{messages}\n ### HUMAN:\n{prompt} \n ### ASSISTANT:'
|
7 |
+
output = gguf_model(
|
8 |
+
prompt_templated, # Prompt
|
9 |
+
max_tokens=512,
|
10 |
+
stop=["### HUMAN:\n", " ### ASSISTANT:"], # Stop generating just before the model would generate a new question
|
11 |
+
echo=True # Echo the prompt back in the output
|
12 |
+
) # Generate a completion, can also call create_completion
|
13 |
+
print(output)
|
14 |
+
return output['choices'][0]['text']
|
15 |
+
|
16 |
+
llm = Llama(
|
17 |
+
model_path="llama3_8b_chat_brainstorm.Q2_K.gguf",
|
18 |
+
# n_gpu_layers=-1, # Uncomment to use GPU acceleration
|
19 |
+
# seed=1337, # Uncomment to set a specific seed
|
20 |
+
# n_ctx=2048, # Uncomment to increase the context window
|
21 |
+
)
|
22 |
+
|
23 |
+
def chatty(prompt, messages):
|
24 |
+
print(prompt)
|
25 |
+
print(f'messages: {messages}')
|
26 |
+
past_messages = ''
|
27 |
+
if len(messages) > 0:
|
28 |
+
for idx, message in enumerate(messages):
|
29 |
+
print(f'idx: {idx}, message: {message}')
|
30 |
+
past_messages += f'\n### HUMAN: {message[0]}'
|
31 |
+
past_messages += f'\n### ASSISTANT: {message[1]}'
|
32 |
+
|
33 |
+
|
34 |
+
# past_messages = messages[0][0]
|
35 |
+
print(f'past_messages: {past_messages}')
|
36 |
+
messages = llama_cpp_chat(llm, prompt, past_messages)
|
37 |
+
return messages.split('### ASSISTANT:')[-1]
|
38 |
+
|
39 |
+
|
40 |
+
demo = gr.ChatInterface(
|
41 |
+
fn=chatty,
|
42 |
+
title="Brainstorm on CPU with llama.cpp",
|
43 |
+
description="Please note that CPU prediction will very slow - but this can run on the Free Tier :)"
|
44 |
+
)
|
45 |
+
|
46 |
+
|
47 |
+
if __name__ == "__main__":
|
48 |
+
demo.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
--extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
|
2 |
+
llama-cpp-python
|
3 |
+
huggingface_hub==0.22.2
|