csabakecskemeti commited on
Commit
a8408b2
1 Parent(s): 7f36055

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +48 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from llama_cpp import Llama
3
+
4
+
5
+ def llama_cpp_chat(gguf_model, prompt:str, messages:str = ''):
6
+ prompt_templated = f'{messages}\n ### HUMAN:\n{prompt} \n ### ASSISTANT:'
7
+ output = gguf_model(
8
+ prompt_templated, # Prompt
9
+ max_tokens=512,
10
+ stop=["### HUMAN:\n", " ### ASSISTANT:"], # Stop generating just before the model would generate a new question
11
+ echo=True # Echo the prompt back in the output
12
+ ) # Generate a completion, can also call create_completion
13
+ print(output)
14
+ return output['choices'][0]['text']
15
+
16
+ llm = Llama(
17
+ model_path="llama3_8b_chat_brainstorm.Q2_K.gguf",
18
+ # n_gpu_layers=-1, # Uncomment to use GPU acceleration
19
+ # seed=1337, # Uncomment to set a specific seed
20
+ # n_ctx=2048, # Uncomment to increase the context window
21
+ )
22
+
23
+ def chatty(prompt, messages):
24
+ print(prompt)
25
+ print(f'messages: {messages}')
26
+ past_messages = ''
27
+ if len(messages) > 0:
28
+ for idx, message in enumerate(messages):
29
+ print(f'idx: {idx}, message: {message}')
30
+ past_messages += f'\n### HUMAN: {message[0]}'
31
+ past_messages += f'\n### ASSISTANT: {message[1]}'
32
+
33
+
34
+ # past_messages = messages[0][0]
35
+ print(f'past_messages: {past_messages}')
36
+ messages = llama_cpp_chat(llm, prompt, past_messages)
37
+ return messages.split('### ASSISTANT:')[-1]
38
+
39
+
40
+ demo = gr.ChatInterface(
41
+ fn=chatty,
42
+ title="Brainstorm on CPU with llama.cpp",
43
+ description="Please note that CPU prediction will very slow - but this can run on the Free Tier :)"
44
+ )
45
+
46
+
47
+ if __name__ == "__main__":
48
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
2
+ llama-cpp-python
3
+ huggingface_hub==0.22.2