Spaces:
Runtime error
Runtime error
File size: 4,677 Bytes
11df34d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 |
##### `🦙_Alpaca.py`
##### Alpaca Model
##### https://github.com/seemanne/llamacpypy
##### https://github.com/shaunabanana/llama.py
##### Please reach out to ben@benbox.org for any questions
#### Loading needed Python libraries
import streamlit as st
#from llamacpypy import Llama
import llamacpp
from llama_cpp import Llama
import os
import subprocess
#### Streamlit initial setup
st.set_page_config(
page_title = "🦙 Alpaca",
page_icon = "images/Logo.png",
layout = "centered",
initial_sidebar_state = "expanded"
)
#### Functions of the Python Wrapper
def llama_stream(
prompt = '',
skip_prompt = True,
trim_prompt = 0,
executable = 'pages/llama.cpp/main',
model = 'models/7B/ggml-model-q4_0.bin',
threads = 4,
temperature = 0.7,
top_k = 40,
top_p = 0.5,
repeat_last_n = 256,
repeat_penalty = 1.17647,
n = 4096,
interactive = False,
reverse_prompt = "User:"
):
command = [
executable,
'-m', model,
'-t', str(threads),
'--temp', str(temperature),
'--top_k', str(top_k),
'--top_p', str(top_p),
'--repeat_last_n', str(repeat_last_n),
'--repeat_penalty', str(repeat_penalty),
'-n', str(n),
'-p', prompt
]
if interactive:
command += ['-i', '-r', reverse_prompt]
process = subprocess.Popen(
command,
stdin = subprocess.PIPE,
stdout = subprocess.PIPE,
stderr = subprocess.PIPE,
)
token = b''
generated = ''
while True:
token += process.stdout.read(1)
if token: # neither empty string nor None
try:
decoded = token.decode('utf-8')
trimmed_prompt = prompt
if trim_prompt > 0:
trimmed_prompt = prompt[:-trim_prompt]
prompt_finished = generated.startswith(trimmed_prompt)
reverse_prompt_encountered = generated.endswith(reverse_prompt)
if not skip_prompt or prompt_finished:
yield decoded
if interactive and prompt_finished and reverse_prompt_encountered:
user_input = input()
process.stdin.write(user_input.encode('utf-8') + b'\n')
process.stdin.flush()
generated += decoded
token = b''
except UnicodeDecodeError:
continue
elif process.poll() is not None:
return
def llama(
prompt = '',
stream = False,
skip_prompt = False,
trim_prompt = 0,
executable = 'pages/llama.cpp/main',
model = 'models/7B/ggml-model-q4_0.bin',
threads = 4,
temperature = 0.7,
top_k = 40,
top_p = 0.5,
repeat_last_n = 256,
repeat_penalty = 1.17647,
n = 4096,
interactive = False,
reverse_prompt = "User:"
):
streamer = llama_stream(
prompt = prompt,
skip_prompt = skip_prompt,
trim_prompt = trim_prompt,
executable = executable,
model = model,
threads = threads,
temperature = temperature,
top_k = top_k,
top_p = top_p,
repeat_last_n = repeat_last_n,
repeat_penalty = repeat_penalty,
n = n,
interactive = interactive,
reverse_prompt = reverse_prompt
)
if stream:
return streamer
else:
return ''.join(list(streamer))
### Python Wrapper (functions above
#text = []
#for token in llama(prompt = 'What is your purpose?', repeat_penalty = 1.05, skip_prompt = False, interactive = False):
# print(token, end = '', flush = True)
# text.append(token)
#st.subheader('Debug')
#st.experimental_show(text[0])
#st.experimental_show(text[1])
#st.subheader('Answer')
#st.write(''.join(text))
### llamacpypy
#llama = Llama(model_name = 'models/7B/ggml-model-q4_0.bin', warm_start = True)
#llama.load_model()
#var = llama.generate("This is the weather report, we are reporting a clown fiesta happening at backer street. The clowns ")
#st.write(var)
### llamacpp
#model_path = "./models/7B/ggml-model-q4_0.bin"
#params = llamacpp.gpt_params(model_path, 4096, 40, 0.1, 0.7, 2.0)
#model = llamacpp.PyLLAMA(model_path, params)
#text = model.predict("Hello, I'm a llama.", 10)
#st.write(text)
### Llama cpp
llm = Llama(model_path="models/7B/ggml-model-q4_0.bin")
output = llm("Q: Name the planets in the solar system? A: ", max_tokens = 32, stop = ["Q:", "\n"], echo = True)
st.write(output)
|