DrBenjamin commited on
Commit
11df34d
1 Parent(s): fc849f1

added files

Browse files
Files changed (2) hide show
  1. pages/🦙_Alpaca.py +169 -0
  2. requirements.txt +3 -0
pages/🦙_Alpaca.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ##### `🦙_Alpaca.py`
2
+ ##### Alpaca Model
3
+ ##### https://github.com/seemanne/llamacpypy
4
+ ##### https://github.com/shaunabanana/llama.py
5
+ ##### Please reach out to ben@benbox.org for any questions
6
+ #### Loading needed Python libraries
7
+ import streamlit as st
8
+ #from llamacpypy import Llama
9
+ import llamacpp
10
+ from llama_cpp import Llama
11
+ import os
12
+ import subprocess
13
+
14
+
15
+
16
+
17
+ #### Streamlit initial setup
18
+ st.set_page_config(
19
+ page_title = "🦙 Alpaca",
20
+ page_icon = "images/Logo.png",
21
+ layout = "centered",
22
+ initial_sidebar_state = "expanded"
23
+ )
24
+
25
+
26
+
27
+
28
+ #### Functions of the Python Wrapper
29
+ def llama_stream(
30
+ prompt = '',
31
+ skip_prompt = True,
32
+ trim_prompt = 0,
33
+ executable = 'pages/llama.cpp/main',
34
+ model = 'models/7B/ggml-model-q4_0.bin',
35
+ threads = 4,
36
+ temperature = 0.7,
37
+ top_k = 40,
38
+ top_p = 0.5,
39
+ repeat_last_n = 256,
40
+ repeat_penalty = 1.17647,
41
+ n = 4096,
42
+ interactive = False,
43
+ reverse_prompt = "User:"
44
+ ):
45
+ command = [
46
+ executable,
47
+ '-m', model,
48
+ '-t', str(threads),
49
+ '--temp', str(temperature),
50
+ '--top_k', str(top_k),
51
+ '--top_p', str(top_p),
52
+ '--repeat_last_n', str(repeat_last_n),
53
+ '--repeat_penalty', str(repeat_penalty),
54
+ '-n', str(n),
55
+ '-p', prompt
56
+ ]
57
+ if interactive:
58
+ command += ['-i', '-r', reverse_prompt]
59
+
60
+ process = subprocess.Popen(
61
+ command,
62
+ stdin = subprocess.PIPE,
63
+ stdout = subprocess.PIPE,
64
+ stderr = subprocess.PIPE,
65
+ )
66
+
67
+ token = b''
68
+ generated = ''
69
+ while True:
70
+ token += process.stdout.read(1)
71
+ if token: # neither empty string nor None
72
+ try:
73
+ decoded = token.decode('utf-8')
74
+
75
+ trimmed_prompt = prompt
76
+ if trim_prompt > 0:
77
+ trimmed_prompt = prompt[:-trim_prompt]
78
+ prompt_finished = generated.startswith(trimmed_prompt)
79
+ reverse_prompt_encountered = generated.endswith(reverse_prompt)
80
+ if not skip_prompt or prompt_finished:
81
+ yield decoded
82
+ if interactive and prompt_finished and reverse_prompt_encountered:
83
+ user_input = input()
84
+ process.stdin.write(user_input.encode('utf-8') + b'\n')
85
+ process.stdin.flush()
86
+
87
+ generated += decoded
88
+ token = b''
89
+ except UnicodeDecodeError:
90
+ continue
91
+ elif process.poll() is not None:
92
+ return
93
+
94
+
95
+ def llama(
96
+ prompt = '',
97
+ stream = False,
98
+ skip_prompt = False,
99
+ trim_prompt = 0,
100
+ executable = 'pages/llama.cpp/main',
101
+ model = 'models/7B/ggml-model-q4_0.bin',
102
+ threads = 4,
103
+ temperature = 0.7,
104
+ top_k = 40,
105
+ top_p = 0.5,
106
+ repeat_last_n = 256,
107
+ repeat_penalty = 1.17647,
108
+ n = 4096,
109
+ interactive = False,
110
+ reverse_prompt = "User:"
111
+ ):
112
+ streamer = llama_stream(
113
+ prompt = prompt,
114
+ skip_prompt = skip_prompt,
115
+ trim_prompt = trim_prompt,
116
+ executable = executable,
117
+ model = model,
118
+ threads = threads,
119
+ temperature = temperature,
120
+ top_k = top_k,
121
+ top_p = top_p,
122
+ repeat_last_n = repeat_last_n,
123
+ repeat_penalty = repeat_penalty,
124
+ n = n,
125
+ interactive = interactive,
126
+ reverse_prompt = reverse_prompt
127
+ )
128
+ if stream:
129
+ return streamer
130
+ else:
131
+ return ''.join(list(streamer))
132
+
133
+
134
+
135
+ ### Python Wrapper (functions above
136
+ #text = []
137
+ #for token in llama(prompt = 'What is your purpose?', repeat_penalty = 1.05, skip_prompt = False, interactive = False):
138
+ # print(token, end = '', flush = True)
139
+ # text.append(token)
140
+ #st.subheader('Debug')
141
+ #st.experimental_show(text[0])
142
+ #st.experimental_show(text[1])
143
+ #st.subheader('Answer')
144
+ #st.write(''.join(text))
145
+
146
+
147
+
148
+ ### llamacpypy
149
+ #llama = Llama(model_name = 'models/7B/ggml-model-q4_0.bin', warm_start = True)
150
+ #llama.load_model()
151
+ #var = llama.generate("This is the weather report, we are reporting a clown fiesta happening at backer street. The clowns ")
152
+ #st.write(var)
153
+
154
+
155
+
156
+ ### llamacpp
157
+ #model_path = "./models/7B/ggml-model-q4_0.bin"
158
+ #params = llamacpp.gpt_params(model_path, 4096, 40, 0.1, 0.7, 2.0)
159
+ #model = llamacpp.PyLLAMA(model_path, params)
160
+ #text = model.predict("Hello, I'm a llama.", 10)
161
+ #st.write(text)
162
+
163
+
164
+
165
+ ### Llama cpp
166
+ llm = Llama(model_path="models/7B/ggml-model-q4_0.bin")
167
+ output = llm("Q: Name the planets in the solar system? A: ", max_tokens = 32, stop = ["Q:", "\n"], echo = True)
168
+ st.write(output)
169
+
requirements.txt CHANGED
@@ -15,3 +15,6 @@ text_generation
15
  datasets
16
  audio2numpy
17
  pydub
 
 
 
 
15
  datasets
16
  audio2numpy
17
  pydub
18
+ llamacpp
19
+ llamacpypy
20
+ llama-cpp-python