DrBenjamin commited on
Commit
fa4af98
1 Parent(s): bbad637

removed file

Browse files
Files changed (1) hide show
  1. pages/🦙_Alpaca.py +0 -201
pages/🦙_Alpaca.py DELETED
@@ -1,201 +0,0 @@
1
- ##### `🦙_Alpaca.py`
2
- ##### Alpaca Model
3
- ##### https://github.com/seemanne/llamacpypy
4
- ##### https://github.com/shaunabanana/llama.py
5
- ##### https://github.com/thomasantony/llamacpp-python
6
- ##### https://github.com/abetlen/llama-cpp-python
7
- ##### Please reach out to ben@benbox.org for any questions
8
- #### Loading needed Python libraries
9
- import streamlit as st
10
- #from llamacpypy import Llama
11
- #import llamacpp
12
- from llama_cpp import Llama
13
- import os
14
- import subprocess
15
- import sys
16
-
17
-
18
-
19
-
20
- #### Streamlit initial setup
21
- st.set_page_config(
22
- page_title = "🦙 Alpaca",
23
- page_icon = "images/Logo.png",
24
- layout = "centered",
25
- initial_sidebar_state = "expanded"
26
- )
27
-
28
-
29
-
30
-
31
- #### Functions of the Python Wrapper
32
- def llama_stream(
33
- prompt = '',
34
- skip_prompt = True,
35
- trim_prompt = 0,
36
- executable = 'pages/llama.cpp/main',
37
- model = 'models/7B/ggml-model-q4_0.bin',
38
- threads = 4,
39
- temperature = 0.7,
40
- top_k = 40,
41
- top_p = 0.5,
42
- repeat_last_n = 256,
43
- repeat_penalty = 1.17647,
44
- n = 4096,
45
- interactive = False,
46
- reverse_prompt = "User:"
47
- ):
48
- command = [
49
- executable,
50
- '-m', model,
51
- '-t', str(threads),
52
- '--temp', str(temperature),
53
- '--top_k', str(top_k),
54
- '--top_p', str(top_p),
55
- '--repeat_last_n', str(repeat_last_n),
56
- '--repeat_penalty', str(repeat_penalty),
57
- '-n', str(n),
58
- '-p', prompt
59
- ]
60
- if interactive:
61
- command += ['-i', '-r', reverse_prompt]
62
-
63
- process = subprocess.Popen(
64
- command,
65
- stdin = subprocess.PIPE,
66
- stdout = subprocess.PIPE,
67
- stderr = subprocess.PIPE,
68
- )
69
-
70
- token = b''
71
- generated = ''
72
- while True:
73
- token += process.stdout.read(1)
74
- if token: # neither empty string nor None
75
- try:
76
- decoded = token.decode('utf-8')
77
-
78
- trimmed_prompt = prompt
79
- if trim_prompt > 0:
80
- trimmed_prompt = prompt[:-trim_prompt]
81
- prompt_finished = generated.startswith(trimmed_prompt)
82
- reverse_prompt_encountered = generated.endswith(reverse_prompt)
83
- if not skip_prompt or prompt_finished:
84
- yield decoded
85
- if interactive and prompt_finished and reverse_prompt_encountered:
86
- user_input = input()
87
- process.stdin.write(user_input.encode('utf-8') + b'\n')
88
- process.stdin.flush()
89
-
90
- generated += decoded
91
- token = b''
92
- except UnicodeDecodeError:
93
- continue
94
- elif process.poll() is not None:
95
- return
96
-
97
-
98
- def llama(
99
- prompt = '',
100
- stream = False,
101
- skip_prompt = False,
102
- trim_prompt = 0,
103
- executable = 'pages/llama.cpp/main',
104
- model = 'models/7B/ggml-model-q4_0.bin',
105
- threads = 4,
106
- temperature = 0.7,
107
- top_k = 40,
108
- top_p = 0.5,
109
- repeat_last_n = 256,
110
- repeat_penalty = 1.17647,
111
- n = 4096,
112
- interactive = False,
113
- reverse_prompt = "User:"
114
- ):
115
- streamer = llama_stream(
116
- prompt = prompt,
117
- skip_prompt = skip_prompt,
118
- trim_prompt = trim_prompt,
119
- executable = executable,
120
- model = model,
121
- threads = threads,
122
- temperature = temperature,
123
- top_k = top_k,
124
- top_p = top_p,
125
- repeat_last_n = repeat_last_n,
126
- repeat_penalty = repeat_penalty,
127
- n = n,
128
- interactive = interactive,
129
- reverse_prompt = reverse_prompt
130
- )
131
- if stream:
132
- return streamer
133
- else:
134
- return ''.join(list(streamer))
135
-
136
-
137
-
138
- ### Python Wrapper (functions above)
139
- #text = []
140
- #for token in llama(prompt = 'What is your purpose?', repeat_penalty = 1.05, skip_prompt = False, interactive = False):
141
- # print(token, end = '', flush = True)
142
- # text.append(token)
143
- #st.subheader('Debug')
144
- #st.experimental_show(text[0])
145
- #st.experimental_show(text[1])
146
- #st.subheader('Answer')
147
- #st.write(''.join(text))
148
-
149
-
150
-
151
- ### llamacpypy
152
- #llama = Llama(model_name = 'models/7B/ggml-model-q4_0.bin', warm_start = True)
153
- #llama.load_model()
154
- #var = llama.generate("This is the weather report, we are reporting a clown fiesta happening at backer street. The clowns ")
155
- #st.write(var)
156
-
157
-
158
-
159
- ### llamacpp
160
- #model_path = "./models/7B/ggml-model-q4_0.bin"
161
- #params = llamacpp.gpt_params(model_path, 4096, 40, 0.1, 0.7, 2.0)
162
- #model = llamacpp.PyLLAMA(model_path, params)
163
- #text = model.predict("Hello, I'm a llama.", 10)
164
- #st.write(text)
165
- #params = llamacpp.gpt_params('./models/7B/ggml-model-q4_0.bin', # model,
166
- # 512, # ctx_size
167
- # 100, # n_predict
168
- # 40, # top_k
169
- # 0.95, # top_p
170
- # 0.85, # temp
171
- # 1.30, # repeat_penalty
172
- # -1, # seed
173
- # 8, # threads
174
- # 64, # repeat_last_n
175
- # 8, # batch_size
176
- #)
177
- #model = llamacpp.PyLLAMA(params)
178
- #model.add_bos() # Adds "beginning of string" token
179
- #model.update_input("A llama is a")
180
- #model.print_startup_stats()
181
- #model.prepare_context()
182
-
183
- #model.ingest_all_pending_input(True)
184
- #while not model.is_finished():
185
- # text, is_finished = model.infer_text()
186
- # print(text, end="")
187
-
188
- # if is_finished:
189
- # break
190
-
191
- # Flush stdout
192
- #sys.stdout.flush()
193
- #model.print_end_stats()
194
-
195
-
196
-
197
- ### Llama cpp
198
- llm = Llama(model_path = "xfh/alpaca.cpp_65b_ggml")
199
- output = llm("Q: Name the planets in the solar system? A: ", max_tokens = 32, stop = ["Q:", "\n"], echo = True)
200
- st.write(output)
201
-