lamhieu commited on
Commit
068cd80
1 Parent(s): 37a740d

chore: update something

Browse files
Files changed (5) hide show
  1. LICENSE.txt +1 -0
  2. README.md +21 -6
  3. app.py +192 -0
  4. requirements.txt +8 -0
  5. style.css +24 -0
LICENSE.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ~
README.md CHANGED
@@ -1,13 +1,28 @@
1
  ---
2
- title: Ghost 8b Beta 8k
3
- emoji: 🐠
4
- colorFrom: gray
5
- colorTo: gray
6
  sdk: gradio
7
- sdk_version: 4.37.2
8
  app_file: app.py
9
  pinned: false
 
 
 
 
 
 
 
 
 
 
 
10
  license: other
 
 
 
 
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Ghost 8B Beta
3
+ emoji: 👻
4
+ colorFrom: indigo
5
+ colorTo: pink
6
  sdk: gradio
7
+ sdk_version: 4.36.1
8
  app_file: app.py
9
  pinned: false
10
+ suggested_hardware: a10g-small
11
+ language:
12
+ - en
13
+ - vi
14
+ - es
15
+ - pt
16
+ - de
17
+ - it
18
+ - fr
19
+ - ko
20
+ - zh
21
  license: other
22
+ license_name: ghost-llms
23
+ license_link: https://ghost-x.org/ghost-llms-license
24
+ tags:
25
+ - ghost
26
  ---
27
 
28
+ # ~
app.py ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from threading import Thread
3
+ from typing import Iterator
4
+
5
+ import gradio as gr
6
+ import spaces
7
+ import torch
8
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
9
+
10
+ MAX_MAX_NEW_TOKENS = 2048
11
+ DEFAULT_MAX_NEW_TOKENS = 1024
12
+ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "8192"))
13
+
14
+ DESCRIPTION = """\
15
+ # Playground with Ghost 8B Beta (p)
16
+
17
+ **Ghost 8B Beta** is a large language model developed with goals that include excellent multilingual support, superior knowledge capabilities, and cost-effectiveness. The model comes in two context length versions, 8k and 128k, along with multilingual function tools support by default.
18
+
19
+ The languages supported are 🇺🇸 English, 🇫🇷 French, 🇮🇹 Italian, 🇪🇸 Spanish, 🇵🇹 Portuguese, 🇩🇪 German, 🇻🇳 Vietnamese, 🇰🇷 Korean and 🇨🇳 Chinese.
20
+
21
+ 📋 Note: current model version is "disl-0x5-8k" (10 Jul 2024), context length 8k and current status is "moderating / previewing". For detailed information about the model, see [here](https://ghost-x.org/docs/models/ghost-8b-beta/). Try to experience it the way you want!
22
+ """
23
+
24
+
25
+ PLACEHOLDER = """
26
+ <div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
27
+ <h1 style="font-size: 26px; margin-bottom: 2px; opacity: 0.20;">👻 Ghost 8B Beta</h1>
28
+ <p style="font-size: 18px; margin-bottom: 2px; opacity: 0.10;">Ask and share whatever you want ~</p>
29
+ </div>
30
+ """
31
+
32
+ LICENSE = """
33
+ <p/>
34
+
35
+ ---
36
+ Ghost 8B Beta may give inaccurate information, including information about people, so please verify Ghost 8B Beta's answers. [Ghost 8B Beta](https://ghost-x.org/docs/models/ghost-8b-beta/) by [Ghost X](https://ghost-x.org).
37
+ """
38
+
39
+ EXAMPLES = [
40
+ [
41
+ "Explain the concept of quantum entanglement and its implications for quantum computing."
42
+ ],
43
+ ["Comment le mouvement des Lumières a-t-il influencé la Révolution française ?"],
44
+ ["Quale fu l'impatto del Rinascimento italiano sull'arte e la cultura europea?"],
45
+ [
46
+ "Spiega il funzionamento e le applicazioni della spettroscopia Raman in chimica analitica."
47
+ ],
48
+ [
49
+ "Explique el teorema de incompletitud de Gödel y sus implicaciones en la lógica matemática."
50
+ ],
51
+ [
52
+ "Descreva o processo de meiose celular e sua importância na variabilidade genética."
53
+ ],
54
+ [
55
+ "Giải thích nguyên lý hoạt động của máy học sâu (deep learning) trong trí tuệ nhân tạo và ứng dụng của nó trong xử lý ngôn ngữ tự nhiên."
56
+ ],
57
+ ["조선 시대의 신분제도가 한국 사회에 미친 영향을 분석하시오."],
58
+ ["分析丝绸之路对中国古代文化交流和经济发展的影响。"],
59
+ ]
60
+
61
+ if not torch.cuda.is_available():
62
+ DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
63
+
64
+
65
+ if torch.cuda.is_available():
66
+ model_id = "lamhieu/ghost-8b-beta-disl-0x5-8k"
67
+ model_tk = os.getenv("HF_TOKEN", None)
68
+ model = AutoModelForCausalLM.from_pretrained(
69
+ model_id,
70
+ device_map="auto",
71
+ trust_remote_code=True,
72
+ token=model_tk,
73
+ )
74
+ tokenizer = AutoTokenizer.from_pretrained(
75
+ model_id,
76
+ trust_remote_code=True,
77
+ token=model_tk,
78
+ )
79
+
80
+
81
+ @spaces.GPU(duration=60)
82
+ def generate(
83
+ message: str,
84
+ chat_history: list[tuple[str, str]],
85
+ system_prompt: str,
86
+ max_new_tokens: int = 1024,
87
+ temperature: float = 0.4,
88
+ top_p: float = 0.95,
89
+ top_k: int = 50,
90
+ repetition_penalty: float = 1.0,
91
+ ) -> Iterator[str]:
92
+ conversation = []
93
+ if system_prompt:
94
+ conversation.append({"role": "system", "content": system_prompt})
95
+ for user, assistant in chat_history:
96
+ conversation.extend(
97
+ [
98
+ {"role": "user", "content": user},
99
+ {"role": "assistant", "content": assistant},
100
+ ]
101
+ )
102
+ conversation.append({"role": "user", "content": message})
103
+
104
+ input_ids = tokenizer.apply_chat_template(
105
+ conversation, add_generation_prompt=True, return_tensors="pt"
106
+ )
107
+ input_ids = input_ids.to(model.device)
108
+ if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
109
+ input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
110
+ gr.Warning(
111
+ f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens."
112
+ )
113
+
114
+ streamer = TextIteratorStreamer(
115
+ tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True
116
+ )
117
+ generate_kwargs = dict(
118
+ input_ids=input_ids,
119
+ streamer=streamer,
120
+ max_new_tokens=max_new_tokens,
121
+ do_sample=True,
122
+ top_p=top_p,
123
+ top_k=top_k,
124
+ temperature=temperature,
125
+ repetition_penalty=repetition_penalty,
126
+ )
127
+ t = Thread(target=model.generate, kwargs=generate_kwargs)
128
+ t.start()
129
+
130
+ outputs = []
131
+ for text in streamer:
132
+ outputs.append(text)
133
+ yield "".join(outputs)
134
+
135
+
136
+ chatbot = gr.Chatbot(height=400, placeholder=PLACEHOLDER, label="Ghost 8B Beta")
137
+
138
+ chat_interface = gr.ChatInterface(
139
+ fn=generate,
140
+ chatbot=chatbot,
141
+ fill_height=True,
142
+ additional_inputs=[
143
+ gr.Textbox(label="System prompt", lines=6),
144
+ gr.Slider(
145
+ label="Max new tokens",
146
+ minimum=1,
147
+ maximum=MAX_MAX_NEW_TOKENS,
148
+ step=1,
149
+ value=DEFAULT_MAX_NEW_TOKENS,
150
+ ),
151
+ gr.Slider(
152
+ label="Temperature",
153
+ minimum=0.1,
154
+ maximum=2.0,
155
+ step=0.1,
156
+ value=0.4,
157
+ ),
158
+ gr.Slider(
159
+ label="Top-p (nucleus sampling)",
160
+ minimum=0.05,
161
+ maximum=1.0,
162
+ step=0.05,
163
+ value=0.95,
164
+ ),
165
+ gr.Slider(
166
+ label="Top-k",
167
+ minimum=1,
168
+ maximum=100,
169
+ step=1,
170
+ value=50,
171
+ ),
172
+ gr.Slider(
173
+ label="Repetition penalty",
174
+ minimum=1.0,
175
+ maximum=2.0,
176
+ step=0.05,
177
+ value=1.0,
178
+ ),
179
+ ],
180
+ stop_btn=None,
181
+ cache_examples=False,
182
+ examples=EXAMPLES,
183
+ )
184
+
185
+ with gr.Blocks(fill_height=True, css="style.css") as demo:
186
+ gr.Markdown(DESCRIPTION)
187
+ chat_interface.render()
188
+ gr.Markdown(LICENSE)
189
+
190
+ if __name__ == "__main__":
191
+ # demo.queue(max_size=20).launch()
192
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ accelerate==0.30.1
2
+ bitsandbytes==0.43.1
3
+ gradio==4.37.2
4
+ scipy==1.13.0
5
+ sentencepiece==0.2.0
6
+ spaces==0.28.3
7
+ torch==2.0.0
8
+ transformers==4.41.0
style.css ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ h1 {
2
+ text-align: center;
3
+ display: block;
4
+ }
5
+
6
+ #duplicate-button {
7
+ margin: auto;
8
+ color: white;
9
+ background: #1565c0;
10
+ border-radius: 100vh;
11
+ }
12
+
13
+ .contain {
14
+ max-width: 900px;
15
+ margin: auto;
16
+ padding-top: 1.5rem;
17
+ }
18
+
19
+ .s-pad {
20
+ display: block;
21
+ padding-top: 2rem;
22
+ height: 1px;
23
+ width: 100%;
24
+ }