likewendy commited on
Commit
7f7f074
ยท
1 Parent(s): 3a7ae36
Files changed (3) hide show
  1. =0.26.0 +39 -0
  2. app.py +48 -35
  3. requirements.txt +3 -1
=0.26.0 ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Collecting accelerate
2
+ Downloading accelerate-1.2.0-py3-none-any.whl (336 kB)
3
+ โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ” 336.3/336.3 kB 12.1 MB/s eta 0:00:00
4
+ Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.10/site-packages (from accelerate) (0.4.5)
5
+ Requirement already satisfied: numpy<3.0.0,>=1.17 in /usr/local/lib/python3.10/site-packages (from accelerate) (2.1.1)
6
+ Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/site-packages (from accelerate) (6.0.2)
7
+ Requirement already satisfied: torch>=1.10.0 in /usr/local/lib/python3.10/site-packages (from accelerate) (2.4.0)
8
+ Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/site-packages (from accelerate) (24.1)
9
+ Requirement already satisfied: psutil in /usr/local/lib/python3.10/site-packages (from accelerate) (5.9.8)
10
+ Requirement already satisfied: huggingface-hub>=0.21.0 in /usr/local/lib/python3.10/site-packages (from accelerate) (0.25.2)
11
+ Requirement already satisfied: requests in /usr/local/lib/python3.10/site-packages (from huggingface-hub>=0.21.0->accelerate) (2.32.3)
12
+ Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/site-packages (from huggingface-hub>=0.21.0->accelerate) (4.12.2)
13
+ Requirement already satisfied: filelock in /usr/local/lib/python3.10/site-packages (from huggingface-hub>=0.21.0->accelerate) (3.16.1)
14
+ Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/site-packages (from huggingface-hub>=0.21.0->accelerate) (4.66.5)
15
+ Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/site-packages (from huggingface-hub>=0.21.0->accelerate) (2024.6.1)
16
+ Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (12.1.105)
17
+ Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (12.1.105)
18
+ Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (9.1.0.70)
19
+ Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (12.1.0.106)
20
+ Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (11.4.5.107)
21
+ Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (11.0.2.54)
22
+ Requirement already satisfied: nvidia-nccl-cu12==2.20.5 in /usr/local/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (2.20.5)
23
+ Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (12.1.105)
24
+ Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (12.1.3.1)
25
+ Requirement already satisfied: sympy in /usr/local/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (1.13.3)
26
+ Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (10.3.2.106)
27
+ Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (3.1.4)
28
+ Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (12.1.105)
29
+ Requirement already satisfied: networkx in /usr/local/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (3.3)
30
+ Requirement already satisfied: triton==3.0.0 in /usr/local/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (3.0.0)
31
+ Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.10/site-packages (from nvidia-cusolver-cu12==11.4.5.107->torch>=1.10.0->accelerate) (12.6.68)
32
+ Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/site-packages (from jinja2->torch>=1.10.0->accelerate) (2.1.5)
33
+ Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/site-packages (from requests->huggingface-hub>=0.21.0->accelerate) (3.3.2)
34
+ Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/site-packages (from requests->huggingface-hub>=0.21.0->accelerate) (2024.8.30)
35
+ Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/site-packages (from requests->huggingface-hub>=0.21.0->accelerate) (3.10)
36
+ Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/site-packages (from requests->huggingface-hub>=0.21.0->accelerate) (2.2.3)
37
+ Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/site-packages (from sympy->torch>=1.10.0->accelerate) (1.3.0)
38
+ Installing collected packages: accelerate
39
+ Successfully installed accelerate-1.2.0
app.py CHANGED
@@ -1,12 +1,20 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
 
3
 
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
 
 
 
 
 
 
 
 
9
 
 
10
  def respond(
11
  message,
12
  history: list[tuple[str, str]],
@@ -15,50 +23,55 @@ def respond(
15
  temperature,
16
  top_p,
17
  ):
18
- messages = [{"role": "system", "content": system_message}]
19
-
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
 
26
- messages.append({"role": "user", "content": message})
27
 
28
- response = ""
 
 
 
 
 
 
29
 
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
 
34
  temperature=temperature,
35
  top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
 
39
- response += token
40
- yield response
 
 
 
 
 
41
 
42
-
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
  demo = gr.ChatInterface(
47
- respond,
48
  additional_inputs=[
49
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
50
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
51
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
52
  gr.Slider(
53
- minimum=0.1,
54
- maximum=1.0,
55
- value=0.95,
56
- step=0.05,
57
- label="Top-p (nucleus sampling)",
58
  ),
59
  ],
 
60
  )
61
 
62
-
63
  if __name__ == "__main__":
64
- demo.launch()
 
1
  import gradio as gr
2
+ import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
 
5
+ # ๅฎšไน‰็ณป็ปŸๆ็คบ่ฏญ
6
+ system_prompt = """You are Skywork-o1, a thinking model developed by Skywork AI, specializing in solving complex problems involving mathematics, coding, and logical reasoning through deep thought. When faced with a user's request, you first engage in a lengthy and in-depth thinking process to explore possible solutions to the problem. After completing your thoughts, you then provide a detailed explanation of the solution process in your response."""
 
 
7
 
8
+ # ๅˆๅง‹ๅŒ–ๆจกๅž‹ๅ’Œๅˆ†่ฏๅ™จ
9
+ model_name = "Skywork/Skywork-o1-Open-Llama-3.1-8B"
10
+ model = AutoModelForCausalLM.from_pretrained(
11
+ model_name,
12
+ torch_dtype="auto",
13
+ device_map="auto"
14
+ )
15
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
16
 
17
+ # ๅฎšไน‰็”Ÿๆˆๅ›žๅค็š„ๅ‡ฝๆ•ฐ
18
  def respond(
19
  message,
20
  history: list[tuple[str, str]],
 
23
  temperature,
24
  top_p,
25
  ):
26
+ # ๆž„้€ ๅฏน่ฏๅŽ†ๅฒ
27
+ conversation = [{"role": "system", "content": system_message}]
28
+ for user_msg, assistant_msg in history:
29
+ if user_msg:
30
+ conversation.append({"role": "user", "content": user_msg})
31
+ if assistant_msg:
32
+ conversation.append({"role": "assistant", "content": assistant_msg})
33
 
34
+ conversation.append({"role": "user", "content": message})
35
 
36
+ # ๆž„้€ ่พ“ๅ…ฅ
37
+ input_ids = tokenizer.apply_chat_template(
38
+ conversation,
39
+ tokenize=True,
40
+ add_generation_prompt=True,
41
+ return_tensors="pt"
42
+ ).to(model.device)
43
 
44
+ # ๆจกๅž‹็”Ÿๆˆ
45
+ generation = model.generate(
46
+ input_ids=input_ids,
47
+ max_new_tokens=max_tokens,
48
+ do_sample=True,
49
  temperature=temperature,
50
  top_p=top_p,
51
+ pad_token_id=tokenizer.pad_token_id,
52
+ )
53
 
54
+ # ่งฃ็ ็”Ÿๆˆๅ†…ๅฎน
55
+ completion = tokenizer.decode(
56
+ generation[0][len(input_ids[0]):],
57
+ skip_special_tokens=True,
58
+ clean_up_tokenization_spaces=True
59
+ )
60
+ return completion
61
 
62
+ # ๅฎšไน‰Gradio็•Œ้ข
 
 
 
63
  demo = gr.ChatInterface(
64
+ fn=respond,
65
  additional_inputs=[
66
+ gr.Textbox(value=system_prompt, label="System message"),
67
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
68
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
69
  gr.Slider(
70
+ minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"
 
 
 
 
71
  ),
72
  ],
73
+ # chatbot_style="default"
74
  )
75
 
 
76
  if __name__ == "__main__":
77
+ demo.launch(server_port=9114)
requirements.txt CHANGED
@@ -1 +1,3 @@
1
- huggingface_hub==0.25.2
 
 
 
1
+ huggingface_hub==0.25.2
2
+ transformers
3
+ accelerate>=0.26.0