OsakanaTeishoku
commited on
Commit
•
daa0894
1
Parent(s):
4855037
Update README.md
Browse files
README.md
CHANGED
@@ -62,7 +62,6 @@ import os
|
|
62 |
os.environ["HF_TOKEN"] = "あなたのHugging Faceトークン"
|
63 |
|
64 |
from vllm.lora.request import LoRARequest
|
65 |
-
|
66 |
llm = vllm.LLM(
|
67 |
MODEL_NAME, # "deepseek-ai/deepseek-math-7b-instruct"
|
68 |
tensor_parallel_size=1, # 2, 4
|
@@ -79,15 +78,56 @@ llm = vllm.LLM(
|
|
79 |
)
|
80 |
tokenizer = llm.get_tokenizer()
|
81 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
```
|
83 |
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
# Uploaded model
|
92 |
|
93 |
- **Developed by:** OsakanaTeishoku
|
|
|
62 |
os.environ["HF_TOKEN"] = "あなたのHugging Faceトークン"
|
63 |
|
64 |
from vllm.lora.request import LoRARequest
|
|
|
65 |
llm = vllm.LLM(
|
66 |
MODEL_NAME, # "deepseek-ai/deepseek-math-7b-instruct"
|
67 |
tensor_parallel_size=1, # 2, 4
|
|
|
78 |
)
|
79 |
tokenizer = llm.get_tokenizer()
|
80 |
|
81 |
+
from transformers import AutoTokenizer
|
82 |
+
sft_tokenizer = AutoTokenizer.from_pretrained(
|
83 |
+
"weblab-GENIAC/Tanuki-8B-dpo-v1.0"
|
84 |
+
)
|
85 |
+
tokenizer.chat_template = sft_tokenizer.chat_template
|
86 |
+
|
87 |
+
from huggingface_hub import snapshot_download
|
88 |
+
lora_path = snapshot_download(repo_id="OsakanaTeishoku/1204lora")
|
89 |
+
|
90 |
+
from datasets import load_dataset
|
91 |
+
# jsonlファイルのパスを指定します。
|
92 |
+
data_files = {"test": "elyza-tasks-100-TV_0.jsonl"} # "your_jsonl_file.jsonl" を実際のファイル名に置き換えてください
|
93 |
+
# load_dataset関数を使用してデータを読み込みます。
|
94 |
+
tasks = load_dataset("json", data_files=data_files, split="test")
|
95 |
+
# データセットを確認します。
|
96 |
+
|
97 |
+
messages_list = [
|
98 |
+
[{"role": "user", "content": tasks["input"][i]}] for i in range(len(tasks))
|
99 |
+
]
|
100 |
+
prompts = [line[0]["content"] for line in messages_list]
|
101 |
+
prompt_token_ids = [tokenizer.apply_chat_template(messages, add_generation_prompt=True) for messages in messages_list]
|
102 |
+
sampling_params = vllm.SamplingParams(
|
103 |
+
temperature=1.5,
|
104 |
+
max_tokens=1024,
|
105 |
+
repetition_penalty=1.05,
|
106 |
+
min_p=0.1,
|
107 |
+
)
|
108 |
+
outputs = llm.generate(
|
109 |
+
prompt_token_ids=prompt_token_ids,
|
110 |
+
sampling_params=sampling_params,
|
111 |
+
lora_request=LoRARequest("lora", 1, lora_path), # LoRA adapter
|
112 |
+
)
|
113 |
+
for prompt, response in zip(prompts, outputs):
|
114 |
+
print("prompt:", prompt)
|
115 |
+
print("output:", response.outputs[0].text.strip())
|
116 |
+
print("-"*80)
|
117 |
+
import json
|
118 |
+
data = [{
|
119 |
+
"task_id": i,
|
120 |
+
#"input": prompts[i],
|
121 |
+
"output": outputs[i].outputs[0].text.strip()
|
122 |
+
} for i in range(len(tasks))]
|
123 |
+
file_path_with_unicode = 'output.jsonl'
|
124 |
+
with open(file_path_with_unicode, 'w', encoding='utf-8') as file:
|
125 |
+
for entry in data:
|
126 |
+
json.dump(entry, file, ensure_ascii=False)
|
127 |
+
file.write('\n')
|
128 |
+
print(f"Saved json {file_path_with_unicode} !")
|
129 |
```
|
130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
# Uploaded model
|
132 |
|
133 |
- **Developed by:** OsakanaTeishoku
|