OsakanaTeishoku commited on
Commit
daa0894
1 Parent(s): 4855037

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +48 -8
README.md CHANGED
@@ -62,7 +62,6 @@ import os
62
  os.environ["HF_TOKEN"] = "あなたのHugging Faceトークン"
63
 
64
  from vllm.lora.request import LoRARequest
65
-
66
  llm = vllm.LLM(
67
  MODEL_NAME, # "deepseek-ai/deepseek-math-7b-instruct"
68
  tensor_parallel_size=1, # 2, 4
@@ -79,15 +78,56 @@ llm = vllm.LLM(
79
  )
80
  tokenizer = llm.get_tokenizer()
81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  ```
83
 
84
-
85
-
86
-
87
-
88
-
89
-
90
-
91
  # Uploaded model
92
 
93
  - **Developed by:** OsakanaTeishoku
 
62
  os.environ["HF_TOKEN"] = "あなたのHugging Faceトークン"
63
 
64
  from vllm.lora.request import LoRARequest
 
65
  llm = vllm.LLM(
66
  MODEL_NAME, # "deepseek-ai/deepseek-math-7b-instruct"
67
  tensor_parallel_size=1, # 2, 4
 
78
  )
79
  tokenizer = llm.get_tokenizer()
80
 
81
+ from transformers import AutoTokenizer
82
+ sft_tokenizer = AutoTokenizer.from_pretrained(
83
+ "weblab-GENIAC/Tanuki-8B-dpo-v1.0"
84
+ )
85
+ tokenizer.chat_template = sft_tokenizer.chat_template
86
+
87
+ from huggingface_hub import snapshot_download
88
+ lora_path = snapshot_download(repo_id="OsakanaTeishoku/1204lora")
89
+
90
+ from datasets import load_dataset
91
+ # jsonlファイルのパスを指定します。
92
+ data_files = {"test": "elyza-tasks-100-TV_0.jsonl"} # "your_jsonl_file.jsonl" を実際のファイル名に置き換えてください
93
+ # load_dataset関数を使用してデータを読み込みます。
94
+ tasks = load_dataset("json", data_files=data_files, split="test")
95
+ # データセットを確認します。
96
+
97
+ messages_list = [
98
+ [{"role": "user", "content": tasks["input"][i]}] for i in range(len(tasks))
99
+ ]
100
+ prompts = [line[0]["content"] for line in messages_list]
101
+ prompt_token_ids = [tokenizer.apply_chat_template(messages, add_generation_prompt=True) for messages in messages_list]
102
+ sampling_params = vllm.SamplingParams(
103
+ temperature=1.5,
104
+ max_tokens=1024,
105
+ repetition_penalty=1.05,
106
+ min_p=0.1,
107
+ )
108
+ outputs = llm.generate(
109
+ prompt_token_ids=prompt_token_ids,
110
+ sampling_params=sampling_params,
111
+ lora_request=LoRARequest("lora", 1, lora_path), # LoRA adapter
112
+ )
113
+ for prompt, response in zip(prompts, outputs):
114
+ print("prompt:", prompt)
115
+ print("output:", response.outputs[0].text.strip())
116
+ print("-"*80)
117
+ import json
118
+ data = [{
119
+ "task_id": i,
120
+ #"input": prompts[i],
121
+ "output": outputs[i].outputs[0].text.strip()
122
+ } for i in range(len(tasks))]
123
+ file_path_with_unicode = 'output.jsonl'
124
+ with open(file_path_with_unicode, 'w', encoding='utf-8') as file:
125
+ for entry in data:
126
+ json.dump(entry, file, ensure_ascii=False)
127
+ file.write('\n')
128
+ print(f"Saved json {file_path_with_unicode} !")
129
  ```
130
 
 
 
 
 
 
 
 
131
  # Uploaded model
132
 
133
  - **Developed by:** OsakanaTeishoku