hiroki-rad
/

google-gemma-2-2b-128-ft-3000

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

hiroki-rad commited on Dec 15, 2024

Commit

4e8ab50

·

verified ·

1 Parent(s): 13e968d

Update README.md

Files changed (1) hide show

README.md +9 -3

README.md CHANGED Viewed

@@ -48,7 +48,12 @@ model = AutoModelForCausalLM.from_pretrained(
 )
 def generate_text(data):
-    prompt = f"## 指示:以下の日本語の質問に対して、必ず日本語で回答してください。\n質問:{data.input}\n回答:"
     # 推論の実行
     input_ids = tokenizer(prompt, return_tensors="pt").to(model.device)
     # Remove token_type_ids from the input_ids
@@ -58,11 +63,12 @@ def generate_text(data):
         max_new_tokens=2048,
         do_sample=True,
         top_p=0.95,
-        temperature=0.2,
         repetition_penalty=1.1,
     )
-    return tokenizer.decode(outputs[0])
 results = []
 for d in tqdm(data.itertuples(), position=0):

 )
 def generate_text(data):
+    prompt = f"""## 指示:あなたは優秀な日本人の問題解決のエキスパートです。以下のステップで質問に取り組んでください：\n\n1. 質問の種類を特定する（事実確認/推論/創造的回答/計算など）\n2. 重要な情報や制約条件を抽出する\n3. 解決に必要なステップを明確にする\n4. 回答を組み立てる
+    質問をよく読んで、冷静に考え、考えをステップバイステップで考えをまとめてましょう。それをもう一度じっくり考えて、思考のプロセスを整理してください。質問に対して適切な回答を簡潔に出力してください。
+    質問:{data.input}\n回答:"""
     # 推論の実行
     input_ids = tokenizer(prompt, return_tensors="pt").to(model.device)
     # Remove token_type_ids from the input_ids
         max_new_tokens=2048,
         do_sample=True,
         top_p=0.95,
+        temperature=0.9,
         repetition_penalty=1.1,
     )
+    return tokenizer.decode(outputs[0][len(input_ids['input_ids'][0]):], skip_special_tokens=True)
 results = []
 for d in tqdm(data.itertuples(), position=0):