CC_and_Distractors / utils /generate_translation.py
谢璐璟
.
e9ce3e8
raw
history blame
3.05 kB
import json
import re
from tqdm import tqdm
import os
import asyncio
from openai import AsyncOpenAI
from utils.api_utils import generate_from_openai_chat_completion, generate_from_claude_chat_completion
def construct_translate_prompt_textonly(question: str, options: list, text_only_analysis: str) -> str:
optionized_list = [f"{chr(65 + i)}. {option}" for i, option in enumerate(options)]
QA_str = question + "\n" + "\n".join(optionized_list)
prompt = f"""
Please translate the following inputs into Chinese, ensuring they maintain a professional tone. If the input is empty, return an empty string.
Output the result in valid JSON format using the structure provided below. Be careful to avoid extra commas or missing quotation marks:
{{
"QA": "The translation of QA str",
"ToA" "The translation of text_only_analysis.",
}}
Input:
QA: {QA_str}
text_only_analysis: {text_only_analysis}
"""
# prompt = prompt.replace("I don't know.", "Idle.")
return prompt
def prepare_q_text_input_translation(query, prompt_func=construct_translate_prompt_textonly):
question = query['question']
options = [query['option_1'], query['option_2'], query['option_3'], query['option_4'],query['option_5'],query['option_6'],query['option_7']]
text_only_analysis = query['text_only_example_response']
q_text_prompt = prompt_func(question=question, options=options, text_only_analysis=text_only_analysis)
return q_text_prompt
def prepare_q_inputs_translation(queries):
messages = []
for i, query in enumerate(queries):
q_text_prompt = prepare_q_text_input_translation(query)
prompt_message = [
{
"role": "user",
"content": q_text_prompt,
},
]
messages.append(prompt_message)
return messages
def generate_translation(model_name: str,
queries: list,
n: int=1,
max_tokens: int=2048):
assert model_name in ["gpt-4o-mini", "gpt-4-turbo", "gpt-4o", "gpt-4o-2024-08-06"], "Invalid model name"
client = AsyncOpenAI(api_key=os.environ.get("OPENAI_API_KEY"),base_url="https://yanlp.zeabur.app/v1")
messages = prepare_q_inputs_translation(queries)
responses = asyncio.run(
generate_from_openai_chat_completion(
client,
messages=messages,
engine_name=model_name,
n = n,
max_tokens=max_tokens,
requests_per_minute=30,
json_format=True
)
)
for query, response in zip(queries, responses):
new_options = response
# print(new_options)
if new_options:
query["QA_translation"] = new_options.get("QA", "")
else:
query["QA_translation"] = ""
if new_options:
query["text_only_example_response_translation"] = new_options.get("ToA", "")
else:
query["text_only_example_response_translation"] = ""
return queries