Spaces:
Running
Running

Refactor imports and update configuration for LLM response processing; add post-processing functionality and enhance README documentation.
25fca04
import json | |
from huggingface_hub import InferenceClient | |
import re | |
from config import MODEL_NAME | |
from dotenv import load_dotenv | |
import os | |
# Load environment variables from .env file | |
load_dotenv() | |
api_key = os.getenv("HUGGINGFACE_TOKEN") | |
# Prompt配置 | |
SYSTEM_PROMPT = "你是一位精通英语和中文的教学专家,专门帮助用户,单词量为8000的语言学习者,将中文翻译成地道的英语表达,并提取重要词汇,比如,形容词,名词和动词,提供解释和用法说明。只用输出json格式的数据,确保json完整性和正确性,不要在开头加任何内容,并且注意json格式,control character。" | |
PROMPT_TEMPLATE = """请将以下中文翻译成英语,并提取重要词汇,比如,形容词,名词和动词,针对每个单词,提供解释和简单的造句说明: | |
中文文本: | |
<text> | |
{text} | |
<text> | |
请按照以下JSON格式返回: | |
{{ | |
"english": "英语翻译", | |
"important_words": [ | |
{{ | |
"word_en": "重要英语单词或短语", | |
"meaning_ch": "中文含义", | |
"usage": "用法说明" | |
}} | |
] | |
}} | |
""" | |
USER_SHOT_1 = "我昨天去公园散步,看到很多人在那里锻炼身体" | |
SHOT_1_RESPONSE = """ | |
{ | |
"english": "I went for a walk in the park yesterday and saw many people exercising there.", | |
"important_words": [ | |
{ | |
"word_en": "exercise", | |
"meaning_ch": "锻炼身体", | |
"usage": "e.g. I go to the gym to exercise every day." | |
}, | |
{ | |
"word_en": "park", | |
"meaning_ch": "公园", | |
"usage": "e.g. I took a walk in the park on Sunday." | |
}, | |
{ | |
"word_en": "scatter", | |
"meaning_ch": "散步", | |
"usage": "e.g. I like to scatter around the city to explore new places." | |
} | |
] | |
} | |
""" | |
USER_SHOT_2 = "我昨天吃坏肚子了,一直在拉肚子。" | |
SHOT_2_RESPONSE = """{ | |
"english": "I had a stomachache yesterday and have been experiencing diarrhea ever since.", | |
"important_words": [ | |
{ | |
"word_en": "stomachache", | |
"meaning_ch": "肚子痛", | |
"usage": "Example sentence: I woke up with a stomachache and couldn't eat anything." | |
}, | |
{ | |
"word_en": "diarrhea", | |
"meaning_ch": "腹泻", | |
"usage": "Example sentence: She had diarrhea after eating spoiled food." | |
} | |
] | |
} | |
""" | |
def message_builder(text): | |
"""构建消息,包含few-shot示例""" | |
messages = [ | |
{ | |
"role": "system", | |
"content": SYSTEM_PROMPT | |
}, | |
{ | |
"role": "user", | |
"content": PROMPT_TEMPLATE.format(text=USER_SHOT_1) | |
}, | |
{ | |
"role": "assistant", | |
"content": SHOT_1_RESPONSE | |
}, | |
{ | |
"role": "user", | |
"content": PROMPT_TEMPLATE.format(text=USER_SHOT_2) | |
}, | |
{ | |
"role": "assistant", | |
"content": SHOT_2_RESPONSE | |
}, | |
{ | |
"role": "user", | |
"content": PROMPT_TEMPLATE.format(text=text) | |
} | |
] | |
return messages | |
client = InferenceClient(api_key=api_key) | |
def get_llm_response(text) -> str: | |
"""调用LLM获取响应""" | |
print("get_llm_response") | |
messages = message_builder(text) | |
llm_response = client.chat.completions.create( | |
model=MODEL_NAME, messages=messages, max_tokens=1024, temperature=0.3) | |
return llm_response.choices[0].message.content | |
# main | |
if __name__ == "__main__": | |
response = get_llm_response("我昨天去公园散步,看到很多人在那里锻炼身体") | |
print(response) | |
# import pprint | |
# pprint.pprint(response) | |