Spaces:
Sleeping
Sleeping
| import json | |
| import os | |
| import re | |
| import tempfile | |
| from statistics import mean | |
| from typing import Any, Dict, List, Optional, Tuple | |
| import requests | |
| from fastapi import FastAPI, File, Form, UploadFile | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel | |
| APP_VERSION = "5.0.0" | |
| app = FastAPI(title="Japanese Role Interview API", version=APP_VERSION) | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=False, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # ----------------------------- | |
| # Config | |
| # ----------------------------- | |
| HF_TOKEN = os.getenv("HF_TOKEN", "").strip() | |
| HF_ROUTER_URL = os.getenv("HF_ROUTER_URL", "https://router.huggingface.co/v1/chat/completions") | |
| HF_INFERENCE_BASE = os.getenv("HF_INFERENCE_BASE", "https://router.huggingface.co/hf-inference/models") | |
| ASR_MODEL = os.getenv("ASR_MODEL", "openai/whisper-large-v3") | |
| CHAT_MODEL = os.getenv("CHAT_MODEL", "Qwen/Qwen2.5-7B-Instruct-1M") | |
| USE_FASTER_WHISPER = os.getenv("USE_FASTER_WHISPER", "true").lower() in {"1", "true", "yes", "on"} | |
| FASTER_WHISPER_MODEL = os.getenv("FASTER_WHISPER_MODEL", "small") | |
| MAX_QUESTION_LIMIT = int(os.getenv("MAX_QUESTION_LIMIT", "20")) | |
| ASR_TIMEOUT_SECONDS = int(os.getenv("ASR_TIMEOUT_SECONDS", "180")) | |
| LLM_TIMEOUT_SECONDS = int(os.getenv("LLM_TIMEOUT_SECONDS", "90")) | |
| _REPEAT_PROMPTS = [ | |
| "声が小さいです。もう少し大きい声で、もう一度お願いします。", | |
| "まだ音がはっきり聞こえません。マイクを確認して、もう一度お願いします。", | |
| "音がうまく入っていません。マイクを近づけて、もう一度お願いします。", | |
| ] | |
| _LOCAL_ASR_MODEL = None | |
| ROLE_BANK: Dict[str, Dict[str, Any]] = { | |
| "construction": { | |
| "english_name": "Construction", | |
| "japanese_name": "建設", | |
| "intro_jp": "こんにちは。建設の仕事の面接練習を始めます。よろしくお願いします。", | |
| "min_questions": 3, | |
| "max_questions": 20, | |
| "expected_keywords": ["安全", "ヘルメット", "現場", "工具", "体力", "チーム", "ルール"], | |
| "questions": [ | |
| {"id":"name","theme":"intro","stage":"screening","jp":"お名前を教えてください。","branch":"all"}, | |
| {"id":"country","theme":"intro","stage":"screening","jp":"どこの国から来ましたか。","branch":"all"}, | |
| {"id":"reason","theme":"motivation","stage":"screening","jp":"日本へ行きたい理由は何ですか。","branch":"all"}, | |
| {"id":"japanese","theme":"language","stage":"screening","jp":"日本語はどのくらい勉強しましたか。","branch":"all"}, | |
| {"id":"ready_check","theme":"intro","stage":"screening","jp":"面接の準備はできていますか。","branch":"all"}, | |
| {"id":"experience_gate","theme":"experience","stage":"role","jp":"建設の仕事をしたことがありますか。","branch":"all"}, | |
| {"id":"exp_yes_detail","theme":"experience","stage":"role","jp":"どんな建設の仕事をしましたか。","branch":"yes_exp"}, | |
| {"id":"exp_years","theme":"experience","stage":"role","jp":"その仕事は何年ぐらいしましたか。","branch":"yes_exp"}, | |
| {"id":"exp_no_motivation","theme":"motivation","stage":"role","jp":"経験がなくても、建設の仕事を勉強してがんばれますか。","branch":"no_exp"}, | |
| {"id":"physical","theme":"role_fit","stage":"role","jp":"体力が必要な仕事ですが、大丈夫ですか。","branch":"all"}, | |
| {"id":"safety","theme":"safety","stage":"role","jp":"危ない場所で働くとき、何に気をつけますか。","branch":"all"}, | |
| {"id":"teamwork","theme":"teamwork","stage":"role","jp":"チームで仕事をするとき、大切なことは何ですか。","branch":"all"}, | |
| {"id":"tools","theme":"role_fit","stage":"followup","jp":"工具を使う仕事に興味はありますか。","branch":"all"}, | |
| {"id":"morning","theme":"schedule","stage":"followup","jp":"朝早い仕事や外の仕事はできますか。","branch":"all"}, | |
| {"id":"report","theme":"teamwork","stage":"followup","jp":"分からないとき、先輩にすぐ相談できますか。","branch":"all"}, | |
| {"id":"mistake","theme":"reliability","stage":"followup","jp":"仕事でミスをしたら、どうしますか。","branch":"all"}, | |
| {"id":"strength","theme":"personality","stage":"followup","jp":"建設の仕事に向いている自分の長所を一つ話してください。","branch":"all"}, | |
| {"id":"closing","theme":"closing","stage":"closing","jp":"本日の建設の面接練習はここまでです。ご参加ありがとうございました。","branch":"all"}, | |
| ], | |
| }, | |
| "restaurant_konbini": { | |
| "english_name": "Restaurant / Konbini", | |
| "japanese_name": "外食・コンビニ", | |
| "intro_jp": "こんにちは。外食・コンビニの仕事の面接練習を始めます。よろしくお願いします。", | |
| "min_questions": 3, | |
| "max_questions": 20, | |
| "expected_keywords": ["接客", "レジ", "お客様", "笑顔", "ていねい", "品出し", "掃除"], | |
| "questions": [ | |
| {"id":"name","theme":"intro","stage":"screening","jp":"お名前を教えてください。","branch":"all"}, | |
| {"id":"country","theme":"intro","stage":"screening","jp":"どこの国から来ましたか。","branch":"all"}, | |
| {"id":"reason","theme":"motivation","stage":"screening","jp":"日本へ行きたい理由は何ですか。","branch":"all"}, | |
| {"id":"japanese","theme":"language","stage":"screening","jp":"日本語はどのくらい勉強しましたか。","branch":"all"}, | |
| {"id":"ready_check","theme":"intro","stage":"screening","jp":"面接の準備はできていますか。","branch":"all"}, | |
| {"id":"experience_gate","theme":"experience","stage":"role","jp":"レストランやコンビニで働いた経験はありますか。","branch":"all"}, | |
| {"id":"exp_yes_detail","theme":"experience","stage":"role","jp":"どんな仕事をしましたか。レジ、接客、品出しなどを話してください。","branch":"yes_exp"}, | |
| {"id":"exp_no_motivation","theme":"motivation","stage":"role","jp":"経験がなくても、接客を勉強する気持ちはありますか。","branch":"no_exp"}, | |
| {"id":"customer","theme":"service","stage":"role","jp":"お客様には、どんな話し方をしたいですか。","branch":"all"}, | |
| {"id":"busy","theme":"role_fit","stage":"role","jp":"忙しい時間でも落ち着いて働けますか。","branch":"all"}, | |
| {"id":"cleanliness","theme":"service","stage":"role","jp":"お店の清潔さは大切ですか。なぜですか。","branch":"all"}, | |
| {"id":"shift","theme":"schedule","stage":"followup","jp":"立ち仕事やシフト勤務は大丈夫ですか。","branch":"all"}, | |
| {"id":"mistake","theme":"reliability","stage":"followup","jp":"注文やレジで間違えたら、どうしますか。","branch":"all"}, | |
| {"id":"teamwork","theme":"teamwork","stage":"followup","jp":"ほかのスタッフと協力できますか。","branch":"all"}, | |
| {"id":"strength","theme":"personality","stage":"followup","jp":"この仕事に向いている自分の長所を一つ話してください。","branch":"all"}, | |
| {"id":"closing","theme":"closing","stage":"closing","jp":"本日の外食・コンビニの面接練習はここまでです。ご参加ありがとうございました。","branch":"all"}, | |
| ], | |
| }, | |
| "nursing_care": { | |
| "english_name": "Nursing Care", | |
| "japanese_name": "介護", | |
| "intro_jp": "こんにちは。介護の仕事の面接練習を始めます。よろしくお願いします。", | |
| "min_questions": 3, | |
| "max_questions": 20, | |
| "expected_keywords": ["介護", "やさしい", "利用者", "お年寄り", "清潔", "手伝う", "責任"], | |
| "questions": [ | |
| {"id":"name","theme":"intro","stage":"screening","jp":"お名前を教えてください。","branch":"all"}, | |
| {"id":"country","theme":"intro","stage":"screening","jp":"どこの国から来ましたか。","branch":"all"}, | |
| {"id":"reason","theme":"motivation","stage":"screening","jp":"日本へ行きたい理由は何ですか。","branch":"all"}, | |
| {"id":"japanese","theme":"language","stage":"screening","jp":"日本語はどのくらい勉強しましたか。","branch":"all"}, | |
| {"id":"ready_check","theme":"intro","stage":"screening","jp":"面接の準備はできていますか。","branch":"all"}, | |
| {"id":"experience_gate","theme":"experience","stage":"role","jp":"介護の仕事をしたことがありますか。","branch":"all"}, | |
| {"id":"exp_yes_detail","theme":"experience","stage":"role","jp":"どんな介護の仕事をしましたか。","branch":"yes_exp"}, | |
| {"id":"exp_no_motivation","theme":"motivation","stage":"role","jp":"経験がなくても、介護の勉強をしてがんばれますか。","branch":"no_exp"}, | |
| {"id":"kindness","theme":"service","stage":"role","jp":"お年寄りや利用者さんに、やさしく話せますか。","branch":"all"}, | |
| {"id":"cleanliness","theme":"safety","stage":"role","jp":"介護の仕事で清潔さは大切ですか。なぜですか。","branch":"all"}, | |
| {"id":"communication","theme":"service","stage":"role","jp":"利用者さんが困っていたら、まず何をしますか。","branch":"all"}, | |
| {"id":"hard_work","theme":"role_fit","stage":"followup","jp":"大変な仕事でも、落ち着いて続けられますか。","branch":"all"}, | |
| {"id":"teamwork","theme":"teamwork","stage":"followup","jp":"スタッフと協力できますか。","branch":"all"}, | |
| {"id":"report","theme":"teamwork","stage":"followup","jp":"報告・連絡・相談はできますか。","branch":"all"}, | |
| {"id":"strength","theme":"personality","stage":"followup","jp":"介護の仕事に向いている自分の長所を一つ話してください。","branch":"all"}, | |
| {"id":"closing","theme":"closing","stage":"closing","jp":"本日の介護の面接練習はここまでです。ご参加ありがとうございました。","branch":"all"}, | |
| ], | |
| }, | |
| "hotel_accommodation": { | |
| "english_name": "Hotel / Accommodation", | |
| "japanese_name": "宿泊", | |
| "intro_jp": "こんにちは。宿泊・ホテルの仕事の面接練習を始めます。よろしくお願いします。", | |
| "min_questions": 3, | |
| "max_questions": 20, | |
| "expected_keywords": ["ホテル", "お客様", "ていねい", "笑顔", "掃除", "フロント", "案内"], | |
| "questions": [ | |
| {"id":"name","theme":"intro","stage":"screening","jp":"お名前を教えてください。","branch":"all"}, | |
| {"id":"country","theme":"intro","stage":"screening","jp":"どこの国から来ましたか。","branch":"all"}, | |
| {"id":"reason","theme":"motivation","stage":"screening","jp":"日本へ行きたい理由は何ですか。","branch":"all"}, | |
| {"id":"japanese","theme":"language","stage":"screening","jp":"日本語はどのくらい勉強しましたか。","branch":"all"}, | |
| {"id":"ready_check","theme":"intro","stage":"screening","jp":"面接の準備はできていますか。","branch":"all"}, | |
| {"id":"experience_gate","theme":"experience","stage":"role","jp":"ホテルや宿泊の仕事をしたことがありますか。","branch":"all"}, | |
| {"id":"exp_yes_detail","theme":"experience","stage":"role","jp":"どんな仕事をしましたか。フロント、掃除、ベッドメイクなどを話してください。","branch":"yes_exp"}, | |
| {"id":"exp_no_motivation","theme":"motivation","stage":"role","jp":"経験がなくても、ホテルの仕事を勉強してがんばれますか。","branch":"no_exp"}, | |
| {"id":"customer","theme":"service","stage":"role","jp":"お客様に話すとき、どんなことを大切にしますか。","branch":"all"}, | |
| {"id":"cleanliness","theme":"service","stage":"role","jp":"部屋の掃除や整理整頓は好きですか。","branch":"all"}, | |
| {"id":"busy","theme":"role_fit","stage":"role","jp":"忙しい時間でも落ち着いて働けますか。","branch":"all"}, | |
| {"id":"teamwork","theme":"teamwork","stage":"followup","jp":"スタッフと協力して働くことはできますか。","branch":"all"}, | |
| {"id":"shift","theme":"schedule","stage":"followup","jp":"夜や朝のシフトは大丈夫ですか。","branch":"all"}, | |
| {"id":"strength","theme":"personality","stage":"followup","jp":"ホテルの仕事に向いている自分の長所を一つ話してください。","branch":"all"}, | |
| {"id":"closing","theme":"closing","stage":"closing","jp":"本日の宿泊の面接練習はここまでです。ご参加ありがとうございました。","branch":"all"}, | |
| ], | |
| }, | |
| "agriculture": { | |
| "english_name": "Agriculture", | |
| "japanese_name": "農業", | |
| "intro_jp": "こんにちは。農業の仕事の面接練習を始めます。よろしくお願いします。", | |
| "min_questions": 3, | |
| "max_questions": 20, | |
| "expected_keywords": ["農業", "畑", "体力", "朝", "収穫", "外", "時間"], | |
| "questions": [ | |
| {"id":"name","theme":"intro","stage":"screening","jp":"お名前を教えてください。","branch":"all"}, | |
| {"id":"country","theme":"intro","stage":"screening","jp":"どこの国から来ましたか。","branch":"all"}, | |
| {"id":"reason","theme":"motivation","stage":"screening","jp":"日本へ行きたい理由は何ですか。","branch":"all"}, | |
| {"id":"japanese","theme":"language","stage":"screening","jp":"日本語はどのくらい勉強しましたか。","branch":"all"}, | |
| {"id":"ready_check","theme":"intro","stage":"screening","jp":"面接の準備はできていますか。","branch":"all"}, | |
| {"id":"experience_gate","theme":"experience","stage":"role","jp":"農業の仕事をしたことがありますか。","branch":"all"}, | |
| {"id":"exp_yes_detail","theme":"experience","stage":"role","jp":"どんな農業の仕事をしましたか。","branch":"yes_exp"}, | |
| {"id":"exp_no_motivation","theme":"motivation","stage":"role","jp":"経験がなくても、農業を勉強してがんばれますか。","branch":"no_exp"}, | |
| {"id":"outside_work","theme":"role_fit","stage":"role","jp":"外で長い時間働くことは大丈夫ですか。","branch":"all"}, | |
| {"id":"early_morning","theme":"schedule","stage":"role","jp":"朝早い仕事でも時間を守れますか。","branch":"all"}, | |
| {"id":"physical","theme":"role_fit","stage":"role","jp":"体力に自信はありますか。","branch":"all"}, | |
| {"id":"weather","theme":"role_fit","stage":"followup","jp":"暑い日や寒い日でも、まじめに働けますか。","branch":"all"}, | |
| {"id":"teamwork","theme":"teamwork","stage":"followup","jp":"ほかの人と一緒に働けますか。","branch":"all"}, | |
| {"id":"strength","theme":"personality","stage":"followup","jp":"農業の仕事に向いている自分の長所を一つ話してください。","branch":"all"}, | |
| {"id":"closing","theme":"closing","stage":"closing","jp":"本日の農業の面接練習はここまでです。ご参加ありがとうございました。","branch":"all"}, | |
| ], | |
| }, | |
| "manufacturing": { | |
| "english_name": "Manufacturing", | |
| "japanese_name": "製造業", | |
| "intro_jp": "こんにちは。製造業の仕事の面接練習を始めます。よろしくお願いします。", | |
| "min_questions": 3, | |
| "max_questions": 20, | |
| "expected_keywords": ["工場", "安全", "正確", "確認", "時間", "ルール", "集中"], | |
| "questions": [ | |
| {"id":"name","theme":"intro","stage":"screening","jp":"お名前を教えてください。","branch":"all"}, | |
| {"id":"country","theme":"intro","stage":"screening","jp":"どこの国から来ましたか。","branch":"all"}, | |
| {"id":"reason","theme":"motivation","stage":"screening","jp":"日本へ行きたい理由は何ですか。","branch":"all"}, | |
| {"id":"japanese","theme":"language","stage":"screening","jp":"日本語はどのくらい勉強しましたか。","branch":"all"}, | |
| {"id":"ready_check","theme":"intro","stage":"screening","jp":"面接の準備はできていますか。","branch":"all"}, | |
| {"id":"experience_gate","theme":"experience","stage":"role","jp":"工場や製造の仕事をしたことがありますか。","branch":"all"}, | |
| {"id":"exp_yes_detail","theme":"experience","stage":"role","jp":"どんな製造の仕事をしましたか。","branch":"yes_exp"}, | |
| {"id":"exp_no_motivation","theme":"motivation","stage":"role","jp":"経験がなくても、製造の仕事を勉強してがんばれますか。","branch":"no_exp"}, | |
| {"id":"accuracy","theme":"role_fit","stage":"role","jp":"ミスを少なくするために、何を大切にしますか。","branch":"all"}, | |
| {"id":"safety","theme":"safety","stage":"role","jp":"機械を使うとき、安全のために何をしますか。","branch":"all"}, | |
| {"id":"time","theme":"schedule","stage":"role","jp":"時間を守って、同じ作業を続けることはできますか。","branch":"all"}, | |
| {"id":"quality","theme":"reliability","stage":"followup","jp":"品質を守ることは大切ですか。なぜですか。","branch":"all"}, | |
| {"id":"teamwork","theme":"teamwork","stage":"followup","jp":"チームで協力できますか。","branch":"all"}, | |
| {"id":"strength","theme":"personality","stage":"followup","jp":"製造業の仕事に向いている自分の長所を一つ話してください。","branch":"all"}, | |
| {"id":"closing","theme":"closing","stage":"closing","jp":"本日の製造業の面接練習はここまでです。ご参加ありがとうございました。","branch":"all"}, | |
| ], | |
| }, | |
| } | |
| class StartRequest(BaseModel): | |
| session_uuid: str | |
| job_role: str = "construction" | |
| def root() -> Dict[str, Any]: | |
| return { | |
| "ok": True, | |
| "service": "jp-role-interview", | |
| "version": APP_VERSION, | |
| "routes": ["/health", "/roles", "/start", "/answer"], | |
| } | |
| def health() -> Dict[str, Any]: | |
| asr_backend = "hf_api" | |
| if USE_FASTER_WHISPER: | |
| asr_backend = "faster_whisper_then_hf_api" | |
| return { | |
| "ok": True, | |
| "service": "jp-role-interview", | |
| "version": APP_VERSION, | |
| "hf_token_set": bool(HF_TOKEN), | |
| "asr_backend": asr_backend, | |
| "chat_model": CHAT_MODEL, | |
| "role_count": len(ROLE_BANK), | |
| } | |
| def roles() -> Dict[str, Any]: | |
| return { | |
| "ok": True, | |
| "roles": [ | |
| { | |
| "key": key, | |
| "english_name": cfg["english_name"], | |
| "japanese_name": cfg["japanese_name"], | |
| "min_questions": cfg["min_questions"], | |
| "max_questions": cfg["max_questions"], | |
| } | |
| for key, cfg in ROLE_BANK.items() | |
| ], | |
| } | |
| def start_interview(payload: StartRequest) -> Dict[str, Any]: | |
| role_key = normalize_role_key(payload.job_role) | |
| role_cfg = ROLE_BANK[role_key] | |
| first_q = get_question(role_cfg, "name") | |
| opening = f"{role_cfg['intro_jp']} {first_q['jp']}" | |
| memory = { | |
| "job_role": role_key, | |
| "job_role_en": role_cfg["english_name"], | |
| "job_role_jp": role_cfg["japanese_name"], | |
| "candidate_name": None, | |
| "country_name": None, | |
| "age": None, | |
| "reason_for_japan": None, | |
| "occupation": None, | |
| "japanese_level": None, | |
| "experience_state": "unknown", | |
| "answers_so_far": [], | |
| "asked_question_ids": [first_q["id"]], | |
| "asked_themes": [first_q["theme"]], | |
| "low_score_streak": 0, | |
| "no_sound_count": 0, | |
| "min_questions": role_cfg["min_questions"], | |
| "max_questions": min(role_cfg["max_questions"], MAX_QUESTION_LIMIT), | |
| "auto_question_mode": True, | |
| } | |
| return { | |
| "ok": True, | |
| "session_uuid": payload.session_uuid, | |
| "job_role": role_key, | |
| "job_role_label": f"{role_cfg['english_name']} / {role_cfg['japanese_name']}", | |
| "question_no": 1, | |
| "question_id": first_q["id"], | |
| "question_jp": first_q["jp"], | |
| "speech_text_jp": opening, | |
| "memory": memory, | |
| "is_finished": False, | |
| "speak_now": True, | |
| } | |
| async def answer_interview( | |
| session_uuid: str = Form(...), | |
| question_no: int = Form(...), | |
| question_id: str = Form(...), | |
| question_jp: str = Form(...), | |
| memory_json: str = Form("{}"), | |
| audio: UploadFile = File(...), | |
| ) -> Dict[str, Any]: | |
| memory = safe_json_loads(memory_json) | |
| role_key = normalize_role_key(memory.get("job_role")) | |
| role_cfg = ROLE_BANK[role_key] | |
| transcript, asr_backend, asr_error = await transcribe_upload(audio) | |
| if not transcript.strip(): | |
| memory["no_sound_count"] = int(memory.get("no_sound_count", 0)) + 1 | |
| name = memory.get("candidate_name") | |
| spoken = build_repeat_prompt(name, memory["no_sound_count"]) | |
| should_finish = memory["no_sound_count"] >= 2 and question_no >= role_cfg["min_questions"] | |
| if should_finish: | |
| result = build_final_result(role_cfg, memory, force_fail=True, summary_jp="音声が聞こえないため、面接を終了しました。") | |
| return { | |
| "ok": True, | |
| "is_finished": True, | |
| "session_uuid": session_uuid, | |
| "job_role": role_key, | |
| "transcript_jp": "", | |
| "answer_score": 0, | |
| "feedback_jp": spoken, | |
| "speech_text_jp": spoken, | |
| "memory": memory, | |
| "asr_backend": asr_backend, | |
| "asr_error": asr_error, | |
| "result": result, | |
| } | |
| return { | |
| "ok": True, | |
| "is_finished": False, | |
| "needs_repeat": True, | |
| "session_uuid": session_uuid, | |
| "job_role": role_key, | |
| "question_no": question_no, | |
| "question_id": question_id, | |
| "question_jp": question_jp, | |
| "speech_text_jp": spoken, | |
| "transcript_jp": "", | |
| "answer_score": 0, | |
| "feedback_jp": spoken, | |
| "memory": memory, | |
| "next_question_no": question_no, | |
| "next_question_id": question_id, | |
| "next_question_jp": question_jp, | |
| "asr_backend": asr_backend, | |
| "asr_error": asr_error, | |
| "speak_now": True, | |
| } | |
| memory["no_sound_count"] = 0 | |
| profile_update = maybe_extract_basic_profile(memory, transcript, question_id) | |
| memory = merge_memory(memory, profile_update) | |
| score = score_answer(role_cfg, question_id, transcript) | |
| feedback = build_feedback(score) | |
| answers = list(memory.get("answers_so_far", [])) | |
| answers.append({ | |
| "question_no": question_no, | |
| "question_id": question_id, | |
| "question_jp": question_jp, | |
| "answer_text_jp": transcript, | |
| "answer_score": score, | |
| "feedback_jp": feedback, | |
| }) | |
| memory["answers_so_far"] = answers | |
| if score <= 3: | |
| memory["low_score_streak"] = int(memory.get("low_score_streak", 0)) + 1 | |
| else: | |
| memory["low_score_streak"] = 0 | |
| should_finish = decide_finish(role_cfg, memory, question_no, score) | |
| if should_finish: | |
| result = build_final_result(role_cfg, memory) | |
| return { | |
| "ok": True, | |
| "is_finished": True, | |
| "session_uuid": session_uuid, | |
| "job_role": role_key, | |
| "question_no": question_no, | |
| "transcript_jp": transcript, | |
| "answer_score": score, | |
| "feedback_jp": feedback, | |
| "speech_text_jp": result["closing_message_jp"], | |
| "memory": memory, | |
| "asr_backend": asr_backend, | |
| "asr_error": asr_error, | |
| "result": result, | |
| } | |
| next_q = select_next_question(role_cfg, memory) | |
| next_no = question_no + 1 | |
| if next_q["id"] not in memory["asked_question_ids"]: | |
| memory["asked_question_ids"].append(next_q["id"]) | |
| if next_q["theme"] not in memory["asked_themes"]: | |
| memory["asked_themes"].append(next_q["theme"]) | |
| spoken_next = next_q["jp"] | |
| if next_q["id"] == "ready_check" and memory.get("candidate_name"): | |
| spoken_next = f"{memory['candidate_name']}さん、ありがとうございます。{next_q['jp']}" | |
| return { | |
| "ok": True, | |
| "is_finished": False, | |
| "session_uuid": session_uuid, | |
| "job_role": role_key, | |
| "question_no": question_no, | |
| "transcript_jp": transcript, | |
| "answer_score": score, | |
| "feedback_jp": feedback, | |
| "speech_text_jp": spoken_next, | |
| "memory": memory, | |
| "asr_backend": asr_backend, | |
| "asr_error": asr_error, | |
| "next_question_no": next_no, | |
| "next_question_id": next_q["id"], | |
| "next_question_jp": next_q["jp"], | |
| "speak_now": True, | |
| } | |
| def normalize_role_key(value: Any) -> str: | |
| key = str(value or "construction").strip().lower() | |
| aliases = { | |
| "restaurant": "restaurant_konbini", | |
| "konbini": "restaurant_konbini", | |
| "nursing": "nursing_care", | |
| "care": "nursing_care", | |
| "hotel": "hotel_accommodation", | |
| "accommodation": "hotel_accommodation", | |
| } | |
| key = aliases.get(key, key) | |
| return key if key in ROLE_BANK else "construction" | |
| def get_question(role_cfg: Dict[str, Any], qid: str) -> Dict[str, Any]: | |
| for q in role_cfg["questions"]: | |
| if q["id"] == qid: | |
| return q | |
| return role_cfg["questions"][0] | |
| def build_repeat_prompt(name: Optional[str], count: int) -> str: | |
| idx = max(0, min(count - 1, len(_REPEAT_PROMPTS) - 1)) | |
| base = _REPEAT_PROMPTS[idx] | |
| if name: | |
| return f"{name}さん、{base}" | |
| return base | |
| async def transcribe_upload(audio: UploadFile) -> Tuple[str, str, Optional[str]]: | |
| content = await audio.read() | |
| filename = audio.filename or "answer.webm" | |
| if USE_FASTER_WHISPER: | |
| try: | |
| text = transcribe_with_faster_whisper(content, filename) | |
| return normalize_text(text), "faster_whisper", None | |
| except Exception as exc: | |
| if HF_TOKEN: | |
| try: | |
| text = transcribe_with_hf_api(content, filename) | |
| return normalize_text(text), "hf_api_fallback", str(exc) | |
| except Exception as exc2: | |
| return "", "hf_api_fallback_failed", f"{exc} | {exc2}" | |
| return "", "faster_whisper_failed", str(exc) | |
| if HF_TOKEN: | |
| try: | |
| text = transcribe_with_hf_api(content, filename) | |
| return normalize_text(text), "hf_api", None | |
| except Exception as exc: | |
| return "", "hf_api_failed", str(exc) | |
| return "", "no_asr_backend", "Neither faster-whisper nor HF API is available." | |
| def transcribe_with_faster_whisper(content: bytes, filename: str) -> str: | |
| global _LOCAL_ASR_MODEL | |
| from faster_whisper import WhisperModel # lazy import | |
| suffix = os.path.splitext(filename)[1] or ".webm" | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp: | |
| tmp.write(content) | |
| temp_path = tmp.name | |
| try: | |
| if _LOCAL_ASR_MODEL is None: | |
| _LOCAL_ASR_MODEL = WhisperModel(FASTER_WHISPER_MODEL, device="cpu", compute_type="int8") | |
| segments, _info = _LOCAL_ASR_MODEL.transcribe(temp_path, language="ja", vad_filter=True) | |
| return " ".join(seg.text.strip() for seg in segments).strip() | |
| finally: | |
| try: | |
| os.remove(temp_path) | |
| except OSError: | |
| pass | |
| def transcribe_with_hf_api(content: bytes, filename: str) -> str: | |
| url = f"{HF_INFERENCE_BASE}/{ASR_MODEL}" | |
| headers = { | |
| "Authorization": f"Bearer {HF_TOKEN}", | |
| "Content-Type": guess_mime_type(filename), | |
| } | |
| response = requests.post(url, headers=headers, data=content, timeout=ASR_TIMEOUT_SECONDS) | |
| response.raise_for_status() | |
| data = response.json() | |
| if isinstance(data, dict): | |
| return str(data.get("text") or data.get("generated_text") or "") | |
| if isinstance(data, list) and data and isinstance(data[0], dict): | |
| return str(data[0].get("text") or "") | |
| return "" | |
| def guess_mime_type(filename: str) -> str: | |
| lower = (filename or "").lower() | |
| if lower.endswith(".wav"): | |
| return "audio/wav" | |
| if lower.endswith(".mp3"): | |
| return "audio/mpeg" | |
| if lower.endswith(".m4a"): | |
| return "audio/mp4" | |
| if lower.endswith(".ogg"): | |
| return "audio/ogg" | |
| return "audio/webm" | |
| def maybe_extract_basic_profile(memory: Dict[str, Any], transcript: str, question_id: str) -> Dict[str, Any]: | |
| text = normalize_text(transcript) | |
| update: Dict[str, Any] = {} | |
| if question_id == "name" and not memory.get("candidate_name"): | |
| name = extract_name(text) | |
| if name: | |
| update["candidate_name"] = name | |
| if question_id == "country" and not memory.get("country_name"): | |
| country = extract_country(text) | |
| if country: | |
| update["country_name"] = country | |
| if question_id == "reason" and not memory.get("reason_for_japan") and len(text) >= 4: | |
| update["reason_for_japan"] = text[:120] | |
| if question_id == "japanese" and not memory.get("japanese_level") and len(text) >= 4: | |
| update["japanese_level"] = text[:120] | |
| if question_id == "experience_gate": | |
| update["experience_state"] = detect_experience_state(text, memory.get("experience_state", "unknown")) | |
| age = extract_age(text) | |
| if age and not memory.get("age"): | |
| update["age"] = age | |
| return update | |
| def detect_experience_state(text: str, current: str) -> str: | |
| yes_markers = ["あります", "しました", "経験があります", "働いたことがあります"] | |
| no_markers = ["ありません", "ないです", "経験がありません", "したことがありません"] | |
| if any(m in text for m in yes_markers): | |
| return "yes" | |
| if any(m in text for m in no_markers): | |
| return "no" | |
| return current if current in {"yes", "no"} else "unknown" | |
| def select_next_question(role_cfg: Dict[str, Any], memory: Dict[str, Any]) -> Dict[str, Any]: | |
| asked_ids = set(memory.get("asked_question_ids", [])) | |
| experience_state = memory.get("experience_state", "unknown") | |
| answers = memory.get("answers_so_far", []) | |
| avg = mean([a.get("answer_score", 0) for a in answers]) if answers else 0 | |
| # fixed screening order | |
| for qid in ["country", "reason", "japanese", "ready_check", "experience_gate"]: | |
| q = get_question(role_cfg, qid) | |
| if q["id"] not in asked_ids: | |
| return q | |
| # branch by experience | |
| branch_order = [] | |
| if experience_state == "yes": | |
| branch_order = ["exp_yes_detail", "exp_years"] | |
| elif experience_state == "no": | |
| branch_order = ["exp_no_motivation"] | |
| for qid in branch_order: | |
| q = get_question(role_cfg, qid) | |
| if q["id"] not in asked_ids: | |
| return q | |
| # weaker user gets simpler role questions first | |
| if avg < 4.5: | |
| simple_ids = ["physical", "busy", "kindness", "cleanliness", "outside_work", "time", "customer", "safety", "teamwork"] | |
| for qid in simple_ids: | |
| try: | |
| q = get_question(role_cfg, qid) | |
| if q["id"] not in asked_ids: | |
| return q | |
| except Exception: | |
| pass | |
| # normal role/followup path | |
| for q in role_cfg["questions"]: | |
| if q["id"] in asked_ids: | |
| continue | |
| if q["stage"] == "closing": | |
| continue | |
| if q["branch"] == "yes_exp" and experience_state != "yes": | |
| continue | |
| if q["branch"] == "no_exp" and experience_state != "no": | |
| continue | |
| return q | |
| return get_question(role_cfg, "closing") | |
| def score_answer(role_cfg: Dict[str, Any], question_id: str, transcript: str) -> int: | |
| text = normalize_text(transcript) | |
| if not text: | |
| return 0 | |
| score = 3 | |
| if len(text) >= 4: | |
| score += 1 | |
| if len(text) >= 10: | |
| score += 1 | |
| if len(text) >= 20: | |
| score += 1 | |
| if "です" in text or "ます" in text: | |
| score += 1 | |
| role_hits = sum(1 for kw in role_cfg["expected_keywords"] if kw in text) | |
| score += min(2, role_hits) | |
| if question_id == "name" and extract_name(text): | |
| score += 1 | |
| if question_id == "country" and extract_country(text): | |
| score += 1 | |
| if question_id == "experience_gate" and detect_experience_state(text, "unknown") != "unknown": | |
| score += 1 | |
| return max(0, min(score, 10)) | |
| def build_feedback(score: int) -> str: | |
| if score >= 8: | |
| return "とても良いです。自然に答えられています。" | |
| if score >= 6: | |
| return "良いです。もう少し長く、ていねいに話すともっと良くなります。" | |
| if score >= 4: | |
| return "意味は伝わりますが、短いです。完全な文で答えてみましょう。" | |
| return "短すぎるか、内容が分かりにくいです。もう少し詳しく話してください。" | |
| def decide_finish(role_cfg: Dict[str, Any], memory: Dict[str, Any], question_no: int, score: int) -> bool: | |
| answers = memory.get("answers_so_far", []) | |
| avg = mean([a.get("answer_score", 0) for a in answers]) if answers else 0 | |
| min_q = int(memory.get("min_questions", role_cfg["min_questions"])) | |
| max_q = int(memory.get("max_questions", role_cfg["max_questions"])) | |
| if question_no >= max_q: | |
| return True | |
| if question_no >= min_q and memory.get("low_score_streak", 0) >= 2: | |
| return True | |
| if question_no >= min_q and len(answers) >= 3 and avg < 3.5: | |
| return True | |
| if question_no >= 10 and avg >= 6: | |
| # good candidate can continue; otherwise finish around middle | |
| return False | |
| if question_no >= 8 and avg < 5.5: | |
| return True | |
| return False | |
| def build_final_result(role_cfg: Dict[str, Any], memory: Dict[str, Any], force_fail: bool = False, summary_jp: str = "") -> Dict[str, Any]: | |
| answers = list(memory.get("answers_so_far", [])) | |
| scores = [int(a.get("answer_score", 0)) for a in answers] or [0] | |
| avg = mean(scores) | |
| overall_score = max(0, min(100, int(round(avg * 10)))) | |
| if force_fail: | |
| overall_score = min(overall_score, 39) | |
| pass_fail = "PASS" if overall_score >= 60 and not force_fail else "FAIL" | |
| strengths: List[str] = [] | |
| weaknesses: List[str] = [] | |
| tips: List[str] = [] | |
| if memory.get("candidate_name"): | |
| strengths.append("Self introduction was understood.") | |
| else: | |
| weaknesses.append("Name was not clearly understood.") | |
| if memory.get("experience_state") == "yes": | |
| strengths.append("Role experience was communicated.") | |
| elif memory.get("experience_state") == "no": | |
| weaknesses.append("No direct role experience was explained clearly.") | |
| if overall_score >= 70: | |
| strengths.append("Answers were mostly clear and relevant.") | |
| else: | |
| weaknesses.append("Several answers were too short or unclear.") | |
| tips.extend([ | |
| "Use one or two extra sentences in each answer.", | |
| "Use polite endings like です and ます.", | |
| "Speak a little louder and more clearly.", | |
| ]) | |
| closing = get_question(role_cfg, "closing")["jp"] | |
| return { | |
| "candidate_name": memory.get("candidate_name"), | |
| "country_name": memory.get("country_name"), | |
| "age": memory.get("age"), | |
| "job_role": memory.get("job_role"), | |
| "job_role_en": role_cfg["english_name"], | |
| "job_role_jp": role_cfg["japanese_name"], | |
| "summary_jp": summary_jp or f"{role_cfg['japanese_name']}の面接練習が完了しました。", | |
| "closing_message_jp": closing, | |
| "total_questions": len(answers), | |
| "overall_score": overall_score, | |
| "scores": { | |
| "fluency": clamp_int(round(avg), 1, 10), | |
| "grammar": clamp_int(round(avg - 1), 1, 10), | |
| "confidence": clamp_int(round(avg), 1, 10), | |
| "relevance": clamp_int(round(avg + 1), 1, 10), | |
| "role_fit": clamp_int(round(avg), 1, 10), | |
| }, | |
| "pass_fail": pass_fail, | |
| "strengths": strengths[:4], | |
| "weaknesses": weaknesses[:4], | |
| "tips": tips[:5], | |
| "answers": answers, | |
| } | |
| def merge_memory(memory: Dict[str, Any], update: Dict[str, Any]) -> Dict[str, Any]: | |
| merged = dict(memory or {}) | |
| for k, v in (update or {}).items(): | |
| if v not in (None, "", [], {}): | |
| merged[k] = v | |
| return merged | |
| def normalize_text(text: str) -> str: | |
| return re.sub(r"\s+", " ", (text or "")).strip() | |
| def safe_json_loads(value: str) -> Dict[str, Any]: | |
| try: | |
| obj = json.loads(value or "{}") | |
| return obj if isinstance(obj, dict) else {} | |
| except Exception: | |
| return {} | |
| def extract_name(text: str) -> Optional[str]: | |
| value = text.replace("私は", "").replace("わたしは", "").replace("ぼくは", "") | |
| value = value.replace("です", "").replace("と申します", "").replace("といいます", "").strip(" 。") | |
| if not value or len(value) > 30: | |
| return None | |
| return value | |
| def extract_country(text: str) -> Optional[str]: | |
| known = ["ネパール", "日本", "インド", "バングラデシュ", "スリランカ", "ベトナム", "中国", "ミャンマー", "フィリピン", "インドネシア"] | |
| for k in known: | |
| if k in text: | |
| return k | |
| m = re.search(r"(.+?)から来ました", text) | |
| if m: | |
| return m.group(1).strip(" 。") | |
| return None | |
| def extract_age(text: str) -> Optional[int]: | |
| m = re.search(r"(\d{1,2})", text) | |
| return int(m.group(1)) if m else None | |
| def clamp_int(value: Any, low: int, high: int) -> int: | |
| try: | |
| return max(low, min(high, int(round(float(value))))) | |
| except Exception: | |
| return low | |