bipinbudhathoki's picture
Update app/main.py
5aafae3 verified
import json
import os
import re
import tempfile
from statistics import mean
from typing import Any, Dict, List, Optional, Tuple
import requests
from fastapi import FastAPI, File, Form, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
APP_VERSION = "5.0.0"
app = FastAPI(title="Japanese Role Interview API", version=APP_VERSION)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=False,
allow_methods=["*"],
allow_headers=["*"],
)
# -----------------------------
# Config
# -----------------------------
HF_TOKEN = os.getenv("HF_TOKEN", "").strip()
HF_ROUTER_URL = os.getenv("HF_ROUTER_URL", "https://router.huggingface.co/v1/chat/completions")
HF_INFERENCE_BASE = os.getenv("HF_INFERENCE_BASE", "https://router.huggingface.co/hf-inference/models")
ASR_MODEL = os.getenv("ASR_MODEL", "openai/whisper-large-v3")
CHAT_MODEL = os.getenv("CHAT_MODEL", "Qwen/Qwen2.5-7B-Instruct-1M")
USE_FASTER_WHISPER = os.getenv("USE_FASTER_WHISPER", "true").lower() in {"1", "true", "yes", "on"}
FASTER_WHISPER_MODEL = os.getenv("FASTER_WHISPER_MODEL", "small")
MAX_QUESTION_LIMIT = int(os.getenv("MAX_QUESTION_LIMIT", "20"))
ASR_TIMEOUT_SECONDS = int(os.getenv("ASR_TIMEOUT_SECONDS", "180"))
LLM_TIMEOUT_SECONDS = int(os.getenv("LLM_TIMEOUT_SECONDS", "90"))
_REPEAT_PROMPTS = [
"声が小さいです。もう少し大きい声で、もう一度お願いします。",
"まだ音がはっきり聞こえません。マイクを確認して、もう一度お願いします。",
"音がうまく入っていません。マイクを近づけて、もう一度お願いします。",
]
_LOCAL_ASR_MODEL = None
ROLE_BANK: Dict[str, Dict[str, Any]] = {
"construction": {
"english_name": "Construction",
"japanese_name": "建設",
"intro_jp": "こんにちは。建設の仕事の面接練習を始めます。よろしくお願いします。",
"min_questions": 3,
"max_questions": 20,
"expected_keywords": ["安全", "ヘルメット", "現場", "工具", "体力", "チーム", "ルール"],
"questions": [
{"id":"name","theme":"intro","stage":"screening","jp":"お名前を教えてください。","branch":"all"},
{"id":"country","theme":"intro","stage":"screening","jp":"どこの国から来ましたか。","branch":"all"},
{"id":"reason","theme":"motivation","stage":"screening","jp":"日本へ行きたい理由は何ですか。","branch":"all"},
{"id":"japanese","theme":"language","stage":"screening","jp":"日本語はどのくらい勉強しましたか。","branch":"all"},
{"id":"ready_check","theme":"intro","stage":"screening","jp":"面接の準備はできていますか。","branch":"all"},
{"id":"experience_gate","theme":"experience","stage":"role","jp":"建設の仕事をしたことがありますか。","branch":"all"},
{"id":"exp_yes_detail","theme":"experience","stage":"role","jp":"どんな建設の仕事をしましたか。","branch":"yes_exp"},
{"id":"exp_years","theme":"experience","stage":"role","jp":"その仕事は何年ぐらいしましたか。","branch":"yes_exp"},
{"id":"exp_no_motivation","theme":"motivation","stage":"role","jp":"経験がなくても、建設の仕事を勉強してがんばれますか。","branch":"no_exp"},
{"id":"physical","theme":"role_fit","stage":"role","jp":"体力が必要な仕事ですが、大丈夫ですか。","branch":"all"},
{"id":"safety","theme":"safety","stage":"role","jp":"危ない場所で働くとき、何に気をつけますか。","branch":"all"},
{"id":"teamwork","theme":"teamwork","stage":"role","jp":"チームで仕事をするとき、大切なことは何ですか。","branch":"all"},
{"id":"tools","theme":"role_fit","stage":"followup","jp":"工具を使う仕事に興味はありますか。","branch":"all"},
{"id":"morning","theme":"schedule","stage":"followup","jp":"朝早い仕事や外の仕事はできますか。","branch":"all"},
{"id":"report","theme":"teamwork","stage":"followup","jp":"分からないとき、先輩にすぐ相談できますか。","branch":"all"},
{"id":"mistake","theme":"reliability","stage":"followup","jp":"仕事でミスをしたら、どうしますか。","branch":"all"},
{"id":"strength","theme":"personality","stage":"followup","jp":"建設の仕事に向いている自分の長所を一つ話してください。","branch":"all"},
{"id":"closing","theme":"closing","stage":"closing","jp":"本日の建設の面接練習はここまでです。ご参加ありがとうございました。","branch":"all"},
],
},
"restaurant_konbini": {
"english_name": "Restaurant / Konbini",
"japanese_name": "外食・コンビニ",
"intro_jp": "こんにちは。外食・コンビニの仕事の面接練習を始めます。よろしくお願いします。",
"min_questions": 3,
"max_questions": 20,
"expected_keywords": ["接客", "レジ", "お客様", "笑顔", "ていねい", "品出し", "掃除"],
"questions": [
{"id":"name","theme":"intro","stage":"screening","jp":"お名前を教えてください。","branch":"all"},
{"id":"country","theme":"intro","stage":"screening","jp":"どこの国から来ましたか。","branch":"all"},
{"id":"reason","theme":"motivation","stage":"screening","jp":"日本へ行きたい理由は何ですか。","branch":"all"},
{"id":"japanese","theme":"language","stage":"screening","jp":"日本語はどのくらい勉強しましたか。","branch":"all"},
{"id":"ready_check","theme":"intro","stage":"screening","jp":"面接の準備はできていますか。","branch":"all"},
{"id":"experience_gate","theme":"experience","stage":"role","jp":"レストランやコンビニで働いた経験はありますか。","branch":"all"},
{"id":"exp_yes_detail","theme":"experience","stage":"role","jp":"どんな仕事をしましたか。レジ、接客、品出しなどを話してください。","branch":"yes_exp"},
{"id":"exp_no_motivation","theme":"motivation","stage":"role","jp":"経験がなくても、接客を勉強する気持ちはありますか。","branch":"no_exp"},
{"id":"customer","theme":"service","stage":"role","jp":"お客様には、どんな話し方をしたいですか。","branch":"all"},
{"id":"busy","theme":"role_fit","stage":"role","jp":"忙しい時間でも落ち着いて働けますか。","branch":"all"},
{"id":"cleanliness","theme":"service","stage":"role","jp":"お店の清潔さは大切ですか。なぜですか。","branch":"all"},
{"id":"shift","theme":"schedule","stage":"followup","jp":"立ち仕事やシフト勤務は大丈夫ですか。","branch":"all"},
{"id":"mistake","theme":"reliability","stage":"followup","jp":"注文やレジで間違えたら、どうしますか。","branch":"all"},
{"id":"teamwork","theme":"teamwork","stage":"followup","jp":"ほかのスタッフと協力できますか。","branch":"all"},
{"id":"strength","theme":"personality","stage":"followup","jp":"この仕事に向いている自分の長所を一つ話してください。","branch":"all"},
{"id":"closing","theme":"closing","stage":"closing","jp":"本日の外食・コンビニの面接練習はここまでです。ご参加ありがとうございました。","branch":"all"},
],
},
"nursing_care": {
"english_name": "Nursing Care",
"japanese_name": "介護",
"intro_jp": "こんにちは。介護の仕事の面接練習を始めます。よろしくお願いします。",
"min_questions": 3,
"max_questions": 20,
"expected_keywords": ["介護", "やさしい", "利用者", "お年寄り", "清潔", "手伝う", "責任"],
"questions": [
{"id":"name","theme":"intro","stage":"screening","jp":"お名前を教えてください。","branch":"all"},
{"id":"country","theme":"intro","stage":"screening","jp":"どこの国から来ましたか。","branch":"all"},
{"id":"reason","theme":"motivation","stage":"screening","jp":"日本へ行きたい理由は何ですか。","branch":"all"},
{"id":"japanese","theme":"language","stage":"screening","jp":"日本語はどのくらい勉強しましたか。","branch":"all"},
{"id":"ready_check","theme":"intro","stage":"screening","jp":"面接の準備はできていますか。","branch":"all"},
{"id":"experience_gate","theme":"experience","stage":"role","jp":"介護の仕事をしたことがありますか。","branch":"all"},
{"id":"exp_yes_detail","theme":"experience","stage":"role","jp":"どんな介護の仕事をしましたか。","branch":"yes_exp"},
{"id":"exp_no_motivation","theme":"motivation","stage":"role","jp":"経験がなくても、介護の勉強をしてがんばれますか。","branch":"no_exp"},
{"id":"kindness","theme":"service","stage":"role","jp":"お年寄りや利用者さんに、やさしく話せますか。","branch":"all"},
{"id":"cleanliness","theme":"safety","stage":"role","jp":"介護の仕事で清潔さは大切ですか。なぜですか。","branch":"all"},
{"id":"communication","theme":"service","stage":"role","jp":"利用者さんが困っていたら、まず何をしますか。","branch":"all"},
{"id":"hard_work","theme":"role_fit","stage":"followup","jp":"大変な仕事でも、落ち着いて続けられますか。","branch":"all"},
{"id":"teamwork","theme":"teamwork","stage":"followup","jp":"スタッフと協力できますか。","branch":"all"},
{"id":"report","theme":"teamwork","stage":"followup","jp":"報告・連絡・相談はできますか。","branch":"all"},
{"id":"strength","theme":"personality","stage":"followup","jp":"介護の仕事に向いている自分の長所を一つ話してください。","branch":"all"},
{"id":"closing","theme":"closing","stage":"closing","jp":"本日の介護の面接練習はここまでです。ご参加ありがとうございました。","branch":"all"},
],
},
"hotel_accommodation": {
"english_name": "Hotel / Accommodation",
"japanese_name": "宿泊",
"intro_jp": "こんにちは。宿泊・ホテルの仕事の面接練習を始めます。よろしくお願いします。",
"min_questions": 3,
"max_questions": 20,
"expected_keywords": ["ホテル", "お客様", "ていねい", "笑顔", "掃除", "フロント", "案内"],
"questions": [
{"id":"name","theme":"intro","stage":"screening","jp":"お名前を教えてください。","branch":"all"},
{"id":"country","theme":"intro","stage":"screening","jp":"どこの国から来ましたか。","branch":"all"},
{"id":"reason","theme":"motivation","stage":"screening","jp":"日本へ行きたい理由は何ですか。","branch":"all"},
{"id":"japanese","theme":"language","stage":"screening","jp":"日本語はどのくらい勉強しましたか。","branch":"all"},
{"id":"ready_check","theme":"intro","stage":"screening","jp":"面接の準備はできていますか。","branch":"all"},
{"id":"experience_gate","theme":"experience","stage":"role","jp":"ホテルや宿泊の仕事をしたことがありますか。","branch":"all"},
{"id":"exp_yes_detail","theme":"experience","stage":"role","jp":"どんな仕事をしましたか。フロント、掃除、ベッドメイクなどを話してください。","branch":"yes_exp"},
{"id":"exp_no_motivation","theme":"motivation","stage":"role","jp":"経験がなくても、ホテルの仕事を勉強してがんばれますか。","branch":"no_exp"},
{"id":"customer","theme":"service","stage":"role","jp":"お客様に話すとき、どんなことを大切にしますか。","branch":"all"},
{"id":"cleanliness","theme":"service","stage":"role","jp":"部屋の掃除や整理整頓は好きですか。","branch":"all"},
{"id":"busy","theme":"role_fit","stage":"role","jp":"忙しい時間でも落ち着いて働けますか。","branch":"all"},
{"id":"teamwork","theme":"teamwork","stage":"followup","jp":"スタッフと協力して働くことはできますか。","branch":"all"},
{"id":"shift","theme":"schedule","stage":"followup","jp":"夜や朝のシフトは大丈夫ですか。","branch":"all"},
{"id":"strength","theme":"personality","stage":"followup","jp":"ホテルの仕事に向いている自分の長所を一つ話してください。","branch":"all"},
{"id":"closing","theme":"closing","stage":"closing","jp":"本日の宿泊の面接練習はここまでです。ご参加ありがとうございました。","branch":"all"},
],
},
"agriculture": {
"english_name": "Agriculture",
"japanese_name": "農業",
"intro_jp": "こんにちは。農業の仕事の面接練習を始めます。よろしくお願いします。",
"min_questions": 3,
"max_questions": 20,
"expected_keywords": ["農業", "畑", "体力", "朝", "収穫", "外", "時間"],
"questions": [
{"id":"name","theme":"intro","stage":"screening","jp":"お名前を教えてください。","branch":"all"},
{"id":"country","theme":"intro","stage":"screening","jp":"どこの国から来ましたか。","branch":"all"},
{"id":"reason","theme":"motivation","stage":"screening","jp":"日本へ行きたい理由は何ですか。","branch":"all"},
{"id":"japanese","theme":"language","stage":"screening","jp":"日本語はどのくらい勉強しましたか。","branch":"all"},
{"id":"ready_check","theme":"intro","stage":"screening","jp":"面接の準備はできていますか。","branch":"all"},
{"id":"experience_gate","theme":"experience","stage":"role","jp":"農業の仕事をしたことがありますか。","branch":"all"},
{"id":"exp_yes_detail","theme":"experience","stage":"role","jp":"どんな農業の仕事をしましたか。","branch":"yes_exp"},
{"id":"exp_no_motivation","theme":"motivation","stage":"role","jp":"経験がなくても、農業を勉強してがんばれますか。","branch":"no_exp"},
{"id":"outside_work","theme":"role_fit","stage":"role","jp":"外で長い時間働くことは大丈夫ですか。","branch":"all"},
{"id":"early_morning","theme":"schedule","stage":"role","jp":"朝早い仕事でも時間を守れますか。","branch":"all"},
{"id":"physical","theme":"role_fit","stage":"role","jp":"体力に自信はありますか。","branch":"all"},
{"id":"weather","theme":"role_fit","stage":"followup","jp":"暑い日や寒い日でも、まじめに働けますか。","branch":"all"},
{"id":"teamwork","theme":"teamwork","stage":"followup","jp":"ほかの人と一緒に働けますか。","branch":"all"},
{"id":"strength","theme":"personality","stage":"followup","jp":"農業の仕事に向いている自分の長所を一つ話してください。","branch":"all"},
{"id":"closing","theme":"closing","stage":"closing","jp":"本日の農業の面接練習はここまでです。ご参加ありがとうございました。","branch":"all"},
],
},
"manufacturing": {
"english_name": "Manufacturing",
"japanese_name": "製造業",
"intro_jp": "こんにちは。製造業の仕事の面接練習を始めます。よろしくお願いします。",
"min_questions": 3,
"max_questions": 20,
"expected_keywords": ["工場", "安全", "正確", "確認", "時間", "ルール", "集中"],
"questions": [
{"id":"name","theme":"intro","stage":"screening","jp":"お名前を教えてください。","branch":"all"},
{"id":"country","theme":"intro","stage":"screening","jp":"どこの国から来ましたか。","branch":"all"},
{"id":"reason","theme":"motivation","stage":"screening","jp":"日本へ行きたい理由は何ですか。","branch":"all"},
{"id":"japanese","theme":"language","stage":"screening","jp":"日本語はどのくらい勉強しましたか。","branch":"all"},
{"id":"ready_check","theme":"intro","stage":"screening","jp":"面接の準備はできていますか。","branch":"all"},
{"id":"experience_gate","theme":"experience","stage":"role","jp":"工場や製造の仕事をしたことがありますか。","branch":"all"},
{"id":"exp_yes_detail","theme":"experience","stage":"role","jp":"どんな製造の仕事をしましたか。","branch":"yes_exp"},
{"id":"exp_no_motivation","theme":"motivation","stage":"role","jp":"経験がなくても、製造の仕事を勉強してがんばれますか。","branch":"no_exp"},
{"id":"accuracy","theme":"role_fit","stage":"role","jp":"ミスを少なくするために、何を大切にしますか。","branch":"all"},
{"id":"safety","theme":"safety","stage":"role","jp":"機械を使うとき、安全のために何をしますか。","branch":"all"},
{"id":"time","theme":"schedule","stage":"role","jp":"時間を守って、同じ作業を続けることはできますか。","branch":"all"},
{"id":"quality","theme":"reliability","stage":"followup","jp":"品質を守ることは大切ですか。なぜですか。","branch":"all"},
{"id":"teamwork","theme":"teamwork","stage":"followup","jp":"チームで協力できますか。","branch":"all"},
{"id":"strength","theme":"personality","stage":"followup","jp":"製造業の仕事に向いている自分の長所を一つ話してください。","branch":"all"},
{"id":"closing","theme":"closing","stage":"closing","jp":"本日の製造業の面接練習はここまでです。ご参加ありがとうございました。","branch":"all"},
],
},
}
class StartRequest(BaseModel):
session_uuid: str
job_role: str = "construction"
@app.get("/")
def root() -> Dict[str, Any]:
return {
"ok": True,
"service": "jp-role-interview",
"version": APP_VERSION,
"routes": ["/health", "/roles", "/start", "/answer"],
}
@app.get("/health")
def health() -> Dict[str, Any]:
asr_backend = "hf_api"
if USE_FASTER_WHISPER:
asr_backend = "faster_whisper_then_hf_api"
return {
"ok": True,
"service": "jp-role-interview",
"version": APP_VERSION,
"hf_token_set": bool(HF_TOKEN),
"asr_backend": asr_backend,
"chat_model": CHAT_MODEL,
"role_count": len(ROLE_BANK),
}
@app.get("/roles")
def roles() -> Dict[str, Any]:
return {
"ok": True,
"roles": [
{
"key": key,
"english_name": cfg["english_name"],
"japanese_name": cfg["japanese_name"],
"min_questions": cfg["min_questions"],
"max_questions": cfg["max_questions"],
}
for key, cfg in ROLE_BANK.items()
],
}
@app.post("/start")
def start_interview(payload: StartRequest) -> Dict[str, Any]:
role_key = normalize_role_key(payload.job_role)
role_cfg = ROLE_BANK[role_key]
first_q = get_question(role_cfg, "name")
opening = f"{role_cfg['intro_jp']} {first_q['jp']}"
memory = {
"job_role": role_key,
"job_role_en": role_cfg["english_name"],
"job_role_jp": role_cfg["japanese_name"],
"candidate_name": None,
"country_name": None,
"age": None,
"reason_for_japan": None,
"occupation": None,
"japanese_level": None,
"experience_state": "unknown",
"answers_so_far": [],
"asked_question_ids": [first_q["id"]],
"asked_themes": [first_q["theme"]],
"low_score_streak": 0,
"no_sound_count": 0,
"min_questions": role_cfg["min_questions"],
"max_questions": min(role_cfg["max_questions"], MAX_QUESTION_LIMIT),
"auto_question_mode": True,
}
return {
"ok": True,
"session_uuid": payload.session_uuid,
"job_role": role_key,
"job_role_label": f"{role_cfg['english_name']} / {role_cfg['japanese_name']}",
"question_no": 1,
"question_id": first_q["id"],
"question_jp": first_q["jp"],
"speech_text_jp": opening,
"memory": memory,
"is_finished": False,
"speak_now": True,
}
@app.post("/answer")
async def answer_interview(
session_uuid: str = Form(...),
question_no: int = Form(...),
question_id: str = Form(...),
question_jp: str = Form(...),
memory_json: str = Form("{}"),
audio: UploadFile = File(...),
) -> Dict[str, Any]:
memory = safe_json_loads(memory_json)
role_key = normalize_role_key(memory.get("job_role"))
role_cfg = ROLE_BANK[role_key]
transcript, asr_backend, asr_error = await transcribe_upload(audio)
if not transcript.strip():
memory["no_sound_count"] = int(memory.get("no_sound_count", 0)) + 1
name = memory.get("candidate_name")
spoken = build_repeat_prompt(name, memory["no_sound_count"])
should_finish = memory["no_sound_count"] >= 2 and question_no >= role_cfg["min_questions"]
if should_finish:
result = build_final_result(role_cfg, memory, force_fail=True, summary_jp="音声が聞こえないため、面接を終了しました。")
return {
"ok": True,
"is_finished": True,
"session_uuid": session_uuid,
"job_role": role_key,
"transcript_jp": "",
"answer_score": 0,
"feedback_jp": spoken,
"speech_text_jp": spoken,
"memory": memory,
"asr_backend": asr_backend,
"asr_error": asr_error,
"result": result,
}
return {
"ok": True,
"is_finished": False,
"needs_repeat": True,
"session_uuid": session_uuid,
"job_role": role_key,
"question_no": question_no,
"question_id": question_id,
"question_jp": question_jp,
"speech_text_jp": spoken,
"transcript_jp": "",
"answer_score": 0,
"feedback_jp": spoken,
"memory": memory,
"next_question_no": question_no,
"next_question_id": question_id,
"next_question_jp": question_jp,
"asr_backend": asr_backend,
"asr_error": asr_error,
"speak_now": True,
}
memory["no_sound_count"] = 0
profile_update = maybe_extract_basic_profile(memory, transcript, question_id)
memory = merge_memory(memory, profile_update)
score = score_answer(role_cfg, question_id, transcript)
feedback = build_feedback(score)
answers = list(memory.get("answers_so_far", []))
answers.append({
"question_no": question_no,
"question_id": question_id,
"question_jp": question_jp,
"answer_text_jp": transcript,
"answer_score": score,
"feedback_jp": feedback,
})
memory["answers_so_far"] = answers
if score <= 3:
memory["low_score_streak"] = int(memory.get("low_score_streak", 0)) + 1
else:
memory["low_score_streak"] = 0
should_finish = decide_finish(role_cfg, memory, question_no, score)
if should_finish:
result = build_final_result(role_cfg, memory)
return {
"ok": True,
"is_finished": True,
"session_uuid": session_uuid,
"job_role": role_key,
"question_no": question_no,
"transcript_jp": transcript,
"answer_score": score,
"feedback_jp": feedback,
"speech_text_jp": result["closing_message_jp"],
"memory": memory,
"asr_backend": asr_backend,
"asr_error": asr_error,
"result": result,
}
next_q = select_next_question(role_cfg, memory)
next_no = question_no + 1
if next_q["id"] not in memory["asked_question_ids"]:
memory["asked_question_ids"].append(next_q["id"])
if next_q["theme"] not in memory["asked_themes"]:
memory["asked_themes"].append(next_q["theme"])
spoken_next = next_q["jp"]
if next_q["id"] == "ready_check" and memory.get("candidate_name"):
spoken_next = f"{memory['candidate_name']}さん、ありがとうございます。{next_q['jp']}"
return {
"ok": True,
"is_finished": False,
"session_uuid": session_uuid,
"job_role": role_key,
"question_no": question_no,
"transcript_jp": transcript,
"answer_score": score,
"feedback_jp": feedback,
"speech_text_jp": spoken_next,
"memory": memory,
"asr_backend": asr_backend,
"asr_error": asr_error,
"next_question_no": next_no,
"next_question_id": next_q["id"],
"next_question_jp": next_q["jp"],
"speak_now": True,
}
def normalize_role_key(value: Any) -> str:
key = str(value or "construction").strip().lower()
aliases = {
"restaurant": "restaurant_konbini",
"konbini": "restaurant_konbini",
"nursing": "nursing_care",
"care": "nursing_care",
"hotel": "hotel_accommodation",
"accommodation": "hotel_accommodation",
}
key = aliases.get(key, key)
return key if key in ROLE_BANK else "construction"
def get_question(role_cfg: Dict[str, Any], qid: str) -> Dict[str, Any]:
for q in role_cfg["questions"]:
if q["id"] == qid:
return q
return role_cfg["questions"][0]
def build_repeat_prompt(name: Optional[str], count: int) -> str:
idx = max(0, min(count - 1, len(_REPEAT_PROMPTS) - 1))
base = _REPEAT_PROMPTS[idx]
if name:
return f"{name}さん、{base}"
return base
async def transcribe_upload(audio: UploadFile) -> Tuple[str, str, Optional[str]]:
content = await audio.read()
filename = audio.filename or "answer.webm"
if USE_FASTER_WHISPER:
try:
text = transcribe_with_faster_whisper(content, filename)
return normalize_text(text), "faster_whisper", None
except Exception as exc:
if HF_TOKEN:
try:
text = transcribe_with_hf_api(content, filename)
return normalize_text(text), "hf_api_fallback", str(exc)
except Exception as exc2:
return "", "hf_api_fallback_failed", f"{exc} | {exc2}"
return "", "faster_whisper_failed", str(exc)
if HF_TOKEN:
try:
text = transcribe_with_hf_api(content, filename)
return normalize_text(text), "hf_api", None
except Exception as exc:
return "", "hf_api_failed", str(exc)
return "", "no_asr_backend", "Neither faster-whisper nor HF API is available."
def transcribe_with_faster_whisper(content: bytes, filename: str) -> str:
global _LOCAL_ASR_MODEL
from faster_whisper import WhisperModel # lazy import
suffix = os.path.splitext(filename)[1] or ".webm"
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
tmp.write(content)
temp_path = tmp.name
try:
if _LOCAL_ASR_MODEL is None:
_LOCAL_ASR_MODEL = WhisperModel(FASTER_WHISPER_MODEL, device="cpu", compute_type="int8")
segments, _info = _LOCAL_ASR_MODEL.transcribe(temp_path, language="ja", vad_filter=True)
return " ".join(seg.text.strip() for seg in segments).strip()
finally:
try:
os.remove(temp_path)
except OSError:
pass
def transcribe_with_hf_api(content: bytes, filename: str) -> str:
url = f"{HF_INFERENCE_BASE}/{ASR_MODEL}"
headers = {
"Authorization": f"Bearer {HF_TOKEN}",
"Content-Type": guess_mime_type(filename),
}
response = requests.post(url, headers=headers, data=content, timeout=ASR_TIMEOUT_SECONDS)
response.raise_for_status()
data = response.json()
if isinstance(data, dict):
return str(data.get("text") or data.get("generated_text") or "")
if isinstance(data, list) and data and isinstance(data[0], dict):
return str(data[0].get("text") or "")
return ""
def guess_mime_type(filename: str) -> str:
lower = (filename or "").lower()
if lower.endswith(".wav"):
return "audio/wav"
if lower.endswith(".mp3"):
return "audio/mpeg"
if lower.endswith(".m4a"):
return "audio/mp4"
if lower.endswith(".ogg"):
return "audio/ogg"
return "audio/webm"
def maybe_extract_basic_profile(memory: Dict[str, Any], transcript: str, question_id: str) -> Dict[str, Any]:
text = normalize_text(transcript)
update: Dict[str, Any] = {}
if question_id == "name" and not memory.get("candidate_name"):
name = extract_name(text)
if name:
update["candidate_name"] = name
if question_id == "country" and not memory.get("country_name"):
country = extract_country(text)
if country:
update["country_name"] = country
if question_id == "reason" and not memory.get("reason_for_japan") and len(text) >= 4:
update["reason_for_japan"] = text[:120]
if question_id == "japanese" and not memory.get("japanese_level") and len(text) >= 4:
update["japanese_level"] = text[:120]
if question_id == "experience_gate":
update["experience_state"] = detect_experience_state(text, memory.get("experience_state", "unknown"))
age = extract_age(text)
if age and not memory.get("age"):
update["age"] = age
return update
def detect_experience_state(text: str, current: str) -> str:
yes_markers = ["あります", "しました", "経験があります", "働いたことがあります"]
no_markers = ["ありません", "ないです", "経験がありません", "したことがありません"]
if any(m in text for m in yes_markers):
return "yes"
if any(m in text for m in no_markers):
return "no"
return current if current in {"yes", "no"} else "unknown"
def select_next_question(role_cfg: Dict[str, Any], memory: Dict[str, Any]) -> Dict[str, Any]:
asked_ids = set(memory.get("asked_question_ids", []))
experience_state = memory.get("experience_state", "unknown")
answers = memory.get("answers_so_far", [])
avg = mean([a.get("answer_score", 0) for a in answers]) if answers else 0
# fixed screening order
for qid in ["country", "reason", "japanese", "ready_check", "experience_gate"]:
q = get_question(role_cfg, qid)
if q["id"] not in asked_ids:
return q
# branch by experience
branch_order = []
if experience_state == "yes":
branch_order = ["exp_yes_detail", "exp_years"]
elif experience_state == "no":
branch_order = ["exp_no_motivation"]
for qid in branch_order:
q = get_question(role_cfg, qid)
if q["id"] not in asked_ids:
return q
# weaker user gets simpler role questions first
if avg < 4.5:
simple_ids = ["physical", "busy", "kindness", "cleanliness", "outside_work", "time", "customer", "safety", "teamwork"]
for qid in simple_ids:
try:
q = get_question(role_cfg, qid)
if q["id"] not in asked_ids:
return q
except Exception:
pass
# normal role/followup path
for q in role_cfg["questions"]:
if q["id"] in asked_ids:
continue
if q["stage"] == "closing":
continue
if q["branch"] == "yes_exp" and experience_state != "yes":
continue
if q["branch"] == "no_exp" and experience_state != "no":
continue
return q
return get_question(role_cfg, "closing")
def score_answer(role_cfg: Dict[str, Any], question_id: str, transcript: str) -> int:
text = normalize_text(transcript)
if not text:
return 0
score = 3
if len(text) >= 4:
score += 1
if len(text) >= 10:
score += 1
if len(text) >= 20:
score += 1
if "です" in text or "ます" in text:
score += 1
role_hits = sum(1 for kw in role_cfg["expected_keywords"] if kw in text)
score += min(2, role_hits)
if question_id == "name" and extract_name(text):
score += 1
if question_id == "country" and extract_country(text):
score += 1
if question_id == "experience_gate" and detect_experience_state(text, "unknown") != "unknown":
score += 1
return max(0, min(score, 10))
def build_feedback(score: int) -> str:
if score >= 8:
return "とても良いです。自然に答えられています。"
if score >= 6:
return "良いです。もう少し長く、ていねいに話すともっと良くなります。"
if score >= 4:
return "意味は伝わりますが、短いです。完全な文で答えてみましょう。"
return "短すぎるか、内容が分かりにくいです。もう少し詳しく話してください。"
def decide_finish(role_cfg: Dict[str, Any], memory: Dict[str, Any], question_no: int, score: int) -> bool:
answers = memory.get("answers_so_far", [])
avg = mean([a.get("answer_score", 0) for a in answers]) if answers else 0
min_q = int(memory.get("min_questions", role_cfg["min_questions"]))
max_q = int(memory.get("max_questions", role_cfg["max_questions"]))
if question_no >= max_q:
return True
if question_no >= min_q and memory.get("low_score_streak", 0) >= 2:
return True
if question_no >= min_q and len(answers) >= 3 and avg < 3.5:
return True
if question_no >= 10 and avg >= 6:
# good candidate can continue; otherwise finish around middle
return False
if question_no >= 8 and avg < 5.5:
return True
return False
def build_final_result(role_cfg: Dict[str, Any], memory: Dict[str, Any], force_fail: bool = False, summary_jp: str = "") -> Dict[str, Any]:
answers = list(memory.get("answers_so_far", []))
scores = [int(a.get("answer_score", 0)) for a in answers] or [0]
avg = mean(scores)
overall_score = max(0, min(100, int(round(avg * 10))))
if force_fail:
overall_score = min(overall_score, 39)
pass_fail = "PASS" if overall_score >= 60 and not force_fail else "FAIL"
strengths: List[str] = []
weaknesses: List[str] = []
tips: List[str] = []
if memory.get("candidate_name"):
strengths.append("Self introduction was understood.")
else:
weaknesses.append("Name was not clearly understood.")
if memory.get("experience_state") == "yes":
strengths.append("Role experience was communicated.")
elif memory.get("experience_state") == "no":
weaknesses.append("No direct role experience was explained clearly.")
if overall_score >= 70:
strengths.append("Answers were mostly clear and relevant.")
else:
weaknesses.append("Several answers were too short or unclear.")
tips.extend([
"Use one or two extra sentences in each answer.",
"Use polite endings like です and ます.",
"Speak a little louder and more clearly.",
])
closing = get_question(role_cfg, "closing")["jp"]
return {
"candidate_name": memory.get("candidate_name"),
"country_name": memory.get("country_name"),
"age": memory.get("age"),
"job_role": memory.get("job_role"),
"job_role_en": role_cfg["english_name"],
"job_role_jp": role_cfg["japanese_name"],
"summary_jp": summary_jp or f"{role_cfg['japanese_name']}の面接練習が完了しました。",
"closing_message_jp": closing,
"total_questions": len(answers),
"overall_score": overall_score,
"scores": {
"fluency": clamp_int(round(avg), 1, 10),
"grammar": clamp_int(round(avg - 1), 1, 10),
"confidence": clamp_int(round(avg), 1, 10),
"relevance": clamp_int(round(avg + 1), 1, 10),
"role_fit": clamp_int(round(avg), 1, 10),
},
"pass_fail": pass_fail,
"strengths": strengths[:4],
"weaknesses": weaknesses[:4],
"tips": tips[:5],
"answers": answers,
}
def merge_memory(memory: Dict[str, Any], update: Dict[str, Any]) -> Dict[str, Any]:
merged = dict(memory or {})
for k, v in (update or {}).items():
if v not in (None, "", [], {}):
merged[k] = v
return merged
def normalize_text(text: str) -> str:
return re.sub(r"\s+", " ", (text or "")).strip()
def safe_json_loads(value: str) -> Dict[str, Any]:
try:
obj = json.loads(value or "{}")
return obj if isinstance(obj, dict) else {}
except Exception:
return {}
def extract_name(text: str) -> Optional[str]:
value = text.replace("私は", "").replace("わたしは", "").replace("ぼくは", "")
value = value.replace("です", "").replace("と申します", "").replace("といいます", "").strip(" 。")
if not value or len(value) > 30:
return None
return value
def extract_country(text: str) -> Optional[str]:
known = ["ネパール", "日本", "インド", "バングラデシュ", "スリランカ", "ベトナム", "中国", "ミャンマー", "フィリピン", "インドネシア"]
for k in known:
if k in text:
return k
m = re.search(r"(.+?)から来ました", text)
if m:
return m.group(1).strip(" 。")
return None
def extract_age(text: str) -> Optional[int]:
m = re.search(r"(\d{1,2})", text)
return int(m.group(1)) if m else None
def clamp_int(value: Any, low: int, high: int) -> int:
try:
return max(low, min(high, int(round(float(value)))))
except Exception:
return low