| import re |
| from typing import Optional, Dict |
|
|
| _UNIT_TABLE: Dict[str, float] = { |
| "円": 1.0, |
| "千円": 1_000.0, |
| "百万円": 1_000_000.0, |
| "千万円": 10_000_000.0, |
| "億円": 100_000_000.0, |
| } |
|
|
| def detect_unit(text: str) -> Optional[str]: |
| """ |
| 「単位:千円」「単位: 百万円」「単位は億円」などから最頻のものを拾う |
| """ |
| if not text: return None |
| cand = re.findall(r"単位[::\s]*([^\s\)((]+?円)", text) |
| for u in cand: |
| if u in _UNIT_TABLE: |
| return u |
| |
| cand2 = re.findall(r"[((]\s*単位[::\s]*([^\s\))]+?円)\s*[))]", text) |
| for u in cand2: |
| if u in _UNIT_TABLE: |
| return u |
| return None |
|
|
| def unit_factor(unit_label: Optional[str]) -> float: |
| if unit_label in _UNIT_TABLE: |
| return _UNIT_TABLE[unit_label] |
| return 1.0 |
|
|
| def scale_financials_yen(fin: dict, factor: float) -> dict: |
| """抽出された数値(PDFの単位ベース)を円に換算して返す""" |
| if not fin: return fin |
| out = {k:(v if not isinstance(v, dict) else v.copy()) for k,v in fin.items()} |
| for sec in ("balance_sheet","income_statement","cash_flows"): |
| d = out.get(sec) |
| if not isinstance(d, dict): continue |
| for k, v in d.items(): |
| try: |
| out[sec][k] = None if v in (None,"", "null") else float(v) * factor |
| except Exception: |
| out[sec][k] = None |
| return out |
|
|