| | import pandas as pd |
| | import yaml, datetime, hashlib, json, math |
| | from pathlib import Path |
| | from templating import get_env, render |
| | from models import CompanyMeta, ReportSections, RenderPayload |
| | from render import html_to_pdf, html_to_docx |
| | from charts import line_chart_base64, materiality_base64 |
| | from validators import validate_financials, validate_esg |
| | from typing import Dict, Any, List, Optional |
| |
|
| | DISPLAY_NAME = { |
| | "co2_emissions": "CO₂排出量", |
| | "energy_renewable_ratio": "再生可能エネルギー比率", |
| | "female_management_ratio": "女性管理職比率", |
| | } |
| |
|
| | def _sha256(p: Path) -> str: |
| | h = hashlib.sha256() |
| | with p.open("rb") as f: |
| | for chunk in iter(lambda: f.read(8192), b""): |
| | h.update(chunk) |
| | return h.hexdigest() |
| |
|
| | def _require_columns(df: pd.DataFrame, required, name: str): |
| | missing = [c for c in required if c not in df.columns] |
| | if missing: |
| | raise ValueError(f"{name} に必須列がありません: {missing}. 例: {required}") |
| |
|
| | def load_company_meta(path: str) -> CompanyMeta: |
| | data = yaml.safe_load(Path(path).read_text(encoding="utf-8")) |
| | return CompanyMeta(**data) |
| |
|
| | def load_financials(path: str) -> pd.DataFrame: |
| | if str(path).lower().endswith(".xlsx"): |
| | df = pd.read_excel(path) |
| | else: |
| | df = pd.read_csv(path) |
| | df = validate_financials(df) |
| | |
| | q = (df["quarter"].astype(str).str.upper().str.replace("Q","Q").str.replace(" ","")) |
| | df["quarter"] = "Q" + q.str.extract(r"(\d)", expand=False).fillna("") |
| | df["year"] = pd.to_numeric(df["year"], errors="coerce").astype("Int64") |
| | return df |
| |
|
| | def load_esg(path: str) -> pd.DataFrame: |
| | if str(path).lower().endswith(".xlsx"): |
| | df = pd.read_excel(path) |
| | else: |
| | df = pd.read_csv(path) |
| | df = validate_esg(df) |
| | df["year"] = pd.to_numeric(df["year"], errors="coerce").astype("Int64") |
| | return df |
| |
|
| | def compute_kpi(fin_df: pd.DataFrame, fiscal_year: int): |
| | years = set(fin_df["year"].dropna().astype(int)) |
| | if fiscal_year not in years: |
| | raise ValueError(f"financials.csv に年度 {fiscal_year} のデータがありません。year 列を確認してください。") |
| |
|
| | fy = fin_df[fin_df["year"] == fiscal_year].copy() |
| | if fy.empty: |
| | raise ValueError(f"年度 {fiscal_year} の四半期データが空です。quarter の表記(Q1~Q4)を確認してください。") |
| |
|
| | order = {"Q1":1, "Q2":2, "Q3":3, "Q4":4} |
| | fy["q_order"] = fy["quarter"].map(order) |
| | latest = fy.sort_values("q_order").dropna(subset=["q_order"]).tail(1) |
| | if latest.empty: |
| | raise ValueError(f"年度 {fiscal_year} の quarter が Q1〜Q4 として認識できません。例: Q4") |
| |
|
| | prev_fy = fin_df[fin_df["year"] == fiscal_year - 1].copy() |
| | if not prev_fy.empty: |
| | prev_fy["q_order"] = prev_fy["quarter"].map(order) |
| | prev = prev_fy.sort_values("q_order").dropna(subset=["q_order"]).tail(1) |
| | else: |
| | prev = pd.DataFrame() |
| |
|
| | revenue = float(latest["revenue"].iloc[0]) |
| | ebit = float(latest["ebit"].iloc[0]) |
| | net_income = float(latest["net_income"].iloc[0]) |
| | equity = float(latest["total_equity"].iloc[0]) if "total_equity" in latest else 0.0 |
| |
|
| | ebit_margin = (ebit / revenue * 100) if revenue else 0.0 |
| | roe = (net_income / equity * 100) if equity else 0.0 |
| | revenue_yoy = 0.0 |
| | if not prev.empty and float(prev["revenue"].iloc[0]) != 0: |
| | revenue_yoy = ((revenue / float(prev["revenue"].iloc[0])) - 1) * 100 |
| |
|
| | return { |
| | "revenue": revenue, "ebit": ebit, "net_income": net_income, |
| | "ebit_margin": ebit_margin, "roe": roe, "revenue_yoy": revenue_yoy, |
| | } |
| |
|
| | def esg_table(df: pd.DataFrame, fiscal_year: int): |
| | dfy = df[df["year"] == fiscal_year].copy() |
| | rows = [] |
| | for _, r in dfy.iterrows(): |
| | metric = r["metric"] |
| | display = DISPLAY_NAME.get(metric, metric) |
| | rows.append({ |
| | "display": display, |
| | "value": r["value"], |
| | "unit": r.get("unit", ""), |
| | "notes": r.get("notes", ""), |
| | }) |
| | return rows |
| |
|
| | def build_sections(meta: CompanyMeta, kpi: dict, esg_rows: list, llm=None) -> ReportSections: |
| | if llm: |
| | ceo_message = llm.generate_ceo_message(meta, kpi, esg_rows) |
| | risk = llm.generate_risk_opportunity(meta, kpi, esg_rows) |
| | else: |
| | ceo_message = f"{meta.fiscal_year}期は、売上成長と収益性の両立に注力しました。" |
| | risk = "主要リスクはマクロ環境と規制動向。機会は生成AI活用と脱炭素需要の拡大です。" |
| | return ReportSections(ceo_message=ceo_message, risk_opportunity=risk) |
| |
|
| | def _s(x): |
| | if x is None: return "" |
| | if isinstance(x, float) and math.isnan(x): return "" |
| | return str(x) |
| |
|
| | def _translate_payload_texts(payload: dict, lang: str, llm, glossary: Optional[Dict[str,str]]): |
| | if not llm or lang == "ja": |
| | return payload |
| |
|
| | texts = [] |
| | texts.append(_s(payload["sections"]["ceo_message"])) |
| | texts.append(_s(payload["sections"]["risk_opportunity"])) |
| | for row in payload["esg_table"]: |
| | texts.append(_s(row.get("display", ""))) |
| | texts.append(_s(row.get("notes", ""))) |
| | texts.append(_s(payload["meta"]["report_title"])) |
| | for topic in payload["meta"].get("material_topics", []): |
| | texts.append(_s(topic)) |
| |
|
| | translated = llm.translate_texts(texts, target_lang=lang, glossary=glossary or {}) |
| | it = iter(translated) |
| |
|
| | payload["sections"]["ceo_message"] = next(it) |
| | payload["sections"]["risk_opportunity"] = next(it) |
| | for row in payload["esg_table"]: |
| | row["display"] = next(it) |
| | row["notes"] = next(it) |
| | payload["meta"]["report_title"] = next(it) |
| | mt = payload["meta"].get("material_topics", []) |
| | for i in range(len(mt)): |
| | mt[i] = next(it) |
| |
|
| | return payload |
| |
|
| | def _load_glossary(glossary_path: Optional[str]) -> Dict[str,str]: |
| | if not glossary_path: return {} |
| | try: |
| | g = yaml.safe_load(Path(glossary_path).read_text(encoding="utf-8")) |
| | return g or {} |
| | except Exception: |
| | return {} |
| |
|
| | def _load_benchmarks(benchmarks_path: Optional[str]) -> Dict[str,Any]: |
| | if not benchmarks_path: return {} |
| | try: |
| | b = yaml.safe_load(Path(benchmarks_path).read_text(encoding="utf-8")) |
| | return b or {} |
| | except Exception: |
| | return {} |
| |
|
| | def _build_charts(fin: pd.DataFrame, esg: pd.DataFrame, fiscal_year: int) -> Dict[str,str]: |
| | |
| | def series(df, y): |
| | o = {"Q1":1,"Q2":2,"Q3":3,"Q4":4} |
| | d = df[df["year"]==y].copy() |
| | d["q"] = d["quarter"].map(o) |
| | d = d.sort_values("q") |
| | xs = d["quarter"].tolist() |
| | ys = d["revenue"].tolist() |
| | return xs, ys |
| | xs, ys = series(fin, fiscal_year) |
| | rev = line_chart_base64(xs, ys, xlabel="Quarter", ylabel="Revenue", title=f"Revenue Trend {fiscal_year}") |
| |
|
| | |
| | def metric_series(metric): |
| | d = esg[esg["metric"]==metric].sort_values("year") |
| | return d["year"].tolist(), d["value"].tolist() |
| | xs_re, ys_re = metric_series("energy_renewable_ratio") |
| | xs_fm, ys_fm = metric_series("female_management_ratio") |
| | re_img = line_chart_base64(xs_re, ys_re, xlabel="Year", ylabel="%", title="Renewable Energy Ratio") |
| | fm_img = line_chart_base64(xs_fm, ys_fm, xlabel="Year", ylabel="%", title="Female Management Ratio") |
| |
|
| | |
| | return {"revenue": rev, "renewable": re_img, "female": fm_img} |
| |
|
| | def generate_report( |
| | company_yaml, |
| | financials_csv, |
| | esg_csv, |
| | templates_dir, |
| | template_name="base.html.j2", |
| | out_html="output/report.html", |
| | out_pdf="output/report.pdf", |
| | out_docx="output/report.docx", |
| | lang="ja", |
| | llm=None, |
| | glossary_path: Optional[str] = None, |
| | benchmarks_path: Optional[str] = None, |
| | tenant: Optional[str] = None, |
| | rag_index_dir: Optional[str] = None, |
| | ): |
| | Path(Path(out_html).parent).mkdir(parents=True, exist_ok=True) |
| |
|
| | |
| | tdir = Path(templates_dir); tdir.mkdir(parents=True, exist_ok=True) |
| | if not (tdir / template_name).exists(): |
| | (tdir / "base.html.j2").write_text("""<!doctype html> |
| | <html lang="{{ lang }}"><head><meta charset="utf-8"><title>{{ meta.report_title }}</title></head> |
| | <body> |
| | <h1>{{ meta.report_title }}({{ meta.fiscal_year }})</h1> |
| | <p>{{ meta.company_name }} / Ticker: {{ meta.ticker }} / {{ meta.currency }}</p> |
| | <h2>CEOメッセージ</h2><p>{{ sections.ceo_message }}</p> |
| | <h2>KPI</h2><ul> |
| | <li>売上: {{ kpi.revenue|round(0)|int }} {{ meta.currency }} / YoY {{ kpi.revenue_yoy|round(1) }}%</li> |
| | <li>EBIT: {{ kpi.ebit|round(0)|int }} / Margin {{ kpi.ebit_margin|round(1) }}%</li> |
| | <li>純利益: {{ kpi.net_income|round(0)|int }} / ROE {{ kpi.roe|round(1) }}%</li> |
| | </ul> |
| | <h2>チャート</h2> |
| | <img src="{{ charts.revenue }}" style="max-width:520px"><br/> |
| | <img src="{{ charts.renewable }}" style="max-width:520px"> |
| | <img src="{{ charts.female }}" style="max-width:520px"> |
| | <h2>ESGサマリー</h2> |
| | <table border="1" cellspacing="0" cellpadding="6"> |
| | <tr><th>指標</th><th>値</th><th>単位</th><th>備考</th></tr> |
| | {% for row in esg_table %} |
| | <tr><td>{{ row.display }}</td><td>{{ row.value }}</td><td>{{ row.unit }}</td><td>{{ row.notes }}</td></tr> |
| | {% endfor %} |
| | </table> |
| | <h2>リスク & 機会</h2><p>{{ sections.risk_opportunity }}</p> |
| | {% if benchmark_summary %}<h2>ベンチマーク比較</h2><p>{{ benchmark_summary }}</p>{% endif %} |
| | <footer>Generated on {{ generated_at }} | Template: {{ template_name }} | Tenant: {{ tenant }}</footer> |
| | </body></html>""", encoding="utf-8") |
| | template_name = "base.html.j2" |
| |
|
| | meta = load_company_meta(company_yaml) |
| | fin = load_financials(financials_csv) |
| | esg = load_esg(esg_csv) |
| |
|
| | kpi = compute_kpi(fin, meta.fiscal_year) |
| | esg_rows = esg_table(esg, meta.fiscal_year) |
| | sections = build_sections(meta, kpi, esg_rows, llm=llm) |
| |
|
| | charts = _build_charts(fin, esg, meta.fiscal_year) |
| | glossary = _load_glossary(glossary_path) |
| | benchmarks = _load_benchmarks(benchmarks_path) |
| |
|
| | |
| | benchmark_summary = "" |
| | try: |
| | if benchmarks: |
| | msgs = [] |
| | if "revenue_yoy" in benchmarks: |
| | msgs.append(f"売上YoY: 当社 {kpi['revenue_yoy']:.1f}% / 業界 {benchmarks['revenue_yoy']:.1f}%") |
| | if "renewable_energy_ratio" in benchmarks: |
| | cur = esg[esg["metric"]=="energy_renewable_ratio"].sort_values("year").tail(1)["value"].iloc[0] |
| | msgs.append(f"再エネ比率: 当社 {cur:.1f}% / 業界 {benchmarks['renewable_energy_ratio']:.1f}%") |
| | benchmark_summary = " / ".join(msgs) |
| | except Exception: |
| | pass |
| |
|
| | env = get_env(templates_dir) |
| | payload = RenderPayload( |
| | meta=meta, esg_table=esg_rows, kpi=kpi, sections=sections, |
| | generated_at=datetime.datetime.now().strftime("%Y-%m-%d %H:%M"), |
| | lang=lang |
| | ).model_dump() |
| |
|
| | payload["charts"] = charts |
| | payload["template_name"] = template_name |
| | payload["tenant"] = tenant or "" |
| |
|
| | |
| | payload = _translate_payload_texts(payload, lang=lang, llm=llm, glossary=glossary) |
| |
|
| | html = render(env, template_name, payload) |
| | Path(out_html).write_text(html, encoding="utf-8") |
| | html_to_pdf(html, out_pdf) |
| | html_to_docx(html, out_docx) |
| |
|
| | |
| | meta_json = { |
| | "inputs": { |
| | "company_yaml_sha": _sha256(Path(company_yaml)), |
| | "financials_csv_sha": _sha256(Path(financials_csv)), |
| | "esg_csv_sha": _sha256(Path(esg_csv)), |
| | "lang": lang, |
| | "tenant": tenant, |
| | "glossary_keys": list(glossary.keys()) if glossary else [], |
| | "benchmarks": benchmarks, |
| | }, |
| | "outputs": {"html": out_html, "pdf": out_pdf, "docx": out_docx}, |
| | "template": {"dir": templates_dir, "name": template_name}, |
| | "generated_at": datetime.datetime.now().isoformat(timespec="seconds"), |
| | "usage": getattr(llm, "last_usage", {}) if llm else {}, |
| | "benchmark_summary": benchmark_summary, |
| | } |
| | return out_html, out_pdf, out_docx, meta_json, html |
| |
|