File size: 2,980 Bytes
bc2b09a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from __future__ import annotations
import pandas as pd
from typing import Optional, List, Dict
from validation import _prepare_generic, _nfkc

TX_EXPECTED = {
    "transaction_id":["txn_id","transactionid","id","tx_id"],
    "customer_id":["cust_id","user_id","client_id"],
    "amount":["amt","amount_inr","value"],
    "timestamp":["date","event_time","created_at","tx_time"],
    "merchant_category":["mcc","merchant_cat","category"]
}

def prepare_transactions(df: pd.DataFrame):
    return _prepare_generic(df, TX_EXPECTED)

def detect_transactions(clean_df: pd.DataFrame, colmap: Dict[str,str], high_risk_mcc: Optional[List[str]] = None):
    high_risk = set(["HIGH_RISK","GAMBLING","CRYPTO_EXCHANGE","ESCORTS","CASINO"])
    if high_risk_mcc:
        high_risk.update([_nfkc(x).strip().upper().replace(" ","_") for x in high_risk_mcc])
    if not all(k in colmap for k in ["customer_id","amount"]):
        return pd.DataFrame(), "Required columns missing for detection (need at least customer_id, amount)."
    df = clean_df.copy()
    reasons = []
    amtcol = colmap.get("amount")
    if amtcol:
        reasons.append(df[amtcol] > 10000)    # large
        reasons.append(df[amtcol] < 0)        # negative
    if "merchant_category" in colmap:
        mcc = colmap["merchant_category"]
        high = df[mcc].astype(str).str.upper().str.replace(" ","_", regex=False).isin(high_risk)
        reasons.append(high)
    if all(k in colmap for k in ["customer_id","timestamp","amount"]):
        cid, ts, amt = colmap["customer_id"], colmap["timestamp"], colmap["amount"]
        daily = df.groupby([cid, df[ts].dt.date])[amt].transform("sum")
        reasons.append(daily > 50000)
    mask = None
    for m in reasons:
        mask = m if mask is None else (mask | m)
    flagged = df[mask] if mask is not None else pd.DataFrame()
    if not flagged.empty:
        rr=[]
        for _, row in flagged.iterrows():
            hits=[]
            if amtcol:
                a=row[amtcol]
                if pd.notna(a) and a>10000: hits.append("large_amount")
                if pd.notna(a) and a<0: hits.append("negative_amount")
            if "merchant_category" in colmap:
                val = str(row[colmap["merchant_category"]]).upper().replace(" ","_")
                if val in high_risk: hits.append("mcc_high_risk")
            try:
                if all(k in colmap for k in ["customer_id","timestamp","amount"]):
                    sub = df[(df[colmap["customer_id"]]==row[colmap["customer_id"]]) &
                             (df[colmap["timestamp"]].dt.date==pd.to_datetime(row[colmap["timestamp"]], errors="coerce").date())]
                    if sub[colmap["amount"]].sum() > 50000: hits.append("daily_sum>50k")
            except Exception: pass
            rr.append(", ".join(sorted(set(hits))) or "rule_hit")
        flagged = flagged.assign(risk_reason=rr)
    stats = f"Transactions flagged: {len(flagged)} of {len(df)}."
    return flagged, stats