Spaces:
Running
Running
| """VynFi Γ pm4py: Interactive Process Mining Demo""" | |
| import streamlit as st | |
| import pandas as pd | |
| from collections import Counter | |
| st.set_page_config(page_title="VynFi Process Mining", page_icon="π", layout="wide") | |
| st.title("π VynFi Γ pm4py: Process Mining Demo") | |
| st.caption("Synthetic supply-chain event log from [VynFi](https://vynfi.com)") | |
| def load_data(): | |
| from datasets import load_dataset | |
| ds = load_dataset("VynFi/vynfi-supply-chain-ocel", "events", split="train") | |
| df = ds.to_pandas() | |
| df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce") | |
| # Drop rows with NaT timestamps (pm4py can't handle them) | |
| df = df.dropna(subset=["timestamp"]) | |
| # Rename for pm4py β use safe names without colons for display | |
| df = df.rename(columns={ | |
| "case_id": "case_id_pm", | |
| "activity_name": "activity", | |
| "timestamp": "ts", | |
| }) | |
| return df | |
| df = load_data() | |
| st.sidebar.header("Dataset") | |
| st.sidebar.metric("Events", f"{len(df):,}") | |
| st.sidebar.metric("Activities", df["activity"].nunique()) | |
| st.sidebar.metric("Cases", df["case_id_pm"].nunique()) | |
| tab1, tab2, tab3, tab4 = st.tabs(["Process Model", "Variants", "Statistics", "Raw Data"]) | |
| with tab1: | |
| st.subheader("Directly-Follows Graph") | |
| try: | |
| import pm4py | |
| # Convert to pm4py format | |
| pm_df = df.rename(columns={ | |
| "case_id_pm": "case:concept:name", | |
| "activity": "concept:name", | |
| "ts": "time:timestamp", | |
| }) | |
| event_log = pm4py.convert_to_event_log(pm_df) | |
| dfg, sa, ea = pm4py.discover_dfg(event_log) | |
| from pm4py.visualization.dfg import visualizer as dfg_vis | |
| gviz = dfg_vis.apply(dfg, log=event_log, variant=dfg_vis.Variants.FREQUENCY, | |
| parameters={ | |
| dfg_vis.Variants.FREQUENCY.value.Parameters.START_ACTIVITIES: sa, | |
| dfg_vis.Variants.FREQUENCY.value.Parameters.END_ACTIVITIES: ea, | |
| dfg_vis.Variants.FREQUENCY.value.Parameters.FORMAT: "svg", | |
| }) | |
| st.image(dfg_vis.serialize(gviz).decode("utf-8"), use_container_width=True) | |
| except Exception as e: | |
| st.warning(f"Could not render DFG: {e}") | |
| st.info("Try the Variants or Statistics tabs instead.") | |
| with tab2: | |
| st.subheader("Process Variants") | |
| variants = {} | |
| for cid, grp in df.sort_values("ts").groupby("case_id_pm"): | |
| variants[cid] = tuple(grp["activity"].tolist()) | |
| vc = Counter(variants.values()) | |
| total = len(variants) | |
| st.metric("Unique Variants", len(vc)) | |
| rows = [{"Trace": " β ".join(t), "Count": c, "Frequency": f"{c/total*100:.1f}%"} | |
| for t, c in vc.most_common(20)] | |
| st.dataframe(pd.DataFrame(rows), use_container_width=True, hide_index=True) | |
| if vc: | |
| hp = vc.most_common(1)[0] | |
| st.info(f"**Happy path**: {' β '.join(hp[0])} ({hp[1]} cases, {hp[1]/total*100:.1f}%)") | |
| with tab3: | |
| st.subheader("Activity Frequency") | |
| ac = df["activity"].value_counts().reset_index() | |
| ac.columns = ["Activity", "Count"] | |
| st.bar_chart(ac, x="Activity", y="Count") | |
| st.subheader("Events Over Time") | |
| if "ts" in df.columns: | |
| weekly = df.set_index("ts").resample("W").size().reset_index() | |
| weekly.columns = ["Week", "Events"] | |
| st.line_chart(weekly, x="Week", y="Events") | |
| with tab4: | |
| st.subheader("Raw Event Data") | |
| st.dataframe(df.head(200), use_container_width=True) | |
| st.divider() | |
| st.caption("[VynFi](https://vynfi.com) Β· [pm4py](https://pm4py.fit.fraunhofer.de/) Β· [Dataset](https://huggingface.co/datasets/VynFi/vynfi-supply-chain-ocel)") | |