datamining-final / gradioSankey.py
nelson40514's picture
Upload folder using huggingface_hub
d737845 verified
import pandas as pd
import plotly.graph_objects as go
from PIL import Image
def sankey(input):
file_path = './sankeyData/TDCS_M06A_20240523_090000.csv'
data = pd.read_csv(file_path)#, nrows=200)
# 確認欄位名稱
data.columns = ['VehicleType', 'DetectionTime_O', 'GantryID_O', 'DetectionTime_D', 'GantryID_D', 'TripLength', 'TripEnd', 'TripInformation']
# 轉換時間格式
data['DetectionTime_O'] = pd.to_datetime(data['DetectionTime_O'])
data['DetectionTime_D'] = pd.to_datetime(data['DetectionTime_D'])
# 只取GantryID_O開頭是01結尾是S的資料
# data = data[data['GantryID_O'].str.startswith('01')]
data = data[data['GantryID_O'].str.endswith('S')]
data = data[data['GantryID_D'].str.endswith('S')]
# 移除重複記錄
data_cleaned = data.drop_duplicates()
# 提取站點流動信息
flows = []
for trip_info in data_cleaned['TripInformation']:
stations = trip_info.split('; ')
for i in range(len(stations) - 1):
source = stations[i].split('+')[1]
target = stations[i + 1].split('+')[1]
flows.append((source, target))
# 構建 Sankey 圖所需數據
sources = []
targets = []
values = []
flow_dict = {}
for source, target in flows:
if (source, target) in flow_dict:
flow_dict[(source, target)] += 1
else:
flow_dict[(source, target)] = 1
for key, value in flow_dict.items():
sources.append(key[0])
targets.append(key[1])
values.append(value)
# 創建節點列表
all_nodes = list(set(sources + targets))
node_indices = {node: idx for idx, node in enumerate(all_nodes)}
# 創建 Sankey 圖
fig = go.Figure(data=[go.Sankey(
node=dict(
pad=15,
thickness=20,
line=dict(color="black", width=0.5),
label=all_nodes,
),
link=dict(
source=[node_indices[source] for source in sources],
target=[node_indices[target] for target in targets],
value=values,
))])
fig.update_layout(title_text="車輛在不同站點之間的流動 Sankey 圖", font_size=10)
fig.write_image("./sankeyData/sankey.png")
fig.write_html("./sankeyData/sankey.html")
return Image.open("./sankeyData/sankey.png")