yokoha commited on
Commit
58e2c34
ยท
verified ยท
1 Parent(s): c1815a9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +150 -162
app.py CHANGED
@@ -1,168 +1,156 @@
1
- import pandas as pd
2
- import numpy as np
3
- import datetime as dt
4
- import warnings
5
-
6
- from statsmodels.tsa.holtwinters import ExponentialSmoothing
7
- import plotly.graph_objects as go
8
  import gradio as gr
9
-
10
- warnings.filterwarnings("ignore")
11
-
12
- # -----------------------------
13
- # CONFIG
14
- # -----------------------------
15
- DATA_FILE = "202503-domae.parquet" # ๊ฐ™์€ ๊ฒฝ๋กœ์— ๋†“์—ฌ ์žˆ์–ด์•ผ ํ•จ
16
- FORECAST_END_YEAR = 2030 # ์˜ˆ์ธก ์ข…๋ฃŒ ์—ฐ๋„(12์›”๊นŒ์ง€)
17
- SEASONAL_PERIODS = 12 # ์›”๋ณ„ seasonality
18
-
19
- # -----------------------------
20
- # 1. ๋ฐ์ดํ„ฐ ์ ์žฌ & ์ „์ฒ˜๋ฆฌ
21
- # -----------------------------
22
-
23
- def load_data(path: str) -> pd.DataFrame:
24
- """Parquet โ†’ ์›”๋ณ„ ํ”ผ๋ฒ— ํ…Œ์ด๋ธ”(DateIndex, ์—ด: ํ’ˆ๋ชฉ, ๊ฐ’: ๊ฐ€๊ฒฉ)."""
25
- df = pd.read_parquet(path)
26
-
27
- # ๋‚ ์งœ ์ปฌ๋Ÿผ ์ƒ์„ฑ/์ •๊ทœํ™” (๋‘ ๊ฐ€์ง€ ์ผ€์ด์Šค ์ง€์›)
28
- if "date" in df.columns:
29
- df["date"] = pd.to_datetime(df["date"])
30
- elif "PRCE_REG_MM" in df.columns:
31
- df["date"] = pd.to_datetime(df["PRCE_REG_MM"].astype(str), format="%Y%m")
32
- else:
33
- raise ValueError("์ง€์›๋˜์ง€ ์•Š๋Š” ๋‚ ์งœ ์ปฌ๋Ÿผ ํ˜•์‹์ž…๋‹ˆ๋‹ค.")
34
-
35
- # ๊ธฐ๋ณธ ์ปฌ๋Ÿผ๋ช… ํ†ต์ผ
36
- item_col = "PDLT_NM" if "PDLT_NM" in df.columns else "item"
37
- price_col = "AVRG_PRCE" if "AVRG_PRCE" in df.columns else "price"
38
-
39
- monthly = (
40
- df.groupby(["date", item_col])[price_col]
41
- .mean()
42
- .reset_index()
43
- )
44
- pivot = (
45
- monthly
46
- .pivot(index="date", columns=item_col, values=price_col)
47
- .sort_index()
48
- )
49
- # ์›” ์‹œ์ž‘์ผ MS ๋นˆ๋„๋กœ ์ •๋ ฌ
50
- pivot.index = pd.to_datetime(pivot.index).to_period("M").to_timestamp()
51
- return pivot
52
-
53
- pivot = load_data(DATA_FILE)
54
- products = pivot.columns.tolist()
55
-
56
- # -----------------------------
57
- # 2. ๊ณ ์œ  ๋ชจ๋ธ ์ •์˜ (Holtโ€‘Winters + fallback)
58
- # -----------------------------
59
-
60
- def _fit_forecast(series: pd.Series) -> pd.Series:
61
- """์›”๋ณ„ ์‹œ๊ณ„์—ด โ†’ 2025โ€‘04 ์ดํ›„ FORECAST_END_YEARโ€‘12๊นŒ์ง€ ์˜ˆ์ธก."""
62
- # Ensure Monthly Start frequency
63
- series = series.asfreq("MS")
64
-
65
- # ์˜ˆ์ธก ๊ธฐ๊ฐ„ ๊ณ„์‚ฐ
66
- last_date = series.index[-1]
67
- end_date = dt.datetime(FORECAST_END_YEAR, 12, 1)
68
- horizon = (end_date.year - last_date.year) * 12 + (end_date.month - last_date.month)
69
- if horizon <= 0:
70
- return pd.Series(dtype=float)
71
-
72
  try:
73
- model = ExponentialSmoothing(
74
- series,
75
- trend="add",
76
- seasonal="mul",
77
- seasonal_periods=SEASONAL_PERIODS,
78
- initialization_method="estimated",
79
- )
80
- res = model.fit(optimized=True)
81
- fc = res.forecast(horizon)
82
- except Exception:
83
- # ํ™€ํŠธ์œˆํ„ฐ์Šค ํ•™์Šต ์‹คํŒจ ์‹œ ๋‹จ์ˆœ CAGR ๊ธฐ๋ฐ˜ ์˜ˆ์ธก
84
- growth = series.pct_change().fillna(0).mean()
85
- fc = pd.Series(
86
- [series.iloc[-1] * (1 + growth) ** i for i in range(1, horizon + 1)],
87
- index=pd.date_range(
88
- series.index[-1] + pd.DateOffset(months=1),
89
- periods=horizon,
90
- freq="MS",
91
- ),
92
- )
93
- return fc
94
-
95
- # ํ’ˆ๋ชฉ๋ณ„ ์ „์ฒด ์‹œ๋ฆฌ์ฆˆ(๊ณผ๊ฑฐ+์˜ˆ์ธก) ์‚ฌ์ „ ๊ตฌ์ถ• โ†’ ์•ฑ ๋ฐ˜์‘ ์†๋„ ๊ฐœ์„ 
96
- FULL_SERIES = {}
97
- FORECASTS = {}
98
- for item in products:
99
- hist = pivot[item].dropna()
100
- fc = _fit_forecast(hist)
101
- FULL_SERIES[item] = pd.concat([hist, fc])
102
- FORECASTS[item] = fc
103
-
104
- # -----------------------------
105
- # 3. ๋‚ด์ผ ๊ฐ€๊ฒฉ ์˜ˆ์ธก ํ•จ์ˆ˜
106
- # -----------------------------
107
-
108
- today = dt.date.today()
109
- tomorrow = today + dt.timedelta(days=1)
110
-
111
- def build_tomorrow_df() -> pd.DataFrame:
112
- """๋‚ด์ผ(์ผ ๋‹จ์œ„) ์˜ˆ์ƒ ๊ฐ€๊ฒฉ DataFrame ๋ฐ˜ํ™˜."""
113
- preds = {}
114
- for item, series in FULL_SERIES.items():
115
- # ์ผ ๋‹จ์œ„ ์„ ํ˜• ๋ณด๊ฐ„
116
- daily = series.resample("D").interpolate("linear")
117
- preds[item] = round(daily.loc[tomorrow], 2) if tomorrow in daily.index else np.nan
118
- return (
119
- pd.DataFrame.from_dict(preds, orient="index", columns=[f"๋‚ด์ผ({tomorrow}) ์˜ˆ์ƒ๊ฐ€(KRW)"])
120
- .sort_index()
 
121
  )
122
-
123
- tomorrow_df = build_tomorrow_df()
124
-
125
- # -----------------------------
126
- # 4. ์‹œ๊ฐํ™” ํ•จ์ˆ˜
127
- # -----------------------------
128
-
129
- def plot_item(item: str):
130
- hist = pivot[item].dropna().asfreq("MS")
131
- fc = FORECASTS[item]
132
-
133
- fig = go.Figure()
134
- fig.add_trace(go.Scatter(x=hist.index, y=hist.values, mode="lines", name="Historical"))
135
- fig.add_trace(go.Scatter(x=fc.index, y=fc.values, mode="lines", name="Forecast"))
136
- fig.update_layout(
137
- title=f"{item} โ€“ Monthly Avg Price (1996โ€‘2025) & Forecast(2025โ€‘04โ†’2030โ€‘12)",
138
- xaxis_title="Date",
139
- yaxis_title="Price (KRW)",
140
- legend=dict(orientation="h", y=1.02, x=0.01),
141
- margin=dict(l=40, r=20, t=60, b=40),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  )
143
- return fig
144
-
145
- # -----------------------------
146
- # 5. Gradio UI
147
- # -----------------------------
148
- with gr.Blocks(title="๋„๋งค ๊ฐ€๊ฒฉ ์˜ˆ์ธกย App") as demo:
149
- gr.Markdown("## ๐Ÿ“ˆ ๋„๋งค ๊ฐ€๊ฒฉ ์˜ˆ์ธก ๋Œ€์‹œ๋ณด๋“œ (1996โ€‘2030)")
150
-
151
- # ํ’ˆ๋ชฉ ์„ ํƒ โ†’ ๊ทธ๋ž˜ํ”„ ์—…๋ฐ์ดํŠธ
152
- item_dd = gr.Dropdown(products, value=products[0], label="ํ’ˆ๋ชฉ ์„ ํƒ")
153
- chart_out = gr.Plot(label="๊ฐ€๊ฒฉ ์ถ”์„ธ")
154
-
155
- # ๋‚ด์ผ ๊ฐ€๊ฒฉ ํ‘œ (์ดˆ๊ธฐ ๊ณ ์ •)
156
- gr.Markdown(f"### ๋‚ด์ผ({tomorrow}) ๊ฐ ํ’ˆ๋ชฉ ์˜ˆ์ƒ๊ฐ€ (KRW)")
157
- tomorrow_table = gr.Dataframe(tomorrow_df, interactive=False, height=400)
158
-
159
- def update_chart(product):
160
- return plot_item(product)
161
-
162
- item_dd.change(update_chart, inputs=item_dd, outputs=chart_out, queue=False)
163
 
164
- # -----------------------------
165
- # 6. ์‹คํ–‰ ์Šคํฌ๋ฆฝํŠธ ์—”ํŠธ๋ฆฌํฌ์ธํŠธ
166
- # -----------------------------
167
  if __name__ == "__main__":
168
- demo.launch()
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import pandas as pd
3
+ import os
4
+ import tempfile
5
+ import chardet
6
+
7
+ def detect_encoding(file_path):
8
+ """
9
+ Function to detect file encoding
10
+ """
11
+ with open(file_path, 'rb') as f:
12
+ result = chardet.detect(f.read())
13
+ return result['encoding']
14
+
15
+ def merge_csv_files(files):
16
+ """
17
+ Function to merge multiple CSV files into one
18
+
19
+ Args:
20
+ files: List of uploaded CSV files
21
+
22
+ Returns:
23
+ Path to the merged CSV file and status message
24
+ """
25
+ if not files or len(files) == 0:
26
+ return None, "No files were uploaded. Please select CSV files to merge."
27
+
28
+ if len(files) > 30:
29
+ return None, "Maximum 30 files can be merged at once."
30
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  try:
32
+ # Read all files into DataFrame list
33
+ dataframes = []
34
+ for file in files:
35
+ # Detect file encoding
36
+ encoding = detect_encoding(file.name)
37
+ try:
38
+ df = pd.read_csv(file.name, encoding=encoding)
39
+ except UnicodeDecodeError:
40
+ # Try other encodings if detected encoding fails
41
+ encodings_to_try = ['cp949', 'euc-kr', 'latin1', 'ISO-8859-1']
42
+ for enc in encodings_to_try:
43
+ try:
44
+ df = pd.read_csv(file.name, encoding=enc)
45
+ break
46
+ except UnicodeDecodeError:
47
+ continue
48
+ else:
49
+ return None, f"Could not determine encoding for '{os.path.basename(file.name)}'."
50
+
51
+ dataframes.append(df)
52
+
53
+ # Merge all DataFrames
54
+ if dataframes:
55
+ merged_df = pd.concat(dataframes, ignore_index=True)
56
+
57
+ # Save to temporary file
58
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as tmp:
59
+ output_path = tmp.name
60
+
61
+ # Save merged data in Excel-compatible format (UTF-8 with BOM)
62
+ merged_df.to_csv(output_path, index=False, encoding='utf-8-sig')
63
+
64
+ return output_path, f"Successfully merged {len(files)} files. Please open with UTF-8 encoding in Excel."
65
+ else:
66
+ return None, "No data to merge."
67
+
68
+ except Exception as e:
69
+ return None, f"Error occurred: {str(e)}"
70
+
71
+ # Create a stylish Gradio interface
72
+ with gr.Blocks(title="CSVFusion") as app:
73
+ gr.Markdown(
74
+ """
75
+ # ๐Ÿ“Š CSVFusion: Intelligent File Merger
76
+
77
+ *Seamlessly combine multiple CSV files into one unified dataset*
78
+
79
+ ---
80
+ """
81
  )
82
+
83
+ with gr.Row():
84
+ with gr.Column(scale=2):
85
+ gr.Markdown("""
86
+ ### How to use CSVFusion:
87
+ 1. Upload up to 30 CSV files using the panel on the right
88
+ 2. Click the "Merge Files" button
89
+ 3. Download your consolidated CSV file
90
+
91
+ ### Features:
92
+ - Automatic encoding detection
93
+ - Handles various CSV formats
94
+ - Excel-compatible output (UTF-8)
95
+ - Preserves all data columns
96
+ """)
97
+
98
+ with gr.Column(scale=3):
99
+ input_files = gr.File(
100
+ file_count="multiple",
101
+ label="Upload CSV Files (Max 30)",
102
+ file_types=[".csv"],
103
+ elem_id="file_upload"
104
+ )
105
+
106
+ with gr.Row():
107
+ merge_button = gr.Button("Merge Files", variant="primary", size="lg")
108
+
109
+ with gr.Row():
110
+ with gr.Column():
111
+ status = gr.Textbox(label="Status", placeholder="Ready to merge your files...")
112
+ with gr.Column():
113
+ output_file = gr.File(label="Download Merged CSV")
114
+
115
+ # Add custom CSS for better visual appeal
116
+ gr.HTML("""
117
+ <style>
118
+ .gradio-container {
119
+ background: linear-gradient(to right, #f9f9f9, #ffffff);
120
+ border-radius: 12px;
121
+ }
122
+ #file_upload {
123
+ border: 2px dashed #3498db;
124
+ border-radius: 8px;
125
+ padding: 20px;
126
+ transition: all 0.3s;
127
+ }
128
+ #file_upload:hover {
129
+ border-color: #2980b9;
130
+ box-shadow: 0 0 10px rgba(52, 152, 219, 0.3);
131
+ }
132
+ .footer {
133
+ text-align: center;
134
+ margin-top: 30px;
135
+ color: #7f8c8d;
136
+ font-size: 0.9em;
137
+ }
138
+ </style>
139
+ """)
140
+
141
+ # Add footer
142
+ gr.HTML("""
143
+ <div class="footer">
144
+ <p>CSVFusion ยฉ 2025 - A powerful tool for data professionals</p>
145
+ </div>
146
+ """)
147
+
148
+ merge_button.click(
149
+ fn=merge_csv_files,
150
+ inputs=[input_files],
151
+ outputs=[output_file, status]
152
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
 
154
+ # Run the app
 
 
155
  if __name__ == "__main__":
156
+ app.launch()