Nucha commited on
Commit
3607f17
·
verified ·
1 Parent(s): 36fa45a

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +13 -32
  2. requirements.txt +1 -0
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import os
2
  import io
3
  import pandas as pd
@@ -8,6 +9,7 @@ import gradio as gr
8
  import matplotlib
9
  matplotlib.use("Agg")
10
  import matplotlib.pyplot as plt
 
11
 
12
  DEFAULT_CSV = "Trend_of_Top_10_Hard_Skills.csv" # Place at repo root
13
 
@@ -26,15 +28,12 @@ def prepare_dataframe(df):
26
  """
27
  if "YearMonth" not in df.columns:
28
  raise gr.Error("CSV must have a 'YearMonth' column.")
29
- # Parse YearMonth to datetime (coerce errors to NaT)
30
  dt = pd.to_datetime(df["YearMonth"], format="%Y-%m", errors="coerce")
31
- # If parsing fails, try general parse
32
  if dt.isna().any():
33
  dt = pd.to_datetime(df["YearMonth"], errors="coerce")
34
  df = df.copy()
35
  df["_dt"] = dt
36
  df = df.sort_values("_dt")
37
- # Coerce numeric columns
38
  for c in df.columns:
39
  if c not in ["YearMonth", "_dt"]:
40
  df[c] = pd.to_numeric(df[c], errors="coerce")
@@ -45,7 +44,7 @@ def list_skill_columns(df):
45
 
46
  def apply_smoothing(series, window):
47
  if window and window > 1:
48
- return series.rolling(window=window, min_periods=1, center=False).mean()
49
  return series
50
 
51
  def normalize_series(series, mode):
@@ -54,27 +53,19 @@ def normalize_series(series, mode):
54
  s = series.copy()
55
  if mode == "min-max (per skill)":
56
  mn, mx = s.min(), s.max()
57
- if mx > mn:
58
- return (s - mn) / (mx - mn)
59
- else:
60
- return s.fillna(0.0)
61
  if mode == "z-score (per skill)":
62
  mu, sd = s.mean(), s.std(ddof=0)
63
- if sd > 0:
64
- return (s - mu) / sd
65
- else:
66
- return s.fillna(0.0)
67
  return series
68
 
69
  def plot_lines(df, selected_skills, smoothing_window, normalize_mode, show_markers, y_label):
70
  if not selected_skills:
71
  raise gr.Error("Please select at least one skill.")
72
 
73
- # Prepare x
74
  x_dt = df["_dt"]
75
  x_labels = df["YearMonth"].astype(str).tolist()
76
 
77
- # Create single chart
78
  fig = plt.figure(figsize=(10, 5.5))
79
  ax = fig.add_subplot(111)
80
 
@@ -95,40 +86,31 @@ def plot_lines(df, selected_skills, smoothing_window, normalize_mode, show_marke
95
  ax.legend(loc="best")
96
  ax.grid(True, which="both", axis="both", alpha=0.35)
97
 
98
- # Format x ticks with Month labels
99
  ax.set_xticks(x_dt)
100
  ax.set_xticklabels(x_labels, rotation=45, ha="right")
101
 
102
  fig.tight_layout()
103
- import io as _io
104
- buf = _io.BytesIO()
105
  fig.savefig(buf, format="png", dpi=160, bbox_inches="tight")
106
  plt.close(fig)
107
  buf.seek(0)
108
- return buf # return BytesIO; Gradio Image can accept bytes-like
 
109
 
110
- def run(
111
- csv_file, selected_skills, smoothing_window, normalize_mode, show_markers, y_label
112
- ):
113
  df = load_dataframe(csv_file)
114
  df = prepare_dataframe(df)
115
  available = list_skill_columns(df)
116
-
117
- # Auto-select if user didn't pick yet
118
  if not selected_skills:
119
  selected_skills = available
120
-
121
- # Keep only existing skills
122
  selected_skills = [s for s in selected_skills if s in available]
123
-
124
- img_buf = plot_lines(df, selected_skills, smoothing_window, normalize_mode, show_markers, y_label)
125
- # Also return a preview table for selected columns
126
  preview_cols = ["YearMonth"] + selected_skills
127
  preview = df[preview_cols].reset_index(drop=True)
128
- return img_buf, gr.update(choices=available, value=selected_skills), preview
129
 
130
  with gr.Blocks(title="Hard Skills Trend Line Chart") as demo:
131
- gr.Markdown("# Hard Skills Trend — Line Chart\nUpload a CSV or place **Trend_of_Top_10_Hard_Skills.csv** in the repo root.")
132
 
133
  with gr.Row():
134
  with gr.Column(scale=1):
@@ -141,10 +123,9 @@ with gr.Blocks(title="Hard Skills Trend Line Chart") as demo:
141
  btn = gr.Button("Plot", variant="primary")
142
 
143
  with gr.Column(scale=1):
144
- out_img = gr.Image(label="Line Chart")
145
  out_table = gr.Dataframe(label="Data preview")
146
 
147
- # On click, return image, refresh skill choices, and table
148
  btn.click(
149
  fn=run,
150
  inputs=[csv_file, selected_skills, smoothing_window, normalize_mode, show_markers, y_label],
 
1
+ \
2
  import os
3
  import io
4
  import pandas as pd
 
9
  import matplotlib
10
  matplotlib.use("Agg")
11
  import matplotlib.pyplot as plt
12
+ from PIL import Image # <-- add PIL for returning PIL.Image
13
 
14
  DEFAULT_CSV = "Trend_of_Top_10_Hard_Skills.csv" # Place at repo root
15
 
 
28
  """
29
  if "YearMonth" not in df.columns:
30
  raise gr.Error("CSV must have a 'YearMonth' column.")
 
31
  dt = pd.to_datetime(df["YearMonth"], format="%Y-%m", errors="coerce")
 
32
  if dt.isna().any():
33
  dt = pd.to_datetime(df["YearMonth"], errors="coerce")
34
  df = df.copy()
35
  df["_dt"] = dt
36
  df = df.sort_values("_dt")
 
37
  for c in df.columns:
38
  if c not in ["YearMonth", "_dt"]:
39
  df[c] = pd.to_numeric(df[c], errors="coerce")
 
44
 
45
  def apply_smoothing(series, window):
46
  if window and window > 1:
47
+ return series.rolling(window=window, min_periods=1).mean()
48
  return series
49
 
50
  def normalize_series(series, mode):
 
53
  s = series.copy()
54
  if mode == "min-max (per skill)":
55
  mn, mx = s.min(), s.max()
56
+ return (s - mn) / (mx - mn) if mx > mn else s.fillna(0.0)
 
 
 
57
  if mode == "z-score (per skill)":
58
  mu, sd = s.mean(), s.std(ddof=0)
59
+ return (s - mu) / sd if sd > 0 else s.fillna(0.0)
 
 
 
60
  return series
61
 
62
  def plot_lines(df, selected_skills, smoothing_window, normalize_mode, show_markers, y_label):
63
  if not selected_skills:
64
  raise gr.Error("Please select at least one skill.")
65
 
 
66
  x_dt = df["_dt"]
67
  x_labels = df["YearMonth"].astype(str).tolist()
68
 
 
69
  fig = plt.figure(figsize=(10, 5.5))
70
  ax = fig.add_subplot(111)
71
 
 
86
  ax.legend(loc="best")
87
  ax.grid(True, which="both", axis="both", alpha=0.35)
88
 
 
89
  ax.set_xticks(x_dt)
90
  ax.set_xticklabels(x_labels, rotation=45, ha="right")
91
 
92
  fig.tight_layout()
93
+ buf = io.BytesIO()
 
94
  fig.savefig(buf, format="png", dpi=160, bbox_inches="tight")
95
  plt.close(fig)
96
  buf.seek(0)
97
+ # Return PIL Image instead of BytesIO to satisfy gr.Image postprocess
98
+ return Image.open(buf)
99
 
100
+ def run(csv_file, selected_skills, smoothing_window, normalize_mode, show_markers, y_label):
 
 
101
  df = load_dataframe(csv_file)
102
  df = prepare_dataframe(df)
103
  available = list_skill_columns(df)
 
 
104
  if not selected_skills:
105
  selected_skills = available
 
 
106
  selected_skills = [s for s in selected_skills if s in available]
107
+ img = plot_lines(df, selected_skills, smoothing_window, normalize_mode, show_markers, y_label)
 
 
108
  preview_cols = ["YearMonth"] + selected_skills
109
  preview = df[preview_cols].reset_index(drop=True)
110
+ return img, gr.update(choices=available, value=selected_skills), preview
111
 
112
  with gr.Blocks(title="Hard Skills Trend Line Chart") as demo:
113
+ gr.Markdown("# Hard Skills Trend — Line Chart\\nUpload a CSV or place **Trend_of_Top_10_Hard_Skills.csv** in the repo root.")
114
 
115
  with gr.Row():
116
  with gr.Column(scale=1):
 
123
  btn = gr.Button("Plot", variant="primary")
124
 
125
  with gr.Column(scale=1):
126
+ out_img = gr.Image(label="Line Chart", type="pil") # explicitly set type=pil
127
  out_table = gr.Dataframe(label="Data preview")
128
 
 
129
  btn.click(
130
  fn=run,
131
  inputs=[csv_file, selected_skills, smoothing_window, normalize_mode, show_markers, y_label],
requirements.txt CHANGED
@@ -2,3 +2,4 @@ gradio>=4.26.0
2
  pandas>=2.0.0
3
  matplotlib>=3.8
4
  numpy
 
 
2
  pandas>=2.0.0
3
  matplotlib>=3.8
4
  numpy
5
+ Pillow