ehsk commited on
Commit
445b175
1 Parent(s): 5f919c8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -9
app.py CHANGED
@@ -8,6 +8,7 @@ st.set_page_config(layout="wide")
8
  DATA_FILE = "data/aclanthology2016-23_specter2_base.json"
9
  THEMES = {"cluster": "fall", "year": "mint", "source": "phase"}
10
 
 
11
  def load_df(data_file: os.PathLike):
12
  df = pd.read_json(data_file, orient="records")
13
  df["x"] = df["point2d"].apply(lambda x: x[0])
@@ -26,6 +27,8 @@ def load_dataframe():
26
 
27
 
28
  DF = load_dataframe()
 
 
29
 
30
  with st.sidebar:
31
  venues = st.multiselect(
@@ -35,7 +38,9 @@ with st.sidebar:
35
  )
36
 
37
  start_year, end_year = st.select_slider(
38
- "Publication year", options=("2020", "2021", "2022", "2023"), value=("2020", "2023")
 
 
39
  )
40
  author_names = st.text_input("Author names (separated by comma)")
41
 
@@ -43,32 +48,34 @@ with st.sidebar:
43
 
44
  start_year = int(start_year)
45
  end_year = int(end_year)
46
- df = DF[(DF["year"] >= start_year) & (DF["year"] <= end_year)]
47
  if 0 < len(venues) < 4:
48
  selected_venues = [v.lower() for v in venues]
49
- df = df[df["source"].isin(selected_venues)]
50
  elif not venues:
51
  st.write(":red[Please select a venue]")
52
 
53
  if author_names:
54
  authors = [a.strip() for a in author_names.split(",")]
55
- author_mask = df.authors.apply(
56
  lambda row: all(any(re.match(rf".*{a}.*", x, re.IGNORECASE) for x in row) for a in authors)
57
  )
58
- df = df[author_mask]
59
 
60
  if title:
61
- df = df[df.title.apply(lambda x: title.lower() in x.lower())]
62
 
63
- st.write(f"Number of points: {df.shape[0]}")
 
64
 
65
  color = st.selectbox("Color", ("cluster", "year", "source"))
66
 
67
 
68
  fig = px.scatter(
69
- df,
70
  x="x",
71
  y="y",
 
72
  color=color,
73
  width=1000,
74
  height=800,
@@ -86,4 +93,4 @@ fig.update_layout(
86
  fig.update_xaxes(title="")
87
  fig.update_yaxes(title="")
88
 
89
- st.plotly_chart(fig, use_container_width=True)
 
8
  DATA_FILE = "data/aclanthology2016-23_specter2_base.json"
9
  THEMES = {"cluster": "fall", "year": "mint", "source": "phase"}
10
 
11
+
12
  def load_df(data_file: os.PathLike):
13
  df = pd.read_json(data_file, orient="records")
14
  df["x"] = df["point2d"].apply(lambda x: x[0])
 
27
 
28
 
29
  DF = load_dataframe()
30
+ DF["opacity"] = 0.04
31
+ min_year, max_year = DF["year"].min(), DF["year"].max()
32
 
33
  with st.sidebar:
34
  venues = st.multiselect(
 
38
  )
39
 
40
  start_year, end_year = st.select_slider(
41
+ "Publication year",
42
+ options=[str(y) for y in range(min_year, max_year + 1)],
43
+ value=(str(min_year), str(max_year)),
44
  )
45
  author_names = st.text_input("Author names (separated by comma)")
46
 
 
48
 
49
  start_year = int(start_year)
50
  end_year = int(end_year)
51
+ df_mask = (DF["year"] >= start_year) & (DF["year"] <= end_year)
52
  if 0 < len(venues) < 4:
53
  selected_venues = [v.lower() for v in venues]
54
+ df_mask = df_mask & DF["source"].isin(selected_venues)
55
  elif not venues:
56
  st.write(":red[Please select a venue]")
57
 
58
  if author_names:
59
  authors = [a.strip() for a in author_names.split(",")]
60
+ author_mask = DF.authors.apply(
61
  lambda row: all(any(re.match(rf".*{a}.*", x, re.IGNORECASE) for x in row) for a in authors)
62
  )
63
+ df_mask = df_mask & author_mask
64
 
65
  if title:
66
+ df_mask = df_mask & DF.title.apply(lambda x: title.lower() in x.lower())
67
 
68
+ DF.loc[df_mask, "opacity"] = 1.0
69
+ st.write(f"Number of points: {DF[df_mask].shape[0]}")
70
 
71
  color = st.selectbox("Color", ("cluster", "year", "source"))
72
 
73
 
74
  fig = px.scatter(
75
+ DF,
76
  x="x",
77
  y="y",
78
+ opacity=DF["opacity"],
79
  color=color,
80
  width=1000,
81
  height=800,
 
93
  fig.update_xaxes(title="")
94
  fig.update_yaxes(title="")
95
 
96
+ st.plotly_chart(fig, use_container_width=True)