ehsk commited on
Commit
780a0a5
0 Parent(s):

Duplicate from gwf-uwaterloo/scicatter2d

Browse files
.gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ data/anthology-2020-23_specter2_base.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Scicatter2d
3
+ emoji: 📉
4
+ colorFrom: purple
5
+ colorTo: indigo
6
+ sdk: streamlit
7
+ sdk_version: 1.26.0
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: gwf-uwaterloo/scicatter2d
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import pandas as pd
4
+ import plotly.express as px
5
+ import streamlit as st
6
+
7
+ st.set_page_config(layout="wide")
8
+ DATA_FILE = "data/anthology-2020-23_specter2_base.json"
9
+ THEMES = {"cluster": "fall", "year": "mint", "source": "phase"}
10
+
11
+ def load_df(data_file: os.PathLike):
12
+ df = pd.read_json(data_file, orient="records")
13
+ df["x"] = df["point2d"].apply(lambda x: x[0])
14
+ df["y"] = df["point2d"].apply(lambda x: x[1])
15
+ if "publication_type" in df.columns:
16
+ df["type"] = df["publication_type"]
17
+ df = df.drop(columns=["point2d", "publication_type"])
18
+ else:
19
+ df = df.drop(columns=["point2d"])
20
+ return df
21
+
22
+
23
+ @st.cache_data
24
+ def load_dataframe():
25
+ return load_df(DATA_FILE)
26
+
27
+
28
+ DF = load_dataframe()
29
+
30
+ with st.sidebar:
31
+ venues = st.multiselect(
32
+ "Venues",
33
+ ["ACL", "EMNLP", "NAACL", "TACL"],
34
+ ["ACL", "EMNLP", "NAACL", "TACL"],
35
+ )
36
+
37
+ start_year, end_year = st.select_slider(
38
+ "Publication year", options=("2020", "2021", "2022", "2023"), value=("2020", "2023")
39
+ )
40
+ author_names = st.text_input("Author names (separated by comma)")
41
+
42
+ title = st.text_input("Title")
43
+
44
+ start_year = int(start_year)
45
+ end_year = int(end_year)
46
+ df = DF[(DF["year"] >= start_year) & (DF["year"] <= end_year)]
47
+ if 0 < len(venues) < 4:
48
+ selected_venues = [v.lower() for v in venues]
49
+ df = df[df["source"].isin(selected_venues)]
50
+ elif not venues:
51
+ st.write(":red[Please select a venue]")
52
+
53
+ if author_names:
54
+ authors = [a.strip() for a in author_names.split(",")]
55
+ author_mask = df.authors.apply(
56
+ lambda row: all(any(re.match(rf".*{a}.*", x, re.IGNORECASE) for x in row) for a in authors)
57
+ )
58
+ df = df[author_mask]
59
+
60
+ if title:
61
+ df = df[df.title.apply(lambda x: title.lower() in x.lower())]
62
+
63
+ st.write(f"Number of points: {df.shape[0]}")
64
+
65
+ color = st.selectbox("Color", ("cluster", "year", "source"))
66
+
67
+
68
+ fig = px.scatter(
69
+ df,
70
+ x="x",
71
+ y="y",
72
+ color=color,
73
+ width=1000,
74
+ height=800,
75
+ hover_data=["title", "authors", "year", "source", "type"],
76
+ color_continuous_scale=THEMES[color],
77
+ )
78
+ fig.update_layout(
79
+ # margin=dict(l=10, r=10, t=10, b=10),
80
+ showlegend=False,
81
+ font=dict(
82
+ family="Times New Roman",
83
+ size=30,
84
+ ),
85
+ )
86
+ fig.update_xaxes(title="")
87
+ fig.update_yaxes(title="")
88
+
89
+ st.plotly_chart(fig, use_container_width=True)
data/anthology-2020-23_specter2_base.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf528ba2e72c9865d3a332b5d319a573142a2f6813eb78cd071c0781b4e780b3
3
+ size 11791691
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ plotly
2
+ pandas