Spaces:
Sleeping
Sleeping
Commit
•
28f08c2
1
Parent(s):
7117f63
Update app.py (#2)
Browse files- Update app.py (a3839305306296d4cdeed96580213b96ac2fc19e)
Co-authored-by: Harm de Vries <harmdevries@users.noreply.huggingface.co>
app.py
CHANGED
@@ -17,6 +17,7 @@ for index, row in df.iterrows():
|
|
17 |
all_languages = list(tags.keys())
|
18 |
|
19 |
|
|
|
20 |
@st.cache()
|
21 |
def load_data(language, ext):
|
22 |
ds = load_dataset(
|
@@ -27,61 +28,72 @@ def load_data(language, ext):
|
|
27 |
return ds
|
28 |
|
29 |
|
30 |
-
col1, col2, _ = st.
|
31 |
with col1:
|
32 |
-
chosen_language = st.selectbox(
|
33 |
label="Select a programming language", options=all_languages, index=0
|
34 |
)
|
35 |
with col2:
|
36 |
-
chosen_ext = st.selectbox(
|
37 |
label="Select an extension", options=tags[chosen_language], index=0
|
38 |
)
|
39 |
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
# load the dataset and get indexes of non lexable files
|
42 |
samples = load_data(chosen_language, chosen_ext)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
max_docs = len(samples)
|
44 |
samples = samples.add_column("idx", range(len(samples)))
|
45 |
-
not_lexed = samples.filter(lambda x: not x["lexable"])
|
46 |
-
indexes_not_lexed = not_lexed["idx"]
|
47 |
-
|
48 |
|
49 |
# info about extension
|
50 |
-
st.sidebar.markdown("### Information about the extension:")
|
51 |
-
text = f"Extension {chosen_ext} has {max_docs} files, {df[df['extension'] == chosen_ext]['low_alphanum_count'].values[0]} with very low alphanumeric ratio, \
|
52 |
-
{df[df['extension'] == chosen_ext]['long_lines_count'].values[0]} with very long lines, and {df[df['extension'] == chosen_ext]['non_lexable_count'].values[0]} \
|
53 |
-
are not lexable.\n These files are at indexes:\n {indexes_not_lexed}."
|
54 |
-
st.sidebar.markdown(text)
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
st.markdown("#### File content:")
|
82 |
-
if example["lexable"]:
|
83 |
-
st.code(example["content"], language=chosen_language)
|
84 |
-
else:
|
85 |
-
st.text(f"File can't be lexed so we remove syntax highlighting.\nContent:\n")
|
86 |
-
st.text(str(example['content']))
|
87 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
all_languages = list(tags.keys())
|
18 |
|
19 |
|
20 |
+
|
21 |
@st.cache()
|
22 |
def load_data(language, ext):
|
23 |
ds = load_dataset(
|
|
|
28 |
return ds
|
29 |
|
30 |
|
31 |
+
col1, col2, _ = st.columns([1, 1, 4])
|
32 |
with col1:
|
33 |
+
chosen_language = st.sidebar.selectbox(
|
34 |
label="Select a programming language", options=all_languages, index=0
|
35 |
)
|
36 |
with col2:
|
37 |
+
chosen_ext = st.sidebar.selectbox(
|
38 |
label="Select an extension", options=tags[chosen_language], index=0
|
39 |
)
|
40 |
|
41 |
+
st.sidebar.header("Filters")
|
42 |
+
not_lexable = st.sidebar.checkbox("Not lexable?")
|
43 |
+
low_alphanum = st.sidebar.checkbox("Low alphanum count?")
|
44 |
+
long_lines = st.sidebar.checkbox("Long lines?")
|
45 |
+
|
46 |
|
47 |
# load the dataset and get indexes of non lexable files
|
48 |
samples = load_data(chosen_language, chosen_ext)
|
49 |
+
|
50 |
+
if not_lexable:
|
51 |
+
samples = samples.filter(lambda x: not x["lexable"])
|
52 |
+
if low_alphanum:
|
53 |
+
samples = samples.filter(lambda x: x["low_alphanum"])
|
54 |
+
if long_lines:
|
55 |
+
samples = samples.filter(lambda x: x["long_lines"])
|
56 |
+
|
57 |
max_docs = len(samples)
|
58 |
samples = samples.add_column("idx", range(len(samples)))
|
|
|
|
|
|
|
59 |
|
60 |
# info about extension
|
61 |
+
# st.sidebar.markdown("### Information about the extension:")
|
62 |
+
# text = f"Extension {chosen_ext} has {max_docs} files, {df[df['extension'] == chosen_ext]['low_alphanum_count'].values[0]} with very low alphanumeric ratio, \
|
63 |
+
# {df[df['extension'] == chosen_ext]['long_lines_count'].values[0]} with very long lines, and {df[df['extension'] == chosen_ext]['non_lexable_count'].values[0]} \
|
64 |
+
# are not lexable.\n These files are at indexes:\n {indexes_not_lexed}."
|
65 |
+
# st.sidebar.markdown(text)
|
66 |
+
|
67 |
+
if max_docs > 0:
|
68 |
+
col_1, _ = st.columns([3, 3])
|
69 |
+
with col_1:
|
70 |
+
index_example = st.number_input(
|
71 |
+
f"Extension {chosen_ext} has {max_docs} files, choose one to visualize:",
|
72 |
+
min_value=0,
|
73 |
+
max_value=max_docs - 1,
|
74 |
+
value=0,
|
75 |
+
step=1,
|
76 |
+
)
|
77 |
+
|
78 |
+
|
79 |
+
# info about the chosen example
|
80 |
+
example = samples[index_example]
|
81 |
+
|
82 |
+
# st.markdown("#### Information about the chosen example:")
|
83 |
+
# text_alpha = "**has**" if example["long_lines"] else "doesn't have"
|
84 |
+
# text_lines = "**has**" if example["low_alphanum"] else "doesn't have"
|
85 |
+
# text_lexer = "is" if example["lexable"] else "**isn't**"
|
86 |
+
|
87 |
+
# st.markdown(
|
88 |
+
# f"Example {index_example} {text_alpha} a very low alphanumeric ratio, \
|
89 |
+
# {text_lines} very long lines, and {text_lexer} lexable."
|
90 |
+
# )
|
91 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
|
93 |
+
# display file content
|
94 |
+
st.markdown("#### File content:")
|
95 |
+
if not example["lexable"]:
|
96 |
+
st.write(f"File can't be lexed so we remove syntax highlighting.\nContent:\n")
|
97 |
+
st.text(example['content'])
|
98 |
+
else:
|
99 |
+
st.code(example["content"], language=chosen_language)
|