the-stack-inspection

Sleeping

App Files Files Community

loubnabnl HF staff

harmdevries commited on Feb 13, 2023

Commit

28f08c2

•

1 Parent(s): 7117f63

Update app.py (#2)

Browse files

- Update app.py (a3839305306296d4cdeed96580213b96ac2fc19e)

Co-authored-by: Harm de Vries <harmdevries@users.noreply.huggingface.co>

Files changed (1) hide show

app.py +55 -43

app.py CHANGED Viewed

@@ -17,6 +17,7 @@ for index, row in df.iterrows():
 all_languages = list(tags.keys())
 @st.cache()
 def load_data(language, ext):
     ds = load_dataset(
@@ -27,61 +28,72 @@ def load_data(language, ext):
     return ds
-col1, col2, _ = st.sidebar.columns([1, 1, 4])
 with col1:
-    chosen_language = st.selectbox(
         label="Select a programming language", options=all_languages, index=0
     )
 with col2:
-    chosen_ext = st.selectbox(
         label="Select an extension", options=tags[chosen_language], index=0
     )
 # load the dataset and get indexes of non lexable files
 samples = load_data(chosen_language, chosen_ext)
 max_docs = len(samples)
 samples = samples.add_column("idx", range(len(samples)))
-not_lexed = samples.filter(lambda x: not x["lexable"])
-indexes_not_lexed = not_lexed["idx"]
 # info about extension
-st.sidebar.markdown("### Information about the extension:")
-text = f"Extension {chosen_ext} has {max_docs} files, {df[df['extension'] == chosen_ext]['low_alphanum_count'].values[0]} with very low alphanumeric ratio, \
-{df[df['extension'] == chosen_ext]['long_lines_count'].values[0]} with very long lines, and {df[df['extension'] == chosen_ext]['non_lexable_count'].values[0]} \
-are not lexable.\n These files are at indexes:\n {indexes_not_lexed}."
-st.sidebar.markdown(text)
-col_1, _ = st.columns([2, 4])
-with col_1:
-    index_example = st.number_input(
-        f"Extension {chosen_ext} has {max_docs} files, choose one to visualize:",
-        min_value=0,
-        max_value=max_docs - 1,
-        value=0,
-        step=1,
-    )
-# info about the chosen example
-example = samples[index_example]
-st.markdown("#### Information about the chosen example:")
-text_alpha = "**has**" if example["long_lines"] else "doesn't have"
-text_lines = "**has**" if example["low_alphanum"] else "doesn't have"
-text_lexer = "is" if example["lexable"] else "**isn't**"
-st.markdown(
-    f"Example {index_example} {text_alpha} a very low alphanumeric ratio, \
-    {text_lines} very long lines,  and {text_lexer} lexable."
-)
-# display file content
-st.markdown("#### File content:")
-if example["lexable"]:
-    st.code(example["content"], language=chosen_language)
-else:
-    st.text(f"File can't be lexed so we remove syntax highlighting.\nContent:\n")
-    st.text(str(example['content']))

 all_languages = list(tags.keys())
 @st.cache()
 def load_data(language, ext):
     ds = load_dataset(
     return ds
+col1, col2, _ = st.columns([1, 1, 4])
 with col1:
+    chosen_language = st.sidebar.selectbox(
         label="Select a programming language", options=all_languages, index=0
     )
 with col2:
+    chosen_ext = st.sidebar.selectbox(
         label="Select an extension", options=tags[chosen_language], index=0
     )
+st.sidebar.header("Filters")
+not_lexable = st.sidebar.checkbox("Not lexable?")
+low_alphanum = st.sidebar.checkbox("Low alphanum count?")
+long_lines = st.sidebar.checkbox("Long lines?")
 # load the dataset and get indexes of non lexable files
 samples = load_data(chosen_language, chosen_ext)
+if not_lexable:
+    samples = samples.filter(lambda x: not x["lexable"])
+if low_alphanum:
+    samples = samples.filter(lambda x: x["low_alphanum"])
+if long_lines:
+    samples = samples.filter(lambda x: x["long_lines"])
 max_docs = len(samples)
 samples = samples.add_column("idx", range(len(samples)))
 # info about extension
+# st.sidebar.markdown("### Information about the extension:")
+# text = f"Extension {chosen_ext} has {max_docs} files, {df[df['extension'] == chosen_ext]['low_alphanum_count'].values[0]} with very low alphanumeric ratio, \
+# {df[df['extension'] == chosen_ext]['long_lines_count'].values[0]} with very long lines, and {df[df['extension'] == chosen_ext]['non_lexable_count'].values[0]} \
+# are not lexable.\n These files are at indexes:\n {indexes_not_lexed}."
+# st.sidebar.markdown(text)
+if max_docs > 0:
+    col_1, _ = st.columns([3, 3])
+    with col_1:
+        index_example = st.number_input(
+            f"Extension {chosen_ext} has {max_docs} files, choose one to visualize:",
+            min_value=0,
+            max_value=max_docs - 1,
+            value=0,
+            step=1,
+        )
+    # info about the chosen example
+    example = samples[index_example]
+    # st.markdown("#### Information about the chosen example:")
+    # text_alpha = "**has**" if example["long_lines"] else "doesn't have"
+    # text_lines = "**has**" if example["low_alphanum"] else "doesn't have"
+    # text_lexer = "is" if example["lexable"] else "**isn't**"
+    # st.markdown(
+    #     f"Example {index_example} {text_alpha} a very low alphanumeric ratio, \
+    #     {text_lines} very long lines,  and {text_lexer} lexable."
+    # )
+    # display file content
+    st.markdown("#### File content:")
+    if not example["lexable"]:
+        st.write(f"File can't be lexed so we remove syntax highlighting.\nContent:\n")
+        st.text(example['content'])
+    else:
+        st.code(example["content"], language=chosen_language)