loubnabnl HF staff harmdevries commited on
Commit
28f08c2
1 Parent(s): 7117f63

Update app.py (#2)

Browse files

- Update app.py (a3839305306296d4cdeed96580213b96ac2fc19e)


Co-authored-by: Harm de Vries <harmdevries@users.noreply.huggingface.co>

Files changed (1) hide show
  1. app.py +55 -43
app.py CHANGED
@@ -17,6 +17,7 @@ for index, row in df.iterrows():
17
  all_languages = list(tags.keys())
18
 
19
 
 
20
  @st.cache()
21
  def load_data(language, ext):
22
  ds = load_dataset(
@@ -27,61 +28,72 @@ def load_data(language, ext):
27
  return ds
28
 
29
 
30
- col1, col2, _ = st.sidebar.columns([1, 1, 4])
31
  with col1:
32
- chosen_language = st.selectbox(
33
  label="Select a programming language", options=all_languages, index=0
34
  )
35
  with col2:
36
- chosen_ext = st.selectbox(
37
  label="Select an extension", options=tags[chosen_language], index=0
38
  )
39
 
 
 
 
 
 
40
 
41
  # load the dataset and get indexes of non lexable files
42
  samples = load_data(chosen_language, chosen_ext)
 
 
 
 
 
 
 
 
43
  max_docs = len(samples)
44
  samples = samples.add_column("idx", range(len(samples)))
45
- not_lexed = samples.filter(lambda x: not x["lexable"])
46
- indexes_not_lexed = not_lexed["idx"]
47
-
48
 
49
  # info about extension
50
- st.sidebar.markdown("### Information about the extension:")
51
- text = f"Extension {chosen_ext} has {max_docs} files, {df[df['extension'] == chosen_ext]['low_alphanum_count'].values[0]} with very low alphanumeric ratio, \
52
- {df[df['extension'] == chosen_ext]['long_lines_count'].values[0]} with very long lines, and {df[df['extension'] == chosen_ext]['non_lexable_count'].values[0]} \
53
- are not lexable.\n These files are at indexes:\n {indexes_not_lexed}."
54
- st.sidebar.markdown(text)
55
-
56
- col_1, _ = st.columns([2, 4])
57
- with col_1:
58
- index_example = st.number_input(
59
- f"Extension {chosen_ext} has {max_docs} files, choose one to visualize:",
60
- min_value=0,
61
- max_value=max_docs - 1,
62
- value=0,
63
- step=1,
64
- )
65
-
66
-
67
- # info about the chosen example
68
- example = samples[index_example]
69
- st.markdown("#### Information about the chosen example:")
70
- text_alpha = "**has**" if example["long_lines"] else "doesn't have"
71
- text_lines = "**has**" if example["low_alphanum"] else "doesn't have"
72
- text_lexer = "is" if example["lexable"] else "**isn't**"
73
-
74
- st.markdown(
75
- f"Example {index_example} {text_alpha} a very low alphanumeric ratio, \
76
- {text_lines} very long lines, and {text_lexer} lexable."
77
- )
78
-
79
-
80
- # display file content
81
- st.markdown("#### File content:")
82
- if example["lexable"]:
83
- st.code(example["content"], language=chosen_language)
84
- else:
85
- st.text(f"File can't be lexed so we remove syntax highlighting.\nContent:\n")
86
- st.text(str(example['content']))
87
 
 
 
 
 
 
 
 
 
17
  all_languages = list(tags.keys())
18
 
19
 
20
+
21
  @st.cache()
22
  def load_data(language, ext):
23
  ds = load_dataset(
 
28
  return ds
29
 
30
 
31
+ col1, col2, _ = st.columns([1, 1, 4])
32
  with col1:
33
+ chosen_language = st.sidebar.selectbox(
34
  label="Select a programming language", options=all_languages, index=0
35
  )
36
  with col2:
37
+ chosen_ext = st.sidebar.selectbox(
38
  label="Select an extension", options=tags[chosen_language], index=0
39
  )
40
 
41
+ st.sidebar.header("Filters")
42
+ not_lexable = st.sidebar.checkbox("Not lexable?")
43
+ low_alphanum = st.sidebar.checkbox("Low alphanum count?")
44
+ long_lines = st.sidebar.checkbox("Long lines?")
45
+
46
 
47
  # load the dataset and get indexes of non lexable files
48
  samples = load_data(chosen_language, chosen_ext)
49
+
50
+ if not_lexable:
51
+ samples = samples.filter(lambda x: not x["lexable"])
52
+ if low_alphanum:
53
+ samples = samples.filter(lambda x: x["low_alphanum"])
54
+ if long_lines:
55
+ samples = samples.filter(lambda x: x["long_lines"])
56
+
57
  max_docs = len(samples)
58
  samples = samples.add_column("idx", range(len(samples)))
 
 
 
59
 
60
  # info about extension
61
+ # st.sidebar.markdown("### Information about the extension:")
62
+ # text = f"Extension {chosen_ext} has {max_docs} files, {df[df['extension'] == chosen_ext]['low_alphanum_count'].values[0]} with very low alphanumeric ratio, \
63
+ # {df[df['extension'] == chosen_ext]['long_lines_count'].values[0]} with very long lines, and {df[df['extension'] == chosen_ext]['non_lexable_count'].values[0]} \
64
+ # are not lexable.\n These files are at indexes:\n {indexes_not_lexed}."
65
+ # st.sidebar.markdown(text)
66
+
67
+ if max_docs > 0:
68
+ col_1, _ = st.columns([3, 3])
69
+ with col_1:
70
+ index_example = st.number_input(
71
+ f"Extension {chosen_ext} has {max_docs} files, choose one to visualize:",
72
+ min_value=0,
73
+ max_value=max_docs - 1,
74
+ value=0,
75
+ step=1,
76
+ )
77
+
78
+
79
+ # info about the chosen example
80
+ example = samples[index_example]
81
+
82
+ # st.markdown("#### Information about the chosen example:")
83
+ # text_alpha = "**has**" if example["long_lines"] else "doesn't have"
84
+ # text_lines = "**has**" if example["low_alphanum"] else "doesn't have"
85
+ # text_lexer = "is" if example["lexable"] else "**isn't**"
86
+
87
+ # st.markdown(
88
+ # f"Example {index_example} {text_alpha} a very low alphanumeric ratio, \
89
+ # {text_lines} very long lines, and {text_lexer} lexable."
90
+ # )
91
+
 
 
 
 
 
 
92
 
93
+ # display file content
94
+ st.markdown("#### File content:")
95
+ if not example["lexable"]:
96
+ st.write(f"File can't be lexed so we remove syntax highlighting.\nContent:\n")
97
+ st.text(example['content'])
98
+ else:
99
+ st.code(example["content"], language=chosen_language)