anton-l HF staff commited on
Commit
a8d50b5
1 Parent(s): cf6f473

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -9
app.py CHANGED
@@ -7,17 +7,24 @@ df = pd.read_csv("./stackv2_languages_freq.csv")
7
  langs = sorted(list(df["language"].unique()))
8
  exts = list(df["extension"].unique())
9
 
10
-
11
- lang_to_ext = defaultdict(set)
12
-
13
- for lang, ext in zip(df["language"], df["extension"]):
14
- if isinstance(ext, str):
15
- lang_to_ext[lang].update([ext])
16
-
17
  def compute(lang):
18
-
19
- report = f"## Summary:\n\n The `{lang}` language has {len(lang_to_ext[lang])} extensions: {', '.join(sorted(['`'+ext+'`' for ext in lang_to_ext[lang]]))}.\n\n"
20
  df_lang = df[df["language"]==lang]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  for i, (ext, example) in enumerate(zip(df_lang["extension"], df_lang["content"])):
23
  example_string = f"**Example {i+1} (extension=`{ext}`):**\n\n ```\n{example}\n```\n\n"
 
7
  langs = sorted(list(df["language"].unique()))
8
  exts = list(df["extension"].unique())
9
 
 
 
 
 
 
 
 
10
  def compute(lang):
 
 
11
  df_lang = df[df["language"]==lang]
12
+ # clean up weird exts
13
+ df_lang = df_lang[df_lang["ext_fraction_per_lang"] > 0.0001].reset_index()
14
+
15
+ df_lang_uniq = df_lang.groupby("extension").first().reset_index()
16
+
17
+ report = f"## Summary:\n\n The `{lang}` language has {df_lang_uniq.shape[0]} extensions: \n\n"
18
+
19
+ for i, (ext, ext_fraction, gen_fraction, vend_fraction) in enumerate(zip(df_lang_uniq["extension"], df_lang_uniq["ext_fraction_per_lang"], df_lang_uniq["generated_fraction"], df_lang_uniq["vendor_fraction"])):
20
+ fractions_string = f"{ext_fraction*100:.2f}%"
21
+ if gen_fraction > 0.5:
22
+ fractions_string += f", autogenerated: {gen_fraction*100:.2f}%"
23
+ if vend_fraction > 0.5:
24
+ fractions_string += f", vendor files: {vend_fraction*100:.2f}%"
25
+
26
+ report += f"`{ext}` ({fractions_string}), \n\n"
27
+ report = report[:-2] + "\n\n\n\n"
28
 
29
  for i, (ext, example) in enumerate(zip(df_lang["extension"], df_lang["content"])):
30
  example_string = f"**Example {i+1} (extension=`{ext}`):**\n\n ```\n{example}\n```\n\n"