nouamanetazi HF staff commited on
Commit
5ad2e6e
·
verified ·
1 Parent(s): 6f9f426

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +34 -0
app.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from tabs import comparison, leaderboard, about
3
+ import pandas as pd
4
+
5
+ dataframe_path = "darija_tokenizers_leaderboard.jsonl"
6
+ try:
7
+ df = pd.read_json(dataframe_path, lines=True)
8
+ assert all(col in df.columns for col in ["Tokenizer", "Vocabulary Size", "Token Count", "Tokens/Character Ratio", "Latin Support", "Tokenizer Class"]), "Invalid columns in leaderboard"
9
+ except:
10
+ df = pd.DataFrame(
11
+ columns=[
12
+ "Tokenizer",
13
+ "Vocabulary Size",
14
+ "Token Count",
15
+ "Tokens/Character Ratio",
16
+ "Latin Support",
17
+ "Tokenizer Class"
18
+ ]
19
+ )
20
+
21
+ def main():
22
+ st.title("Darija Tokenizer Explorer 🧭")
23
+
24
+ tab1, tab2, tab3 = st.tabs(["Leaderboard", "Comparison", "About"])
25
+
26
+ with tab1:
27
+ leaderboard.leaderboard_tab(df)
28
+ with tab2:
29
+ comparison.comparison_tab(df)
30
+ with tab3:
31
+ about.about_tab()
32
+
33
+ if __name__ == "__main__":
34
+ main()