kargaranamir commited on
Commit
0582f98
β€’
1 Parent(s): 703f596

add aii, tly.

Browse files
Files changed (3) hide show
  1. app.py +1 -1
  2. languages/aii_Syrc.json +42 -0
  3. languages/tly_Latn.json +24 -0
app.py CHANGED
@@ -104,6 +104,6 @@ def main():
104
  render_metadata()
105
  st.markdown("**GlotWeb** is an indexing service for low-resource languages. It indexes **non-religous** sites or links written in each language. This list can be used to create raw text or parallel corpora and to study low-resource languages on the web.\n")
106
  render_home_table()
107
- st.markdown("\n\n<font color='gray'>We compare the level of support for these languages in the three big datasets ([MADLAD-400](https://huggingface.co/datasets/allenai/MADLAD-400), [FLORES200](https://huggingface.co/datasets/facebook/flores), [GLOT500](https://huggingface.co/datasets/cis-lmu/Glot500)) of low-resource languages (πŸŸ₯ 0/3 < 🟧 1/3 < 🟨 2/3 < 🟩 3/3). Although the support in these datasets for some of these languages could be just the religious texts.</font>", unsafe_allow_html=True)
108
 
109
  main()
 
104
  render_metadata()
105
  st.markdown("**GlotWeb** is an indexing service for low-resource languages. It indexes **non-religous** sites or links written in each language. This list can be used to create raw text or parallel corpora and to study low-resource languages on the web.\n")
106
  render_home_table()
107
+ st.markdown("\n\n<font color='gray'>We compare the level of support for these languages in the three big datasets ([MADLAD400](https://huggingface.co/datasets/allenai/MADLAD-400), [FLORES200](https://huggingface.co/datasets/facebook/flores), [GLOT500](https://huggingface.co/datasets/cis-lmu/Glot500)) of low-resource languages (πŸŸ₯ 0/3 < 🟧 1/3 < 🟨 2/3 < 🟩 3/3). Although the support in these datasets for some of these languages could be just the religious texts.</font>", unsafe_allow_html=True)
108
 
109
  main()
languages/aii_Syrc.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Language Name": "Assyrian Neo-Aramaic",
3
+ "Family": "Afro-Asiatic",
4
+ "Subgrouping": "Semitic",
5
+ "Number of Speakers": "800_000",
6
+ "Supported by allenai/MADLAD-400": 1,
7
+ "Supported by facebook/flores": 0,
8
+ "Supported by cis-lmu/Glot500": 0,
9
+ "Sites": [
10
+ {
11
+ "Site Name": "reimagine.today",
12
+ "Site URL": "https://reimagine.today/?lang=as",
13
+ "Category": "insurance, government ",
14
+ "Confidence": "🟩",
15
+ "Info": "confirmed by webpage metadata and glotlid",
16
+ "Possible Parallel Languages": "",
17
+ "Links": []
18
+ },
19
+ {
20
+ "Site Name": "mesopotamiaheritage.org",
21
+ "Site URL": "https://www.mesopotamiaheritage.org/sy/",
22
+ "Category": "heritage guide",
23
+ "Confidence": "🟩",
24
+ "Info": "confirmed by webpage metadata and glotlid",
25
+ "Possible Parallel Languages": "arb_Arab, eng_Latn, fra_Latn",
26
+ "Links": []
27
+ },
28
+ {
29
+ "Site Name": "parstoday.ir",
30
+ "Site URL": "https://parstoday.ir/assyrian",
31
+ "Category": "news",
32
+ "Confidence": "🟩",
33
+ "Info": "confirmed by webpage metadata and glotlid",
34
+ "Possible Parallel Languages": "many",
35
+ "Links": []
36
+ }
37
+ ]
38
+ }
39
+
40
+
41
+
42
+
languages/tly_Latn.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Language Name": "Talysh",
3
+ "Family": "Indo-European",
4
+ "Subgrouping": "Iranian",
5
+ "Number of Speakers": "230_000",
6
+ "Supported by allenai/MADLAD-400": 0,
7
+ "Supported by facebook/flores": 0,
8
+ "Supported by cis-lmu/Glot500": 0,
9
+ "Sites": [
10
+ {
11
+ "Site Name": "parstoday.ir",
12
+ "Site URL": "https://parstoday.ir/tly",
13
+ "Category": "news",
14
+ "Confidence": "🟩",
15
+ "Info": "confirmed by webpage metadata and glotlid",
16
+ "Possible Parallel Languages": "many",
17
+ "Links": []
18
+ }
19
+ ]
20
+ }
21
+
22
+
23
+
24
+