import pandas as pd langs_id = [ { "lang": "Afrikaans", "dataset_id": "af", "stopwords_id": "af", "flagged_words_id": None, "fasttext_id": "af", "sentencepiece_id": "af", "kenlm_id": "af", }, { "lang": "Arabic", "dataset_id": "ar", "stopwords_id": "ar", "flagged_words_id": "ar", "fasttext_id": "ar", "sentencepiece_id": "ar", "kenlm_id": "ar", }, { "lang": "Egyptian Arabic", "dataset_id": "arz", "stopwords_id": None, "flagged_words_id": None, "fasttext_id": "arz", "sentencepiece_id": "arz", "kenlm_id": "arz", }, { "lang": "Assamese", "dataset_id": "as", "stopwords_id": "as", "flagged_words_id": None, "fasttext_id": "as", "sentencepiece_id": "as", "kenlm_id": "as", }, { "lang": "Bengali", "dataset_id": "bn", "stopwords_id": "bn", "flagged_words_id": "bn", "fasttext_id": "bn", "sentencepiece_id": "bn", "kenlm_id": "bn", }, { "lang": "Catalan", "dataset_id": "ca", "stopwords_id": "ca", "flagged_words_id": "ca", "fasttext_id": "ca", "sentencepiece_id": "ca", "kenlm_id": "ca", }, { "lang": "English", "dataset_id": "en", "stopwords_id": "en", "flagged_words_id": "en", "fasttext_id": "en", "sentencepiece_id": "en", "kenlm_id": "en", }, { "lang": "Spanish", "dataset_id": "es", "stopwords_id": "es", "flagged_words_id": "es", "fasttext_id": "es", "sentencepiece_id": "es", "kenlm_id": "es", }, { "lang": "Basque", "dataset_id": "eu", "stopwords_id": "eu", "flagged_words_id": "eu", "fasttext_id": "eu", "sentencepiece_id": "eu", "kenlm_id": "eu", }, { "lang": "French", "dataset_id": "fr", "stopwords_id": "fr", "flagged_words_id": "fr", "fasttext_id": "fr", "sentencepiece_id": "fr", "kenlm_id": "fr", }, { "lang": "Gujarati", "dataset_id": "gu", "stopwords_id": "gu", "flagged_words_id": None, "fasttext_id": "gu", "sentencepiece_id": "gu", "kenlm_id": "gu", }, { "lang": "Hindi", "dataset_id": "hi", "stopwords_id": "hi", "flagged_words_id": "hi", "fasttext_id": "hi", "sentencepiece_id": "hi", "kenlm_id": "hi", }, { "lang": "Indonesian", "dataset_id": "id", "stopwords_id": "id", "flagged_words_id": "id", "fasttext_id": "id", "sentencepiece_id": "id", "kenlm_id": "id", }, { "lang": "Kannada", "dataset_id": "kn", "stopwords_id": None, "flagged_words_id": "kn", "fasttext_id": "kn", "sentencepiece_id": "kn", "kenlm_id": "kn", }, { "lang": "Malayalam", "dataset_id": "ml", "stopwords_id": None, "flagged_words_id": "ml", "fasttext_id": "ml", "sentencepiece_id": "ml", "kenlm_id": "ml", }, { "lang": "Marathi", "dataset_id": "mr", "stopwords_id": "mr", "flagged_words_id": "mr", "fasttext_id": "mr", "sentencepiece_id": "mr", "kenlm_id": "mr", }, { "lang": "Portuguese", "dataset_id": "pt", "stopwords_id": "pt", "flagged_words_id": "pt", "fasttext_id": "pt", "sentencepiece_id": "pt", "kenlm_id": "pt", }, { "lang": "Swahili", "dataset_id": "sw", "stopwords_id": "sw", "flagged_words_id": None, "fasttext_id": "sw", "sentencepiece_id": "sw", "kenlm_id": "sw", }, { "lang": "Tamil", "dataset_id": "ta", "stopwords_id": None, "flagged_words_id": "ta", "fasttext_id": "ta", "sentencepiece_id": "ta", "kenlm_id": "ta", }, { "lang": "Telugu", "dataset_id": "te", "stopwords_id": None, "flagged_words_id": "te", "fasttext_id": "te", "sentencepiece_id": "te", "kenlm_id": "te", }, { "lang": "Urdu", "dataset_id": "ur", "stopwords_id": "ur", "flagged_words_id": "ur", "fasttext_id": "ur", "sentencepiece_id": "ur", "kenlm_id": "ur", }, { "lang": "Vietnamese", "dataset_id": "vi", "stopwords_id": "vi", "flagged_words_id": "vi", "fasttext_id": "vi", "sentencepiece_id": "vi", "kenlm_id": "vi", }, { "lang": "Yoruba", "dataset_id": "yo", "stopwords_id": "yo", "flagged_words_id": None, "fasttext_id": "yo", "sentencepiece_id": "yo", "kenlm_id": "yo", }, { "lang": "Chinese", "dataset_id": "zh", "stopwords_id": "zh", "flagged_words_id": "zh", "fasttext_id": "zh", "sentencepiece_id": "zh", "kenlm_id": "zh", }, ] langs_id = pd.DataFrame(langs_id)