Spaces:
Running
Running
import json | |
import pandas as pd | |
import plotly.express as px | |
# Language codes predicted by language detection model | |
LANG_CODES = ['ar', 'bg', 'de', 'el', 'en', 'es', 'fr', 'hi', 'it', 'ja', | |
'nl', 'pl', 'pt', 'ru', 'sw', 'th', 'tr', 'ur', 'vi', 'zh'] | |
COUNTRY_TO_LANG_CODE = { | |
'Algeria': 'ar', | |
'Chad': 'ar', | |
'Djibouti': 'ar', | |
'Egypt': 'ar', | |
'Iraq': 'ar', | |
'Jordan': 'ar', | |
'Kuwait': 'ar', | |
'Lebanon': 'ar', | |
'Libya': 'ar', | |
'Mali': 'ar', | |
'Mauritania': 'ar', | |
'Morocco': 'ar', | |
'Oman': 'ar', | |
'Palestine': 'ar', | |
'Qatar': 'ar', | |
'Saudi Arabia': 'ar', | |
'Somalia': 'ar', | |
'Sudan': 'ar', | |
'Syria': 'ar', | |
'Tunisia': 'ar', | |
'United Arab Emirates': 'ar', | |
'Yemen': 'ar', | |
'Bulgaria': 'bg', | |
'Germany': 'de', | |
'Greece': 'el', | |
'Cyprus': 'el', | |
'United States of America': 'en', | |
'Ireland': 'en', | |
'United Kingdom': 'en', | |
'Canada': 'en', | |
'Australia': 'en', | |
'Mexico': 'es', | |
'Mexico': 'es', | |
'Colombia': 'es', | |
'Spain': 'es', | |
'Argentina': 'es', | |
'Peru': 'es', | |
'Venezuela': 'es', | |
'Chile': 'es', | |
'Guatemala': 'es', | |
'Ecuador': 'es', | |
'Bolivia': 'es', | |
'Cuba': 'es', | |
'Dominican Rep.': 'es', | |
'Honduras': 'es', | |
'Paraguay': 'es', | |
'El Salvador': 'es', | |
'Nicaragua': 'es', | |
'Costa Rica': 'es', | |
'Panama': 'es', | |
'Uruguay': 'es', | |
'Guinea': 'es', | |
'France': 'fr', | |
'India': 'hi', | |
'Italy': 'it', | |
'Japan': 'ja', | |
'Netherlands': 'nl', | |
'Belgium': 'nl', | |
'Poland': 'pl', | |
'Portugal': 'pt', | |
'Russia': 'ru', | |
'Uganda': 'sw', | |
'Kenya': 'sw', | |
'Tanzania': 'sw', | |
'Thailand': 'th', | |
'Turkey': 'tr', | |
'Pakistan': 'ur', | |
'Vietnam': 'vi', | |
'China': 'zh' | |
} | |
def lang_map(df): | |
with open('data/countries.geo.json') as f: | |
countries = json.load(f) | |
country_list = [country['properties']['name'] | |
for country in dict(countries)['features']] | |
LANG_CODES = df.value_counts('predicted_language') | |
countries_data = [] | |
lang_count_data = [] | |
lang_code_data = [] | |
for country in country_list: | |
if country in COUNTRY_TO_LANG_CODE: | |
country_lang = COUNTRY_TO_LANG_CODE[country] | |
if country_lang in LANG_CODES.index: | |
countries_data.append(country) | |
lang_count = LANG_CODES.loc[COUNTRY_TO_LANG_CODE[country]] | |
lang_count_data.append(lang_count) | |
lang_code_data.append(country_lang) | |
lang_df = pd.DataFrame({ | |
'country': countries_data, | |
'count': lang_count_data, | |
'lang_code': lang_code_data | |
}) | |
fig = px.choropleth( | |
lang_df, | |
geojson=countries, | |
locations='country', | |
locationmode='country names', | |
color='count', | |
color_continuous_scale=[ | |
[0, "rgb(45,45,48)"], | |
[0.33, "rgb(116,173,209)"], | |
[0.66, "rgb(255,255,0)"], | |
[1, "rgb(255,94,5)"] | |
], | |
scope='world', | |
hover_data=['lang_code'], | |
labels={'count': "Language Count"}, | |
template='plotly_dark' | |
) | |
fig.update_geos(showcountries=True) | |
fig.update_layout( | |
title_text="Language Map", | |
margin={"r": 0, "t": 20, "l": 0, "b": 0} | |
) | |
return fig | |