{ "cells": [ { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(\"./speakers_all.csv\")" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "df['id'] = df['filename'].apply(lambda x: x + \".wav\")\n", "df = df[df['file_missing?'] == False]" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | age | \n", "age_onset | \n", "birthplace | \n", "filename | \n", "native_language | \n", "sex | \n", "speakerid | \n", "country | \n", "file_missing? | \n", "Unnamed: 9 | \n", "Unnamed: 10 | \n", "Unnamed: 11 | \n", "id | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
32 | \n", "27.0 | \n", "9.0 | \n", "virginia, south africa | \n", "afrikaans1 | \n", "afrikaans | \n", "female | \n", "1 | \n", "south africa | \n", "False | \n", "NaN | \n", "NaN | \n", "NaN | \n", "afrikaans1.wav | \n", "
33 | \n", "40.0 | \n", "5.0 | \n", "pretoria, south africa | \n", "afrikaans2 | \n", "afrikaans | \n", "male | \n", "2 | \n", "south africa | \n", "False | \n", "NaN | \n", "NaN | \n", "NaN | \n", "afrikaans2.wav | \n", "
34 | \n", "43.0 | \n", "4.0 | \n", "pretoria, transvaal, south africa | \n", "afrikaans3 | \n", "afrikaans | \n", "male | \n", "418 | \n", "south africa | \n", "False | \n", "NaN | \n", "NaN | \n", "NaN | \n", "afrikaans3.wav | \n", "
35 | \n", "26.0 | \n", "8.0 | \n", "pretoria, south africa | \n", "afrikaans4 | \n", "afrikaans | \n", "male | \n", "1159 | \n", "south africa | \n", "False | \n", "NaN | \n", "NaN | \n", "NaN | \n", "afrikaans4.wav | \n", "
36 | \n", "19.0 | \n", "6.0 | \n", "cape town, south africa | \n", "afrikaans5 | \n", "afrikaans | \n", "male | \n", "1432 | \n", "south africa | \n", "False | \n", "NaN | \n", "NaN | \n", "NaN | \n", "afrikaans5.wav | \n", "