{ "cells": [ { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "One off conversion / data wrangling script, to avoid redownloading videos." ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import base64\n", "\n", "from tqdm import tqdm" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "df = pd.read_parquet(\"../data/dataset_original.parquet\")" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | video_id | \n", "frame_idx | \n", "timestamp | \n", "image_path | \n", "dim_0 | \n", "dim_1 | \n", "dim_2 | \n", "dim_3 | \n", "dim_4 | \n", "dim_5 | \n", "... | \n", "dim_502 | \n", "dim_503 | \n", "dim_504 | \n", "dim_505 | \n", "dim_506 | \n", "dim_507 | \n", "dim_508 | \n", "dim_509 | \n", "dim_510 | \n", "dim_511 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "8Ilh1ewceco | \n", "0 | \n", "0.0 | \n", "data/images/8Ilh1ewceco/0.jpg | \n", "-0.013127 | \n", "-0.022996 | \n", "-0.049374 | \n", "-0.006306 | \n", "0.013602 | \n", "-0.003762 | \n", "... | \n", "-0.006920 | \n", "0.013831 | \n", "0.056647 | \n", "0.007946 | \n", "-0.002478 | \n", "-0.030497 | \n", "-0.011770 | \n", "0.067427 | \n", "-0.031810 | \n", "-0.025615 | \n", "
1 | \n", "8Ilh1ewceco | \n", "145 | \n", "5.0 | \n", "data/images/8Ilh1ewceco/145.jpg | \n", "0.009040 | \n", "0.003338 | \n", "0.029684 | \n", "-0.033058 | \n", "0.040864 | \n", "-0.006447 | \n", "... | \n", "0.033575 | \n", "-0.019076 | \n", "0.047166 | \n", "-0.010574 | \n", "-0.018608 | \n", "-0.013465 | \n", "-0.020017 | \n", "0.086240 | \n", "-0.029653 | \n", "0.035949 | \n", "
2 | \n", "8Ilh1ewceco | \n", "290 | \n", "10.0 | \n", "data/images/8Ilh1ewceco/290.jpg | \n", "0.004891 | \n", "0.006527 | \n", "0.004417 | \n", "-0.000323 | \n", "0.006400 | \n", "-0.024191 | \n", "... | \n", "-0.043122 | \n", "-0.010695 | \n", "0.005672 | \n", "0.000172 | \n", "-0.014442 | \n", "-0.014647 | \n", "-0.016840 | \n", "0.100285 | \n", "0.013794 | \n", "0.015046 | \n", "
3 | \n", "8Ilh1ewceco | \n", "435 | \n", "15.0 | \n", "data/images/8Ilh1ewceco/435.jpg | \n", "-0.022159 | \n", "0.020703 | \n", "-0.021607 | \n", "-0.019721 | \n", "-0.006067 | \n", "-0.035070 | \n", "... | \n", "-0.017047 | \n", "-0.018341 | \n", "-0.006733 | \n", "-0.007040 | \n", "-0.008368 | \n", "0.009755 | \n", "-0.045662 | \n", "0.116601 | \n", "-0.000572 | \n", "-0.000985 | \n", "
4 | \n", "8Ilh1ewceco | \n", "580 | \n", "20.0 | \n", "data/images/8Ilh1ewceco/580.jpg | \n", "-0.015903 | \n", "0.033545 | \n", "0.009257 | \n", "-0.033540 | \n", "0.010586 | \n", "-0.028067 | \n", "... | \n", "-0.016532 | \n", "0.012388 | \n", "0.020868 | \n", "-0.012635 | \n", "0.010914 | \n", "0.009203 | \n", "-0.010078 | \n", "0.063971 | \n", "-0.038024 | \n", "0.025840 | \n", "
5 rows × 516 columns
\n", "\n", " | video_id | \n", "frame_idx | \n", "timestamp | \n", "base64_image | \n", "dim_0 | \n", "dim_1 | \n", "dim_2 | \n", "dim_3 | \n", "dim_4 | \n", "dim_5 | \n", "... | \n", "dim_502 | \n", "dim_503 | \n", "dim_504 | \n", "dim_505 | \n", "dim_506 | \n", "dim_507 | \n", "dim_508 | \n", "dim_509 | \n", "dim_510 | \n", "dim_511 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "8Ilh1ewceco | \n", "0 | \n", "0.0 | \n", "b'/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgH... | \n", "-0.013127 | \n", "-0.022996 | \n", "-0.049374 | \n", "-0.006306 | \n", "0.013602 | \n", "-0.003762 | \n", "... | \n", "-0.006920 | \n", "0.013831 | \n", "0.056647 | \n", "0.007946 | \n", "-0.002478 | \n", "-0.030497 | \n", "-0.011770 | \n", "0.067427 | \n", "-0.031810 | \n", "-0.025615 | \n", "
1 | \n", "8Ilh1ewceco | \n", "145 | \n", "5.0 | \n", "b'/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgH... | \n", "0.009040 | \n", "0.003338 | \n", "0.029684 | \n", "-0.033058 | \n", "0.040864 | \n", "-0.006447 | \n", "... | \n", "0.033575 | \n", "-0.019076 | \n", "0.047166 | \n", "-0.010574 | \n", "-0.018608 | \n", "-0.013465 | \n", "-0.020017 | \n", "0.086240 | \n", "-0.029653 | \n", "0.035949 | \n", "
2 | \n", "8Ilh1ewceco | \n", "290 | \n", "10.0 | \n", "b'/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgH... | \n", "0.004891 | \n", "0.006527 | \n", "0.004417 | \n", "-0.000323 | \n", "0.006400 | \n", "-0.024191 | \n", "... | \n", "-0.043122 | \n", "-0.010695 | \n", "0.005672 | \n", "0.000172 | \n", "-0.014442 | \n", "-0.014647 | \n", "-0.016840 | \n", "0.100285 | \n", "0.013794 | \n", "0.015046 | \n", "
3 | \n", "8Ilh1ewceco | \n", "435 | \n", "15.0 | \n", "b'/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgH... | \n", "-0.022159 | \n", "0.020703 | \n", "-0.021607 | \n", "-0.019721 | \n", "-0.006067 | \n", "-0.035070 | \n", "... | \n", "-0.017047 | \n", "-0.018341 | \n", "-0.006733 | \n", "-0.007040 | \n", "-0.008368 | \n", "0.009755 | \n", "-0.045662 | \n", "0.116601 | \n", "-0.000572 | \n", "-0.000985 | \n", "
4 | \n", "8Ilh1ewceco | \n", "580 | \n", "20.0 | \n", "b'/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgH... | \n", "-0.015903 | \n", "0.033545 | \n", "0.009257 | \n", "-0.033540 | \n", "0.010586 | \n", "-0.028067 | \n", "... | \n", "-0.016532 | \n", "0.012388 | \n", "0.020868 | \n", "-0.012635 | \n", "0.010914 | \n", "0.009203 | \n", "-0.010078 | \n", "0.063971 | \n", "-0.038024 | \n", "0.025840 | \n", "
5 rows × 516 columns
\n", "