|
from pathlib import Path |
|
|
|
import pytest |
|
|
|
from youtube_transcriber.loading.loaderiterator import LoaderIterator |
|
from youtube_transcriber.loading.serialization import JsonSerializer |
|
|
|
@pytest.fixture |
|
def loader_iterator(): |
|
test_folder = Path.home()/"whisper_gpt_pipeline/youtube_transcriber/test" |
|
paths = [Path(test_folder/"files/1.json"), Path(test_folder/"files/2.json"), |
|
Path("non-existing-path"), Path(test_folder/"files/3.json"), |
|
Path(test_folder/"files/4.json"), Path(test_folder/"files/5.json")] |
|
return LoaderIterator(JsonSerializer(), 2, paths) |
|
|
|
def test_loader_iterator_init(): |
|
loader_iterator = LoaderIterator(JsonSerializer(), 3, "dummy_paths") |
|
assert type(loader_iterator) == LoaderIterator |
|
assert type(loader_iterator.serializer) == JsonSerializer |
|
assert loader_iterator.load_paths == "dummy_paths" |
|
assert loader_iterator.num_files_per_iteration == 3 |
|
|
|
def test_loop_through_loaded_data(loader_iterator): |
|
expected_data = [ |
|
[ |
|
{ |
|
"channel_name": "The verge", |
|
"url": "https://www.youtube.com/watch?v=YMlTSmusEmA" |
|
}, |
|
{ |
|
"channel_name": "The verge", |
|
"url": "https://www.youtube.com/watch?v=Jzl0hHTc7Jw" |
|
} |
|
], |
|
[ |
|
{ |
|
"channel_name": "The verge", |
|
"url": "https://www.youtube.com/watch?v=gV50hpSKHFQ" |
|
} |
|
], |
|
[ |
|
{ |
|
"channel_name": "The verge", |
|
"url": "https://www.youtube.com/watch?v=N6ZyzoibXqg" |
|
}, |
|
{ |
|
"channel_name": "The verge", |
|
"url": "https://www.youtube.com/watch?v=q90v9FLXi1E" |
|
} |
|
] |
|
] |
|
|
|
for i, data in enumerate(loader_iterator): |
|
assert data == expected_data[i] |