File size: 2,805 Bytes
fe3ba5f
 
cb5b71d
 
 
 
fe3ba5f
 
 
cb5b71d
 
fe3ba5f
73ebcab
fe3ba5f
cb5b71d
 
 
 
73ebcab
cb5b71d
 
 
 
 
 
 
fe3ba5f
cb5b71d
 
 
fe3ba5f
73ebcab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fe3ba5f
 
 
 
cb5b71d
fe3ba5f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from unittest import mock

from etils import epath
import pandas as pd
import pytest

from core import files as files_module

FileTypes = files_module.FileTypes


@mock.patch.object(files_module, "guess_file_type", return_value=FileTypes.CSV)
def test_check_file_csv_url(guess_file_type):
    del guess_file_type
    csv = epath.Path(
        # This is the hash path for "https://my.url".
        "/tmp/croissant-editor-f76b4732c82d83daf858fae2cc0e590d352a4bceb781351243a03daab11f76bc"
    )
    # Test unescaped CSV
    if csv.exists():
        csv.unlink()
    with csv.open("w") as f:
        f.write("column1,column2\n")
        f.write("a,1\n")
        f.write("b,2\n")
        f.write("c,3\n")
    file = files_module.file_from_url("https://my.url", set(), epath.Path())
    pd.testing.assert_frame_equal(
        file.df, pd.DataFrame({"column1": ["a", "b", "c"], "column2": [1, 2, 3]})
    )

    # Test error thrown on no file
    csv.unlink()
    with pytest.raises(Exception):
        files_module.file_from_url("https://my.url", set(), epath.Path())

    # Test escaped CSV
    content = b'"This","Is"\n1,2\n3,4'
    with csv.open("wb") as f:
        f.write(content)
    file = files_module.file_from_url("https://my.url", set(), epath.Path())
    pd.testing.assert_frame_equal(file.df, pd.DataFrame({"This": [1, 3], "Is": [2, 4]}))


@mock.patch.object(files_module, "guess_file_type", return_value=FileTypes.TSV)
def test_check_file_tsv_url(guess_file_type):
    del guess_file_type
    tsv = epath.Path(
        # This is the hash path for "https://my.url".
        "/tmp/croissant-editor-f76b4732c82d83daf858fae2cc0e590d352a4bceb781351243a03daab11f76bc"
    )
    # Test unescaped CSV
    if tsv.exists():
        tsv.unlink()
    with tsv.open("w") as f:
        f.write("column1\tcolumn2\n")
        f.write("a\t1\n")
        f.write("b\t2\n")
        f.write("c\t3\n")
    file = files_module.file_from_url("https://my.url", set(), epath.Path())
    pd.testing.assert_frame_equal(
        file.df, pd.DataFrame({"column1": ["a", "b", "c"], "column2": [1, 2, 3]})
    )

    # Test error thrown on no file
    tsv.unlink()
    with pytest.raises(Exception):
        files_module.file_from_url("https://my.url", set(), epath.Path())

    # Test escaped TSV
    content = b'"This"\t"Is"\n1\t2\n3\t4'
    with tsv.open("wb") as f:
        f.write(content)
    file = files_module.file_from_url("https://my.url", set(), epath.Path())
    pd.testing.assert_frame_equal(file.df, pd.DataFrame({"This": [1, 3], "Is": [2, 4]}))


@mock.patch.object(files_module, "guess_file_type", return_value="unknown")
def test_check_file_unknown(guess_file_type):
    del guess_file_type
    with pytest.raises(NotImplementedError):
        files_module.file_from_url("https://my.url", set(), epath.Path())