croissant-editor / core /files_test.py
marcenacp's picture
Deploy (see actual commits on https://github.com/mlcommons/croissant).
73ebcab
raw
history blame contribute delete
No virus
2.81 kB
from unittest import mock
from etils import epath
import pandas as pd
import pytest
from core import files as files_module
FileTypes = files_module.FileTypes
@mock.patch.object(files_module, "guess_file_type", return_value=FileTypes.CSV)
def test_check_file_csv_url(guess_file_type):
del guess_file_type
csv = epath.Path(
# This is the hash path for "https://my.url".
"/tmp/croissant-editor-f76b4732c82d83daf858fae2cc0e590d352a4bceb781351243a03daab11f76bc"
)
# Test unescaped CSV
if csv.exists():
csv.unlink()
with csv.open("w") as f:
f.write("column1,column2\n")
f.write("a,1\n")
f.write("b,2\n")
f.write("c,3\n")
file = files_module.file_from_url("https://my.url", set(), epath.Path())
pd.testing.assert_frame_equal(
file.df, pd.DataFrame({"column1": ["a", "b", "c"], "column2": [1, 2, 3]})
)
# Test error thrown on no file
csv.unlink()
with pytest.raises(Exception):
files_module.file_from_url("https://my.url", set(), epath.Path())
# Test escaped CSV
content = b'"This","Is"\n1,2\n3,4'
with csv.open("wb") as f:
f.write(content)
file = files_module.file_from_url("https://my.url", set(), epath.Path())
pd.testing.assert_frame_equal(file.df, pd.DataFrame({"This": [1, 3], "Is": [2, 4]}))
@mock.patch.object(files_module, "guess_file_type", return_value=FileTypes.TSV)
def test_check_file_tsv_url(guess_file_type):
del guess_file_type
tsv = epath.Path(
# This is the hash path for "https://my.url".
"/tmp/croissant-editor-f76b4732c82d83daf858fae2cc0e590d352a4bceb781351243a03daab11f76bc"
)
# Test unescaped CSV
if tsv.exists():
tsv.unlink()
with tsv.open("w") as f:
f.write("column1\tcolumn2\n")
f.write("a\t1\n")
f.write("b\t2\n")
f.write("c\t3\n")
file = files_module.file_from_url("https://my.url", set(), epath.Path())
pd.testing.assert_frame_equal(
file.df, pd.DataFrame({"column1": ["a", "b", "c"], "column2": [1, 2, 3]})
)
# Test error thrown on no file
tsv.unlink()
with pytest.raises(Exception):
files_module.file_from_url("https://my.url", set(), epath.Path())
# Test escaped TSV
content = b'"This"\t"Is"\n1\t2\n3\t4'
with tsv.open("wb") as f:
f.write(content)
file = files_module.file_from_url("https://my.url", set(), epath.Path())
pd.testing.assert_frame_equal(file.df, pd.DataFrame({"This": [1, 3], "Is": [2, 4]}))
@mock.patch.object(files_module, "guess_file_type", return_value="unknown")
def test_check_file_unknown(guess_file_type):
del guess_file_type
with pytest.raises(NotImplementedError):
files_module.file_from_url("https://my.url", set(), epath.Path())