import unittest | |
from swift.llm import load_dataset | |
from swift.utils import lower_bound | |
class TestLlmUtils(unittest.TestCase): | |
def test_count_startswith(self): | |
arr = [-100] * 1000 + list(range(1000)) | |
self.assertTrue(lower_bound(0, len(arr), lambda i: arr[i] != -100) == 1000) | |
def test_count_endswith(self): | |
arr = list(range(1000)) + [-100] * 1000 | |
self.assertTrue(lower_bound(0, len(arr), lambda i: arr[i] == -100) == 1000) | |
def test_dataset(self): | |
dataset = load_dataset(['AI-ModelScope/alpaca-gpt4-data-zh#1000', 'AI-ModelScope/alpaca-gpt4-data-en#200'], | |
num_proc=4, | |
strict=False, | |
download_mode='force_redownload') | |
print(f'dataset[0]: {dataset[0]}') | |
print(f'dataset[1]: {dataset[1]}') | |
if __name__ == '__main__': | |
unittest.main() | |