coreutils / tests /join /tests_for_xfields.c
AryaWu's picture
Upload folder using huggingface_hub
78d2150 verified
#include "../../unity/unity.h"
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <stdint.h>
/* We are included within join.c and can access internal symbols:
- struct line
- xfields
- freeline
- static global mcel_t tab
- mcel_scanz
*/
static void reset_tab_default(void)
{
/* Reset to default mode: fields separated by blanks */
tab.len = 0;
tab.ch = 0;
tab.err = 0;
}
static void set_tab_char(char c)
{
char s[2] = { c, 0 };
tab = mcel_scanz(s);
}
static void prepare_line(struct line *ln, const char *content_with_newline)
{
memset(ln, 0, sizeof *ln);
ln->buf.buffer = strdup(content_with_newline);
TEST_ASSERT_NOT_NULL(ln->buf.buffer);
ln->buf.length = (idx_t)strlen(content_with_newline);
ln->nfields = 0;
ln->nfields_allocated = 0;
ln->fields = NULL;
}
static void cleanup_line(struct line *ln)
{
freeline(ln);
}
static void assert_nfields(struct line *ln, unsigned expected)
{
TEST_ASSERT_EQUAL_UINT_MESSAGE(expected, (unsigned)ln->nfields, "Unexpected number of fields");
}
static void assert_field_eq(struct line *ln, unsigned index, const char *expected)
{
char msg[128];
snprintf(msg, sizeof msg, "Field index %u out of range", index);
TEST_ASSERT_TRUE_MESSAGE(index < (unsigned)ln->nfields, msg);
idx_t len = ln->fields[index].len;
TEST_ASSERT_EQUAL_UINT((unsigned)strlen(expected), (unsigned)len);
TEST_ASSERT_EQUAL_INT(0, memcmp(ln->fields[index].beg, expected, (size_t)len));
}
void setUp(void)
{
reset_tab_default();
}
void tearDown(void)
{
/* Nothing to do */
}
/* Default mode (blanks as separators), basic split */
void test_xfields_blank_mode_basic(void)
{
struct line ln;
prepare_line(&ln, "alpha beta gamma\n");
/* Default: tab.len == 0 means blanks (spaces/tabs) act as separators */
xfields(&ln);
assert_nfields(&ln, 3);
assert_field_eq(&ln, 0, "alpha");
assert_field_eq(&ln, 1, "beta");
assert_field_eq(&ln, 2, "gamma");
cleanup_line(&ln);
}
/* Default mode: leading/trailing/multiple blanks ignored/compressed */
void test_xfields_blank_mode_leading_trailing_and_multiple_blanks(void)
{
struct line ln;
prepare_line(&ln, " alpha \t beta gamma \n");
xfields(&ln);
assert_nfields(&ln, 3);
assert_field_eq(&ln, 0, "alpha");
assert_field_eq(&ln, 1, "beta");
assert_field_eq(&ln, 2, "gamma");
cleanup_line(&ln);
}
/* Default mode: tabs are treated as blanks */
void test_xfields_blank_mode_tabs_as_blanks(void)
{
struct line ln;
prepare_line(&ln, "\tfoo\tbar\tbaz\t\n");
xfields(&ln);
assert_nfields(&ln, 3);
assert_field_eq(&ln, 0, "foo");
assert_field_eq(&ln, 1, "bar");
assert_field_eq(&ln, 2, "baz");
cleanup_line(&ln);
}
/* Default mode: empty line (just newline) -> zero fields */
void test_xfields_blank_mode_empty_line(void)
{
struct line ln;
prepare_line(&ln, "\n");
xfields(&ln);
assert_nfields(&ln, 0);
cleanup_line(&ln);
}
/* Char separator: comma, simple split */
void test_xfields_char_sep_comma_basic(void)
{
struct line ln;
prepare_line(&ln, "x,y,z\n");
set_tab_char(',');
xfields(&ln);
assert_nfields(&ln, 3);
assert_field_eq(&ln, 0, "x");
assert_field_eq(&ln, 1, "y");
assert_field_eq(&ln, 2, "z");
cleanup_line(&ln);
}
/* Char separator: comma, consecutive and trailing separators create empty fields */
void test_xfields_char_sep_comma_allows_empty_fields(void)
{
struct line ln;
prepare_line(&ln, "a,,b,c,\n");
set_tab_char(',');
xfields(&ln);
assert_nfields(&ln, 5);
assert_field_eq(&ln, 0, "a");
assert_field_eq(&ln, 1, "");
assert_field_eq(&ln, 2, "b");
assert_field_eq(&ln, 3, "c");
assert_field_eq(&ln, 4, "");
cleanup_line(&ln);
}
/* Char separator: space, consecutive spaces are not compressed (empty field between them) */
void test_xfields_char_sep_space_consecutive_spaces(void)
{
struct line ln;
prepare_line(&ln, "a b\n");
set_tab_char(' ');
xfields(&ln);
assert_nfields(&ln, 3);
assert_field_eq(&ln, 0, "a");
assert_field_eq(&ln, 1, "");
assert_field_eq(&ln, 2, "b");
cleanup_line(&ln);
}
/* Special: delimiter is newline -> no split; whole pre-newline is one field */
void test_xfields_delim_newline_single_field(void)
{
struct line ln;
prepare_line(&ln, "hello world\n");
set_tab_char('\n');
xfields(&ln);
assert_nfields(&ln, 1);
assert_field_eq(&ln, 0, "hello world");
cleanup_line(&ln);
}
/* Many fields to exercise xpalloc growth/reallocation behavior */
void test_xfields_many_fields_reallocation(void)
{
/* Build a CSV line with 100 fields: 0,1,2,...,99\n */
unsigned n = 100;
size_t cap = 1024;
char *buf = (char *)malloc(cap);
TEST_ASSERT_NOT_NULL(buf);
size_t pos = 0;
for (unsigned i = 0; i < n; i++)
{
char tmp[32];
int k = snprintf(tmp, sizeof tmp, "%u", i);
TEST_ASSERT_TRUE(k > 0);
if (pos + (size_t)k + 2 >= cap)
{
cap *= 2;
char *nb = (char *)realloc(buf, cap);
TEST_ASSERT_NOT_NULL(nb);
buf = nb;
}
memcpy(buf + pos, tmp, (size_t)k);
pos += (size_t)k;
if (i + 1 < n)
buf[pos++] = ',';
}
buf[pos++] = '\n';
buf[pos] = '\0';
struct line ln;
memset(&ln, 0, sizeof ln);
ln.buf.buffer = buf; /* Take ownership directly */
ln.buf.length = (idx_t)pos;
set_tab_char(',');
xfields(&ln);
assert_nfields(&ln, n);
/* Spot-check a few fields */
assert_field_eq(&ln, 0, "0");
assert_field_eq(&ln, 1, "1");
assert_field_eq(&ln, 50, "50");
assert_field_eq(&ln, 99, "99");
cleanup_line(&ln);
/* 'cleanup_line' freed ln.buf.buffer */
}
int main(void)
{
UNITY_BEGIN();
RUN_TEST(test_xfields_blank_mode_basic);
RUN_TEST(test_xfields_blank_mode_leading_trailing_and_multiple_blanks);
RUN_TEST(test_xfields_blank_mode_tabs_as_blanks);
RUN_TEST(test_xfields_blank_mode_empty_line);
RUN_TEST(test_xfields_char_sep_comma_basic);
RUN_TEST(test_xfields_char_sep_comma_allows_empty_fields);
RUN_TEST(test_xfields_char_sep_space_consecutive_spaces);
RUN_TEST(test_xfields_delim_newline_single_field);
RUN_TEST(test_xfields_many_fields_reallocation);
return UNITY_END();
}