libxml / tests /tests_HTMLparser_htmlStartCharData.c
AryaWu's picture
Upload folder using huggingface_hub
6baed57 verified
#include "unity/unity.h"
#include <libxml/HTMLparser.h>
#include <string.h>
#include <stdlib.h>
/* Wrapper for the static function provided in the source module */
void test_htmlStartCharData(htmlParserCtxtPtr ctxt);
/* Simple recorder for SAX events */
typedef struct {
int start_count;
int end_count;
xmlChar* start_names[16];
xmlChar* end_names[16];
} TestSAXRec;
static void recordStartElement(void *ctx, const xmlChar *name, const xmlChar **atts) {
(void)atts;
TestSAXRec *rec = (TestSAXRec *)ctx;
if (rec == NULL) return;
if (rec->start_count < (int)(sizeof(rec->start_names)/sizeof(rec->start_names[0]))) {
rec->start_names[rec->start_count] = xmlStrdup(name);
}
rec->start_count++;
}
static void recordEndElement(void *ctx, const xmlChar *name) {
TestSAXRec *rec = (TestSAXRec *)ctx;
if (rec == NULL) return;
if (rec->end_count < (int)(sizeof(rec->end_names)/sizeof(rec->end_names[0]))) {
rec->end_names[rec->end_count] = xmlStrdup(name);
}
rec->end_count++;
}
static void freeRec(TestSAXRec *rec) {
if (!rec) return;
for (int i = 0; i < rec->start_count; i++) {
if (rec->start_names[i]) xmlFree(rec->start_names[i]);
rec->start_names[i] = NULL;
}
for (int i = 0; i < rec->end_count; i++) {
if (rec->end_names[i]) xmlFree(rec->end_names[i]);
rec->end_names[i] = NULL;
}
rec->start_count = 0;
rec->end_count = 0;
}
static void initSAX(xmlSAXHandler *sax) {
memset(sax, 0, sizeof(*sax));
sax->startElement = recordStartElement;
sax->endElement = recordEndElement;
}
void setUp(void) {
xmlInitParser();
}
void tearDown(void) {
xmlCleanupParser();
}
/* Helper to create and initialize a fresh HTML parser context with our SAX */
static htmlParserCtxtPtr create_ctxt_with_rec(TestSAXRec *rec) {
htmlParserCtxtPtr ctxt = htmlNewParserCtxt();
TEST_ASSERT_NOT_NULL_MESSAGE(ctxt, "htmlNewParserCtxt failed");
static xmlSAXHandler sax;
initSAX(&sax);
ctxt->sax = &sax;
ctxt->userData = rec;
/* Ensure clean name stack */
ctxt->name = NULL;
ctxt->nameNr = 0;
/* Leave nameTab/nameMax as initialized by htmlNewParserCtxt; htmlCheckImplied/htmlnamePush will manage it */
ctxt->options = 0;
return ctxt;
}
/* Test 1: Should early-return with HTML_PARSE_HTML5 option */
void test_htmlStartCharData_returns_early_with_HTML5_option(void) {
TestSAXRec rec;
memset(&rec, 0, sizeof(rec));
htmlParserCtxtPtr ctxt = create_ctxt_with_rec(&rec);
ctxt->options |= HTML_PARSE_HTML5;
test_htmlStartCharData(ctxt);
TEST_ASSERT_EQUAL_INT(0, rec.start_count);
TEST_ASSERT_EQUAL_INT(0, rec.end_count);
freeRec(&rec);
htmlFreeParserCtxt(ctxt);
}
/* Test 2: Should early-return with HTML_PARSE_NOIMPLIED option */
void test_htmlStartCharData_returns_early_with_NOIMPLIED_option(void) {
TestSAXRec rec;
memset(&rec, 0, sizeof(rec));
htmlParserCtxtPtr ctxt = create_ctxt_with_rec(&rec);
ctxt->options |= HTML_PARSE_NOIMPLIED;
test_htmlStartCharData(ctxt);
TEST_ASSERT_EQUAL_INT(0, rec.start_count);
TEST_ASSERT_EQUAL_INT(0, rec.end_count);
freeRec(&rec);
htmlFreeParserCtxt(ctxt);
}
/* Test 3: Empty stack implies <html> and <body> in order */
void test_htmlStartCharData_implies_html_and_body_when_stack_empty(void) {
TestSAXRec rec;
memset(&rec, 0, sizeof(rec));
htmlParserCtxtPtr ctxt = create_ctxt_with_rec(&rec);
/* Ensure options allow implied elements */
ctxt->options = 0;
/* Ensure empty stack */
ctxt->name = NULL;
ctxt->nameNr = 0;
test_htmlStartCharData(ctxt);
TEST_ASSERT_EQUAL_INT(2, rec.start_count);
TEST_ASSERT_EQUAL_INT(0, rec.end_count);
TEST_ASSERT_NOT_NULL(rec.start_names[0]);
TEST_ASSERT_NOT_NULL(rec.start_names[1]);
TEST_ASSERT_TRUE(xmlStrEqual(BAD_CAST "html", rec.start_names[0]));
TEST_ASSERT_TRUE(xmlStrEqual(BAD_CAST "body", rec.start_names[1]));
TEST_ASSERT_TRUE_MESSAGE(ctxt->nameNr >= 2, "Expected at least 2 elements on the stack");
TEST_ASSERT_TRUE(xmlStrEqual(BAD_CAST "body", ctxt->name));
freeRec(&rec);
htmlFreeParserCtxt(ctxt);
}
/* Test 4: Only <html> present implies <body> */
void test_htmlStartCharData_implies_body_when_only_html_present(void) {
TestSAXRec rec;
memset(&rec, 0, sizeof(rec));
htmlParserCtxtPtr ctxt = create_ctxt_with_rec(&rec);
ctxt->options = 0;
/* Manually set the stack to contain only "html" */
/* Ensure nameTab exists and has room for at least 2 entries */
if (ctxt->nameTab == NULL || ctxt->nameMax < 2) {
/* Allocate a small nameTab if needed */
ctxt->nameMax = 8;
ctxt->nameTab = (const xmlChar **) xmlMalloc(ctxt->nameMax * sizeof(const xmlChar *));
TEST_ASSERT_NOT_NULL_MESSAGE(ctxt->nameTab, "Failed to allocate nameTab");
}
ctxt->nameNr = 1;
ctxt->nameTab[0] = BAD_CAST "html";
ctxt->name = ctxt->nameTab[0];
test_htmlStartCharData(ctxt);
TEST_ASSERT_EQUAL_INT(1, rec.start_count);
TEST_ASSERT_TRUE(xmlStrEqual(BAD_CAST "body", rec.start_names[0]));
TEST_ASSERT_TRUE_MESSAGE(ctxt->nameNr >= 2, "Expected body to be pushed on the stack");
TEST_ASSERT_TRUE(xmlStrEqual(BAD_CAST "body", ctxt->name));
freeRec(&rec);
htmlFreeParserCtxt(ctxt);
}
/* Test 5: If <body> already present, no further implied insertions */
void test_htmlStartCharData_no_duplication_when_body_present(void) {
TestSAXRec rec;
memset(&rec, 0, sizeof(rec));
htmlParserCtxtPtr ctxt = create_ctxt_with_rec(&rec);
ctxt->options = 0;
/* Prepare stack: "html", "body" */
if (ctxt->nameTab == NULL || ctxt->nameMax < 2) {
ctxt->nameMax = 8;
ctxt->nameTab = (const xmlChar **) xmlMalloc(ctxt->nameMax * sizeof(const xmlChar *));
TEST_ASSERT_NOT_NULL_MESSAGE(ctxt->nameTab, "Failed to allocate nameTab");
}
ctxt->nameNr = 2;
ctxt->nameTab[0] = BAD_CAST "html";
ctxt->nameTab[1] = BAD_CAST "body";
ctxt->name = ctxt->nameTab[1];
test_htmlStartCharData(ctxt);
TEST_ASSERT_EQUAL_INT(0, rec.start_count);
TEST_ASSERT_EQUAL_INT(0, rec.end_count);
TEST_ASSERT_EQUAL_INT(2, ctxt->nameNr);
TEST_ASSERT_TRUE(xmlStrEqual(BAD_CAST "body", ctxt->name));
freeRec(&rec);
htmlFreeParserCtxt(ctxt);
}
int main(void) {
UNITY_BEGIN();
RUN_TEST(test_htmlStartCharData_returns_early_with_HTML5_option);
RUN_TEST(test_htmlStartCharData_returns_early_with_NOIMPLIED_option);
RUN_TEST(test_htmlStartCharData_implies_html_and_body_when_stack_empty);
RUN_TEST(test_htmlStartCharData_implies_body_when_only_html_present);
RUN_TEST(test_htmlStartCharData_no_duplication_when_body_present);
return UNITY_END();
}