libxml / tests /tests_HTMLparser_htmlParseElement.c
AryaWu's picture
Upload folder using huggingface_hub
6baed57 verified
#include "unity/unity.h"
#include <libxml/HTMLparser.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
/* Simple event logger for SAX callbacks */
typedef struct {
char buf[2048];
} EventLog;
static void log_append(EventLog *log, const char *s) {
size_t cur = strlen(log->buf);
size_t add = strlen(s);
if (cur + add >= sizeof(log->buf)) {
add = sizeof(log->buf) - cur - 1;
}
if (add > 0) {
memcpy(log->buf + cur, s, add);
log->buf[cur + add] = '\0';
}
}
/* SAX callbacks we care about */
static void onStartElement(void *ctx, const xmlChar *name, const xmlChar **atts) {
(void)atts;
EventLog *log = (EventLog *)ctx;
char tmp[256];
snprintf(tmp, sizeof(tmp), "[S:%s]", (const char *)name);
log_append(log, tmp);
}
static void onEndElement(void *ctx, const xmlChar *name) {
EventLog *log = (EventLog *)ctx;
char tmp[256];
snprintf(tmp, sizeof(tmp), "[E:%s]", (const char *)name);
log_append(log, tmp);
}
static void onCharacters(void *ctx, const xmlChar *ch, int len) {
EventLog *log = (EventLog *)ctx;
if (len <= 0)
return;
char tmp[512];
int copy = len;
if (copy > (int)sizeof(tmp) - 1)
copy = (int)sizeof(tmp) - 1;
memcpy(tmp, ch, (size_t)copy);
tmp[copy] = '\0';
char out[600];
snprintf(out, sizeof(out), "[T:%s]", tmp);
log_append(log, out);
}
/* Global SAX handler; configured in setUp */
static xmlSAXHandler gSAX;
void setUp(void) {
memset(&gSAX, 0, sizeof(gSAX));
gSAX.startElement = onStartElement;
gSAX.endElement = onEndElement;
gSAX.characters = onCharacters;
}
void tearDown(void) {
/* nothing */
}
/* Helper to run one parsing case through htmlParseElement and check events */
static void run_case(const char *html, int options, const char *expectedLog) {
EventLog log;
log.buf[0] = '\0';
htmlParserCtxtPtr ctxt = htmlCreateMemoryParserCtxt(html, (int)strlen(html));
TEST_ASSERT_NOT_NULL_MESSAGE(ctxt, "htmlCreateMemoryParserCtxt returned NULL");
ctxt->sax = &gSAX;
ctxt->userData = &log;
ctxt->options = options;
htmlParseElement(ctxt);
TEST_ASSERT_EQUAL_STRING_MESSAGE(expectedLog, log.buf, "Unexpected SAX event sequence");
htmlFreeParserCtxt(ctxt);
}
/* Tests */
void test_htmlParseElement_null_context_safe(void) {
/* Should be a no-op and must not crash */
htmlParseElement(NULL);
TEST_ASSERT_TRUE(1);
}
void test_htmlParseElement_empty_input_no_events(void) {
run_case("", 0, "");
}
void test_htmlParseElement_parses_simple_element(void) {
run_case("<p>hi</p>", 0, "[S:p][T:hi][E:p]");
}
void test_htmlParseElement_self_closing_xml_style_non_html5(void) {
/* In non-HTML5 mode, '/>' triggers an explicit endElement */
run_case("<br/>", 0, "[S:br][E:br]");
}
void test_htmlParseElement_self_closing_xml_style_html5(void) {
/* In HTML5 mode, '/>' does NOT call endElement in htmlParseElementInternal */
run_case("<br/>", HTML_PARSE_HTML5, "[S:br]");
}
void test_htmlParseElement_dtd_empty_element_non_html5(void) {
/* DTD-empty element like <br> should auto-close in non-HTML5 mode */
run_case("<br>", 0, "[S:br][E:br]");
}
void test_htmlParseElement_dtd_empty_element_html5(void) {
/* In HTML5 mode, no endElement is emitted for DTD-empty element in this path */
run_case("<br>", HTML_PARSE_HTML5, "[S:br]");
}
void test_htmlParseElement_auto_close_on_eof_non_html5(void) {
/* Unclosed nested elements should be auto-closed at end of input in non-HTML5 mode */
run_case("<div><span>t", 0, "[S:div][S:span][T:t][E:span][E:div]");
}
void test_htmlParseElement_auto_close_on_eof_html5(void) {
/* In HTML5 mode, htmlAutoCloseOnEnd() is a no-op, so no endElement callbacks */
run_case("<div><span>t", HTML_PARSE_HTML5, "[S:div][S:span][T:t]");
}
int main(void) {
UNITY_BEGIN();
RUN_TEST(test_htmlParseElement_null_context_safe);
RUN_TEST(test_htmlParseElement_empty_input_no_events);
RUN_TEST(test_htmlParseElement_parses_simple_element);
RUN_TEST(test_htmlParseElement_self_closing_xml_style_non_html5);
RUN_TEST(test_htmlParseElement_self_closing_xml_style_html5);
RUN_TEST(test_htmlParseElement_dtd_empty_element_non_html5);
RUN_TEST(test_htmlParseElement_dtd_empty_element_html5);
RUN_TEST(test_htmlParseElement_auto_close_on_eof_non_html5);
RUN_TEST(test_htmlParseElement_auto_close_on_eof_html5);
return UNITY_END();
}