libxml / tests /tests_HTMLparser_htmlParseAttribute.c
AryaWu's picture
Upload folder using huggingface_hub
6baed57 verified
#include "unity/unity.h"
#include <libxml/HTMLparser.h>
#include <libxml/xmlmemory.h>
#include <libxml/xmlstring.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
/* Internal type needed for the wrapper's return type */
#include "private/dict.h"
/* Wrapper provided in the module for the static function */
extern xmlHashedString test_htmlParseAttribute(htmlParserCtxtPtr ctxt, xmlChar **value);
static htmlParserCtxtPtr make_ctxt_from_mem_len(const char *data, int len) {
htmlParserCtxtPtr ctxt = htmlCreateMemoryParserCtxt(data, len);
TEST_ASSERT_NOT_NULL_MESSAGE(ctxt, "Failed to create HTML parser context");
return ctxt;
}
void setUp(void) {
/* Setup code here, or leave empty */
}
void tearDown(void) {
/* Cleanup code here, or leave empty */
}
/* Simple unquoted value: id=val */
void test_htmlParseAttribute_unquoted_simple(void) {
const char *src = "id=val";
htmlParserCtxtPtr ctxt = make_ctxt_from_mem_len(src, (int)strlen(src));
xmlChar *val = (xmlChar*)0x1; /* sentinel */
xmlHashedString h = test_htmlParseAttribute(ctxt, &val);
TEST_ASSERT_NOT_NULL(h.name);
TEST_ASSERT_EQUAL_STRING("id", (const char*)h.name);
TEST_ASSERT_NOT_NULL(val);
TEST_ASSERT_EQUAL_STRING("val", (const char*)val);
xmlFree(val);
htmlFreeParserCtxt(ctxt);
}
/* Quoted value with whitespace around '=' and ASCII uppercased name that must be lowercased */
void test_htmlParseAttribute_quoted_double_with_spaces(void) {
const char *src = "CLASS = \"MyVal\"";
htmlParserCtxtPtr ctxt = make_ctxt_from_mem_len(src, (int)strlen(src));
xmlChar *val = NULL;
xmlHashedString h = test_htmlParseAttribute(ctxt, &val);
TEST_ASSERT_NOT_NULL(h.name);
TEST_ASSERT_EQUAL_STRING("class", (const char*)h.name);
TEST_ASSERT_NOT_NULL(val);
TEST_ASSERT_EQUAL_STRING("MyVal", (const char*)val);
xmlFree(val);
htmlFreeParserCtxt(ctxt);
}
/* Single-quoted value with entity decoding */
void test_htmlParseAttribute_quoted_single_with_entity_decoding(void) {
const char *src = "title='a&amp;b'";
htmlParserCtxtPtr ctxt = make_ctxt_from_mem_len(src, (int)strlen(src));
xmlChar *val = NULL;
xmlHashedString h = test_htmlParseAttribute(ctxt, &val);
TEST_ASSERT_NOT_NULL(h.name);
TEST_ASSERT_EQUAL_STRING("title", (const char*)h.name);
TEST_ASSERT_NOT_NULL(val);
TEST_ASSERT_EQUAL_STRING("a&b", (const char*)val);
xmlFree(val);
htmlFreeParserCtxt(ctxt);
}
/* Attribute with no value (no '=' present) */
void test_htmlParseAttribute_no_value_present(void) {
const char *src = "checked ";
htmlParserCtxtPtr ctxt = make_ctxt_from_mem_len(src, (int)strlen(src));
xmlChar *val = (xmlChar*)0x1; /* sentinel, must become NULL */
xmlHashedString h = test_htmlParseAttribute(ctxt, &val);
TEST_ASSERT_NOT_NULL(h.name);
TEST_ASSERT_EQUAL_STRING("checked", (const char*)h.name);
TEST_ASSERT_NULL(val);
htmlFreeParserCtxt(ctxt);
}
/* Non-ASCII in name should be preserved; ASCII uppercase must be lowercased */
void test_htmlParseAttribute_non_ascii_name_lowercase_ascii_only(void) {
/* "Dátâ=1" in UTF-8; expect name "dátâ" */
const char src[] = "D\xc3\xa1t\xc3\xa2=1";
htmlParserCtxtPtr ctxt = make_ctxt_from_mem_len(src, (int)sizeof(src) - 1);
xmlChar *val = NULL;
xmlHashedString h = test_htmlParseAttribute(ctxt, &val);
TEST_ASSERT_NOT_NULL(h.name);
const char expected_name[] = "d\xc3\xa1t\xc3\xa2";
TEST_ASSERT_EQUAL_STRING(expected_name, (const char*)h.name);
TEST_ASSERT_NOT_NULL(val);
TEST_ASSERT_EQUAL_STRING("1", (const char*)val);
xmlFree(val);
htmlFreeParserCtxt(ctxt);
}
/* Embedded NUL in name must be replaced by U+FFFD (EF BF BD) */
void test_htmlParseAttribute_embedded_nul_in_name_replaced(void) {
/* Construct "na\0me=1" with an embedded NUL. */
const char src_with_nul[] = { 'n', 'a', '\0', 'm', 'e', '=', '1' };
htmlParserCtxtPtr ctxt = make_ctxt_from_mem_len(src_with_nul, (int)sizeof(src_with_nul));
xmlChar *val = NULL;
xmlHashedString h = test_htmlParseAttribute(ctxt, &val);
TEST_ASSERT_NOT_NULL(h.name);
/* Expected name: "na" + EF BF BD + "me" */
const char expected_name[] = { 'n', 'a', (char)0xEF, (char)0xBF, (char)0xBD, 'm', 'e', '\0' };
TEST_ASSERT_EQUAL_STRING(expected_name, (const char*)h.name);
TEST_ASSERT_NOT_NULL(val);
TEST_ASSERT_EQUAL_STRING("1", (const char*)val);
xmlFree(val);
htmlFreeParserCtxt(ctxt);
}
/* Empty input should yield no name and no value */
void test_htmlParseAttribute_empty_input(void) {
const char *src = "";
htmlParserCtxtPtr ctxt = make_ctxt_from_mem_len(src, 0);
xmlChar *val = (xmlChar*)0x1; /* sentinel */
xmlHashedString h = test_htmlParseAttribute(ctxt, &val);
TEST_ASSERT_NULL(h.name);
TEST_ASSERT_NULL(val);
htmlFreeParserCtxt(ctxt);
}
int main(void) {
xmlInitParser();
UNITY_BEGIN();
RUN_TEST(test_htmlParseAttribute_unquoted_simple);
RUN_TEST(test_htmlParseAttribute_quoted_double_with_spaces);
RUN_TEST(test_htmlParseAttribute_quoted_single_with_entity_decoding);
RUN_TEST(test_htmlParseAttribute_no_value_present);
RUN_TEST(test_htmlParseAttribute_non_ascii_name_lowercase_ascii_only);
RUN_TEST(test_htmlParseAttribute_embedded_nul_in_name_replaced);
RUN_TEST(test_htmlParseAttribute_empty_input);
int ret = UNITY_END();
xmlCleanupParser();
return ret;
}