| #include "unity/unity.h" |
| #include <libxml/HTMLparser.h> |
| #include <libxml/xmlerror.h> |
| #include <libxml/parser.h> |
| #include <string.h> |
| #include <stdlib.h> |
|
|
| |
| void test_htmlCheckMeta(htmlParserCtxtPtr ctxt, const xmlChar **atts); |
|
|
| static htmlParserCtxtPtr make_ctxt(void) { |
| htmlParserCtxtPtr ctxt = htmlNewParserCtxt(); |
| TEST_ASSERT_NOT_NULL_MESSAGE(ctxt, "Failed to create HTML parser context"); |
| xmlCtxtResetLastError(ctxt); |
| return ctxt; |
| } |
|
|
| void setUp(void) { |
| |
| } |
|
|
| void tearDown(void) { |
| |
| } |
|
|
| static void assert_encoding_equals_case_insensitive(const xmlChar *enc, const char *expected) { |
| TEST_ASSERT_NOT_NULL_MESSAGE(enc, "Encoding was not set"); |
| TEST_ASSERT_EQUAL_INT_MESSAGE(0, xmlStrcasecmp(enc, BAD_CAST expected), |
| "Declared encoding does not match expected (case-insensitive compare)"); |
| } |
|
|
| void test_htmlCheckMeta_charset_utf8_sets_declared_encoding(void) { |
| htmlParserCtxtPtr ctxt = make_ctxt(); |
|
|
| const xmlChar *atts[] = { |
| BAD_CAST "charset", BAD_CAST "utf-8", |
| NULL |
| }; |
|
|
| TEST_ASSERT_NULL_MESSAGE(ctxt->encoding, "Precondition failed: encoding should start as NULL"); |
| xmlCtxtResetLastError(ctxt); |
|
|
| test_htmlCheckMeta(ctxt, atts); |
|
|
| assert_encoding_equals_case_insensitive(ctxt->encoding, "utf-8"); |
|
|
| xmlErrorPtr err = xmlCtxtGetLastError(ctxt); |
| TEST_ASSERT_TRUE_MESSAGE(err == NULL || err->code == 0, |
| "Unexpected error after setting UTF-8 encoding"); |
|
|
| htmlFreeParserCtxt(ctxt); |
| } |
|
|
| void test_htmlCheckMeta_http_equiv_content_type_sets_declared_encoding(void) { |
| htmlParserCtxtPtr ctxt = make_ctxt(); |
|
|
| const xmlChar *atts[] = { |
| BAD_CAST "http-equiv", BAD_CAST "Content-Type", |
| BAD_CAST "content", BAD_CAST "text/html; charset=ISO-8859-1", |
| NULL |
| }; |
|
|
| TEST_ASSERT_NULL(ctxt->encoding); |
| xmlCtxtResetLastError(ctxt); |
|
|
| test_htmlCheckMeta(ctxt, atts); |
|
|
| assert_encoding_equals_case_insensitive(ctxt->encoding, "ISO-8859-1"); |
|
|
| xmlErrorPtr err = xmlCtxtGetLastError(ctxt); |
| TEST_ASSERT_TRUE_MESSAGE(err == NULL || err->code == 0, |
| "Unexpected error after setting ISO-8859-1 encoding"); |
|
|
| htmlFreeParserCtxt(ctxt); |
| } |
|
|
| void test_htmlCheckMeta_charset_precedence_over_http_equiv(void) { |
| htmlParserCtxtPtr ctxt = make_ctxt(); |
|
|
| const xmlChar *atts[] = { |
| BAD_CAST "http-equiv", BAD_CAST "Content-Type", |
| BAD_CAST "content", BAD_CAST "text/html; charset=ISO-8859-1", |
| BAD_CAST "charset", BAD_CAST "utf-8", |
| NULL |
| }; |
|
|
| TEST_ASSERT_NULL(ctxt->encoding); |
| xmlCtxtResetLastError(ctxt); |
|
|
| test_htmlCheckMeta(ctxt, atts); |
|
|
| |
| assert_encoding_equals_case_insensitive(ctxt->encoding, "utf-8"); |
|
|
| htmlFreeParserCtxt(ctxt); |
| } |
|
|
| void test_htmlCheckMeta_non_ascii_incompatible_emits_error_and_ignores(void) { |
| htmlParserCtxtPtr ctxt = make_ctxt(); |
|
|
| const xmlChar *atts[] = { |
| BAD_CAST "charset", BAD_CAST "utf-16", |
| NULL |
| }; |
|
|
| TEST_ASSERT_NULL(ctxt->encoding); |
| xmlCtxtResetLastError(ctxt); |
|
|
| test_htmlCheckMeta(ctxt, atts); |
|
|
| |
| TEST_ASSERT_NULL_MESSAGE(ctxt->encoding, "Encoding should not be set for non-ASCII-compatible charset"); |
|
|
| xmlErrorPtr err = xmlCtxtGetLastError(ctxt); |
| TEST_ASSERT_NOT_NULL_MESSAGE(err, "Expected an error for non-ASCII-compatible charset"); |
| TEST_ASSERT_EQUAL_INT_MESSAGE(XML_ERR_UNSUPPORTED_ENCODING, err->code, |
| "Expected XML_ERR_UNSUPPORTED_ENCODING error code"); |
|
|
| htmlFreeParserCtxt(ctxt); |
| } |
|
|
| void test_htmlCheckMeta_null_context_no_crash(void) { |
| const xmlChar *atts[] = { |
| BAD_CAST "charset", BAD_CAST "utf-8", |
| NULL |
| }; |
|
|
| |
| test_htmlCheckMeta(NULL, atts); |
|
|
| TEST_ASSERT_TRUE(1); |
| } |
|
|
| void test_htmlCheckMeta_null_atts_no_action(void) { |
| htmlParserCtxtPtr ctxt = make_ctxt(); |
|
|
| TEST_ASSERT_NULL(ctxt->encoding); |
| xmlCtxtResetLastError(ctxt); |
|
|
| test_htmlCheckMeta(ctxt, NULL); |
|
|
| TEST_ASSERT_NULL_MESSAGE(ctxt->encoding, "Encoding should remain NULL when atts is NULL"); |
|
|
| htmlFreeParserCtxt(ctxt); |
| } |
|
|
| void test_htmlCheckMeta_content_without_http_equiv_no_action(void) { |
| htmlParserCtxtPtr ctxt = make_ctxt(); |
|
|
| const xmlChar *atts[] = { |
| BAD_CAST "content", BAD_CAST "text/html; charset=UTF-8", |
| NULL |
| }; |
|
|
| TEST_ASSERT_NULL(ctxt->encoding); |
| xmlCtxtResetLastError(ctxt); |
|
|
| test_htmlCheckMeta(ctxt, atts); |
|
|
| TEST_ASSERT_NULL_MESSAGE(ctxt->encoding, "Encoding should not be set without http-equiv=Content-Type"); |
|
|
| htmlFreeParserCtxt(ctxt); |
| } |
|
|
| void test_htmlCheckMeta_case_insensitive_matching(void) { |
| htmlParserCtxtPtr ctxt = make_ctxt(); |
|
|
| const xmlChar *atts[] = { |
| BAD_CAST "HTTP-EQUIV", BAD_CAST "content-type", |
| BAD_CAST "CoNtEnT", BAD_CAST "text/html; charset=iso-8859-1", |
| NULL |
| }; |
|
|
| TEST_ASSERT_NULL(ctxt->encoding); |
| xmlCtxtResetLastError(ctxt); |
|
|
| test_htmlCheckMeta(ctxt, atts); |
|
|
| assert_encoding_equals_case_insensitive(ctxt->encoding, "iso-8859-1"); |
|
|
| htmlFreeParserCtxt(ctxt); |
| } |
|
|
| int main(void) { |
| xmlInitParser(); |
|
|
| UNITY_BEGIN(); |
| RUN_TEST(test_htmlCheckMeta_charset_utf8_sets_declared_encoding); |
| RUN_TEST(test_htmlCheckMeta_http_equiv_content_type_sets_declared_encoding); |
| RUN_TEST(test_htmlCheckMeta_charset_precedence_over_http_equiv); |
| RUN_TEST(test_htmlCheckMeta_non_ascii_incompatible_emits_error_and_ignores); |
| RUN_TEST(test_htmlCheckMeta_null_context_no_crash); |
| RUN_TEST(test_htmlCheckMeta_null_atts_no_action); |
| RUN_TEST(test_htmlCheckMeta_content_without_http_equiv_no_action); |
| RUN_TEST(test_htmlCheckMeta_case_insensitive_matching); |
| int res = UNITY_END(); |
|
|
| xmlCleanupParser(); |
| return res; |
| } |