| #include "unity/unity.h" |
| #include <libxml/HTMLparser.h> |
|
|
| #include <libxml/parserInternals.h> |
| #include <libxml/parser.h> |
| #include <stdlib.h> |
| #include <string.h> |
|
|
| |
| int test_htmlValidateUtf8(xmlParserCtxtPtr ctxt, const xmlChar *str, size_t len, int partial); |
|
|
| |
| static xmlParserCtxtPtr make_ctxt(void) { |
| htmlParserCtxtPtr hctxt = htmlNewParserCtxt(); |
| TEST_ASSERT_NOT_NULL(hctxt); |
| xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)hctxt; |
|
|
| xmlParserInputPtr input = xmlNewInputStream(ctxt); |
| TEST_ASSERT_NOT_NULL(input); |
| xmlPushInput(ctxt, input); |
|
|
| |
| if (ctxt->input) |
| ctxt->input->flags = 0; |
|
|
| TEST_ASSERT_NOT_NULL(ctxt->input); |
| TEST_ASSERT_EQUAL_UINT(0u, ctxt->input->flags); |
|
|
| return ctxt; |
| } |
|
|
| static void free_ctxt(xmlParserCtxtPtr ctxt) { |
| if (ctxt != NULL) { |
| htmlFreeParserCtxt((htmlParserCtxtPtr)ctxt); |
| } |
| } |
|
|
| void setUp(void) { |
| |
| } |
|
|
| void tearDown(void) { |
| |
| } |
|
|
| |
|
|
| void test_htmlValidateUtf8_valid_2byte_returns_size_and_no_error_flag(void) { |
| xmlParserCtxtPtr ctxt = make_ctxt(); |
|
|
| const unsigned char seq[] = { 0xC2, 0xA2 }; |
| int ret = test_htmlValidateUtf8(ctxt, (const xmlChar *)seq, sizeof(seq), 0); |
|
|
| TEST_ASSERT_EQUAL_INT(2, ret); |
| TEST_ASSERT_EQUAL_UINT(0u, ctxt->input->flags); |
|
|
| free_ctxt(ctxt); |
| } |
|
|
| void test_htmlValidateUtf8_invalid_start_byte_below_C2_sets_flag_and_returns_minus1(void) { |
| xmlParserCtxtPtr ctxt = make_ctxt(); |
|
|
| const unsigned char seq[] = { 0xC1, 0x80 }; |
| unsigned int before = ctxt->input->flags; |
| int ret = test_htmlValidateUtf8(ctxt, (const xmlChar *)seq, sizeof(seq), 0); |
|
|
| TEST_ASSERT_EQUAL_INT(-1, ret); |
| TEST_ASSERT_TRUE(ctxt->input->flags != before); |
|
|
| free_ctxt(ctxt); |
| } |
|
|
| void test_htmlValidateUtf8_invalid_2byte_continuation_sets_flag(void) { |
| xmlParserCtxtPtr ctxt = make_ctxt(); |
|
|
| const unsigned char seq[] = { 0xC2, 0x20 }; |
| int ret = test_htmlValidateUtf8(ctxt, (const xmlChar *)seq, sizeof(seq), 0); |
|
|
| TEST_ASSERT_EQUAL_INT(-1, ret); |
| TEST_ASSERT_NOT_EQUAL(0u, ctxt->input->flags); |
|
|
| free_ctxt(ctxt); |
| } |
|
|
| void test_htmlValidateUtf8_incomplete_2byte_partial_returns_zero_no_flag(void) { |
| xmlParserCtxtPtr ctxt = make_ctxt(); |
|
|
| const unsigned char seq[] = { 0xC2 }; |
| int ret = test_htmlValidateUtf8(ctxt, (const xmlChar *)seq, sizeof(seq), 1); |
|
|
| TEST_ASSERT_EQUAL_INT(0, ret); |
| TEST_ASSERT_EQUAL_UINT(0u, ctxt->input->flags); |
|
|
| free_ctxt(ctxt); |
| } |
|
|
| void test_htmlValidateUtf8_incomplete_2byte_nonpartial_sets_flag_and_minus1(void) { |
| xmlParserCtxtPtr ctxt = make_ctxt(); |
|
|
| const unsigned char seq[] = { 0xC2 }; |
| int ret = test_htmlValidateUtf8(ctxt, (const xmlChar *)seq, sizeof(seq), 0); |
|
|
| TEST_ASSERT_EQUAL_INT(-1, ret); |
| TEST_ASSERT_NOT_EQUAL(0u, ctxt->input->flags); |
|
|
| free_ctxt(ctxt); |
| } |
|
|
| void test_htmlValidateUtf8_valid_3byte_general_and_minimum(void) { |
| xmlParserCtxtPtr ctxt = make_ctxt(); |
|
|
| const unsigned char euro[] = { 0xE2, 0x82, 0xAC }; |
| int r1 = test_htmlValidateUtf8(ctxt, (const xmlChar *)euro, sizeof(euro), 0); |
| TEST_ASSERT_EQUAL_INT(3, r1); |
| TEST_ASSERT_EQUAL_UINT(0u, ctxt->input->flags); |
|
|
| const unsigned char min3[] = { 0xE0, 0xA0, 0x80 }; |
| int r2 = test_htmlValidateUtf8(ctxt, (const xmlChar *)min3, sizeof(min3), 0); |
| TEST_ASSERT_EQUAL_INT(3, r2); |
| TEST_ASSERT_EQUAL_UINT(0u, ctxt->input->flags); |
|
|
| |
| const unsigned char ed_valid[] = { 0xED, 0x9F, 0xBF }; |
| int r3 = test_htmlValidateUtf8(ctxt, (const xmlChar *)ed_valid, sizeof(ed_valid), 0); |
| TEST_ASSERT_EQUAL_INT(3, r3); |
| TEST_ASSERT_EQUAL_UINT(0u, ctxt->input->flags); |
|
|
| free_ctxt(ctxt); |
| } |
|
|
| void test_htmlValidateUtf8_invalid_3byte_overlong_and_surrogate(void) { |
| |
| { |
| xmlParserCtxtPtr ctxt = make_ctxt(); |
| const unsigned char overlong[] = { 0xE0, 0x80, 0x80 }; |
| int r = test_htmlValidateUtf8(ctxt, (const xmlChar *)overlong, sizeof(overlong), 0); |
| TEST_ASSERT_EQUAL_INT(-1, r); |
| TEST_ASSERT_NOT_EQUAL(0u, ctxt->input->flags); |
| free_ctxt(ctxt); |
| } |
| |
| { |
| xmlParserCtxtPtr ctxt = make_ctxt(); |
| const unsigned char surrogate[] = { 0xED, 0xA0, 0x80 }; |
| int r = test_htmlValidateUtf8(ctxt, (const xmlChar *)surrogate, sizeof(surrogate), 0); |
| TEST_ASSERT_EQUAL_INT(-1, r); |
| TEST_ASSERT_NOT_EQUAL(0u, ctxt->input->flags); |
| free_ctxt(ctxt); |
| } |
| } |
|
|
| void test_htmlValidateUtf8_valid_4byte_boundaries(void) { |
| xmlParserCtxtPtr ctxt = make_ctxt(); |
|
|
| const unsigned char min4[] = { 0xF0, 0x90, 0x80, 0x80 }; |
| int r1 = test_htmlValidateUtf8(ctxt, (const xmlChar *)min4, sizeof(min4), 0); |
| TEST_ASSERT_EQUAL_INT(4, r1); |
| TEST_ASSERT_EQUAL_UINT(0u, ctxt->input->flags); |
|
|
| const unsigned char max4[] = { 0xF4, 0x8F, 0xBF, 0xBF }; |
| int r2 = test_htmlValidateUtf8(ctxt, (const xmlChar *)max4, sizeof(max4), 0); |
| TEST_ASSERT_EQUAL_INT(4, r2); |
| TEST_ASSERT_EQUAL_UINT(0u, ctxt->input->flags); |
|
|
| free_ctxt(ctxt); |
| } |
|
|
| void test_htmlValidateUtf8_invalid_4byte_overlong_and_above_max(void) { |
| |
| { |
| xmlParserCtxtPtr ctxt = make_ctxt(); |
| const unsigned char overlong4[] = { 0xF0, 0x80, 0x80, 0x80 }; |
| int r = test_htmlValidateUtf8(ctxt, (const xmlChar *)overlong4, sizeof(overlong4), 0); |
| TEST_ASSERT_EQUAL_INT(-1, r); |
| TEST_ASSERT_NOT_EQUAL(0u, ctxt->input->flags); |
| free_ctxt(ctxt); |
| } |
| |
| { |
| xmlParserCtxtPtr ctxt = make_ctxt(); |
| const unsigned char above_max[] = { 0xF4, 0x90, 0x80, 0x80 }; |
| int r = test_htmlValidateUtf8(ctxt, (const xmlChar *)above_max, sizeof(above_max), 0); |
| TEST_ASSERT_EQUAL_INT(-1, r); |
| TEST_ASSERT_NOT_EQUAL(0u, ctxt->input->flags); |
| free_ctxt(ctxt); |
| } |
| } |
|
|
| void test_htmlValidateUtf8_incomplete_3_and_4_byte_partial_behavior(void) { |
| |
| { |
| xmlParserCtxtPtr ctxt = make_ctxt(); |
| const unsigned char seq3[] = { 0xE2, 0x82 }; |
| int r = test_htmlValidateUtf8(ctxt, (const xmlChar *)seq3, sizeof(seq3), 1); |
| TEST_ASSERT_EQUAL_INT(0, r); |
| TEST_ASSERT_EQUAL_UINT(0u, ctxt->input->flags); |
| free_ctxt(ctxt); |
| } |
| |
| { |
| xmlParserCtxtPtr ctxt = make_ctxt(); |
| const unsigned char seq4[] = { 0xF0, 0x90, 0x80 }; |
| int r = test_htmlValidateUtf8(ctxt, (const xmlChar *)seq4, sizeof(seq4), 1); |
| TEST_ASSERT_EQUAL_INT(0, r); |
| TEST_ASSERT_EQUAL_UINT(0u, ctxt->input->flags); |
| free_ctxt(ctxt); |
| } |
| |
| { |
| xmlParserCtxtPtr ctxt = make_ctxt(); |
| const unsigned char seq4[] = { 0xF0, 0x90, 0x80 }; |
| int r = test_htmlValidateUtf8(ctxt, (const xmlChar *)seq4, sizeof(seq4), 0); |
| TEST_ASSERT_EQUAL_INT(-1, r); |
| TEST_ASSERT_NOT_EQUAL(0u, ctxt->input->flags); |
| free_ctxt(ctxt); |
| } |
| } |
|
|
| void test_htmlValidateUtf8_error_flag_only_set_once_per_context(void) { |
| xmlParserCtxtPtr ctxt = make_ctxt(); |
|
|
| const unsigned char bad1[] = { 0xC1, 0x80 }; |
| const unsigned char bad2[] = { 0xE0, 0x80, 0x80 }; |
|
|
| int r1 = test_htmlValidateUtf8(ctxt, (const xmlChar *)bad1, sizeof(bad1), 0); |
| TEST_ASSERT_EQUAL_INT(-1, r1); |
| unsigned int flags_after_first = ctxt->input->flags; |
| TEST_ASSERT_NOT_EQUAL(0u, flags_after_first); |
|
|
| int r2 = test_htmlValidateUtf8(ctxt, (const xmlChar *)bad2, sizeof(bad2), 0); |
| TEST_ASSERT_EQUAL_INT(-1, r2); |
| unsigned int flags_after_second = ctxt->input->flags; |
|
|
| |
| TEST_ASSERT_EQUAL_UINT(flags_after_first, flags_after_second); |
|
|
| free_ctxt(ctxt); |
| } |
|
|
| int main(void) { |
| xmlInitParser(); |
|
|
| UNITY_BEGIN(); |
| RUN_TEST(test_htmlValidateUtf8_valid_2byte_returns_size_and_no_error_flag); |
| RUN_TEST(test_htmlValidateUtf8_invalid_start_byte_below_C2_sets_flag_and_returns_minus1); |
| RUN_TEST(test_htmlValidateUtf8_invalid_2byte_continuation_sets_flag); |
| RUN_TEST(test_htmlValidateUtf8_incomplete_2byte_partial_returns_zero_no_flag); |
| RUN_TEST(test_htmlValidateUtf8_incomplete_2byte_nonpartial_sets_flag_and_minus1); |
| RUN_TEST(test_htmlValidateUtf8_valid_3byte_general_and_minimum); |
| RUN_TEST(test_htmlValidateUtf8_invalid_3byte_overlong_and_surrogate); |
| RUN_TEST(test_htmlValidateUtf8_valid_4byte_boundaries); |
| RUN_TEST(test_htmlValidateUtf8_invalid_4byte_overlong_and_above_max); |
| RUN_TEST(test_htmlValidateUtf8_incomplete_3_and_4_byte_partial_behavior); |
| RUN_TEST(test_htmlValidateUtf8_error_flag_only_set_once_per_context); |
| int rc = UNITY_END(); |
|
|
| xmlCleanupParser(); |
| return rc; |
| } |