| #include "unity/unity.h" |
| #include <libxml/HTMLparser.h> |
|
|
| #include <stdlib.h> |
| #include <string.h> |
| #include <stdio.h> |
|
|
| |
| extern void test_htmlParseStartTag(htmlParserCtxtPtr ctxt); |
|
|
| |
| typedef struct { |
| xmlChar *name; |
| int att_count; |
| xmlChar **atts; |
| } StartEvent; |
|
|
| typedef struct { |
| int nevents; |
| StartEvent events[64]; |
| } SAXCapture; |
|
|
| static void capture_init(SAXCapture *cap) { |
| memset(cap, 0, sizeof(*cap)); |
| } |
|
|
| static void capture_free(SAXCapture *cap) { |
| for (int i = 0; i < cap->nevents; i++) { |
| if (cap->events[i].name) { |
| xmlFree(cap->events[i].name); |
| } |
| if (cap->events[i].atts) { |
| |
| int j = 0; |
| while (cap->events[i].atts[j] != NULL) { |
| xmlFree(cap->events[i].atts[j]); |
| j++; |
| } |
| free(cap->events[i].atts); |
| } |
| } |
| memset(cap, 0, sizeof(*cap)); |
| } |
|
|
| static void test_sax_startElement(void *ctx, const xmlChar *name, const xmlChar **atts) { |
| SAXCapture *cap = (SAXCapture *)ctx; |
| if (cap->nevents >= (int)(sizeof(cap->events)/sizeof(cap->events[0]))) |
| return; |
| StartEvent *ev = &cap->events[cap->nevents++]; |
| ev->name = xmlStrdup(name); |
|
|
| int count = 0; |
| if (atts != NULL) { |
| const xmlChar **p = atts; |
| while (*p != NULL) { |
| |
| p++; |
| |
| if (*p == NULL) break; |
| p++; |
| count++; |
| } |
| } |
| ev->att_count = count; |
|
|
| if (atts != NULL && count > 0) { |
| |
| ev->atts = (xmlChar **)calloc((size_t)(2 * count + 1), sizeof(xmlChar *)); |
| int idx = 0; |
| for (int i = 0; i < count; i++) { |
| const xmlChar *aname = atts[2*i]; |
| const xmlChar *aval = atts[2*i + 1]; |
| ev->atts[idx++] = xmlStrdup(aname); |
| ev->atts[idx++] = (aval != NULL) ? xmlStrdup(aval) : NULL; |
| } |
| ev->atts[idx] = NULL; |
| } else { |
| ev->atts = (xmlChar **)calloc(1, sizeof(xmlChar *)); |
| ev->atts[0] = NULL; |
| } |
| } |
|
|
| static const xmlChar* find_attr_value(const StartEvent *ev, const char *name) { |
| if (ev->atts == NULL) return NULL; |
| for (int i = 0; ev->atts[i] != NULL && ev->atts[i+1] != NULL; i += 2) { |
| if (xmlStrcasecmp(ev->atts[i], (const xmlChar *)name) == 0) { |
| return ev->atts[i+1]; |
| } |
| } |
| return NULL; |
| } |
|
|
| |
| static htmlParserCtxtPtr make_ctxt(const char *buf, int flags, SAXCapture *cap) { |
| htmlParserCtxtPtr ctxt = htmlCreateMemoryParserCtxt(buf, (int)strlen(buf)); |
| TEST_ASSERT_NOT_NULL_MESSAGE(ctxt, "Failed to create HTML parser context"); |
| static xmlSAXHandler sax; |
| memset(&sax, 0, sizeof(sax)); |
| sax.startElement = test_sax_startElement; |
| ctxt->sax = &sax; |
| ctxt->userData = cap; |
| ctxt->options |= flags; |
| return ctxt; |
| } |
|
|
| void setUp(void) { |
| |
| xmlInitParser(); |
| } |
|
|
| void tearDown(void) { |
| |
| } |
|
|
| |
| void test_htmlParseStartTag_simple_div_noimplied(void) { |
| const char *src = "<div>"; |
| SAXCapture cap; capture_init(&cap); |
| htmlParserCtxtPtr ctxt = make_ctxt(src, HTML_PARSE_NOIMPLIED, &cap); |
|
|
| test_htmlParseStartTag(ctxt); |
|
|
| TEST_ASSERT_EQUAL_INT(1, cap.nevents); |
| TEST_ASSERT_NOT_NULL(cap.events[0].name); |
| TEST_ASSERT_EQUAL_STRING("div", (const char *)cap.events[0].name); |
| TEST_ASSERT_EQUAL_INT(0, cap.events[0].att_count); |
|
|
| capture_free(&cap); |
| htmlFreeParserCtxt(ctxt); |
| } |
|
|
| |
| void test_htmlParseStartTag_uppercase_and_attr_lowercased(void) { |
| const char *src = "<DIV CLASS=AbC ID=42>"; |
| SAXCapture cap; capture_init(&cap); |
| htmlParserCtxtPtr ctxt = make_ctxt(src, HTML_PARSE_NOIMPLIED, &cap); |
|
|
| test_htmlParseStartTag(ctxt); |
|
|
| TEST_ASSERT_EQUAL_INT(1, cap.nevents); |
| TEST_ASSERT_EQUAL_STRING("div", (const char *)cap.events[0].name); |
| TEST_ASSERT_TRUE(cap.events[0].att_count >= 2); |
|
|
| |
| const xmlChar *vclass = find_attr_value(&cap.events[0], "class"); |
| const xmlChar *vid = find_attr_value(&cap.events[0], "id"); |
| TEST_ASSERT_NOT_NULL(vclass); |
| TEST_ASSERT_NOT_NULL(vid); |
| TEST_ASSERT_EQUAL_STRING("AbC", (const char *)vclass); |
| TEST_ASSERT_EQUAL_STRING("42", (const char *)vid); |
|
|
| capture_free(&cap); |
| htmlFreeParserCtxt(ctxt); |
| } |
|
|
| |
| void test_htmlParseStartTag_duplicate_attributes_dedup(void) { |
| const char *src = "<div class='a' CLASS=\"b\" class=c>"; |
| SAXCapture cap; capture_init(&cap); |
| htmlParserCtxtPtr ctxt = make_ctxt(src, HTML_PARSE_NOIMPLIED, &cap); |
|
|
| test_htmlParseStartTag(ctxt); |
|
|
| TEST_ASSERT_EQUAL_INT(1, cap.nevents); |
| |
| const StartEvent *ev = &cap.events[0]; |
| |
| int class_count = 0; |
| for (int i = 0; ev->atts[i] != NULL && ev->atts[i+1] != NULL; i += 2) { |
| if (xmlStrcasecmp(ev->atts[i], BAD_CAST "class") == 0) |
| class_count++; |
| } |
| TEST_ASSERT_EQUAL_INT(1, class_count); |
| const xmlChar *v = find_attr_value(ev, "class"); |
| TEST_ASSERT_NOT_NULL(v); |
| TEST_ASSERT_EQUAL_STRING("a", (const char *)v); |
|
|
| capture_free(&cap); |
| htmlFreeParserCtxt(ctxt); |
| } |
|
|
| |
| void test_htmlParseStartTag_unexpected_solidus_ignored(void) { |
| const char *src = "<div / id='x'>"; |
| SAXCapture cap; capture_init(&cap); |
| htmlParserCtxtPtr ctxt = make_ctxt(src, HTML_PARSE_NOIMPLIED, &cap); |
|
|
| test_htmlParseStartTag(ctxt); |
|
|
| TEST_ASSERT_EQUAL_INT(1, cap.nevents); |
| TEST_ASSERT_EQUAL_STRING("div", (const char *)cap.events[0].name); |
| const xmlChar *vx = find_attr_value(&cap.events[0], "id"); |
| TEST_ASSERT_NOT_NULL(vx); |
| TEST_ASSERT_EQUAL_STRING("x", (const char *)vx); |
|
|
| capture_free(&cap); |
| htmlFreeParserCtxt(ctxt); |
| } |
|
|
| |
| void test_htmlParseStartTag_self_closing(void) { |
| const char *src = "<br/>"; |
| SAXCapture cap; capture_init(&cap); |
| htmlParserCtxtPtr ctxt = make_ctxt(src, HTML_PARSE_NOIMPLIED, &cap); |
|
|
| test_htmlParseStartTag(ctxt); |
|
|
| TEST_ASSERT_EQUAL_INT(1, cap.nevents); |
| TEST_ASSERT_EQUAL_STRING("br", (const char *)cap.events[0].name); |
| TEST_ASSERT_EQUAL_INT(0, cap.events[0].att_count); |
|
|
| capture_free(&cap); |
| htmlFreeParserCtxt(ctxt); |
| } |
|
|
| |
| void test_htmlParseStartTag_incomplete_tag_discarded(void) { |
| const char *src = "<div id='x'"; |
| SAXCapture cap; capture_init(&cap); |
| htmlParserCtxtPtr ctxt = make_ctxt(src, HTML_PARSE_NOIMPLIED, &cap); |
|
|
| test_htmlParseStartTag(ctxt); |
|
|
| TEST_ASSERT_EQUAL_INT(0, cap.nevents); |
|
|
| capture_free(&cap); |
| htmlFreeParserCtxt(ctxt); |
| } |
|
|
| |
| void test_htmlParseStartTag_many_attributes(void) { |
| |
| char buf[4096]; |
| strcpy(buf, "<span"); |
| const int N = 20; |
| char tmp[64]; |
| for (int i = 0; i < N; i++) { |
| snprintf(tmp, sizeof(tmp), " a%d='v%d'", i, i); |
| strcat(buf, tmp); |
| } |
| strcat(buf, ">"); |
|
|
| SAXCapture cap; capture_init(&cap); |
| htmlParserCtxtPtr ctxt = make_ctxt(buf, HTML_PARSE_NOIMPLIED, &cap); |
|
|
| test_htmlParseStartTag(ctxt); |
|
|
| TEST_ASSERT_EQUAL_INT(1, cap.nevents); |
| TEST_ASSERT_EQUAL_STRING("span", (const char *)cap.events[0].name); |
| TEST_ASSERT_EQUAL_INT(N, cap.events[0].att_count); |
|
|
| |
| const StartEvent *ev = &cap.events[0]; |
| const xmlChar *v0 = find_attr_value(ev, "a0"); |
| const xmlChar *v7 = find_attr_value(ev, "a7"); |
| const xmlChar *v19 = find_attr_value(ev, "a19"); |
| TEST_ASSERT_NOT_NULL(v0); |
| TEST_ASSERT_NOT_NULL(v7); |
| TEST_ASSERT_NOT_NULL(v19); |
| TEST_ASSERT_EQUAL_STRING("v0", (const char *)v0); |
| TEST_ASSERT_EQUAL_STRING("v7", (const char *)v7); |
| TEST_ASSERT_EQUAL_STRING("v19", (const char *)v19); |
|
|
| capture_free(&cap); |
| htmlFreeParserCtxt(ctxt); |
| } |
|
|
| int main(void) { |
| UNITY_BEGIN(); |
|
|
| RUN_TEST(test_htmlParseStartTag_simple_div_noimplied); |
| RUN_TEST(test_htmlParseStartTag_uppercase_and_attr_lowercased); |
| RUN_TEST(test_htmlParseStartTag_duplicate_attributes_dedup); |
| RUN_TEST(test_htmlParseStartTag_unexpected_solidus_ignored); |
| RUN_TEST(test_htmlParseStartTag_self_closing); |
| RUN_TEST(test_htmlParseStartTag_incomplete_tag_discarded); |
| RUN_TEST(test_htmlParseStartTag_many_attributes); |
|
|
| return UNITY_END(); |
| } |