00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026 #if HAVE_CONFIG_H
00027 # include <config.h>
00028 #endif
00029
00030 #if HAVE_STRING_H
00031 # include <string.h>
00032 #else
00033 # if HAVE_STRINGS_H
00034 # include <strings.h>
00035 # endif
00036 #endif
00037
00038 #include <assert.h>
00039 #include <expat.h>
00040 #include <stdio.h>
00041
00042 #include "common/dictionary.h"
00043 #include "common/slist.h"
00044 #include "common/xmalloc.h"
00045 #include "xmlt.h"
00046
00047 typedef struct
00048 {
00049 XML_Parser parser;
00050 xml_node *root_node;
00051 xml_node *curr_node;
00052 void (*doc_finished) (void *, xml_node *);
00053
00054 void *user_callback_data;
00055 dictionary *known_tags;
00056
00057 dictionary *known_attributes;
00058 int depth;
00059 int curr_is_cdata;
00060
00061 int valid;
00062 }
00063 xml_context;
00064
00065
00066
00067 static size_t
00068 _cleanup_string (const char *text, char **_dest, int len)
00069 {
00070 size_t i;
00071 size_t newlen = 0;
00072 char c;
00073 char* dest = *_dest;
00074
00075 assert (text);
00076 assert (_dest && *_dest);
00077
00078 if (len == 0)
00079 return 0;
00080
00081 for (i = 0; i < len; ++i)
00082 {
00083 c = text[i];
00084 if (c == '\n' || c == '\r' || c == '\t')
00085 c = ' ';
00086
00087
00088 if (c != ' ' || newlen == 0 || dest[newlen - 1] != ' ')
00089 dest[newlen++] = c;
00090 }
00091
00092
00093 while (newlen > 0 && dest[newlen - 1] == ' ')
00094 --newlen;
00095
00096 return newlen;
00097 }
00098
00099
00100 static int
00101 _attr_compare (const void* data, const void* compare)
00102 {
00103 const xml_attribute *attr = (const xml_attribute*)data;
00104 int attr_t = *(int*)compare;
00105
00106 return attr->attribute != XML_UNKNOWN_ATTRIBUTE
00107 && attr->attribute == attr_t;
00108 }
00109
00110
00111 static void
00112 _rescan_attrib (void *data, void *userdata)
00113 {
00114 xml_attribute *attrib = (xml_attribute*)data;
00115 dictionary *attr_dict = (dictionary*)userdata;
00116 int *lookup;
00117
00118 if (attrib->attribute == XML_UNKNOWN_ATTRIBUTE)
00119 {
00120 lookup = dict_lookup (attr_dict, attrib->unknown_attrib);
00121 if (lookup != 0)
00122 {
00123 attrib->attribute = *lookup;
00124 free (attrib->unknown_attrib);
00125 attrib->unknown_attrib = 0;
00126 }
00127 }
00128 }
00129
00130
00131 static void
00132 _start_element (void *cntxt, const char *name, const char **attr)
00133 {
00134 xml_node *newnode = xmalloc (sizeof (xml_node));
00135 xml_context *context = (xml_context *) cntxt;
00136 int *node_t;
00137
00138 newnode->type = NODE;
00139
00140
00141
00142
00143 node_t = dict_lookup (context->known_tags, name);
00144 if (!node_t)
00145 {
00146 newnode->tag = XML_UNKNOWN_TAG;
00147 newnode->cdata = xstrdup (name);
00148 }
00149 else
00150 {
00151 newnode->tag = *(int *) node_t;
00152 newnode->cdata = 0;
00153 }
00154
00155 slist_init (&newnode->children);
00156 slist_init (&newnode->attributes);
00157
00158 if (! context->root_node)
00159 {
00160 context->root_node = newnode;
00161 newnode->parent = 0;
00162 }
00163 else
00164 {
00165 newnode->parent = context->curr_node;
00166 slist_append (&context->curr_node->children, newnode);
00167 }
00168
00169 while (attr && *attr)
00170 {
00171 xml_attribute *new_attr = xmalloc (sizeof (xml_attribute));
00172 int *attr_t = dict_lookup (context->known_attributes, *attr);
00173
00174 if (!attr_t)
00175 {
00176 new_attr->attribute = XML_UNKNOWN_ATTRIBUTE;
00177 new_attr->unknown_attrib = xstrdup (*attr);
00178 }
00179 else
00180 {
00181 new_attr->attribute = *(int*)attr_t;
00182 new_attr->unknown_attrib = 0;
00183 }
00184
00185 new_attr->value = xstrdup(*(attr + 1));
00186 slist_append (&newnode->attributes, new_attr);
00187 attr += 2;
00188 }
00189
00190 context->curr_node = newnode;
00191 context->curr_is_cdata = 0;
00192 ++context->depth;
00193 }
00194
00195
00196 static void
00197 _end_element (void *cntxt, const char *name)
00198 {
00199 xml_context *context = (xml_context *) cntxt;
00200
00201 --context->depth;
00202 if (!context->depth)
00203 {
00204 context->doc_finished (context->user_callback_data, context->root_node);
00205 context->curr_node = 0;
00206 context->root_node = 0;
00207 }
00208 else
00209 {
00210 context->curr_node = context->curr_node->parent;
00211 context->curr_is_cdata = 0;
00212 }
00213 }
00214
00215
00216 static void
00217 _character_data (void *cntxt, const char *text, int len)
00218 {
00219 int newlen;
00220 char *newtext;
00221 xml_node *new_node = xmalloc (sizeof (xml_node));
00222 xml_context *context = (xml_context *) cntxt;
00223 slist *siblings;
00224
00225 assert (cntxt);
00226 assert (text);
00227
00228 newtext = xmalloc (len);
00229 newlen =_cleanup_string (text, &newtext, len);
00230 if (newlen == 0)
00231 {
00232 free (newtext);
00233 return;
00234 }
00235
00236 xrealloc (newtext, newlen + 1);
00237 newtext[newlen] = 0;
00238
00239 siblings = &(context->curr_node->children);
00240 if (context->curr_is_cdata)
00241 {
00242
00243
00244
00245 xml_node *last_node = slist_last (siblings);
00246 size_t cdata_len = strlen (last_node->cdata);
00247
00248 last_node->cdata = xrealloc (last_node->cdata, cdata_len + newlen + 1);
00249 memcpy (last_node->cdata + cdata_len, newtext, newlen);
00250 last_node->cdata[cdata_len + cdata_len + newlen + 1] = 0;
00251 }
00252 else
00253 {
00254 new_node = xmalloc( sizeof (xml_node));
00255 new_node->type = CDATA;
00256 new_node->tag = -1;
00257 new_node->cdata = newtext;
00258 new_node->parent = context->curr_node;
00259
00260 slist_append (siblings, new_node);
00261 context->curr_is_cdata = 1;
00262 }
00263 }
00264
00265
00266 static void
00267 _free_attribute (void *_attr)
00268 {
00269 xml_attribute *attr = (xml_attribute *) _attr;
00270 assert (attr);
00271 free (attr->value);
00272 free (attr->unknown_attrib);
00273 free (attr);
00274 }
00275
00276
00277
00278 void *
00279 xmlt_create_context (void (*cb) (void *, xml_node *), void *cb_data,
00280 dictionary * tags, dictionary * attribs)
00281 {
00282 xml_context *context = xmalloc (sizeof (xml_context));
00283
00284 context->parser = XML_ParserCreate (0);
00285 if (!context->parser)
00286 return 0;
00287
00288 XML_SetUserData (context->parser, context);
00289 XML_SetElementHandler (context->parser, _start_element, _end_element);
00290 XML_SetCharacterDataHandler (context->parser, _character_data);
00291
00292 context->root_node = 0;
00293 context->curr_node = 0;
00294 context->user_callback_data = cb_data;
00295 context->known_tags = tags;
00296 context->known_attributes = attribs;
00297 context->depth = 0;
00298 context->curr_is_cdata = 0;
00299 context->doc_finished = cb;
00300 context->valid = 1;
00301
00302 return context;
00303 }
00304
00305
00306 int
00307 xmlt_parse (void *_context, const char *data, size_t len)
00308 {
00309 xml_context *context = (xml_context *) _context;
00310
00311 if (!context || !context->valid)
00312 return 0;
00313
00314 if (len == 0)
00315 return 1;
00316
00317 assert (context->parser);
00318 assert (data);
00319
00320 if (!XML_Parse (context->parser, data, len, 0))
00321 {
00322 context->valid = 0;
00323 return 0;
00324 }
00325
00326 return 1;
00327 }
00328
00329
00330 void
00331 xmlt_free_context (void *context)
00332 {
00333 assert (context);
00334 XML_ParserFree (((xml_context *) context)->parser);
00335 }
00336
00337
00338 void
00339 xmlt_free_document (xml_node * doc)
00340 {
00341 slist_iter iter;
00342 int is_cdata = 1;
00343 void *p;
00344
00345 assert (doc);
00346 iter = slist_begin_iter (&doc->children);
00347 slist_delete_special (&doc->attributes, _free_attribute);
00348
00349 while (iter.curr)
00350 {
00351 p = slist_iter_and_next (&iter);
00352
00353 if (is_cdata)
00354 free (p);
00355 else
00356 xmlt_free_document (p);
00357
00358
00359 is_cdata = -is_cdata + 1;
00360 }
00361
00362 free (doc->cdata);
00363 free (doc);
00364 }
00365
00366
00367 xml_node *
00368 xmlt_get_next (xml_node *iter, xml_node *doc)
00369 {
00370 assert (iter);
00371 assert (doc);
00372
00373 if (iter->children.head)
00374 return (xml_node*)iter->children.head->data;
00375
00376 while (iter->parent)
00377 {
00378 if (iter == doc)
00379 return 0;
00380
00381 slist_iter i = slist_find (&(iter->parent->children), iter);
00382 if (i.curr->next)
00383 return (xml_node *) (i.curr->next->data);
00384
00385 iter = iter->parent;
00386 }
00387
00388
00389 return 0;
00390 }
00391
00392
00393 xml_node *
00394 xmlt_get_next_shallow (xml_node *iter)
00395 {
00396 slist_iter siblings;
00397 assert (iter);
00398
00399 siblings = slist_find (&(iter->parent->children), iter);
00400
00401 if (siblings.curr->next)
00402 iter = (xml_node *)(siblings.curr->next->data);
00403 else
00404 iter = 0;
00405
00406 return iter;
00407 }
00408
00409
00410 void
00411 xmlt_rescan_document (xml_node * doc, dictionary * tags, dictionary * attribs)
00412 {
00413 int *tag_ptr;
00414 xml_node *curr_node = doc;
00415
00416 while (curr_node)
00417 {
00418 if (curr_node->type == NODE && curr_node->tag == XML_UNKNOWN_TAG)
00419 {
00420
00421
00422
00423
00424
00425
00426 tag_ptr = dict_lookup (tags, curr_node->cdata);
00427 if (tag_ptr != 0)
00428 {
00429 curr_node->tag = *tag_ptr;
00430 free (curr_node->cdata);
00431 curr_node->cdata = 0;
00432
00433 if (attribs)
00434 {
00435 slist_for_each (&curr_node->attributes, _rescan_attrib,
00436 attribs);
00437 }
00438 }
00439 }
00440
00441 curr_node = xmlt_get_next (curr_node, doc);
00442 }
00443 }
00444
00445
00446 xml_node *
00447 xmlt_find (xml_node *doc, xml_node* iter, int tag)
00448 {
00449 if (!iter)
00450 iter = xmlt_get_next (doc, doc);
00451 else
00452 iter = xmlt_get_next_shallow (iter);
00453
00454 while (iter && iter->tag != tag)
00455 iter = xmlt_get_next_shallow (iter);
00456
00457 return iter;
00458 }
00459
00460
00461 xml_node *
00462 xmlt_find_if (xml_node *doc, xml_node* iter, int(*compare)(xml_node*))
00463 {
00464 if (!iter)
00465 iter = xmlt_get_next (doc, doc);
00466 else
00467 iter = xmlt_get_next_shallow (iter);
00468
00469 while (iter && !compare (iter))
00470 iter = xmlt_get_next_shallow (iter);
00471
00472 return iter;
00473 }
00474
00475
00476 const char*
00477 xmlt_get_attrib (xml_node *node, int attribute)
00478 {
00479 slist_iter i = slist_find_if (&node->attributes, &attribute, _attr_compare);
00480
00481 if(i.curr)
00482 return ((xml_attribute*)i.curr->data)->value;
00483
00484 return 0;
00485 }
00486
00487
00488 const char*
00489 xmlt_get_first_cdata (xml_node *node)
00490 {
00491 xml_node *first_child;
00492
00493 if (!node)
00494 return 0;
00495
00496 first_child = (xml_node*)node->children.head->data;
00497
00498 return (first_child && first_child->cdata) ? first_child->cdata : 0;
00499 }
00500
00501
00502 #ifdef UNIT_TEST_XMLT_C
00503
00504
00505
00506
00507 void test_callback (void *userdata, xml_node *document)
00508 {
00509 *(xml_node**)userdata = document;
00510 }
00511
00512 void
00513 test_next_tag (xml_node **doc_iter, xml_node *doc, int tag)
00514 {
00515 *doc_iter = xmlt_get_next (*doc_iter, doc);
00516
00517 if (!doc_iter || !*doc_iter)
00518 {
00519 printf ("xmlt_get_next() failed, iterator is NULL\n");
00520 exit (1);
00521 }
00522
00523 if ((**doc_iter).type != NODE)
00524 {
00525 printf ("xmlt_get_next() failed, got CDATA, expected NODE\n");
00526 exit (1);
00527 }
00528
00529 if ((**doc_iter).tag != tag)
00530 {
00531 printf ("xmlt_get_next() failed, tag is %i instead of %i\n",
00532 (**doc_iter).tag, tag);
00533 exit (1);
00534 }
00535 }
00536
00537 void
00538 test_next_cdata (xml_node **doc_iter, xml_node *doc, const char* cdata)
00539 {
00540 *doc_iter = xmlt_get_next (*doc_iter, doc);
00541
00542 if (!doc_iter || !*doc_iter)
00543 {
00544 printf ("xmlt_get_next() failed, iterator is NULL\n");
00545 exit (1);
00546 }
00547
00548 if ((**doc_iter).type != CDATA)
00549 {
00550 printf ("xmlt_get_next() failed, got NODE, expected CDATA\n");
00551 exit (1);
00552 }
00553
00554 if (strcmp((**doc_iter).cdata, cdata))
00555 {
00556 printf ("xmlt_get_next() failed, cdata is '%s' instead of '%s'.\n",
00557 (**doc_iter).cdata, cdata);
00558 exit (1);
00559 }
00560 }
00561
00562 int main (int argc, char **argv)
00563 {
00564 int tag_array[] = { 1, 2, 3, 4 };
00565 int attr_array[] = { 10, 20, 30, 40 };
00566 xml_node *finished_doc = 0;
00567 xml_node *doc_iter;
00568 const char* attr_value;
00569
00570 dict_pair tag_dict[] =
00571 {
00572 { "four", &tag_array[3] },
00573 { "one", &tag_array[0] },
00574 { "three", &tag_array[2] },
00575 { "two", &tag_array[1] }
00576 };
00577
00578 dict_pair attr_dict[] =
00579 {
00580 { "dos", &attr_array[1] },
00581 { "quatro", &attr_array[3] },
00582 { "tres", &attr_array[2] },
00583 { "uno", &attr_array[0] }
00584 };
00585
00586 dictionary tags = { tag_dict , 4 };
00587 dictionary attrs = { attr_dict, 4 };
00588
00589 int read;
00590
00591 char *xmldata = "<one><two>Inside two</two><three uno=\"een\" dos=\"twee\">" \
00592 "<four tres=\"drie\" quatro=\"vier\">Inside four</four></three>" \
00593 "<nonexistent />Closing</one>";
00594
00595 xml_context* context = xmlt_create_context (test_callback, &finished_doc,
00596 &tags, &attrs);
00597
00598 if (!context)
00599 {
00600 printf ("xmlt_create_context() failed\n");
00601 exit (1);
00602 }
00603
00604 if (!xmlt_parse (context, xmldata, strlen(xmldata)))
00605 {
00606 printf ("xmlt_parse() failed\n");
00607 exit (1);
00608 }
00609
00610 if (!finished_doc)
00611 {
00612 printf ("xmlt_parse() failed, no document returned\n");
00613 exit (1);
00614 }
00615
00616 doc_iter = finished_doc;
00617 if (doc_iter->tag != tag_array[0])
00618 {
00619 printf ("xmlt_parse() failed, root tag is %i instead of %i\n",
00620 doc_iter->tag, tag_array[0]);
00621 exit (1);
00622 }
00623
00624 if (doc_iter->parent != 0)
00625 {
00626 printf ("xmlt_parse() failed, first tag is not root tag\n");
00627 exit (1);
00628 }
00629
00630 test_next_tag(&doc_iter, finished_doc, tag_array[1]);
00631
00632 attr_value = xmlt_get_attrib (doc_iter, attr_array[0]);
00633 if (attr_value)
00634 {
00635 printf ("xmlt_get_attribute() returned something.\n");
00636 exit (1);
00637 }
00638
00639 test_next_cdata(&doc_iter, finished_doc, "Inside two");
00640 test_next_tag(&doc_iter, finished_doc, tag_array[2]);
00641
00642 attr_value = xmlt_get_attrib (doc_iter, attr_array[0]);
00643 if (!attr_value)
00644 {
00645 printf ("xmlt_get_attribute() didn't return anything.\n");
00646 exit (1);
00647 }
00648
00649 if (strcmp(attr_value, "een"))
00650 {
00651 printf ("xmlt_get_attribute() returned '%s' instead of 'een'.\n",
00652 attr_value);
00653 exit (1);
00654 }
00655
00656 test_next_tag(&doc_iter, finished_doc, tag_array[3]);
00657 test_next_cdata(&doc_iter, finished_doc, "Inside four");
00658
00659
00660 return 0;
00661 }
00662
00663 #endif