Streaming API で XMLの読み出し

Libxml2 の Streaming API っていうのは、 XML を頭から解析するときに役立つ API。SAXと違ってハンドラを登録する必要がないから楽。

#include <stdio.h>
#include <libxml/tree.h>
#include <libxml/xmlreader.h>

void processNode(xmlTextReaderPtr reader)
{
    const xmlChar *name;
    const xmlChar *value;
    int            ret;

    /* Print node infos */
    name = xmlTextReaderConstName(reader);
    if (NULL == name) {
        name = BAD_CAST "--";
    }
    printf("%d %d %s %d %d %d, ",
           xmlTextReaderDepth(reader),
           xmlTextReaderNodeType(reader),
           name,
           xmlTextReaderIsEmptyElement(reader),
           xmlTextReaderHasValue(reader),
           xmlTextReaderHasAttributes(reader));

    /* Print value */
    if (1 == xmlTextReaderHasValue(reader)) {
        printf("value: ");
        value = xmlTextReaderConstValue(reader);
        if (value == NULL) {
            printf("--, ");
        }
        else {
            if (xmlStrlen(value) > 40) {
                printf("%.40s..., ", value);
            }
            else {
                printf("%s,", value);
            }
        }
    }

    /* Print attribute */
    if (1 == xmlTextReaderHasAttributes(reader)) {
        printf("attr: ");
        ret = xmlTextReaderMoveToFirstAttribute(reader);
        while(1 == ret) {
            printf("%s = %s, ",
                   xmlTextReaderConstName(reader),
                   xmlTextReaderConstValue(reader));
            ret = xmlTextReaderMoveToNextAttribute(reader);
        }
    }

    printf("\n");

}

int main(int argc, char **argv)
{
    xmlTextReaderPtr  reader;
    char             *input_file;
    int               ret;

    if (argc != 2) {
        return 1;
    }

    input_file = argv[1];

    LIBXML_TEST_VERSION;

    /* Read document */
    reader = xmlReaderForFile(input_file, NULL, 0);
    if (NULL == reader) {
        fprintf(stderr, "Failed to parse %s\n", input_file);
        return;
    }

    /* Parse XML */
    while (1 == (ret = xmlTextReaderRead(reader))) {
        processNode(reader);
    }

    if (0 != ret) {
        fprintf(stderr, "%s : failed to parse\n", input_file);
    }

    /* Free reader */
    xmlFreeTextReader(reader);

    xmlCleanupParser();

    return 0;
}

解析対象 XML

<?xml version="1.0" encoding="UTF-8" ?>
<cars>
  <car country="US" type="car">
    <price>150</price>
    <img file="car1.jpg"/>
  </car>
  <car country="JP" type="truck">
    <price>500</price>
    <img file="car2.jpg"/>
  </car>
</cars>

上のXMLを解析すると出力は次のようになる。

0 1 cars 0 0 0, 
1 14 #text 0 1 0, value: 
  ,
1 1 car 0 0 1, attr: country = US, type = car, 
2 14 #text 0 1 0, value: 
    ,
2 1 price 0 0 0, 
3 3 #text 0 1 0, value: 150,
2 15 price 0 0 0, 
2 14 #text 0 1 0, value: 
    ,
2 1 img 1 0 1, attr: file = car1.jpg, 
2 14 #text 0 1 0, value: 
  ,
1 15 car 0 0 1, attr: country = US, type = car, 
1 14 #text 0 1 0, value: 
  ,
1 1 car 0 0 1, attr: country = JP, type = truck, 
2 14 #text 0 1 0, value: 
    ,
2 1 price 0 0 0, 
3 3 #text 0 1 0, value: 500,
2 15 price 0 0 0, 
2 14 #text 0 1 0, value: 
    ,
2 1 img 1 0 1, attr: file = car2.jpg, 
2 14 #text 0 1 0, value: 
  ,
1 15 car 0 0 1, attr: country = JP, type = truck, 
1 14 #text 0 1 0, value: 
,
0 15 cars 0 0 0,