DOM で tree walk (Libxml2)

DOM で XML のツリー構造を解析する。ノードはリストになって、同じ階層のノードは node->next で、子のノードは cur_node->children で辿ることができる。

#include <stdio.h>
#include <string.h>
#include <libxml/tree.h>
#include <libxml/parser.h>

void trim(char *str)
{
    int i;

    for (i = strlen(str) - 1; 0 <= i; i--) {
        if ((str[i] == '\r') ||
            (str[i] == '\n') ||
            (str[i] == ' ')) {
            str[i] = '\0';
        }
        else {
            break;
        }
    }
}

void walk(xmlNodePtr node, int depth)
{
    char     buf[64];
    char     content[128];
    xmlNode *cur_node = NULL;
    int i = 0;

    /* Indent space */
    for (i = 0; i < (depth * 2); i++) {
        buf[i] = ' ';
    }
    buf[i] = '\0';

    for (cur_node = node; cur_node; cur_node = cur_node->next) {
        printf("%s", buf);

        /* Type */
        printf("type(%d):", cur_node->type);
        switch (cur_node->type) {
            case XML_ELEMENT_NODE:
                printf("Node, ");
                break;
            case XML_ATTRIBUTE_NODE:
                printf("Attribute, ");
                break;
            case XML_TEXT_NODE:
                printf("Text, ");
                break;
            default:
                printf("Other, ");
                break;
        }

        /* Name of node, or entity */
        printf("name:%s, ", cur_node->name);

        /* Content */
        if (NULL == cur_node->content) {
            printf("content:(null)");
        }
        else {
            strcpy(content, cur_node->content);
            trim(content);
            printf("content:%s", content);
        }
        printf("\n");

        walk(cur_node->children, depth + 1);
    }
}

int main(int argc, char **argv)
{
    xmlDocPtr  doc;
    xmlNodePtr root_element = NULL;
    char      *input_file;
    int        ret;

    if (argc != 2) {
        return 1;
    }

    input_file = argv[1];

    LIBXML_TEST_VERSION;

    /* Read document */
    doc = xmlReadFile(input_file, NULL, 0);
    if (doc == NULL) {
        fprintf(stderr, "Failed to parse %s\n", input_file);
        return;
    }

    /*Get the root element node */
    root_element = xmlDocGetRootElement(doc);

    /* Walk child */
    walk(root_element, 0);

    /* Free document */
    xmlFreeDoc(doc);

    xmlCleanupParser();

    return 0;
}

解析対象のXMLファイル

<?xml version="1.0" encoding="UTF-8" ?>
<cars>
  <car country="JP">
    <name>motorcycle</name>
    <price>150</price>
    <img file="car1.jpg"/>
  </car>
  <car country="US">
    <name>truck</name>
    <price>500</price>
    <img file="car2.jpg"/>
  </car>
  <car country="DE">
    <name>car</name>
    <price>200</price>
    <img file="car3.jpg"/>
  </car>
</cars>

解析結果

type(1):Node, name:cars, content:(null)
  type(3):Text, name:text, content:
  type(1):Node, name:car, content:(null)
    type(3):Text, name:text, content:
    type(1):Node, name:name, content:(null)
      type(3):Text, name:text, content:motorcycle
    type(3):Text, name:text, content:
    type(1):Node, name:price, content:(null)
      type(3):Text, name:text, content:150
    type(3):Text, name:text, content:
    type(1):Node, name:img, content:(null)
    type(3):Text, name:text, content:
  type(3):Text, name:text, content:
  type(1):Node, name:car, content:(null)
    type(3):Text, name:text, content:
    type(1):Node, name:name, content:(null)
      type(3):Text, name:text, content:truck
    type(3):Text, name:text, content:
    type(1):Node, name:price, content:(null)
      type(3):Text, name:text, content:500
    type(3):Text, name:text, content:
    type(1):Node, name:img, content:(null)
    type(3):Text, name:text, content:
  type(3):Text, name:text, content:
  type(1):Node, name:car, content:(null)
    type(3):Text, name:text, content:
    type(1):Node, name:name, content:(null)
      type(3):Text, name:text, content:car
    type(3):Text, name:text, content:
    type(1):Node, name:price, content:(null)
      type(3):Text, name:text, content:200
    type(3):Text, name:text, content:
    type(1):Node, name:img, content:(null)
    type(3):Text, name:text, content:
  type(3):Text, name:text, content:

参考

The XML C parser and toolkit of Gnome
やさしいXML 第3版

やさしいXML 第3版

やさしいXML 第3版