Libxml2 で XPath

xmlXPathEvalExpression()でXPathに一致したノードのリストを取得できる。

#include <stdio.h>
#include <string.h>
#include <libxml/tree.h>
#include <libxml/parser.h>
#include <libxml/xpath.h>

void trim(char *str)
{
    int i;

    for (i = strlen(str) - 1; 0 <= i; i--) {
        if ((str[i] == '\r') ||
            (str[i] == '\n') ||
            (str[i] == ' ')) {
            str[i] = '\0';
        }
        else {
            break;
        }
    }
}

void print_attr(xmlAttrPtr attr, FILE *output)
{
    xmlAttrPtr cur_attr = NULL;
    for (cur_attr = attr; cur_attr; cur_attr = cur_attr->next) {
        fprintf(output, "attr %s=%s\n", attr->name, attr->children->content);
    }
}

void print_nodes(xmlNodeSetPtr nodes, FILE *output)
{
    xmlNodePtr cur;
    int        size;
    int        i;
    char      *content;
    char       buf[128];

    size = (nodes) ? nodes->nodeNr : 0;
    fprintf(output, "Result (%d nodes):\n", size);
    for(i = 0; i < size; ++i) {
        if(nodes->nodeTab[i]->type == XML_NAMESPACE_DECL) {
            xmlNsPtr ns;

            ns = (xmlNsPtr)nodes->nodeTab[i];
            cur = (xmlNodePtr)ns->next;
            if(cur->ns) {
                fprintf(output,
                        "= namespace \"%s\"=\"%s\" for node %s:%s",
                        ns->prefix,
                        ns->href, cur->ns->href, cur->name);
            }
            else {
                fprintf(output,
                        "= namespace \"%s\"=\"%s\" for node %s",
                        ns->prefix, ns->href, cur->name);
            }
        }
        else if(nodes->nodeTab[i]->type == XML_ELEMENT_NODE) {
            cur = nodes->nodeTab[i];
            if(cur->ns) {
                fprintf(output,
                        "= element node \"%s:%s\"",
                        cur->ns->href, cur->name);
            }
            else {
                fprintf(output, "= element node \"%s\"",
                        cur->name);
            }
        }
        else {
            cur = nodes->nodeTab[i];
            fprintf(output, "= node \"%s\": type %d: ", cur->name, cur->type);
        }

        content = cur->children->content;
        if (NULL != content) {
            strcpy(buf, content);
            trim(buf);
            fprintf(output, " content %s\n", buf);
        }
        else {
            fprintf(output, " content (null)\n");
        }
        print_attr(cur->properties, output);
    }
}

int execute_xpath(xmlDocPtr doc, xmlChar *xpath_expr)
{
    xmlXPathContextPtr xpath_context;
    xmlXPathObjectPtr  xpath_obj;

    printf("---- XPath %s\n", xpath_expr);

    /* Create xpath evaluation context */
    xpath_context = xmlXPathNewContext(doc);
    if(xpath_context == NULL) {
        fprintf(stderr,"Error: unable to create new XPath context\n");
        xmlFreeDoc(doc);
        return -1;
    }

    /* Evaluate xpath expression */
    xpath_obj = xmlXPathEvalExpression(xpath_expr, xpath_context);
    if(xpath_obj == NULL) {
        fprintf(stderr,
                "Error: unable to evaluate xpath expression \"%s\"\n",
                xpath_expr);
        xmlXPathFreeContext(xpath_context);
        xmlFreeDoc(doc);
        return -1;
    }

    /* Print results */
    print_nodes(xpath_obj->nodesetval, stdout);

    /* Cleanup */
    xmlXPathFreeObject(xpath_obj);
    xmlXPathFreeContext(xpath_context);
}

int main(int argc, char **argv)
{
    xmlDocPtr  doc;
    xmlNodePtr root_element = NULL;
    char      *input_file;
    char      *output_file;
    int        ret;

    if (argc != 2) {
        return 1;
    }

    input_file = argv[1];

    LIBXML_TEST_VERSION;

    /* Read document */
    doc = xmlReadFile(input_file, NULL, 0);
    if (doc == NULL) {
        fprintf(stderr, "Failed to parse %s\n", input_file);
        return;
    }

    /* Like getElementTagName() */
    execute_xpath(doc, "/descendant::*[name()='price']");

    execute_xpath(doc, "/cars/car");

    /* Free document */
    xmlFreeDoc(doc);

    xmlCleanupParser();

    return 0;
}

解析対象のXMLファイル

<?xml version="1.0" encoding="UTF-8" ?>
<cars>
  <car country="JP">
    <name>motorcycle</name>
    <price>150</price>
    <img file="car1.jpg"/>
  </car>
  <car country="US">
    <name>truck</name>
    <price>500</price>
    <img file="car2.jpg"/>
  </car>
  <car country="DE">
    <name>car</name>
    <price>200</price>
    <img file="car3.jpg"/>
  </car>
</cars>

解析結果

---- XPath /descendant::*[name()='price']
Result (3 nodes):
= element node "price" content 150
= element node "price" content 500
= element node "price" content 200
---- XPath /cars/car
Result (3 nodes):
= element node "car" content 
attr country=JP
= element node "car" content 
attr country=US
= element node "car" content 
attr country=DE

参考

The XML C parser and toolkit of Gnome
やさしいXML 第3版

やさしいXML 第3版

やさしいXML 第3版