Processing XML with Perl | Michel Rodriguez |
Introduction to XML::DOM | XML::Twig |
Example: a (quasi!) HTML converter
#!/bin/perl -w use strict; use XML::DOM; my $dom = new XML::DOM::Parser; # create the DOM object my $doc= $dom->parsefile( "wine.xml"); # parse the document my $catalog= $doc->getDocumentElement; # get the root $catalog->setTagName( 'html'); # NOT part of the DOM spec! # process class (becomes a div) my @classes= $doc->getElementsByTagName( 'class'); # VERY useful method foreach my $class (@classes) { # get the name my $name= $class->getAttribute( 'name'); # attribute processing $class->removeAttribute ('name'); insert_before( $doc, $class, 'h2', $name); # you WILL write that kind of subroutine $class->setTagName( 'div'); } # process category (becomes a table) my @categories= $doc->getElementsByTagName( 'category'); foreach my $category (@categories) { # get the name my $name= $category->getAttribute( 'name'); $category->removeAttribute ('name'); insert_before( $doc, $category, 'p', $name); $category->setTagName( 'table'); } # process item (becomes a row) and remove stock my @items= $doc->getElementsByTagName( 'item'); foreach my $item (@items) { $item->setTagName( 'tr'); my @stocks= $item->getElementsByTagName( 'stock'); # can be called on an element too foreach my $stock (@stocks) { $item->removeChild( $stock); } } # process fields (become cells) foreach my $field ( qw(winery type year rating price)) { my @items= $doc->getElementsByTagName( $field); foreach my $item (@items) { $item->setTagName( 'td'); } } print $doc->toString; exit; sub insert_before { my( $doc, $elt, $tag, $text)= @_; # create the element my $new_elt= $doc->createElement( $tag); # elements are created in a document my $new_elt_pcdata= $doc->createTextNode( $text); $new_elt->appendChild( $new_elt_pcdata); # get the parent and insert the new_elt my $parent= $elt->getParentNode; $parent->insertBefore( $new_elt, $elt); # you need the parent to insert a child } |
Introduction to XML::DOM | XML::Twig |