File Coverage
File: | blib/lib/XML/Twig.pm |
Coverage: | 93.1% |
line | stmt | bran | cond | sub | pod | time | code |
---|---|---|---|---|---|---|---|
1 |
262
262
262
|
691847
249
13395
|
use strict; | ||||
2 |
256
256
256
|
852
228
13403
|
use warnings; # > perl 5.5 | ||||
3 | |||||||
4 | # This is created in the caller's space | ||||||
5 | # I realize (now!) that it's not clean, but it's been there for 10+ years... | ||||||
6 | BEGIN | ||||||
7 |
230
6
|
1995
64
|
{ sub ::PCDATA { '#PCDATA' } ## no critic (Subroutines::ProhibitNestedSubs); | ||||
8 |
5
|
34
|
sub ::CDATA { '#CDATA' } ## no critic (Subroutines::ProhibitNestedSubs); | ||||
9 | } | ||||||
10 | |||||||
11 |
230
230
226
|
57901
6374
6460
|
use UNIVERSAL(); | ||||
12 | |||||||
13 | ## if a sub returns a scalar, it better not bloody disappear in list context | ||||||
14 | ## no critic (Subroutines::ProhibitExplicitReturnUndef); | ||||||
15 | |||||||
16 | my $perl_version; | ||||||
17 | my $parser_version; | ||||||
18 | |||||||
19 | ###################################################################### | ||||||
20 | package XML::Twig; | ||||||
21 | ###################################################################### | ||||||
22 | |||||||
23 | require 5.004; | ||||||
24 | |||||||
25 |
226
226
219
|
49922
5157
666
|
use utf8; # > perl 5.5 | ||||
26 | |||||||
27 |
219
219
211
|
4660
3916
7758
|
use vars qw($VERSION @ISA %valid_option); | ||||
28 | |||||||
29 |
211
211
211
|
556
3017
7566
|
use Carp; | ||||
30 |
211
211
211
|
557
3037
1323
|
use File::Spec; | ||||
31 |
211
211
208
|
2727
3040
10516
|
use File::Basename; | ||||
32 | |||||||
33 | *isa= *UNIVERSAL::isa; | ||||||
34 | |||||||
35 | # flag, set to true if the weaken sub is available | ||||||
36 |
208
208
206
|
591
2555
75658
|
use vars qw( $weakrefs); | ||||
37 | |||||||
38 | # flag set to true if the version of expat seems to be 1.95.2, which has annoying bugs | ||||||
39 | # wrt doctype handling. This is global for performance reasons. | ||||||
40 | my $expat_1_95_2=0; | ||||||
41 | |||||||
42 | # a slight non-xml mod: # is allowed as a first character | ||||||
43 | my $REG_TAG_FIRST_LETTER; | ||||||
44 | #$REG_TAG_FIRST_LETTER= q{(?:[^\W\d]|[:#_])}; # < perl 5.6 - does not work for leading non-ascii letters | ||||||
45 | $REG_TAG_FIRST_LETTER= q{(?:[[:alpha:]:#_])}; # >= perl 5.6 | ||||||
46 | |||||||
47 | my $REG_TAG_LETTER= q{(?:[\w_.-]*)}; | ||||||
48 | |||||||
49 | # a simple name (no colon) | ||||||
50 | my $REG_NAME_TOKEN= qq{(?:$REG_TAG_FIRST_LETTER$REG_TAG_LETTER*)}; | ||||||
51 | |||||||
52 | # a tag name, possibly including namespace | ||||||
53 | my $REG_NAME= qq{(?:(?:$REG_NAME_TOKEN:)?$REG_NAME_TOKEN)}; | ||||||
54 | |||||||
55 | # tag name (leading # allowed) | ||||||
56 | # first line is for perl 5.005, second line for modern perl, that accept character classes | ||||||
57 | my $REG_TAG_NAME=$REG_NAME; | ||||||
58 | |||||||
59 | # name or wildcard (* or '') (leading # allowed) | ||||||
60 | my $REG_NAME_W = qq{(?:$REG_NAME|[*])}; | ||||||
61 | |||||||
62 | # class and ids are deliberatly permissive | ||||||
63 | my $REG_NTOKEN_FIRST_LETTER; | ||||||
64 | #$REG_NTOKEN_FIRST_LETTER= q{(?:[^\W\d]|[:_])}; # < perl 5.6 - does not work for leading non-ascii letters | ||||||
65 | $REG_NTOKEN_FIRST_LETTER= q{(?:[[:alpha:]:_])}; # >= perl 5.6 | ||||||
66 | |||||||
67 | my $REG_NTOKEN_LETTER= q{(?:[\w_:.-]*)}; | ||||||
68 | |||||||
69 | my $REG_NTOKEN= qq{(?:$REG_NTOKEN_FIRST_LETTER$REG_NTOKEN_LETTER*)}; | ||||||
70 | my $REG_CLASS = $REG_NTOKEN; | ||||||
71 | my $REG_ID = $REG_NTOKEN; | ||||||
72 | |||||||
73 | # allow <tag> #<tag> (private elt) * <tag>.<class> *.<class> <tag>#<id> *#<id> | ||||||
74 | my $REG_TAG_PART= qq{(?:$REG_NAME_W(?:[.]$REG_CLASS|[#]$REG_ID)?|[.]$REG_CLASS)}; | ||||||
75 | |||||||
76 | my $REG_REGEXP = q{(?:/(?:[^\\/]|\\.)*/[eimsox]*)}; # regexp | ||||||
77 | my $REG_MATCH = q{[!=]~}; # match (or not) | ||||||
78 | my $REG_STRING = q{(?:"(?:[^\\"]|\\.)*"|'(?:[^\\']|\\.)*')}; # string (simple or double quoted) | ||||||
79 | my $REG_NUMBER = q{(?:\d+(?:\.\d*)?|\.\d+)}; # number | ||||||
80 | my $REG_VALUE = qq{(?:$REG_STRING|$REG_NUMBER)}; # value | ||||||
81 | my $REG_OP = q{==|!=|>|<|>=|<=|eq|ne|lt|gt|le|ge|=}; # op | ||||||
82 | my $REG_FUNCTION = q{(?:string|text)\(\s*\)}; | ||||||
83 | my $REG_STRING_ARG = qq{(?:string|text)\\(\\s*$REG_NAME_W\\s*\\)}; | ||||||
84 | my $REG_COMP = q{(?:>=|<=|!=|<|>|=)}; | ||||||
85 | |||||||
86 | my $REG_TAG_IN_PREDICATE= $REG_NAME_W . q{(?=\s*(?i:and\b|or\b|\]|$))}; | ||||||
87 | |||||||
88 | # keys in the context stack, chosen not to interfere with att names, even private (#-prefixed) ones | ||||||
89 | my $ST_TAG = '##tag'; | ||||||
90 | my $ST_ELT = '##elt'; | ||||||
91 | my $ST_NS = '##ns' ; | ||||||
92 | |||||||
93 | # used in the handler trigger code | ||||||
94 | my $REG_NAKED_PREDICATE= qq{((?:"[^"]*"|'[^']*'|$REG_STRING_ARG|$REG_FUNCTION|\@$REG_NAME_W|$REG_MATCH\\s*$REG_REGEXP|[\\s\\d><=!()+.-]|(?i:and)|(?i:or)|$REG_TAG_IN_PREDICATE)*)}; | ||||||
95 | my $REG_PREDICATE= qq{\\[$REG_NAKED_PREDICATE\\]}; | ||||||
96 | |||||||
97 | # not all axis, only supported ones (in get_xpath) | ||||||
98 | my @supported_axis= ( 'ancestor', 'ancestor-or-self', 'child', 'descendant', 'descendant-or-self', | ||||||
99 | 'following', 'following-sibling', 'parent', 'preceding', 'preceding-sibling', 'self' | ||||||
100 | ); | ||||||
101 | my $REG_AXIS = "(?:" . join( '|', @supported_axis) .")"; | ||||||
102 | |||||||
103 | # only used in the "xpath"engine (for get_xpath/findnodes) for now | ||||||
104 | my $REG_PREDICATE_ALT = qr{\[(?:(?:string\(\s*\)|\@$REG_TAG_NAME)\s*$REG_MATCH\s*$REG_REGEXP\s*|[^\]]*)\]}; | ||||||
105 | |||||||
106 | # used to convert XPath tests on strings to the perl equivalent | ||||||
107 | my %PERL_ALPHA_TEST= ( '=' => ' eq ', '!=' => ' ne ', '>' => ' gt ', '>=' => ' ge ', '<' => ' lt ', '<=' => ' le '); | ||||||
108 | |||||||
109 | my( $FB_HTMLCREF, $FB_XMLCREF); | ||||||
110 | |||||||
111 | my $NO_WARNINGS= $perl_version >= 5.006 ? 'no warnings' : 'local $^W=0'; | ||||||
112 | |||||||
113 | # default namespaces, both ways | ||||||
114 | my %DEFAULT_NS= ( xml => "http://www.w3.org/XML/1998/namespace", | ||||||
115 | xmlns => "http://www.w3.org/2000/xmlns/", | ||||||
116 | ); | ||||||
117 | my %DEFAULT_URI2NS= map { $DEFAULT_NS{$_} => $_ } keys %DEFAULT_NS; | ||||||
118 | |||||||
119 | # constants | ||||||
120 | my( $PCDATA, $CDATA, $PI, $COMMENT, $ENT, $ELT, $TEXT, $ASIS, $EMPTY, $BUFSIZE); | ||||||
121 | |||||||
122 | # used when an HTML doc only has a PUBLIC declaration, to generate the SYSTEM one | ||||||
123 | # this should really be done by HTML::TreeBuilder, but as of HTML::TreeBuilder 4.2 it isn't | ||||||
124 | # the various declarations are taken from http://en.wikipedia.org/wiki/Document_Type_Declaration | ||||||
125 | my %HTML_DECL= ( "-//W3C//DTD HTML 4.0 Transitional//EN" => "http://www.w3.org/TR/REC-html40/loose.dtd", | ||||||
126 | "-//W3C//DTD HTML 4.01//EN" => "http://www.w3.org/TR/html4/strict.dtd", | ||||||
127 | "-//W3C//DTD HTML 4.01 Transitional//EN" => "http://www.w3.org/TR/html4/loose.dtd", | ||||||
128 | "-//W3C//DTD HTML 4.01 Frameset//EN" => "http://www.w3.org/TR/html4/frameset.dtd", | ||||||
129 | "-//W3C//DTD XHTML 1.0 Strict//EN" => "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd", | ||||||
130 | "-//W3C//DTD XHTML 1.0 Transitional//EN" => "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", | ||||||
131 | "-//W3C//DTD XHTML 1.0 Frameset//EN" => "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd", | ||||||
132 | "-//W3C//DTD XHTML 1.1//EN" => "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd", | ||||||
133 | "-//W3C//DTD XHTML Basic 1.0//EN" => "http://www.w3.org/TR/xhtml-basic/xhtml-basic10.dtd", | ||||||
134 | "-//W3C//DTD XHTML Basic 1.1//EN" => "http://www.w3.org/TR/xhtml-basic/xhtml-basic11.dtd", | ||||||
135 | "-//WAPFORUM//DTD XHTML Mobile 1.0//EN" => "http://www.wapforum.org/DTD/xhtml-mobile10.dtd", | ||||||
136 | "-//WAPFORUM//DTD XHTML Mobile 1.1//EN" => "http://www.openmobilealliance.org/tech/DTD/xhtml-mobile11.dtd", | ||||||
137 | "-//WAPFORUM//DTD XHTML Mobile 1.2//EN" => "http://www.openmobilealliance.org/tech/DTD/xhtml-mobile12.dtd", | ||||||
138 | "-//W3C//DTD XHTML+RDFa 1.0//EN" => "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd", | ||||||
139 | ); | ||||||
140 | |||||||
141 | my $DEFAULT_HTML_TYPE= "-//W3C//DTD HTML 4.0 Transitional//EN"; | ||||||
142 | |||||||
143 | my $SEP= qr/\s*(?:$|\|)/; | ||||||
144 | |||||||
145 | BEGIN | ||||||
146 | { | ||||||
147 |
204
|
61105
|
$VERSION = '3.47'; | ||||
148 | |||||||
149 |
206
206
204
|
59459
927726
1319
|
use XML::Parser; | ||||
150 |
204
|
2183
|
my $needVersion = '2.23'; | ||||
151 |
203
|
431
|
($parser_version= $XML::Parser::VERSION)=~ s{_\d+}{}; # remove _<n> from version so numeric tests do not warn | ||||
152 |
203
|
1233
|
croak "need at least XML::Parser version $needVersion" unless $parser_version >= $needVersion; | ||||
153 | |||||||
154 |
202
|
2003
|
($perl_version= $])=~ s{_\d+}{}; | ||||
155 | |||||||
156 |
201
|
482
|
if( $perl_version >= 5.008) | ||||
157 |
201
187
187
187
|
9063
47386
1046721
36400
|
{ eval "use Encode qw( :all)"; | ||||
158 |
201
|
1968
|
$FB_XMLCREF = 0x0400; # Encode::FB_XMLCREF; | ||||
159 |
201
|
251
|
$FB_HTMLCREF = 0x0200; # Encode::FB_HTMLCREF; | ||||
160 | } | ||||||
161 | |||||||
162 | # test whether we can use weak references | ||||||
163 | # set local empty signal handler to trap error messages | ||||||
164 |
201
201
|
186
2121
|
{ local $SIG{__DIE__}; | ||||
165 |
194
|
6517
|
if( eval( 'require Scalar::Util') && defined( \&Scalar::Util::weaken)) | ||||
166 |
194
194
|
6328
993
|
{ import Scalar::Util( 'weaken'); $weakrefs= 1; } | ||||
167 | elsif( eval( 'require WeakRef')) | ||||||
168 |
4
4
|
13
3
|
{ import WeakRef; $weakrefs= 1; } | ||||
169 | else | ||||||
170 |
4
|
446
|
{ $weakrefs= 0; } | ||||
171 | } | ||||||
172 | |||||||
173 |
191
|
864
|
import XML::Twig::Elt; | ||||
174 |
191
|
749
|
import XML::Twig::Entity; | ||||
175 |
191
|
1141
|
import XML::Twig::Entity_list; | ||||
176 | |||||||
177 | # used to store the gi's | ||||||
178 | # should be set for each twig really, at least when there are several | ||||||
179 | # the init ensures that special gi's are always the same | ||||||
180 | |||||||
181 | # constants: element types | ||||||
182 |
191
|
351
|
$PCDATA = '#PCDATA'; | ||||
183 |
191
|
175
|
$CDATA = '#CDATA'; | ||||
184 |
191
|
595
|
$PI = '#PI'; | ||||
185 |
191
|
173
|
$COMMENT = '#COMMENT'; | ||||
186 |
191
|
19432
|
$ENT = '#ENT'; | ||||
187 | |||||||
188 | # element classes | ||||||
189 |
191
|
6289
|
$ELT = '#ELT'; | ||||
190 |
190
|
4099
|
$TEXT = '#TEXT'; | ||||
191 | |||||||
192 | # element properties | ||||||
193 |
190
|
2217
|
$ASIS = '#ASIS'; | ||||
194 |
190
|
1192
|
$EMPTY = '#EMPTY'; | ||||
195 | |||||||
196 | # used in parseurl to set the buffer size to the same size as in XML::Parser::Expat | ||||||
197 |
190
|
173
|
$BUFSIZE = 32768; | ||||
198 | |||||||
199 | |||||||
200 | # gi => index | ||||||
201 |
190
|
1481
|
%XML::Twig::gi2index=( '', 0, $PCDATA => 1, $CDATA => 2, $PI => 3, $COMMENT => 4, $ENT => 5); | ||||
202 | # list of gi's | ||||||
203 |
190
|
989
|
@XML::Twig::index2gi=( '', $PCDATA, $CDATA, $PI, $COMMENT, $ENT); | ||||
204 | |||||||
205 | # gi's under this value are special | ||||||
206 |
190
|
606
|
$XML::Twig::SPECIAL_GI= @XML::Twig::index2gi; | ||||
207 | |||||||
208 |
190
|
477
|
%XML::Twig::base_ent= ( '>' => '>', '<' => '<', '&' => '&', "'" => ''', '"' => '"',); | ||||
209 |
190
562
|
566
1876
|
foreach my $c ( "\n", "\r", "\t") { $XML::Twig::base_ent{$c}= sprintf( "&#x%02x;", ord( $c)); } | ||||
210 | |||||||
211 | # now set some aliases | ||||||
212 |
188
|
335
|
*find_nodes = *get_xpath; # same as XML::XPath | ||||
213 |
188
|
278
|
*findnodes = *get_xpath; # same as XML::LibXML | ||||
214 |
188
|
174
|
*getElementsByTagName = *descendants; | ||||
215 |
188
|
162
|
*descendants_or_self = *descendants; # valid in XML::Twig, not in XML::Twig::Elt | ||||
216 |
188
|
590
|
*find_by_tag_name = *descendants; | ||||
217 |
187
|
159
|
*getElementById = *elt_id; | ||||
218 |
187
|
564
|
*getEltById = *elt_id; | ||||
219 |
187
|
148
|
*toString = *sprint; | ||||
220 |
187
|
134891
|
*create_accessors = *att_accessors; | ||||
221 | |||||||
222 | } | ||||||
223 | |||||||
224 | @ISA = qw(XML::Parser); | ||||||
225 | |||||||
226 | # fake gi's used in twig_handlers and start_tag_handlers | ||||||
227 | my $ALL = '_all_'; # the associated function is always called | ||||||
228 | my $DEFAULT= '_default_'; # the function is called if no other handler has been | ||||||
229 | |||||||
230 | # some defaults | ||||||
231 | my $COMMENTS_DEFAULT= 'keep'; | ||||||
232 | my $PI_DEFAULT = 'keep'; | ||||||
233 | |||||||
234 | |||||||
235 | # handlers used in regular mode | ||||||
236 | my %twig_handlers=( Start => \&_twig_start, | ||||||
237 | End => \&_twig_end, | ||||||
238 | Char => \&_twig_char, | ||||||
239 | Entity => \&_twig_entity, | ||||||
240 | XMLDecl => \&_twig_xmldecl, | ||||||
241 | Doctype => \&_twig_doctype, | ||||||
242 | Element => \&_twig_element, | ||||||
243 | Attlist => \&_twig_attlist, | ||||||
244 | CdataStart => \&_twig_cdatastart, | ||||||
245 | CdataEnd => \&_twig_cdataend, | ||||||
246 | Proc => \&_twig_pi, | ||||||
247 | Comment => \&_twig_comment, | ||||||
248 | Default => \&_twig_default, | ||||||
249 | ExternEnt => \&_twig_extern_ent, | ||||||
250 | ); | ||||||
251 | |||||||
252 | # handlers used when twig_roots is used and we are outside of the roots | ||||||
253 | my %twig_handlers_roots= | ||||||
254 | ( Start => \&_twig_start_check_roots, | ||||||
255 | End => \&_twig_end_check_roots, | ||||||
256 | Doctype => \&_twig_doctype, | ||||||
257 | Char => undef, Entity => undef, XMLDecl => \&_twig_xmldecl, | ||||||
258 | Element => undef, Attlist => undef, CdataStart => undef, | ||||||
259 | CdataEnd => undef, Proc => undef, Comment => undef, | ||||||
260 | Proc => \&_twig_pi_check_roots, | ||||||
261 | Default => sub {}, # hack needed for XML::Parser 2.27 | ||||||
262 | ExternEnt => \&_twig_extern_ent, | ||||||
263 | ); | ||||||
264 | |||||||
265 | # handlers used when twig_roots and print_outside_roots are used and we are | ||||||
266 | # outside of the roots | ||||||
267 | my %twig_handlers_roots_print_2_30= | ||||||
268 | ( Start => \&_twig_start_check_roots, | ||||||
269 | End => \&_twig_end_check_roots, | ||||||
270 | Char => \&_twig_print, | ||||||
271 | Entity => \&_twig_print_entity, | ||||||
272 | ExternEnt => \&_twig_print_entity, | ||||||
273 | DoctypeFin => \&_twig_doctype_fin_print, | ||||||
274 | XMLDecl => sub { _twig_xmldecl( @_); _twig_print( @_); }, | ||||||
275 | Doctype => \&_twig_print_doctype, # because recognized_string is broken here | ||||||
276 | # Element => \&_twig_print, Attlist => \&_twig_print, | ||||||
277 | CdataStart => \&_twig_print, CdataEnd => \&_twig_print, | ||||||
278 | Proc => \&_twig_pi_check_roots, Comment => \&_twig_print, | ||||||
279 | Default => \&_twig_print_check_doctype, | ||||||
280 | ExternEnt => \&_twig_extern_ent, | ||||||
281 | ); | ||||||
282 | |||||||
283 | # handlers used when twig_roots, print_outside_roots and keep_encoding are used | ||||||
284 | # and we are outside of the roots | ||||||
285 | my %twig_handlers_roots_print_original_2_30= | ||||||
286 | ( Start => \&_twig_start_check_roots, | ||||||
287 | End => \&_twig_end_check_roots, | ||||||
288 | Char => \&_twig_print_original, | ||||||
289 | # I have no idea why I should not be using this handler! | ||||||
290 | Entity => \&_twig_print_entity, | ||||||
291 | ExternEnt => \&_twig_print_entity, | ||||||
292 | DoctypeFin => \&_twig_doctype_fin_print, | ||||||
293 | XMLDecl => sub { _twig_xmldecl( @_); _twig_print_original( @_) }, | ||||||
294 | Doctype => \&_twig_print_original_doctype, # because original_string is broken here | ||||||
295 | Element => \&_twig_print_original, Attlist => \&_twig_print_original, | ||||||
296 | CdataStart => \&_twig_print_original, CdataEnd => \&_twig_print_original, | ||||||
297 | Proc => \&_twig_pi_check_roots, Comment => \&_twig_print_original, | ||||||
298 | Default => \&_twig_print_original_check_doctype, | ||||||
299 | ); | ||||||
300 | |||||||
301 | # handlers used when twig_roots and print_outside_roots are used and we are | ||||||
302 | # outside of the roots | ||||||
303 | my %twig_handlers_roots_print_2_27= | ||||||
304 | ( Start => \&_twig_start_check_roots, | ||||||
305 | End => \&_twig_end_check_roots, | ||||||
306 | Char => \&_twig_print, | ||||||
307 | # if the Entity handler is set then it prints the entity declaration | ||||||
308 | # before the entire internal subset (including the declaration!) is output | ||||||
309 | Entity => sub {}, | ||||||
310 | XMLDecl => \&_twig_print, Doctype => \&_twig_print, | ||||||
311 | CdataStart => \&_twig_print, CdataEnd => \&_twig_print, | ||||||
312 | Proc => \&_twig_pi_check_roots, Comment => \&_twig_print, | ||||||
313 | Default => \&_twig_print, | ||||||
314 | ExternEnt => \&_twig_extern_ent, | ||||||
315 | ); | ||||||
316 | |||||||
317 | # handlers used when twig_roots, print_outside_roots and keep_encoding are used | ||||||
318 | # and we are outside of the roots | ||||||
319 | my %twig_handlers_roots_print_original_2_27= | ||||||
320 | ( Start => \&_twig_start_check_roots, | ||||||
321 | End => \&_twig_end_check_roots, | ||||||
322 | Char => \&_twig_print_original, | ||||||
323 | # for some reason original_string is wrong here | ||||||
324 | # this can be a problem if the doctype includes non ascii characters | ||||||
325 | XMLDecl => \&_twig_print, Doctype => \&_twig_print, | ||||||
326 | # if the Entity handler is set then it prints the entity declaration | ||||||
327 | # before the entire internal subset (including the declaration!) is output | ||||||
328 | Entity => sub {}, | ||||||
329 | #Element => undef, Attlist => undef, | ||||||
330 | CdataStart => \&_twig_print_original, CdataEnd => \&_twig_print_original, | ||||||
331 | Proc => \&_twig_pi_check_roots, Comment => \&_twig_print_original, | ||||||
332 | Default => \&_twig_print, # _twig_print_original does not work | ||||||
333 | ExternEnt => \&_twig_extern_ent, | ||||||
334 | ); | ||||||
335 | |||||||
336 | |||||||
337 | my %twig_handlers_roots_print= $parser_version > 2.27 | ||||||
338 | ? %twig_handlers_roots_print_2_30 | ||||||
339 | : %twig_handlers_roots_print_2_27; | ||||||
340 | my %twig_handlers_roots_print_original= $parser_version > 2.27 | ||||||
341 | ? %twig_handlers_roots_print_original_2_30 | ||||||
342 | : %twig_handlers_roots_print_original_2_27; | ||||||
343 | |||||||
344 | |||||||
345 | # handlers used when the finish_print method has been called | ||||||
346 | my %twig_handlers_finish_print= | ||||||
347 | ( Start => \&_twig_print, | ||||||
348 | End => \&_twig_print, Char => \&_twig_print, | ||||||
349 | Entity => \&_twig_print, XMLDecl => \&_twig_print, | ||||||
350 | Doctype => \&_twig_print, Element => \&_twig_print, | ||||||
351 | Attlist => \&_twig_print, CdataStart => \&_twig_print, | ||||||
352 | CdataEnd => \&_twig_print, Proc => \&_twig_print, | ||||||
353 | Comment => \&_twig_print, Default => \&_twig_print, | ||||||
354 | ExternEnt => \&_twig_extern_ent, | ||||||
355 | ); | ||||||
356 | |||||||
357 | # handlers used when the finish_print method has been called and the keep_encoding | ||||||
358 | # option is used | ||||||
359 | my %twig_handlers_finish_print_original= | ||||||
360 | ( Start => \&_twig_print_original, End => \&_twig_print_end_original, | ||||||
361 | Char => \&_twig_print_original, Entity => \&_twig_print_original, | ||||||
362 | XMLDecl => \&_twig_print_original, Doctype => \&_twig_print_original, | ||||||
363 | Element => \&_twig_print_original, Attlist => \&_twig_print_original, | ||||||
364 | CdataStart => \&_twig_print_original, CdataEnd => \&_twig_print_original, | ||||||
365 | Proc => \&_twig_print_original, Comment => \&_twig_print_original, | ||||||
366 | Default => \&_twig_print_original, | ||||||
367 | ); | ||||||
368 | |||||||
369 | # handlers used within ignored elements | ||||||
370 | my %twig_handlers_ignore= | ||||||
371 | ( Start => \&_twig_ignore_start, | ||||||
372 | End => \&_twig_ignore_end, | ||||||
373 | Char => undef, Entity => undef, XMLDecl => undef, | ||||||
374 | Doctype => undef, Element => undef, Attlist => undef, | ||||||
375 | CdataStart => undef, CdataEnd => undef, Proc => undef, | ||||||
376 | Comment => undef, Default => undef, | ||||||
377 | ExternEnt => undef, | ||||||
378 | ); | ||||||
379 | |||||||
380 | |||||||
381 | # those handlers are only used if the entities are NOT to be expanded | ||||||
382 | my %twig_noexpand_handlers= ( ExternEnt => undef, Default => \&_twig_default ); | ||||||
383 | |||||||
384 | my @saved_default_handler; | ||||||
385 | |||||||
386 | my $ID= 'id'; # default value, set by the Id argument | ||||||
387 | my $css_sel=0; # set through the css_sel option to allow .class selectors in triggers | ||||||
388 | |||||||
389 | # all allowed options | ||||||
390 | %valid_option= | ||||||
391 | ( # XML::Twig options | ||||||
392 | TwigHandlers => 1, Id => 1, | ||||||
393 | TwigRoots => 1, TwigPrintOutsideRoots => 1, | ||||||
394 | StartTagHandlers => 1, EndTagHandlers => 1, | ||||||
395 | ForceEndTagHandlersUsage => 1, | ||||||
396 | DoNotChainHandlers => 1, | ||||||
397 | IgnoreElts => 1, | ||||||
398 | Index => 1, | ||||||
399 | AttAccessors => 1, | ||||||
400 | EltAccessors => 1, | ||||||
401 | FieldAccessors => 1, | ||||||
402 | CharHandler => 1, | ||||||
403 | TopDownHandlers => 1, | ||||||
404 | KeepEncoding => 1, DoNotEscapeAmpInAtts => 1, | ||||||
405 | ParseStartTag => 1, KeepAttsOrder => 1, | ||||||
406 | LoadDTD => 1, DTDHandler => 1, | ||||||
407 | DoNotOutputDTD => 1, NoProlog => 1, | ||||||
408 | ExpandExternalEnts => 1, | ||||||
409 | DiscardSpaces => 1, KeepSpaces => 1, DiscardAllSpaces => 1, | ||||||
410 | DiscardSpacesIn => 1, KeepSpacesIn => 1, | ||||||
411 | PrettyPrint => 1, EmptyTags => 1, | ||||||
412 | EscapeGt => 1, | ||||||
413 | Quote => 1, | ||||||
414 | Comments => 1, Pi => 1, | ||||||
415 | OutputFilter => 1, InputFilter => 1, | ||||||
416 | OutputTextFilter => 1, | ||||||
417 | OutputEncoding => 1, | ||||||
418 | RemoveCdata => 1, | ||||||
419 | EltClass => 1, | ||||||
420 | MapXmlns => 1, KeepOriginalPrefix => 1, | ||||||
421 | SkipMissingEnts => 1, | ||||||
422 | # XML::Parser options | ||||||
423 | ErrorContext => 1, ProtocolEncoding => 1, | ||||||
424 | Namespaces => 1, NoExpand => 1, | ||||||
425 | Stream_Delimiter => 1, ParseParamEnt => 1, | ||||||
426 | NoLWP => 1, Non_Expat_Options => 1, | ||||||
427 | Xmlns => 1, CssSel => 1, | ||||||
428 | UseTidy => 1, TidyOptions => 1, | ||||||
429 | OutputHtmlDoctype => 1, | ||||||
430 | ); | ||||||
431 | |||||||
432 | my $active_twig; # last active twig,for XML::Twig::s | ||||||
433 | |||||||
434 | # predefined input and output filters | ||||||
435 |
187
187
187
|
629
174
1040717
|
use vars qw( %filter); | ||||
436 | %filter= ( html => \&html_encode, | ||||||
437 | safe => \&safe_encode, | ||||||
438 | safe_hex => \&safe_encode_hex, | ||||||
439 | ); | ||||||
440 | |||||||
441 | |||||||
442 | # trigger types (used to sort them) | ||||||
443 | my ($LEVEL_TRIGGER, $REGEXP_TRIGGER, $XPATH_TRIGGER)=(1..3); | ||||||
444 | |||||||
445 | sub new | ||||||
446 |
3137
|
1
|
11024513
|
{ my ($class, %args) = @_; | |||
447 |
3137
|
2337
|
my $handlers; | ||||
448 | |||||||
449 | # change all nice_perlish_names into nicePerlishNames | ||||||
450 |
3137
|
5126
|
%args= _normalize_args( %args); | ||||
451 | |||||||
452 | # check options | ||||||
453 |
3137
|
5556
|
unless( $args{MoreOptions}) | ||||
454 |
3136
|
4931
|
{ foreach my $arg (keys %args) | ||||
455 |
5448
|
8753
|
{ carp "invalid option $arg" unless $valid_option{$arg}; } | ||||
456 | } | ||||||
457 | |||||||
458 | # a twig is really an XML::Parser | ||||||
459 | # my $self= XML::Parser->new(%args); | ||||||
460 |
3137
|
2749
|
my $self; | ||||
461 |
3137
|
8831
|
$self= XML::Parser->new(%args); | ||||
462 | |||||||
463 |
3137
|
71193
|
bless $self, $class; | ||||
464 | |||||||
465 |
3137
|
4962
|
$self->{_twig_context_stack}= []; | ||||
466 | |||||||
467 | # allow tag.class selectors in handler triggers | ||||||
468 |
3137
|
7852
|
$css_sel= $args{CssSel} || 0; | ||||
469 | |||||||
470 | |||||||
471 |
3137
|
4729
|
if( exists $args{TwigHandlers}) | ||||
472 |
230
|
243
|
{ $handlers= $args{TwigHandlers}; | ||||
473 |
230
|
415
|
$self->setTwigHandlers( $handlers); | ||||
474 |
223
|
307
|
delete $args{TwigHandlers}; | ||||
475 | } | ||||||
476 | |||||||
477 | # take care of twig-specific arguments | ||||||
478 |
3130
|
4234
|
if( exists $args{StartTagHandlers}) | ||||
479 |
30
|
65
|
{ $self->setStartTagHandlers( $args{StartTagHandlers}); | ||||
480 |
30
|
47
|
delete $args{StartTagHandlers}; | ||||
481 | } | ||||||
482 | |||||||
483 |
3130
|
4070
|
if( exists $args{DoNotChainHandlers}) | ||||
484 |
1
|
2
|
{ $self->{twig_do_not_chain_handlers}= $args{DoNotChainHandlers}; } | ||||
485 | |||||||
486 |
3130
|
4104
|
if( exists $args{IgnoreElts}) | ||||
487 | { # change array to hash so you can write ignore_elts => [ qw(foo bar baz)] | ||||||
488 |
11
1
2
1
|
32
2
5
2
|
if( isa( $args{IgnoreElts}, 'ARRAY')) { $args{IgnoreElts}= { map { $_ => 1 } @{$args{IgnoreElts}} }; } | ||||
489 |
11
|
27
|
$self->setIgnoreEltsHandlers( $args{IgnoreElts}); | ||||
490 |
11
|
13
|
delete $args{IgnoreElts}; | ||||
491 | } | ||||||
492 | |||||||
493 |
3130
|
3969
|
if( exists $args{Index}) | ||||
494 |
2
|
2
|
{ my $index= $args{Index}; | ||||
495 | # we really want a hash name => path, we turn an array into a hash if necessary | ||||||
496 |
2
|
4
|
if( ref( $index) eq 'ARRAY') | ||||
497 |
1
2
|
1
4
|
{ my %index= map { $_ => $_ } @$index; | ||||
498 |
1
|
2
|
$index= \%index; | ||||
499 | } | ||||||
500 |
2
|
5
|
while( my( $name, $exp)= each %$index) | ||||
501 |
3
4
4
4
|
12
3
7
8
|
{ $self->setTwigHandler( $exp, sub { push @{$_[0]->{_twig_index}->{$name}}, $_; 1; }); } | ||||
502 | } | ||||||
503 | |||||||
504 |
3130
|
8078
|
$self->{twig_elt_class}= $args{EltClass} || 'XML::Twig::Elt'; | ||||
505 |
3130
75
|
4702
117
|
if( defined( $args{EltClass}) && $args{EltClass} ne 'XML::Twig::Elt') { $self->{twig_alt_elt_class}=1; } | ||||
506 |
3130
76
|
3981
126
|
if( exists( $args{EltClass})) { delete $args{EltClass}; } | ||||
507 | |||||||
508 |
3130
|
3646
|
if( exists( $args{MapXmlns})) | ||||
509 |
22
|
24
|
{ $self->{twig_map_xmlns}= $args{MapXmlns}; | ||||
510 |
22
|
23
|
$self->{Namespaces}=1; | ||||
511 |
22
|
25
|
delete $args{MapXmlns}; | ||||
512 | } | ||||||
513 | |||||||
514 |
3130
|
4187
|
if( exists( $args{KeepOriginalPrefix})) | ||||
515 |
4
|
5
|
{ $self->{twig_keep_original_prefix}= $args{KeepOriginalPrefix}; | ||||
516 |
4
|
4
|
delete $args{KeepOriginalPrefix}; | ||||
517 | } | ||||||
518 | |||||||
519 |
3130
|
2932
|
$self->{twig_dtd_handler}= $args{DTDHandler}; | ||||
520 |
3130
|
2651
|
delete $args{DTDHandler}; | ||||
521 | |||||||
522 |
3130
|
3563
|
if( $args{ExpandExternalEnts}) | ||||
523 |
5
|
11
|
{ $self->set_expand_external_entities( 1); | ||||
524 |
5
|
6
|
$self->{twig_expand_external_ents}= $args{ExpandExternalEnts}; | ||||
525 |
5
|
9
|
$self->{twig_read_external_dtd}= 1; # implied by ExpandExternalEnts | ||||
526 |
5
|
13
|
if( $args{ExpandExternalEnts} == -1) | ||||
527 |
2
|
3
|
{ $self->{twig_extern_ent_nofail}= 1; | ||||
528 |
2
|
12
|
$self->setHandlers( ExternEnt => \&_twig_extern_ent_nofail); | ||||
529 | } | ||||||
530 |
5
|
47
|
delete $args{LoadDTD}; | ||||
531 |
5
|
6
|
delete $args{ExpandExternalEnts}; | ||||
532 | } | ||||||
533 | else | ||||||
534 |
3125
|
4577
|
{ $self->set_expand_external_entities( 0); } | ||||
535 | |||||||
536 |
3130
|
5999
|
if( !$args{NoLWP} && ! _use( 'URI') && ! _use( 'URI::File') && ! _use( 'LWP')) | ||||
537 |
0
|
0
|
{ $self->{twig_ext_ent_handler}= \&XML::Parser::initial_ext_ent_handler } | ||||
538 | else | ||||||
539 |
3130
|
4210
|
{ $self->{twig_ext_ent_handler}= \&XML::Parser::file_ext_ent_handler } | ||||
540 | |||||||
541 |
3130
|
3947
|
if( $args{DoNotEscapeAmpInAtts}) | ||||
542 |
1
|
1
|
{ $self->set_do_not_escape_amp_in_atts( 1); | ||||
543 |
1
|
1
|
$self->{twig_do_not_escape_amp_in_atts}=1; | ||||
544 | } | ||||||
545 | else | ||||||
546 |
3129
|
4210
|
{ $self->set_do_not_escape_amp_in_atts( 0); | ||||
547 |
3129
|
2725
|
$self->{twig_do_not_escape_amp_in_atts}=0; | ||||
548 | } | ||||||
549 | |||||||
550 | # deal with TwigRoots argument, a hash of elements for which | ||||||
551 | # subtrees will be built (and associated handlers) | ||||||
552 | |||||||
553 |
3130
|
3910
|
if( $args{TwigRoots}) | ||||
554 |
101
|
207
|
{ $self->setTwigRoots( $args{TwigRoots}); | ||||
555 |
99
|
133
|
delete $args{TwigRoots}; | ||||
556 | } | ||||||
557 | |||||||
558 |
3128
|
3781
|
if( $args{EndTagHandlers}) | ||||
559 |
15
|
51
|
{ unless ($self->{twig_roots} || $args{ForceEndTagHandlersUsage}) | ||||
560 |
1
|
153
|
{ croak "you should not use EndTagHandlers without TwigRoots\n", | ||||
561 | "if you want to use it anyway, normally because you have ", | ||||||
562 | "a start_tag_handlers that calls 'ignore' and you want to ", | ||||||
563 | "call an ent_tag_handlers at the end of the element, then ", | ||||||
564 | "pass 'force_end_tag_handlers_usage => 1' as an argument ", | ||||||
565 | "to new"; | ||||||
566 | } | ||||||
567 | |||||||
568 |
14
|
26
|
$self->setEndTagHandlers( $args{EndTagHandlers}); | ||||
569 |
14
|
164
|
delete $args{EndTagHandlers}; | ||||
570 | } | ||||||
571 | |||||||
572 |
3127
|
3709
|
if( $args{TwigPrintOutsideRoots}) | ||||
573 |
63
|
314
|
{ croak "cannot use twig_print_outside_roots without twig_roots" | ||||
574 | unless( $self->{twig_roots}); | ||||||
575 | # if the arg is a filehandle then store it | ||||||
576 |
62
|
99
|
if( _is_fh( $args{TwigPrintOutsideRoots}) ) | ||||
577 |
59
|
102
|
{ $self->{twig_output_fh}= $args{TwigPrintOutsideRoots}; } | ||||
578 |
62
|
93
|
$self->{twig_default_print}= $args{TwigPrintOutsideRoots}; | ||||
579 | } | ||||||
580 | |||||||
581 | # space policy | ||||||
582 |
3126
|
3492
|
if( $args{KeepSpaces}) | ||||
583 |
49
|
195
|
{ croak "cannot use both keep_spaces and discard_spaces" if( $args{DiscardSpaces}); | ||||
584 |
48
|
170
|
croak "cannot use both keep_spaces and discard_all_spaces" if( $args{DiscardAllSpaces}); | ||||
585 |
47
|
184
|
croak "cannot use both keep_spaces and keep_spaces_in" if( $args{KeepSpacesIn}); | ||||
586 |
46
|
50
|
$self->{twig_keep_spaces}=1; | ||||
587 |
46
|
58
|
delete $args{KeepSpaces}; | ||||
588 | } | ||||||
589 |
3123
|
3487
|
if( $args{DiscardSpaces}) | ||||
590 | { | ||||||
591 |
5
|
106
|
croak "cannot use both discard_spaces and keep_spaces_in" if( $args{KeepSpacesIn}); | ||||
592 |
4
|
97
|
croak "cannot use both discard_spaces and discard_all_spaces" if( $args{DiscardAllSpaces}); | ||||
593 |
3
|
98
|
croak "cannot use both discard_spaces and discard_spaces_in" if( $args{DiscardSpacesIn}); | ||||
594 |
2
|
2
|
$self->{twig_discard_spaces}=1; | ||||
595 |
2
|
2
|
delete $args{DiscardSpaces}; | ||||
596 | } | ||||||
597 |
3120
|
3442
|
if( $args{KeepSpacesIn}) | ||||
598 |
9
|
111
|
{ croak "cannot use both keep_spaces_in and discard_spaces_in" if( $args{DiscardSpacesIn}); | ||||
599 |
8
|
107
|
croak "cannot use both keep_spaces_in and discard_all_spaces" if( $args{DiscardAllSpaces}); | ||||
600 |
7
|
7
|
$self->{twig_discard_spaces}=1; | ||||
601 |
7
|
10
|
$self->{twig_keep_spaces_in}={}; | ||||
602 |
7
7
|
7
17
|
my @tags= @{$args{KeepSpacesIn}}; | ||||
603 |
7
9
|
10
15
|
foreach my $tag (@tags) { $self->{twig_keep_spaces_in}->{$tag}=1; } | ||||
604 |
7
|
14
|
delete $args{KeepSpacesIn}; | ||||
605 | } | ||||||
606 | |||||||
607 |
3118
|
3416
|
if( $args{DiscardAllSpaces}) | ||||
608 | { | ||||||
609 |
2
|
100
|
croak "cannot use both discard_all_spaces and discard_spaces_in" if( $args{DiscardSpacesIn}); | ||||
610 |
1
|
1
|
$self->{twig_discard_all_spaces}=1; | ||||
611 |
1
|
2
|
delete $args{DiscardAllSpaces}; | ||||
612 | } | ||||||
613 | |||||||
614 |
3117
|
3285
|
if( $args{DiscardSpacesIn}) | ||||
615 |
4
|
6
|
{ $self->{twig_keep_spaces}=1; | ||||
616 |
4
|
6
|
$self->{twig_discard_spaces_in}={}; | ||||
617 |
4
4
|
2
9
|
my @tags= @{$args{DiscardSpacesIn}}; | ||||
618 |
4
6
|
5
9
|
foreach my $tag (@tags) { $self->{twig_discard_spaces_in}->{$tag}=1; } | ||||
619 |
4
|
8
|
delete $args{DiscardSpacesIn}; | ||||
620 | } | ||||||
621 | # discard spaces by default | ||||||
622 |
3117
|
4746
|
$self->{twig_discard_spaces}= 1 unless( $self->{twig_keep_spaces}); | ||||
623 | |||||||
624 |
3117
|
5619
|
$args{Comments}||= $COMMENTS_DEFAULT; | ||||
625 |
3117
4
|
5956
7
|
if( $args{Comments} eq 'drop') { $self->{twig_keep_comments}= 0; } | ||||
626 |
2225
|
2082
|
elsif( $args{Comments} eq 'keep') { $self->{twig_keep_comments}= 1; } | ||||
627 |
887
|
701
|
elsif( $args{Comments} eq 'process') { $self->{twig_process_comments}= 1; } | ||||
628 |
1
|
82
|
else { croak "wrong value for comments argument: '$args{Comments}' (should be 'drop', 'keep' or 'process')"; } | ||||
629 |
3116
|
3508
|
delete $args{Comments}; | ||||
630 | |||||||
631 |
3116
|
5188
|
$args{Pi}||= $PI_DEFAULT; | ||||
632 |
3116
3
|
4829
4
|
if( $args{Pi} eq 'drop') { $self->{twig_keep_pi}= 0; } | ||||
633 |
2226
|
1990
|
elsif( $args{Pi} eq 'keep') { $self->{twig_keep_pi}= 1; } | ||||
634 |
886
|
739
|
elsif( $args{Pi} eq 'process') { $self->{twig_process_pi}= 1; } | ||||
635 |
1
|
81
|
else { croak "wrong value for pi argument: '$args{Pi}' (should be 'drop', 'keep' or 'process')"; } | ||||
636 |
3115
|
2493
|
delete $args{Pi}; | ||||
637 | |||||||
638 |
3115
|
2996
|
if( $args{KeepEncoding}) | ||||
639 | { | ||||||
640 | # set it in XML::Twig::Elt so print functions know what to do | ||||||
641 |
1020
|
1171
|
$self->set_keep_encoding( 1); | ||||
642 |
1020
|
2330
|
$self->{parse_start_tag}= $args{ParseStartTag} || \&_parse_start_tag; | ||||
643 |
1020
|
1323
|
delete $args{ParseStartTag} if defined( $args{ParseStartTag}) ; | ||||
644 |
1020
|
849
|
delete $args{KeepEncoding}; | ||||
645 | } | ||||||
646 | else | ||||||
647 |
2095
|
2701
|
{ $self->set_keep_encoding( 0); | ||||
648 |
2095
|
2016
|
if( $args{ParseStartTag}) | ||||
649 |
2
|
5
|
{ $self->{parse_start_tag}= $args{ParseStartTag}; } | ||||
650 | else | ||||||
651 |
2093
|
1687
|
{ delete $self->{parse_start_tag}; } | ||||
652 |
2095
|
1454
|
delete $args{ParseStartTag}; | ||||
653 | } | ||||||
654 | |||||||
655 |
3115
|
2961
|
if( $args{OutputFilter}) | ||||
656 |
5
|
8
|
{ $self->set_output_filter( $args{OutputFilter}); | ||||
657 |
5
|
6
|
delete $args{OutputFilter}; | ||||
658 | } | ||||||
659 | else | ||||||
660 |
3110
|
3612
|
{ $self->set_output_filter( 0); } | ||||
661 | |||||||
662 |
3115
|
3515
|
if( $args{RemoveCdata}) | ||||
663 |
1
|
1
|
{ $self->set_remove_cdata( $args{RemoveCdata}); | ||||
664 |
1
|
1
|
delete $args{RemoveCdata}; | ||||
665 | } | ||||||
666 | else | ||||||
667 |
3114
|
3630
|
{ $self->set_remove_cdata( 0); } | ||||
668 | |||||||
669 |
3115
|
3061
|
if( $args{OutputTextFilter}) | ||||
670 |
5
|
8
|
{ $self->set_output_text_filter( $args{OutputTextFilter}); | ||||
671 |
5
|
5
|
delete $args{OutputTextFilter}; | ||||
672 | } | ||||||
673 | else | ||||||
674 |
3110
|
3508
|
{ $self->set_output_text_filter( 0); } | ||||
675 | |||||||
676 |
3115
|
3406
|
if( exists $args{KeepAttsOrder}) | ||||
677 |
12
|
19
|
{ $self->{keep_atts_order}= $args{KeepAttsOrder}; | ||||
678 |
12
|
15
|
if( _use( 'Tie::IxHash')) | ||||
679 |
11
|
22
|
{ $self->set_keep_atts_order( $self->{keep_atts_order}); } | ||||
680 | else | ||||||
681 |
1
|
86
|
{ croak "Tie::IxHash not available, option keep_atts_order not allowed"; } | ||||
682 | } | ||||||
683 | else | ||||||
684 |
3103
|
3301
|
{ $self->set_keep_atts_order( 0); } | ||||
685 | |||||||
686 | |||||||
687 |
3114
72
|
3339
113
|
if( $args{PrettyPrint}) { $self->set_pretty_print( $args{PrettyPrint}); } | ||||
688 |
3114
1
|
3126
2
|
if( $args{EscapeGt}) { $self->escape_gt( $args{EscapeGt}); } | ||||
689 |
3114
12
|
3086
18
|
if( $args{EmptyTags}) { $self->set_empty_tag_style( $args{EmptyTags}) } | ||||
690 | |||||||
691 |
3114
1
1
|
3470
5
1
|
if( exists $args{Id}) { $ID= $args{Id}; delete $args{ID}; } | ||||
692 |
3114
4
4
|
3093
11
5
|
if( $args{NoProlog}) { $self->{no_prolog}= 1; delete $args{NoProlog}; } | ||||
693 |
3114
3
3
|
3154
7
5
|
if( $args{DoNotOutputDTD}) { $self->{no_dtd_output}= 1; delete $args{DoNotOutputDTD}; } | ||||
694 |
3114
9
9
|
3226
13
9
|
if( $args{LoadDTD}) { $self->{twig_read_external_dtd}= 1; delete $args{LoadDTD}; } | ||||
695 |
3114
1
1
|
2944
3
1
|
if( $args{CharHandler}) { $self->setCharHandler( $args{CharHandler}); delete $args{CharHandler}; } | ||||
696 | |||||||
697 |
3114
3
1
|
3078
6
1
|
if( $args{InputFilter}) { $self->set_input_filter( $args{InputFilter}); delete $args{InputFilter}; } | ||||
698 |
3112
3
3
|
3032
76
36
|
if( $args{NoExpand}) { $self->setHandlers( %twig_noexpand_handlers); $self->{twig_no_expand}=1; } | ||||
699 |
3112
2
2
|
3435
4
2
|
if( my $output_encoding= $args{OutputEncoding}) { $self->set_output_encoding( $output_encoding); delete $args{OutputFilter}; } | ||||
700 | |||||||
701 |
3112
4
4
|
4432
3
3
|
if( my $tdh= $args{TopDownHandlers}) { $self->{twig_tdh}=1; delete $args{TopDownHandlers}; } | ||||
702 | |||||||
703 |
3112
1
|
3411
2
|
if( my $acc_a= $args{AttAccessors}) { $self->att_accessors( @$acc_a); } | ||||
704 |
3112
4
|
3105
14
|
if( my $acc_e= $args{EltAccessors}) { $self->elt_accessors( isa( $acc_e, 'ARRAY') ? @$acc_e : $acc_e); } | ||||
705 |
3112
2
|
3304
9
|
if( my $acc_f= $args{FieldAccessors}) { $self->field_accessors( isa( $acc_f, 'ARRAY') ? @$acc_f : $acc_f); } | ||||
706 | |||||||
707 |
3112
24
|
3037
238
|
if( $args{UseTidy}) { $self->{use_tidy}= 1; } | ||||
708 |
3112
|
7240
|
$self->{tidy_options}= $args{TidyOptions} || {}; | ||||
709 | |||||||
710 |
3112
1
|
3397
1
|
if( $args{OutputHtmlDoctype}) { $self->{html_doctype}= 1; } | ||||
711 | |||||||
712 |
3112
|
6777
|
$self->set_quote( $args{Quote} || 'double'); | ||||
713 | |||||||
714 | # set handlers | ||||||
715 |
3112
|
3014
|
if( $self->{twig_roots}) | ||||
716 |
99
|
139
|
{ if( $self->{twig_default_print}) | ||||
717 |
62
|
84
|
{ if( $self->{twig_keep_encoding}) | ||||
718 |
36
|
255
|
{ $self->setHandlers( %twig_handlers_roots_print_original); } | ||||
719 | else | ||||||
720 |
26
|
119
|
{ $self->setHandlers( %twig_handlers_roots_print); } | ||||
721 | } | ||||||
722 | else | ||||||
723 |
37
|
156
|
{ $self->setHandlers( %twig_handlers_roots); } | ||||
724 | } | ||||||
725 | else | ||||||
726 |
3013
|
10392
|
{ $self->setHandlers( %twig_handlers); } | ||||
727 | |||||||
728 | # XML::Parser::Expat does not like these handler to be set. So in order to | ||||||
729 | # use the various sets of handlers on XML::Parser or XML::Parser::Expat | ||||||
730 | # objects when needed, these ones have to be set only once, here, at | ||||||
731 | # XML::Parser level | ||||||
732 |
3112
|
133706
|
$self->setHandlers( Init => \&_twig_init, Final => \&_twig_final); | ||||
733 | |||||||
734 |
3112
|
37252
|
$self->{twig_entity_list}= XML::Twig::Entity_list->new; | ||||
735 | |||||||
736 |
3112
|
3243
|
$self->{twig_id}= $ID; | ||||
737 |
3112
|
2838
|
$self->{twig_stored_spaces}=''; | ||||
738 | |||||||
739 |
3112
|
2440
|
$self->{twig_autoflush}= 1; # auto flush by default | ||||
740 | |||||||
741 |
3112
|
2482
|
$self->{twig}= $self; | ||||
742 |
3112
3090
|
3914
5164
|
if( $weakrefs) { weaken( $self->{twig}); } | ||||
743 | |||||||
744 |
3112
|
6152
|
return $self; | ||||
745 | } | ||||||
746 | |||||||
747 | sub parse | ||||||
748 | { | ||||||
749 |
3282
|
1
|
845397
|
my $t= shift; | |||
750 | # if called as a class method, calls nparse, which creates the twig then parses it | ||||||
751 |
3282
179
|
9080
387
|
if( !ref( $t) || !isa( $t, 'XML::Twig')) { return $t->nparse( @_); } | ||||
752 | |||||||
753 | # requires 5.006 at least (or the ${^UNICODE} causes a problem) # > perl 5.5 | ||||||
754 | # trap underlying bug in IO::Handle (see RT #17500) # > perl 5.5 | ||||||
755 | # croak if perl 5.8+, -CD (or PERL_UNICODE set to D) and parsing a pipe # > perl 5.5 | ||||||
756 |
3103
|
8790
|
if( $perl_version>=5.008 && ${^UNICODE} && (${^UNICODE} & 24) && isa( $_[0], 'GLOB') && -p $_[0] ) # > perl 5.5 | ||||
757 |
1
|
162
|
{ croak "cannot parse the output of a pipe when perl is set to use the UTF8 perlIO layer\n" # > perl 5.5 | ||||
758 | . "set the environment variable PERL_UNICODE or use the -C option (see perldoc perlrun)\n" # > perl 5.5 | ||||||
759 | . "not to include 'D'"; # > perl 5.5 | ||||||
760 | } # > perl 5.5 | ||||||
761 |
3102
3102
|
2600
7305
|
$t= eval { $t->SUPER::parse( @_); }; | ||||
762 | |||||||
763 |
3102
|
11150070
|
if( !$t | ||||
764 | && $@=~m{(syntax error at line 1, column 0, byte 0|not well-formed \(invalid token\) at line 1, column 1, byte 1)} | ||||||
765 | && -f $_[0] | ||||||
766 | ) | ||||||
767 |
1
|
84
|
{ croak "you seem to have used the parse method on a filename ($_[0]), you probably want parsefile instead"; } | ||||
768 |
3101
|
3715
|
return _checked_parse_result( $t, $@); | ||||
769 | } | ||||||
770 | |||||||
771 | sub parsefile | ||||||
772 |
82
|
1
|
154
|
{ my $t= shift; | |||
773 |
82
1
|
716
3
|
if( -f $_[0] && ! -s $_[0]) { return _checked_parse_result( undef, "empty file '$_[0]'"); } | ||||
774 |
81
81
|
93
400
|
$t= eval { $t->SUPER::parsefile( @_); }; | ||||
775 |
81
|
1599
|
return _checked_parse_result( $t, $@); | ||||
776 | } | ||||||
777 | |||||||
778 | sub _checked_parse_result | ||||||
779 |
3183
|
2748
|
{ my( $t, $returned)= @_; | ||||
780 |
3183
|
3735
|
if( !$t) | ||||
781 |
28
|
165
|
{ if( isa( $returned, 'XML::Twig') && $returned->{twig_finish_now}) | ||||
782 |
6
|
4
|
{ $t= $returned; | ||||
783 |
6
|
5
|
delete $t->{twig_finish_now}; | ||||
784 |
6
|
8
|
return $t->_twig_final; | ||||
785 | } | ||||||
786 | else | ||||||
787 |
22
|
53
|
{ _croak( $returned, 0); } | ||||
788 | } | ||||||
789 | |||||||
790 |
3155
|
2184
|
$active_twig= $t; | ||||
791 |
3155
|
4095
|
return $t; | ||||
792 | } | ||||||
793 | |||||||
794 |
2
|
1
|
7
|
sub active_twig { return $active_twig; } | |||
795 | |||||||
796 | sub finish_now | ||||||
797 |
6
|
1
|
9
|
{ my $t= shift; | |||
798 |
6
|
6
|
$t->{twig_finish_now}=1; | ||||
799 |
6
|
26
|
die $t; | ||||
800 | } | ||||||
801 | |||||||
802 | |||||||
803 |
3
|
1
|
7
|
sub parsefile_inplace { shift->_parse_inplace( parsefile => @_); } | |||
804 |
6
|
1
|
17
|
sub parsefile_html_inplace { shift->_parse_inplace( parsefile_html => @_); } | |||
805 | |||||||
806 | sub _parse_inplace | ||||||
807 |
9
|
19
|
{ my( $t, $method, $file, $suffix)= @_; | ||||
808 |
9
|
14
|
_use( 'File::Temp') || croak "need File::Temp to use inplace methods\n"; | ||||
809 |
9
|
13
|
_use( 'File::Basename'); | ||||
810 | |||||||
811 | |||||||
812 |
9
|
433
|
my $tmpdir= dirname( $file); | ||||
813 |
9
|
41
|
my( $tmpfh, $tmpfile)= File::Temp::tempfile( DIR => $tmpdir); | ||||
814 |
9
|
2032
|
my $original_fh= select $tmpfh; | ||||
815 | |||||||
816 |
9
|
47
|
unless( $t->{twig_keep_encoding} || $perl_version < 5.006) | ||||
817 |
9
|
104241
|
{ if( grep /useperlio=define/, `$^X -V`) # we can only use binmode :utf8 if perl was compiled with useperlio | ||||
818 |
9
|
66
|
{ binmode( $tmpfh, ":utf8" ); } | ||||
819 | } | ||||||
820 | |||||||
821 |
9
|
160
|
$t->$method( $file); | ||||
822 | |||||||
823 |
9
|
28
|
select $original_fh; | ||||
824 |
9
|
3139
|
close $tmpfh; | ||||
825 |
9
|
69
|
my $mode= (stat( $file))[2] & oct(7777); | ||||
826 |
9
|
79
|
chmod $mode, $tmpfile or croak "cannot change temp file mode to $mode: $!"; | ||||
827 | |||||||
828 |
9
|
18
|
if( $suffix) | ||||
829 |
6
|
10
|
{ my $backup; | ||||
830 |
6
3
|
38
29
|
if( $suffix=~ m{\*}) { ($backup = $suffix) =~ s/\*/$file/g; } | ||||
831 |
3
|
12
|
else { $backup= $file . $suffix; } | ||||
832 | |||||||
833 |
6
|
164
|
rename( $file, $backup) or croak "cannot backup initial file ($file) to $backup: $!"; | ||||
834 | } | ||||||
835 |
9
|
339
|
rename( $tmpfile, $file) or croak "cannot rename temp file ($tmpfile) to initial file ($file): $!"; | ||||
836 | |||||||
837 |
9
|
62
|
return $t; | ||||
838 | } | ||||||
839 | |||||||
840 | |||||||
841 | sub parseurl | ||||||
842 |
11
|
1
|
8591
|
{ my $t= shift; | |||
843 |
11
|
27
|
$t->_parseurl( 0, @_); | ||||
844 | } | ||||||
845 | |||||||
846 | sub safe_parseurl | ||||||
847 |
9
|
1
|
17
|
{ my $t= shift; | |||
848 |
9
|
34
|
$t->_parseurl( 1, @_); | ||||
849 | } | ||||||
850 | |||||||
851 | sub safe_parsefile_html | ||||||
852 |
2
|
1
|
2
|
{ my $t= shift; | |||
853 |
2
2
|
3
4
|
eval { $t->parsefile_html( @_); }; | ||||
854 |
2
|
8
|
return $@ ? $t->_reset_twig_after_error : $t; | ||||
855 | } | ||||||
856 | |||||||
857 | sub safe_parseurl_html | ||||||
858 |
2
|
1
|
5
|
{ my $t= shift; | |||
859 |
2
|
3
|
_use( 'LWP::Simple') or croak "missing LWP::Simple"; | ||||
860 |
2
2
|
4
5
|
eval { $t->parse_html( LWP::Simple::get( shift()), @_); } ; | ||||
861 |
2
|
10
|
return $@ ? $t->_reset_twig_after_error : $t; | ||||
862 | } | ||||||
863 | |||||||
864 | sub parseurl_html | ||||||
865 |
1
|
1
|
2
|
{ my $t= shift; | |||
866 |
1
|
2
|
_use( 'LWP::Simple') or croak "missing LWP::Simple"; | ||||
867 |
1
|
4
|
$t->parse_html( LWP::Simple::get( shift()), @_); | ||||
868 | } | ||||||
869 | |||||||
870 | |||||||
871 | # uses eval to catch the parser's death | ||||||
872 | sub safe_parse_html | ||||||
873 |
8
|
1
|
23
|
{ my $t= shift; | |||
874 |
8
8
|
10
35
|
eval { $t->parse_html( @_); } ; | ||||
875 |
8
|
72
|
return $@ ? $t->_reset_twig_after_error : $t; | ||||
876 | } | ||||||
877 | |||||||
878 | sub parsefile_html | ||||||
879 |
10
|
1
|
37
|
{ my $t= shift; | |||
880 |
10
|
26
|
my $file= shift; | ||||
881 |
10
|
41
|
my $indent= $t->{ErrorContext} ? 1 : 0; | ||||
882 |
10
|
123
|
$t->set_empty_tag_style( 'html'); | ||||
883 |
10
|
36
|
my $html2xml= $t->{use_tidy} ? \&_tidy_html : \&_html2xml; | ||||
884 |
10
|
54
|
my $options= $t->{use_tidy} ? $t->{tidy_options} || {} : { indent => $indent, html_doctype => $t->{html_doctype} }; | ||||
885 |
10
|
39
|
$t->parse( $html2xml->( _slurp( $file), $options), @_); | ||||
886 |
10
|
23
|
return $t; | ||||
887 | } | ||||||
888 | |||||||
889 | sub parse_html | ||||||
890 |
56
|
1
|
65661
|
{ my $t= shift; | |||
891 |
56
|
164
|
my $options= ref $_[0] && ref $_[0] eq 'HASH' ? shift() : {}; | ||||
892 |
56
|
102
|
my $use_tidy= exists $options->{use_tidy} ? $options->{use_tidy} : $t->{use_tidy}; | ||||
893 |
56
|
243
|
my $content= shift; | ||||
894 |
56
|
87
|
my $indent= $t->{ErrorContext} ? 1 : 0; | ||||
895 |
56
|
90
|
$t->set_empty_tag_style( 'html'); | ||||
896 |
56
|
87
|
my $html2xml= $use_tidy ? \&_tidy_html : \&_html2xml; | ||||
897 |
56
|
139
|
my $conv_options= $use_tidy ? $t->{tidy_options} || {} : { indent => $indent, html_doctype => $t->{html_doctype} }; | ||||
898 |
56
|
4408
|
$t->parse( $html2xml->( isa( $content, 'GLOB') ? _slurp_fh( $content) : $content, $conv_options), @_); | ||||
899 |
55
|
169
|
return $t; | ||||
900 | } | ||||||
901 | |||||||
902 | sub xparse | ||||||
903 |
2007
|
1
|
1446
|
{ my $t= shift; | |||
904 |
2007
|
1448
|
my $to_parse= $_[0]; | ||||
905 |
2007
2
|
9424
4
|
if( isa( $to_parse, 'GLOB')) { $t->parse( @_); } | ||||
906 |
1987
|
4668
|
elsif( $to_parse=~ m{^\s*<}) { $to_parse=~ m{<html}i ? $t->_parse_as_xml_or_html( @_) | ||||
907 | : $t->parse( @_); | ||||||
908 | } | ||||||
909 |
2
|
3
|
elsif( $to_parse=~ m{^\w+://.*\.html?$}) { _use( 'LWP::Simple') or croak "missing LWP::Simple"; | ||||
910 |
1
|
3
|
$t->_parse_as_xml_or_html( LWP::Simple::get( shift()), @_); | ||||
911 | } | ||||||
912 |
5
|
7
|
elsif( $to_parse=~ m{^\w+://}) { _use( 'LWP::Simple') or croak "missing LWP::Simple"; | ||||
913 |
4
|
10
|
my $doc= LWP::Simple::get( shift); | ||||
914 |
4
1
|
492823
6
|
if( ! defined $doc) { $doc=''; } | ||||
915 |
4
|
22
|
my $xml_parse_ok= $t->safe_parse( $doc, @_); | ||||
916 |
4
|
6
|
if( $xml_parse_ok) | ||||
917 |
1
|
3
|
{ return $xml_parse_ok; } | ||||
918 | else | ||||||
919 |
3
|
4
|
{ my $diag= $@; | ||||
920 |
3
|
10
|
if( $doc=~ m{<html}i) | ||||
921 |
2
|
3
|
{ $t->parse_html( $doc, @_); } | ||||
922 | else | ||||||
923 |
1
|
94
|
{ croak $diag; } | ||||
924 | } | ||||||
925 | } | ||||||
926 |
2
|
3
|
elsif( $to_parse=~ m{\.html?$}) { my $content= _slurp( shift); | ||||
927 |
2
|
4
|
$t->_parse_as_xml_or_html( $content, @_); | ||||
928 | } | ||||||
929 |
9
|
29
|
else { $t->parsefile( @_); } | ||||
930 | } | ||||||
931 | |||||||
932 | sub _parse_as_xml_or_html | ||||||
933 |
11
|
113823
|
{ my $t= shift; | ||||
934 |
11
|
20
|
if( _is_well_formed_xml( $_[0])) | ||||
935 |
3
|
5
|
{ $t->parse( @_) } | ||||
936 | else | ||||||
937 |
8
|
18
|
{ my $html2xml= $t->{use_tidy} ? \&_tidy_html : \&_html2xml; | ||||
938 |
8
|
24
|
my $options= $t->{use_tidy} ? $t->{tidy_options} || {} : { indent => 0, html_doctype => $t->{html_doctype} }; | ||||
939 |
8
|
16
|
my $html= $html2xml->( $_[0], $options, @_); | ||||
940 |
8
|
12
|
if( _is_well_formed_xml( $html)) | ||||
941 |
8
|
17
|
{ $t->parse( $html); } | ||||
942 | else | ||||||
943 |
0
|
0
|
{ croak $@; } # can't really test this because HTML::Parser or HTML::Tidy may change how they deal with bas HTML between versions | ||||
944 | } | ||||||
945 | } | ||||||
946 | |||||||
947 | { my $parser; | ||||||
948 | sub _is_well_formed_xml | ||||||
949 |
19
|
43
|
{ $parser ||= XML::Parser->new; | ||||
950 |
19
19
|
114
43
|
eval { $parser->parse( $_[0]); }; | ||||
951 |
19
|
2776
|
return $@ ? 0 : 1; | ||||
952 | } | ||||||
953 | } | ||||||
954 | |||||||
955 | sub nparse | ||||||
956 |
2008
|
1
|
443408
|
{ my $class= shift; | |||
957 |
2008
|
1525
|
my $to_parse= pop; | ||||
958 |
2008
|
2881
|
$class->new( @_)->xparse( $to_parse); | ||||
959 | } | ||||||
960 | |||||||
961 |
1
|
1
|
2
|
sub nparse_pp { shift()->nparse( pretty_print => 'indented', @_); } | |||
962 |
4
|
1
|
62
|
sub nparse_e { shift()->nparse( error_context => 1, @_); } | |||
963 |
1
|
1
|
2
|
sub nparse_ppe { shift()->nparse( pretty_print => 'indented', error_context => 1, @_); } | |||
964 | |||||||
965 | |||||||
966 | sub _html2xml | ||||||
967 |
49
|
233
|
{ my( $html, $options)= @_; | ||||
968 |
49
|
71
|
_use( 'HTML::TreeBuilder', '3.13') or croak "cannot parse HTML: missing HTML::TreeBuilder v >= 3.13\n"; | ||||
969 |
48
|
199
|
my $tree= HTML::TreeBuilder->new; | ||||
970 |
48
|
7106
|
$tree->ignore_ignorable_whitespace( 0); | ||||
971 |
48
|
289
|
$tree->ignore_unknown( 0); | ||||
972 |
48
|
215
|
$tree->no_space_compacting( 1); | ||||
973 |
48
|
209
|
$tree->store_comments( 1); | ||||
974 |
48
|
200
|
$tree->store_pis(1); | ||||
975 |
48
|
880
|
$tree->parse( $html); | ||||
976 |
48
|
1675799
|
$tree->eof; | ||||
977 | |||||||
978 |
48
|
809
|
my $xml=''; | ||||
979 |
48
|
126
|
if( $options->{html_doctype} && exists $tree->{_decl} ) | ||||
980 |
1
|
2
|
{ my $decl= $tree->{_decl}->as_XML; | ||||
981 | |||||||
982 | # first try to fix declarations that are missing the SYSTEM part | ||||||
983 |
1
1
|
123
5
|
$decl =~ s{^\s*<!DOCTYPE \s+ ((?i)html) \s+ PUBLIC \s+ "([^"]*)" \s* >} | ||||
984 |
1
|
4
|
{ my $system= $HTML_DECL{$2} || $HTML_DECL{$DEFAULT_HTML_TYPE}; | ||||
985 | qq{<!DOCTYPE $1 PUBLIC "$2" "$system">} | ||||||
986 | |||||||
987 | }xe; | ||||||
988 | |||||||
989 | # then check that the declaration looks OK (so it parses), if not remove it, | ||||||
990 | # better to parse without the declaration than to die stupidly | ||||||
991 |
1
|
7
|
if( $decl =~ m{<!DOCTYPE \s+ (?i:HTML) (\s+ PUBLIC \s+ "[^"]*" \s+ (SYSTEM \s+)? "[^"]*")? \s*>}x # PUBLIC then SYSTEM | ||||
992 | || $decl =~ m{<!DOCTYPE \s+ (?i:HTML) \s+ SYSTEM \s+ "[^"]*" \s*>}x # just SYSTEM | ||||||
993 | ) | ||||||
994 |
1
|
1
|
{ $xml= $decl; } | ||||
995 | } | ||||||
996 | |||||||
997 |
48
|
158
|
$xml.= _as_XML( $tree); | ||||
998 | |||||||
999 | |||||||
1000 |
48
|
127
|
_fix_xml( $tree, \$xml); | ||||
1001 | |||||||
1002 |
48
1
|
89
9
|
if( $options->{indent}) { _indent_xhtml( \$xml); } | ||||
1003 |
48
|
124
|
$tree->delete; | ||||
1004 |
48
|
99242
|
$xml=~ s{\s+$}{}s; # trim end | ||||
1005 |
48
|
980
|
return $xml; | ||||
1006 | } | ||||||
1007 | |||||||
1008 | sub _tidy_html | ||||||
1009 |
25
|
90
|
{ my( $html, $options)= @_; | ||||
1010 |
25
|
42
|
_use( 'HTML::Tidy') or croak "cannot cleanup HTML using HTML::Tidy (required by the use_tidy option): $@\n"; ; | ||||
1011 |
25
|
258
|
my $TIDY_DEFAULTS= { output_xhtml => 1, # duh! | ||||
1012 | tidy_mark => 0, # do not add the "generated by tidy" comment | ||||||
1013 | numeric_entities => 1, | ||||||
1014 | char_encoding => 'utf8', | ||||||
1015 | bare => 1, | ||||||
1016 | clean => 1, | ||||||
1017 | doctype => 'transitional', | ||||||
1018 | fix_backslash => 1, | ||||||
1019 | merge_divs => 0, | ||||||
1020 | merge_spans => 0, | ||||||
1021 | sort_attributes => 'alpha', | ||||||
1022 | indent => 0, | ||||||
1023 | wrap => 0, | ||||||
1024 | break_before_br => 0, | ||||||
1025 | }; | ||||||
1026 |
25
|
53
|
$options ||= {}; | ||||
1027 |
25
|
158
|
my $tidy_options= { %$TIDY_DEFAULTS, %$options}; | ||||
1028 |
25
|
139
|
my $tidy = HTML::Tidy->new( $tidy_options); | ||||
1029 |
25
|
2754
|
$tidy->ignore( type => 1, type => 2 ); # 1 is TIDY_WARNING, 2 is TIDY_ERROR, not clean | ||||
1030 |
25
|
406
|
my $xml= $tidy->clean( $html ); | ||||
1031 |
25
|
124838
|
return $xml; | ||||
1032 | } | ||||||
1033 | |||||||
1034 | |||||||
1035 | { my %xml_parser_encoding; | ||||||
1036 | sub _fix_xml | ||||||
1037 |
49
|
718
|
{ my( $tree, $xml)= @_; # $xml is a ref to the xml string | ||||
1038 | |||||||
1039 |
49
|
46
|
my $max_tries=5; | ||||
1040 |
49
|
36
|
my $add_decl; | ||||
1041 | |||||||
1042 |
49
|
69
|
while( ! _check_xml( $xml) && $max_tries--) | ||||
1043 | { | ||||||
1044 | # a couple of fixes for weird HTML::TreeBuilder errors | ||||||
1045 |
3
|
27
|
if( $@=~ m{^\s*xml (or text )?declaration not at start of (external )?entity}i) | ||||
1046 |
1
|
5
|
{ $$xml=~ s{<\?xml.*?\?>}{}g; | ||||
1047 | #warn " fixed xml declaration in the wrong place\n"; | ||||||
1048 | } | ||||||
1049 | elsif( $@=~ m{undefined entity}) | ||||||
1050 |
0
|
0
|
{ $$xml=~ s{&(amp;)?Amp;}{&}g if $HTML::TreeBuilder::VERSION < 4.00; | ||||
1051 |
0
0
|
0
0
|
if( _use( 'HTML::Entities::Numbered')) { $$xml=name2hex_xml( $$xml); } | ||||
1052 |
0
0
0
0
|
0
0
0
0
|
$$xml=~ s{&(\w+);}{ my $ent= $1; if( $ent !~ m{^(amp|lt|gt|apos|quote)$}) { "&$ent;" } }eg; | ||||
1053 | } | ||||||
1054 | elsif( $@=~ m{&Amp; used in html}) | ||||||
1055 | # if $Amp; is used instead of & then HTML::TreeBuilder's as_xml is tripped (old version) | ||||||
1056 |
0
|
0
|
{ $$xml=~ s{&(amp;)?Amp;}{&}g if $HTML::TreeBuilder::VERSION < 4.00; | ||||
1057 | } | ||||||
1058 | elsif( $@=~ m{^\s*not well-formed \(invalid token\)}) | ||||||
1059 |
2
|
5
|
{ if( $HTML::TreeBuilder::VERSION < 4.00) | ||||
1060 |
1
|
5
|
{ $$xml=~ s{&(amp;)?Amp;}{&}g; | ||||
1061 |
1
|
11
|
$$xml=~ s{(<[^>]* )(\d+=)"}{$1a$2"}g; # <table 1> comes out as <table 1="1">, "fix the attribute | ||||
1062 | } | ||||||
1063 |
2
|
4
|
my $q= '<img "=""" '; # extracted so vim doesn't get confused | ||||
1064 |
2
2
|
3
6
|
if( _use( 'HTML::Entities::Numbered')) { $$xml=name2hex_xml( $$xml); } | ||||
1065 |
2
|
32
|
if( $$xml=~ m{$q}) | ||||
1066 |
0
|
0
|
{ $$xml=~ s{$q}{<img }g; # happens with <img src="foo.png"" ... | ||||
1067 | } | ||||||
1068 | else | ||||||
1069 |
2
|
4
|
{ my $encoding= _encoding_from_meta( $tree); | ||||
1070 |
2
2
|
6
4
|
unless( keys %xml_parser_encoding) { %xml_parser_encoding= _xml_parser_encodings(); } | ||||
1071 | |||||||
1072 |
2
|
6
|
if( ! $add_decl) | ||||
1073 |
2
|
9
|
{ if( $xml_parser_encoding{$encoding}) | ||||
1074 |
1
|
1
|
{ $add_decl=1; } | ||||
1075 | elsif( $encoding eq 'euc-jp' && $xml_parser_encoding{'x-euc-jp-jisx0221'}) | ||||||
1076 |
0
0
|
0
0
|
{ $encoding="x-euc-jp-jisx0221"; $add_decl=1;} | ||||
1077 | elsif( $encoding eq 'shift-jis' && $xml_parser_encoding{'x-sjis-jisx0221'}) | ||||||
1078 |
0
0
|
0
0
|
{ $encoding="x-sjis-jisx0221"; $add_decl=1;} | ||||
1079 | |||||||
1080 |
2
|
3
|
if( $add_decl) | ||||
1081 |
1
|
6
|
{ $$xml=~ s{^(<\?xml.*?\?>)?}{<?xml version="1.0" encoding="$encoding"?>}s; | ||||
1082 | #warn " added decl (encoding $encoding)\n"; | ||||||
1083 | } | ||||||
1084 | else | ||||||
1085 |
1
|
2
|
{ $$xml=~ s{^(<\?xml.*?\?>)?}{}s; | ||||
1086 | #warn " converting to utf8 from $encoding\n"; | ||||||
1087 |
1
|
2
|
$$xml= _to_utf8( $encoding, $$xml); | ||||
1088 | } | ||||||
1089 | } | ||||||
1090 | else | ||||||
1091 |
0
|
0
|
{ $$xml=~ s{^(<\?xml.*?\?>)?}{}s; | ||||
1092 | #warn " converting to utf8 from $encoding\n"; | ||||||
1093 |
0
|
0
|
$$xml= _to_utf8( $encoding, $$xml); | ||||
1094 | } | ||||||
1095 | } | ||||||
1096 | } | ||||||
1097 | } | ||||||
1098 | |||||||
1099 | # some versions of HTML::TreeBuilder escape CDATA sections | ||||||
1100 |
49
0
|
243
0
|
$$xml=~ s{(<!\[CDATA\[.*?\]\]>)}{_unescape_cdata( $1)}eg; | ||||
1101 | |||||||
1102 | } | ||||||
1103 | |||||||
1104 | sub _xml_parser_encodings | ||||||
1105 |
2
|
4
|
{ my @encodings=( 'iso-8859-1'); # this one is included by default, there is no map for it in @INC | ||||
1106 |
2
|
2
|
foreach my $inc (@INC) | ||||
1107 |
18
42
|
557
1288
|
{ push @encodings, map { basename( $_, '.enc') } glob( File::Spec->catdir( $inc => XML => Parser => Encodings => '*.enc')); } | ||||
1108 |
2
44
|
5
53
|
return map { $_ => 1 } @encodings; | ||||
1109 | } | ||||||
1110 | } | ||||||
1111 | |||||||
1112 | |||||||
1113 | sub _unescape_cdata | ||||||
1114 |
1
|
220
|
{ my( $cdata)= @_; | ||||
1115 |
1
|
4
|
$cdata=~s{<}{<}g; | ||||
1116 |
1
|
2
|
$cdata=~s{>}{>}g; | ||||
1117 |
1
|
3
|
$cdata=~s{&}{&}g; | ||||
1118 |
1
|
2
|
return $cdata; | ||||
1119 | } | ||||||
1120 | |||||||
1121 | sub _as_XML { | ||||||
1122 | |||||||
1123 | # fork of HTML::Element::as_XML, which is a little too buggy and inconsistent between versions for my liking | ||||||
1124 |
48
|
51
|
my ($elt) = @_; | ||||
1125 |
48
|
42
|
my $xml= ''; | ||||
1126 |
48
|
84
|
my $empty_element_map = $elt->_empty_element_map; | ||||
1127 | |||||||
1128 |
48
|
103
|
my ( $tag, $node, $start ); # per-iteration scratch | ||||
1129 | $elt->traverse( | ||||||
1130 | sub { | ||||||
1131 |
36543
|
301506
|
( $node, $start ) = @_; | ||||
1132 |
36543
|
42774
|
if ( ref $node ) | ||||
1133 | { # it's an element | ||||||
1134 |
24447
|
18781
|
$tag = $node->{'_tag'}; | ||||
1135 |
24447
|
20021
|
if ($start) | ||||
1136 | { # on the way in | ||||||
1137 |
12237
43554
|
17250
64274
|
foreach my $att ( grep { ! m{^(_|/$)} } keys %$node ) | ||||
1138 | { # fix attribute names instead of dying | ||||||
1139 |
6034
|
4161
|
my $new_att= $att; | ||||
1140 |
6034
4
|
7708
4
|
if( $att=~ m{^\d}) { $new_att= "a$att"; } | ||||
1141 |
6034
|
6122
|
$new_att=~ s{[^\w\d:_-]}{}g; | ||||
1142 |
6034
|
5723
|
$new_att ||= 'a'; | ||||
1143 |
6034
6
|
9727
12
|
if( $new_att ne $att) { $node->{$new_att}= delete $node->{$att}; } | ||||
1144 | } | ||||||
1145 | |||||||
1146 |
12237
27
|
20239
99
|
if ( $empty_element_map->{$tag} and !@{ $node->{'_content'} || []} ) | ||||
1147 |
27
|
51
|
{ $xml.= $node->starttag_XML( undef, 1 ); } | ||||
1148 | else | ||||||
1149 |
12210
|
18161
|
{ $xml.= $node->starttag_XML(undef); } | ||||
1150 | } | ||||||
1151 | else | ||||||
1152 | { # on the way out | ||||||
1153 |
12210
0
|
18862
0
|
unless ( $empty_element_map->{$tag} and !@{ $node->{'_content'} || [] } ) | ||||
1154 |
12210
|
16487
|
{ $xml.= $node->endtag_XML(); | ||||
1155 | } # otherwise it will have been an <... /> tag. | ||||||
1156 | } | ||||||
1157 | } | ||||||
1158 | elsif( $node=~ /<!\[CDATA\[/) # the content includes CDATA | ||||||
1159 |
2
|
10
|
{ foreach my $chunk (split /(<!\[CDATA\[.*?\]\]>)/s, $node) # chunks are CDATA sections or normal text | ||||
1160 |
5
|
16
|
{ $xml.= $chunk =~ m{<!\[CDATA\[} ? $chunk : _xml_escape( $chunk); } | ||||
1161 | } | ||||||
1162 | else # it's just text | ||||||
1163 |
12094
|
10833
|
{ $xml .= _xml_escape($node); } | ||||
1164 |
36543
|
250776
|
1; # keep traversing | ||||
1165 | } | ||||||
1166 |
48
|
301
|
); | ||||
1167 |
48
|
1114
|
return $xml; | ||||
1168 | } | ||||||
1169 | |||||||
1170 | sub _xml_escape | ||||||
1171 |
12097
|
7956
|
{ my( $html)= @_; | ||||
1172 |
12097
|
6266
|
$html =~ s{&(?! # An ampersand that isn't followed by... | ||||
1173 | ( \#[0-9]+; | # A hash mark, digits and semicolon, or | ||||||
1174 | \#x[0-9a-fA-F]+; | # A hash mark, "x", hex digits and semicolon, or | ||||||
1175 | [\w]+; # A valid unicode entity name and semicolon | ||||||
1176 | ) | ||||||
1177 | ) | ||||||
1178 | } | ||||||
1179 | {&}gx if 0; # Needs to be escaped to amp | ||||||
1180 | |||||||
1181 |
12097
|
8550
|
$html=~ s{&}{&}g; | ||||
1182 | |||||||
1183 | # in old versions of HTML::TreeBuilder & can come out as &Amp; | ||||||
1184 |
12097
0
|
26576
0
|
if( $HTML::TreeBuilder::VERSION && $HTML::TreeBuilder::VERSION <= 3.23) { $html=~ s{&Amp;}{&}g; } | ||||
1185 | |||||||
1186 | # simple character escapes | ||||||
1187 |
12097
|
7379
|
$html =~ s/</</g; | ||||
1188 |
12097
|
7002
|
$html =~ s/>/>/g; | ||||
1189 |
12097
|
6512
|
$html =~ s/"/"/g; | ||||
1190 |
12097
|
8809
|
$html =~ s/'/'/g; | ||||
1191 | |||||||
1192 |
12097
|
14273
|
return $html; | ||||
1193 | } | ||||||
1194 | |||||||
1195 | |||||||
1196 | |||||||
1197 | |||||||
1198 | sub _check_xml | ||||||
1199 |
52
|
49
|
{ my( $xml)= @_; # $xml is a ref to the xml string | ||||
1200 |
52
52
|
49
193
|
my $ok= eval { XML::Parser->new->parse( $$xml); }; | ||||
1201 | #if( $ok) { warn " parse OK\n"; } | ||||||
1202 |
52
|
16080
|
return $ok; | ||||
1203 | } | ||||||
1204 | |||||||
1205 | sub _encoding_from_meta | ||||||
1206 |
2
|
2
|
{ my( $tree)= @_; | ||||
1207 |
2
|
3
|
my $enc="iso-8859-1"; | ||||
1208 |
2
|
9
|
my @meta= $tree->find( 'meta'); | ||||
1209 |
2
|
75
|
foreach my $meta (@meta) | ||||
1210 |
1
|
13
|
{ if( $meta->{'http-equiv'} && ($meta->{'http-equiv'} =~ m{^\s*content-type\s*}i) | ||||
1211 | && $meta->{content} && ($meta->{content} =~ m{^\s*text/html\s*;\s*charset\s*=\s*(\S*)\s*}i) | ||||||
1212 | ) | ||||||
1213 |
1
|
2
|
{ $enc= lc $1; | ||||
1214 | #warn " encoding from meta tag is '$enc'\n"; | ||||||
1215 |
1
|
1
|
last; | ||||
1216 | } | ||||||
1217 | } | ||||||
1218 |
2
|
3
|
return $enc; | ||||
1219 | } | ||||||
1220 | |||||||
1221 | { sub _to_utf8 | ||||||
1222 |
2
|
8
|
{ my( $encoding, $string)= @_; | ||||
1223 |
2
|
8
|
local $SIG{__DIE__}; | ||||
1224 |
2
|
5
|
if( _use( 'Encode')) | ||||
1225 |
2
|
10
|
{ Encode::from_to( $string, $encoding => 'utf8', 0x0400); } # 0x0400 is Encode::FB_XMLCREF | ||||
1226 | elsif( _use( 'Text::Iconv')) | ||||||
1227 |
0
0
|
0
0
|
{ my $converter = eval { Text::Iconv->new( $encoding => "utf8") }; | ||||
1228 |
0
0
|
0
0
|
if( $converter) { $string= $converter->convert( $string); } | ||||
1229 | } | ||||||
1230 | elsif( _use( 'Unicode::Map8') && _use( 'Unicode::String')) | ||||||
1231 |
0
|
0
|
{ my $map= Unicode::Map8->new( $encoding); | ||||
1232 |
0
|
0
|
$string= $map->tou( $string)->utf8; | ||||
1233 | } | ||||||
1234 |
2
|
212
|
$string=~ s{[\x00-\x08\x0B\x0C\x0E-\x1F]}{}g; # get rid of control chars, portable in 5.6 | ||||
1235 |
2
|
9
|
return $string; | ||||
1236 | } | ||||||
1237 | } | ||||||
1238 | |||||||
1239 | |||||||
1240 | sub _indent_xhtml | ||||||
1241 |
2
|
30
|
{ my( $xhtml)= @_; # $xhtml is a ref | ||||
1242 |
2
78
|
7
132
|
my %block_tag= map { $_ => 1 } qw( html | ||||
1243 | head | ||||||
1244 | meta title link script base | ||||||
1245 | body | ||||||
1246 | h1 h2 h3 h4 h5 h6 | ||||||
1247 | p br address blockquote pre | ||||||
1248 | ol ul li dd dl dt | ||||||
1249 | table tr td th tbody tfoot thead col colgroup caption | ||||||
1250 | div frame frameset hr | ||||||
1251 | ); | ||||||
1252 | |||||||
1253 |
2
|
11
|
my $level=0; | ||||
1254 |
2
27
2
|
24
177
2
|
$$xhtml=~ s{( (?:<!(?:--.*?-->|[CDATA[.*?]]>)) # ignore comments and CDATA sections | ||||
1255 |
2
|
8
|
| <(\w+)((?:\s+\w+\s*=\s*(?:"[^"]*"|'[^']*'))*\s*/>) # empty tag | ||||
1256 | | <(\w+) # start tag | ||||||
1257 |
11
|
17
|
|</(\w+) # end tag | ||||
1258 |
11
|
26
|
) | ||||
1259 |
11
|
26
|
} | ||||
1260 |
11
|
59
|
{ if( $2 && $block_tag{$2}) { my $indent= " " x $level; | ||||
1261 | "\n$indent<$2$3"; | ||||||
1262 |
11
11
|
10
45
|
} | ||||
1263 |
3
|
12
|
elsif( $4 && $block_tag{$4}) { my $indent= " " x $level; | ||||
1264 | $level++ unless( $4=~ m{/>}); | ||||||
1265 | my $nl= $4 eq 'html' ? '' : "\n"; | ||||||
1266 | "$nl$indent<$4"; | ||||||
1267 | } | ||||||
1268 | elsif( $5 && $block_tag{$5}) { $level--; "</$5"; } | ||||||
1269 | else { $1; } | ||||||
1270 | }xesg; | ||||||
1271 | } | ||||||
1272 | |||||||
1273 | |||||||
1274 | sub add_stylesheet | ||||||
1275 |
2
|
1
|
4
|
{ my( $t, $type, $href)= @_; | |||
1276 |
2
4
|
2
7
|
my %text_type= map { $_ => 1 } qw( xsl css); | ||||
1277 |
2
|
3
|
my $ss= $t->{twig_elt_class}->new( $PI); | ||||
1278 |
2
|
3
|
if( $text_type{$type}) | ||||
1279 |
1
|
3
|
{ $ss->_set_pi( 'xml-stylesheet', qq{type="text/$type" href="$href"}); } | ||||
1280 | else | ||||||
1281 |
1
|
119
|
{ croak "unsupported style sheet type '$type'"; } | ||||
1282 | |||||||
1283 |
1
|
2
|
$t->_add_cpi_outside_of_root( leading_cpi => $ss); | ||||
1284 |
1
|
2
|
return $t; | ||||
1285 | } | ||||||
1286 | |||||||
1287 | { my %used; # module => 1 if require ok, 0 otherwise | ||||||
1288 | my %disallowed; # for testing, refuses to _use modules in this hash | ||||||
1289 | |||||||
1290 | sub _disallow_use ## no critic (Subroutines::ProhibitNestedSubs); | ||||||
1291 |
6
|
146
|
{ my( @modules)= @_; | ||||
1292 |
6
|
23
|
$disallowed{$_}= 1 foreach (@modules); | ||||
1293 | } | ||||||
1294 | |||||||
1295 | sub _allow_use ## no critic (Subroutines::ProhibitNestedSubs); | ||||||
1296 |
4
|
6
|
{ my( @modules)= @_; | ||||
1297 |
4
|
11
|
$disallowed{$_}= 0 foreach (@modules); | ||||
1298 | } | ||||||
1299 | |||||||
1300 | sub _use ## no critic (Subroutines::ProhibitNestedSubs); | ||||||
1301 |
3411
|
79823
|
{ my( $module, $version)= @_; | ||||
1302 |
3411
|
6813
|
$version ||= 0; | ||||
1303 |
3411
7
|
4215
325
|
if( $disallowed{$module}) { return 0; } | ||||
1304 |
3404
3145
|
4698
6116
|
if( $used{$module}) { return 1; } | ||||
1305 |
259
256
256
|
13819
1563705
2223
|
if( eval "require $module") { import $module; $used{$module}= 1; | ||||
1306 |
256
|
521
|
if( $version) | ||||
1307 | { | ||||||
1308 | ## no critic (TestingAndDebugging::ProhibitNoStrict); | ||||||
1309 |
187
187
187
|
681
157
1248673
|
no strict 'refs'; | ||||
1310 |
12
12
11
|
11
72
28
|
if( ${"${module}::VERSION"} >= $version ) { return 1; } | ||||
1311 |
1
|
14
|
else { return 0; } | ||||
1312 | } | ||||||
1313 | else | ||||||
1314 |
244
|
832
|
{ return 1; } | ||||
1315 | } | ||||||
1316 |
3
3
|
7
8
|
else { $used{$module}= 0; return 0; } | ||||
1317 | } | ||||||
1318 | } | ||||||
1319 | |||||||
1320 | # used to solve the [n] predicates while avoiding getting the entire list | ||||||
1321 | # needs a prototype to accept passing bare blocks | ||||||
1322 | sub _first_n(&$@) ## nocritic (Subroutines::ProhibitSubroutinePrototypes); | ||||||
1323 |
106
|
81
|
{ my $coderef= shift; | ||||
1324 |
106
|
66
|
my $n= shift; | ||||
1325 |
106
|
57
|
my $i=0; | ||||
1326 |
106
|
109
|
if( $n > 0) | ||||
1327 |
99
121
35
35
|
101
1518
24
457
|
{ foreach (@_) { if( &$coderef) { $i++; return $_ if( $i == $n); } } } | ||||
1328 | elsif( $n < 0) | ||||||
1329 |
6
13
11
11
|
6
105
8
62
|
{ foreach (reverse @_) { if( &$coderef) { $i--; return $_ if( $i == $n); } } } | ||||
1330 | else | ||||||
1331 |
1
|
85
|
{ croak "illegal position number 0"; } | ||||
1332 |
71
|
1085
|
return undef; | ||||
1333 | } | ||||||
1334 | |||||||
1335 | sub _slurp_uri | ||||||
1336 |
11
|
2344
|
{ my( $uri, $base)= @_; | ||||
1337 |
11
1
1
|
60
6
4
|
if( $uri=~ m{^\w+://}) { _use( 'LWP::Simple'); return LWP::Simple::get( $uri); } | ||||
1338 |
10
|
20
|
else { return _slurp( _based_filename( $uri, $base)); } | ||||
1339 | } | ||||||
1340 | |||||||
1341 | sub _based_filename | ||||||
1342 |
27
|
93
|
{ my( $filename, $base)= @_; | ||||
1343 | # cf. XML/Parser.pm's file_ext_ent_handler | ||||||
1344 |
27
|
67
|
if (defined($base) and not ($filename =~ m{^(?:[\\/]|\w+:)})) | ||||
1345 |
3
|
3
|
{ my $newpath = $base; | ||||
1346 |
3
|
9
|
$newpath =~ s{[^\\/:]*$}{$filename}; | ||||
1347 |
3
|
3
|
$filename = $newpath; | ||||
1348 | } | ||||||
1349 |
27
|
175
|
return $filename; | ||||
1350 | } | ||||||
1351 | |||||||
1352 | sub _slurp | ||||||
1353 |
23
|
59
|
{ my( $filename)= @_; | ||||
1354 |
23
|
25
|
my $to_slurp; | ||||
1355 |
23
|
1303
|
open( $to_slurp, "<$filename") or croak "cannot open '$filename': $!"; | ||||
1356 |
19
|
92
|
local $/= undef; | ||||
1357 |
19
|
223
|
my $content= <$to_slurp>; | ||||
1358 |
19
|
76
|
close $to_slurp; | ||||
1359 |
19
|
152
|
return $content; | ||||
1360 | } | ||||||
1361 | |||||||
1362 | sub _slurp_fh | ||||||
1363 |
2
|
3
|
{ my( $fh)= @_; | ||||
1364 |
2
|
6
|
local $/= undef; | ||||
1365 |
2
|
34
|
my $content= <$fh>; | ||||
1366 |
2
|
8
|
return $content; | ||||
1367 | } | ||||||
1368 | |||||||
1369 | # I should really add extra options to allow better configuration of the | ||||||
1370 | # LWP::UserAgent object | ||||||
1371 | # this method forks (except on VMS!) | ||||||
1372 | # - the child gets the data and copies it to the pipe, | ||||||
1373 | # - the parent reads the stream and sends it to XML::Parser | ||||||
1374 | # the data is cut it chunks the size of the XML::Parser::Expat buffer | ||||||
1375 | # the method returns the twig and the status | ||||||
1376 | sub _parseurl | ||||||
1377 |
20
|
38
|
{ my( $t, $safe, $url, $agent)= @_; | ||||
1378 |
20
|
46
|
_use( 'LWP') || croak "LWP not available, needed to use parseurl methods"; | ||||
1379 |
20
|
114
|
if( $^O ne 'VMS') | ||||
1380 |
20
|
251
|
{ pipe( README, WRITEME) or croak "cannot create connected pipes: $!"; | ||||
1381 |
20
|
8079
|
if( my $pid= fork) | ||||
1382 | { # parent code: parse the incoming file | ||||||
1383 |
15
|
270
|
close WRITEME; # no need to write | ||||
1384 |
15
|
387
|
my $result= $safe ? $t->safe_parse( \*README) : $t->parse( \*README); | ||||
1385 |
15
|
147
|
close README; | ||||
1386 |
15
|
94
|
return $@ ? 0 : $t; | ||||
1387 | } | ||||||
1388 | else | ||||||
1389 | { # child | ||||||
1390 |
5
|
230
|
close README; # no need to read | ||||
1391 |
5
|
223
|
local $|=1; | ||||
1392 |
5
|
409
|
$agent ||= LWP::UserAgent->new; | ||||
1393 |
5
|
2070
|
my $request = HTTP::Request->new( GET => $url); | ||||
1394 | # _pass_url_content is called with chunks of data the same size as | ||||||
1395 | # the XML::Parser buffer | ||||||
1396 | my $response = $agent->request( $request, | ||||||
1397 |
5
4
|
21626
287003
|
sub { _pass_url_content( \*WRITEME, @_); }, $BUFSIZE); | ||||
1398 |
5
|
17789
|
$response->is_success or croak "$url ", $response->message; | ||||
1399 |
4
|
79
|
close WRITEME; | ||||
1400 |
4
|
318
|
CORE::exit(); # CORE is there for mod_perl (which redefines exit) | ||||
1401 | } | ||||||
1402 | } | ||||||
1403 | else | ||||||
1404 | { # VMS branch (hard to test!) | ||||||
1405 |
0
|
0
|
local $|=1; | ||||
1406 |
0
|
0
|
$agent ||= LWP::UserAgent->new; | ||||
1407 |
0
|
0
|
my $request = HTTP::Request->new( GET => $url); | ||||
1408 |
0
|
0
|
my $response = $agent->request( $request); | ||||
1409 |
0
|
0
|
$response->is_success or croak "$url ", $response->message; | ||||
1410 |
0
|
0
|
my $result= $safe ? $t->safe_parse($response->content) : $t->parse($response->content); | ||||
1411 |
0
|
0
|
return $@ ? 0 : $t; | ||||
1412 | } | ||||||
1413 | |||||||
1414 | } | ||||||
1415 | |||||||
1416 | # get the (hopefully!) XML data from the URL and | ||||||
1417 | sub _pass_url_content | ||||||
1418 |
4
|
12
|
{ my( $fh, $data, $response, $protocol)= @_; | ||||
1419 |
4
4
|
4
35
|
print {$fh} $data; | ||||
1420 | } | ||||||
1421 | |||||||
1422 | sub add_options | ||||||
1423 |
1
1
|
1
|
23
3
|
{ my %args= map { $_, 1 } @_; | |||
1424 |
1
|
8
|
%args= _normalize_args( %args); | ||||
1425 |
1
1
|
3
2
|
foreach (keys %args) { $valid_option{$_}++; } | ||||
1426 | } | ||||||
1427 | |||||||
1428 |
27
|
60
|
sub _pretty_print_styles { return XML::Twig::Elt::_pretty_print_styles(); } | ||||
1429 | |||||||
1430 | sub _twig_store_internal_dtd | ||||||
1431 | { | ||||||
1432 | # warn " in _twig_store_internal_dtd...\n"; # DEBUG handler | ||||||
1433 |
161
|
134
|
my( $p, $string)= @_; | ||||
1434 |
161
|
101
|
my $t= $p->{twig}; | ||||
1435 |
161
3
|
189
7
|
if( $t->{twig_keep_encoding}) { $string= $p->original_string(); } | ||||
1436 |
161
|
147
|
$t->{twig_doctype}->{internal} .= $string; | ||||
1437 |
161
|
535
|
return; | ||||
1438 | } | ||||||
1439 | |||||||
1440 | sub _twig_stop_storing_internal_dtd | ||||||
1441 | { # warn " in _twig_stop_storing_internal_dtd...\n"; # DEBUG handler | ||||||
1442 |
110
|
101
|
my $p= shift; | ||||
1443 |
110
|
342
|
if( @saved_default_handler && defined $saved_default_handler[1]) | ||||
1444 |
110
|
224
|
{ $p->setHandlers( @saved_default_handler); } | ||||
1445 | else | ||||||
1446 | { | ||||||
1447 |
0
|
0
|
$p->setHandlers( Default => undef); | ||||
1448 | } | ||||||
1449 |
110
|
1798
|
$p->{twig}->{twig_doctype}->{internal}=~ s{^\s*\[}{}; | ||||
1450 |
110
|
178
|
$p->{twig}->{twig_doctype}->{internal}=~ s{\]\s*$}{}; | ||||
1451 |
110
|
365
|
return; | ||||
1452 | } | ||||||
1453 | |||||||
1454 | sub _twig_doctype_fin_print | ||||||
1455 | { # warn " in _twig_doctype_fin_print...\n"; # DEBUG handler | ||||||
1456 |
16
|
20
|
my( $p)= shift; | ||||
1457 |
16
6
|
45
7
|
if( $p->{twig}->{twig_doctype}->{has_internal} && !$expat_1_95_2) { print ' ]>'; } | ||||
1458 |
16
|
47
|
return; | ||||
1459 | } | ||||||
1460 | |||||||
1461 | |||||||
1462 | sub _normalize_args | ||||||
1463 |
8311
|
5193
|
{ my %normalized_args; | ||||
1464 |
8311
|
11813
|
while( my $key= shift ) | ||||
1465 |
5514
7679
|
6934
11029
|
{ $key= join '', map { ucfirst } split /_/, $key; | ||||
1466 | #$key= "Twig".$key unless( substr( $key, 0, 4) eq 'Twig'); | ||||||
1467 |
5514
|
10348
|
$normalized_args{$key}= shift ; | ||||
1468 | } | ||||||
1469 |
8311
|
14873
|
return %normalized_args; | ||||
1470 | } | ||||||
1471 | |||||||
1472 |
62
62
|
96
272
|
sub _is_fh { return unless $_[0]; return $_[0] if( isa( $_[0], 'GLOB') || isa( $_[0], 'IO::Scalar')); } | ||||
1473 | |||||||
1474 | sub _set_handler | ||||||
1475 |
757
|
766
|
{ my( $handlers, $whole_path, $handler)= @_; | ||||
1476 | |||||||
1477 |
757
|
6131
|
my $H_SPECIAL = qr{($ALL|$DEFAULT|$COMMENT|$TEXT)}; | ||||
1478 |
757
|
3366
|
my $H_PI = qr{(\?|$PI)\s*(([^\s]*)\s*)}; | ||||
1479 |
757
|
1068
|
my $H_LEVEL = qr{level \s* \( \s* ([0-9]+) \s* \)}x; | ||||
1480 |
757
|
1083
|
my $H_REGEXP = qr{\(\?([\^xism]*)(-[\^xism]*)?:(.*)\)}x; | ||||
1481 |
757
|
17966
|
my $H_XPATH = qr{(/?/?$REG_TAG_PART? \s* ($REG_PREDICATE\s*)?)+}x; | ||||
1482 | |||||||
1483 |
757
|
606
|
my $prev_handler; | ||||
1484 | |||||||
1485 |
757
|
609
|
my $cpath= $whole_path; | ||||
1486 | #warn "\$cpath: '$cpath\n"; | ||||||
1487 |
757
|
31315
|
while( $cpath && $cpath=~ s{^\s*($H_SPECIAL|$H_PI|$H_LEVEL|$H_REGEXP|$H_XPATH)\s*($|\|)}{}) | ||||
1488 |
770
|
1647
|
{ my $path= $1; | ||||
1489 | #warn "\$cpath: '$cpath' - $path: '$path'\n"; | ||||||
1490 |
770
|
3471
|
$prev_handler ||= $handlers->{handlers}->{string}->{$path} || undef; # $prev_handler gets the first found handler | ||||
1491 | |||||||
1492 |
770
|
1017
|
_set_special_handler ( $handlers, $path, $handler, $prev_handler) | ||||
1493 | || _set_pi_handler ( $handlers, $path, $handler, $prev_handler) | ||||||
1494 | || _set_level_handler ( $handlers, $path, $handler, $prev_handler) | ||||||
1495 | || _set_regexp_handler ( $handlers, $path, $handler, $prev_handler) | ||||||
1496 | || _set_xpath_handler ( $handlers, $path, $handler, $prev_handler) | ||||||
1497 | || croak "unrecognized expression in handler: '$whole_path'"; | ||||||
1498 | |||||||
1499 | # this both takes care of the simple (gi) handlers and store | ||||||
1500 | # the handler code reference for other handlers | ||||||
1501 |
764
|
2512
|
$handlers->{handlers}->{string}->{$path}= $handler; | ||||
1502 | } | ||||||
1503 | |||||||
1504 |
751
2
|
870
201
|
if( $cpath) { croak "unrecognized expression in handler: '$whole_path'"; } | ||||
1505 | |||||||
1506 |
749
|
2694
|
return $prev_handler; | ||||
1507 | } | ||||||
1508 | |||||||
1509 | |||||||
1510 | sub _set_special_handler | ||||||
1511 |
770
|
828
|
{ my( $handlers, $path, $handler, $prev_handler)= @_; | ||||
1512 |
770
|
2708
|
if( $path =~ m{^\s*($ALL|$DEFAULT|$COMMENT|$TEXT)\s*$}io ) | ||||
1513 |
47
|
92
|
{ $handlers->{handlers}->{$1}= $handler; | ||||
1514 |
47
|
138
|
return 1; | ||||
1515 | } | ||||||
1516 | else | ||||||
1517 |
723
|
1816
|
{ return 0; } | ||||
1518 | } | ||||||
1519 | |||||||
1520 | sub _set_xpath_handler | ||||||
1521 |
665
|
620
|
{ my( $handlers, $path, $handler, $prev_handler)= @_; | ||||
1522 |
665
|
733
|
if( my $handler_data= _parse_xpath_handler( $path, $handler)) | ||||
1523 |
659
|
935
|
{ _add_handler( $handlers, $handler_data, $path, $prev_handler); | ||||
1524 |
659
|
2149
|
return 1; | ||||
1525 | } | ||||||
1526 | else | ||||||
1527 |
4
|
425
|
{ return 0; } | ||||
1528 | } | ||||||
1529 | |||||||
1530 | sub _add_handler | ||||||
1531 |
674
|
794
|
{ my( $handlers, $handler_data, $path, $prev_handler)= @_; | ||||
1532 | |||||||
1533 |
674
|
657
|
my $tag= $handler_data->{tag}; | ||||
1534 |
674
156
|
1189
281
|
my @handlers= $handlers->{xpath_handler}->{$tag} ? @{$handlers->{xpath_handler}->{$tag}} : (); | ||||
1535 | |||||||
1536 |
674
64
176
|
813
84
311
|
if( $prev_handler) { @handlers= grep { $_->{path} ne $path } @handlers; } | ||||
1537 | |||||||
1538 |
674
|
1201
|
push @handlers, $handler_data if( $handler_data->{handler}); | ||||
1539 | |||||||
1540 |
674
|
962
|
if( @handlers > 1) | ||||
1541 |
138
686
|
283
6797
|
{ @handlers= sort { (($b->{score}->{type} || 0) <=> ($a->{score}->{type} || 0)) | ||||
1542 | || (($b->{score}->{anchored} || 0) <=> ($a->{score}->{anchored} || 0)) | ||||||
1543 | || (($b->{score}->{steps} || 0) <=> ($a->{score}->{steps} || 0)) | ||||||
1544 | || (($b->{score}->{predicates} || 0) <=> ($a->{score}->{predicates} || 0)) | ||||||
1545 | || (($b->{score}->{tests} || 0) <=> ($a->{score}->{tests} || 0)) | ||||||
1546 | || ($a->{path} cmp $b->{path}) | ||||||
1547 | } @handlers; | ||||||
1548 | } | ||||||
1549 | |||||||
1550 |
674
|
1359
|
$handlers->{xpath_handler}->{$tag}= \@handlers; | ||||
1551 | } | ||||||
1552 | |||||||
1553 | sub _set_pi_handler | ||||||
1554 |
723
|
704
|
{ my( $handlers, $path, $handler, $prev_handler)= @_; | ||||
1555 | # PI conditions ( '?target' => \&handler or '?' => \&handler | ||||||
1556 | # or '#PItarget' => \&handler or '#PI' => \&handler) | ||||||
1557 |
723
|
3330
|
if( $path=~ /^\s*(?:\?|$PI)\s*(?:([^\s]*)\s*)$/) | ||||
1558 |
43
|
116
|
{ my $target= $1 || ''; | ||||
1559 | # update the path_handlers count, knowing that | ||||||
1560 | # either the previous or the new handler can be undef | ||||||
1561 |
43
|
84
|
$handlers->{pi_handlers}->{$1}= $handler; | ||||
1562 |
43
|
131
|
return 1; | ||||
1563 | } | ||||||
1564 | else | ||||||
1565 |
680
|
1960
|
{ return 0; | ||||
1566 | } | ||||||
1567 | } | ||||||
1568 | |||||||
1569 | sub _set_level_handler | ||||||
1570 |
680
|
619
|
{ my( $handlers, $path, $handler, $prev_handler)= @_; | ||||
1571 |
680
|
831
|
if( $path =~ m{^ \s* level \s* \( \s* ([0-9]+) \s* \) \s* $}ox ) | ||||
1572 |
7
|
12
|
{ my $level= $1; | ||||
1573 |
7
31
31
|
28
25
171
|
my $sub= sub { my( $stack)= @_; return( ($stack->[-1]->{$ST_TAG} !~ m{^#}) && (scalar @$stack == $level + 1) ) }; | ||||
1574 |
7
|
26
|
my $handler_data= { tag=> '*', score => { type => $LEVEL_TRIGGER}, trigger => $sub, | ||||
1575 | path => $path, handler => $handler, test_on_text => 0 | ||||||
1576 | }; | ||||||
1577 |
7
|
12
|
_add_handler( $handlers, $handler_data, $path, $prev_handler); | ||||
1578 |
7
|
19
|
return 1; | ||||
1579 | } | ||||||
1580 | else | ||||||
1581 |
673
|
1692
|
{ return 0; } | ||||
1582 | } | ||||||
1583 | |||||||
1584 | sub _set_regexp_handler | ||||||
1585 |
673
|
637
|
{ my( $handlers, $path, $handler, $prev_handler)= @_; | ||||
1586 | # if the expression was a regexp it is now a string (it was stringified when it became a hash key) | ||||||
1587 |
673
|
758
|
if( $path=~ m{^\(\?([\^xism]*)(?:-[\^xism]*)?:(.*)\)$}) | ||||
1588 |
8
|
98
|
{ my $regexp= qr/(?$1:$2)/; # convert it back into a regexp | ||||
1589 |
8
36
36
|
23
23
161
|
my $sub= sub { my( $stack)= @_; return( $stack->[-1]->{$ST_TAG} =~ $regexp ) }; | ||||
1590 |
8
|
32
|
my $handler_data= { tag=> '*', score => { type => $REGEXP_TRIGGER} , trigger => $sub, | ||||
1591 | path => $path, handler => $handler, test_on_text => 0 | ||||||
1592 | }; | ||||||
1593 |
8
|
11
|
_add_handler( $handlers, $handler_data, $path, $prev_handler); | ||||
1594 |
8
|
21
|
return 1; | ||||
1595 | } | ||||||
1596 | else | ||||||
1597 |
665
|
1572
|
{ return 0; } | ||||
1598 | } | ||||||
1599 | |||||||
1600 | my $DEBUG_HANDLER= 0; # 0 or 1 (output the handler checking code) or 2 (super verbose) | ||||||
1601 | my $handler_string; # store the handler itself | ||||||
1602 |
2
|
47
|
sub _set_debug_handler { $DEBUG_HANDLER= shift; } | ||||
1603 |
6
0
6
|
8
0
10
|
sub _warn_debug_handler { if( $DEBUG_HANDLER < 3) { warn @_; } else { $handler_string .= join( '', @_); } } | ||||
1604 |
1
1
1
|
5
1
2
|
sub _return_debug_handler { my $string= $handler_string; $handler_string=''; return $string; } | ||||
1605 | |||||||
1606 | sub _parse_xpath_handler | ||||||
1607 |
665
|
561
|
{ my( $xpath, $handler)= @_; | ||||
1608 |
665
|
517
|
my $xpath_original= $xpath; | ||||
1609 | |||||||
1610 | |||||||
1611 |
665
1
|
856
2
|
if( $DEBUG_HANDLER >=1) { _warn_debug_handler( "\n\nparsing path '$xpath'\n"); } | ||||
1612 | |||||||
1613 |
665
|
586
|
my $path_to_check= $xpath; | ||||
1614 |
665
|
15189
|
$path_to_check=~ s{/?/?$REG_TAG_PART?\s*(?:$REG_PREDICATE\s*)?}{}g; | ||||
1615 |
665
0
|
1240
0
|
if( $DEBUG_HANDLER && $path_to_check=~ /\S/) { _warn_debug_handler( "left: $path_to_check\n"); } | ||||
1616 |
665
|
893
|
return if( $path_to_check=~ /\S/); | ||||
1617 | |||||||
1618 |
665
|
2091
|
(my $xpath_to_display= $xpath)=~ s{(["{}'\[\]\@\$])}{\\$1}g; | ||||
1619 | |||||||
1620 |
665
|
448
|
my @xpath_steps; | ||||
1621 |
665
|
441
|
my $last_token_is_sep; | ||||
1622 | |||||||
1623 |
665
|
19116
|
while( $xpath=~ s{^\s* | ||||
1624 | ( (//?) # separator | ||||||
1625 | | (?:$REG_TAG_PART\s*(?:$REG_PREDICATE\s*)?) # tag name and optional predicate | ||||||
1626 | | (?:$REG_PREDICATE) # just a predicate | ||||||
1627 | ) | ||||||
1628 | } | ||||||
1629 | {}x | ||||||
1630 | ) | ||||||
1631 | { # check that we have alternating separators and steps | ||||||
1632 |
857
|
1689
|
if( $2) # found a separator | ||||
1633 |
114
1
|
132
3
|
{ if( $last_token_is_sep) { return 0; } # 2 separators in a row | ||||
1634 |
113
|
85
|
$last_token_is_sep= 1; | ||||
1635 | } | ||||||
1636 | else | ||||||
1637 |
743
2
|
1285
5
|
{ if( defined( $last_token_is_sep) && !$last_token_is_sep) { return 0; } # 2 steps in a row | ||||
1638 |
741
|
589
|
$last_token_is_sep= 0; | ||||
1639 | } | ||||||
1640 | |||||||
1641 |
854
|
4522
|
push @xpath_steps, $1; | ||||
1642 | } | ||||||
1643 |
662
1
|
817
2
|
if( $last_token_is_sep) { return 0; } # expression cannot end with a separator | ||||
1644 | |||||||
1645 |
661
|
453
|
my $i=-1; | ||||
1646 | |||||||
1647 |
661
|
1464
|
my $perlfunc= _join_n( $NO_WARNINGS . ';', | ||||
1648 | q|my( $stack)= @_; |, | ||||||
1649 | q|my @current_elts= (scalar @$stack); |, | ||||||
1650 | q|my @new_current_elts; |, | ||||||
1651 | q|my $elt; |, | ||||||
1652 | ($DEBUG_HANDLER >= 1) && (qq#warn q{checking path '$xpath_to_display'\n};#), | ||||||
1653 | ); | ||||||
1654 | |||||||
1655 | |||||||
1656 |
661
|
535
|
my $last_tag=''; | ||||
1657 |
661
|
1068
|
my $anchored= $xpath_original=~ m{^\s*/(?!/)} ? 1 : 0; | ||||
1658 |
661
|
1236
|
my $score={ type => $XPATH_TRIGGER, anchored => $anchored }; | ||||
1659 |
661
|
821
|
my $flag= { test_on_text => 0 }; | ||||
1660 |
661
|
546
|
my $sep='/'; # '/' or '//' | ||||
1661 |
661
|
1173
|
while( my $xpath_step= pop @xpath_steps) | ||||
1662 |
738
|
8016
|
{ my( $tag, $predicate)= $xpath_step =~ m{^($REG_TAG_PART)?(?:\[(.*)\])?\s*$}; | ||||
1663 |
738
|
1039
|
$score->{steps}++; | ||||
1664 |
738
|
907
|
$tag||='*'; | ||||
1665 | |||||||
1666 |
738
|
893
|
my $warn_empty_stack= $DEBUG_HANDLER >= 2 ? qq{warn "return with empty stack\\n";} : ''; | ||||
1667 | |||||||
1668 |
738
|
865
|
if( $predicate) | ||||
1669 |
222
1
|
285
2
|
{ if( $DEBUG_HANDLER >= 2) { _warn_debug_handler( "predicate is: '$predicate'\n"); } | ||||
1670 | # changes $predicate (from an XPath expression to a Perl one) | ||||||
1671 |
222
1
|
1039
125
|
if( $predicate=~ m{^\s*$REG_NUMBER\s*$}) { croak "position selector [$predicate] not supported on twig_handlers"; } | ||||
1672 |
221
|
281
|
_parse_predicate_in_handler( $predicate, $flag, $score); | ||||
1673 |
221
1
|
409
3
|
if( $DEBUG_HANDLER >= 2) { _warn_debug_handler( "predicate becomes: '$predicate'\n"); } | ||||
1674 | } | ||||||
1675 | |||||||
1676 |
737
|
820
|
my $tag_cond= _tag_cond( $tag); | ||||
1677 |
737
|
838
|
my $cond= join( " && ", grep { $_ } $tag_cond, $predicate) || 1; | ||||
1678 | |||||||
1679 |
737
8
8
|
1084
19
12
|
if( $css_sel && $tag=~ m{\.}) { $tag=~s{\.[^.]*$}{}; $tag ||='*'; } | ||||
1680 |
737
|
679
|
$tag=~ s{(.)#.+$}{$1}; | ||||
1681 | |||||||
1682 |
737
|
1377
|
$last_tag ||= $tag; | ||||
1683 | |||||||
1684 |
737
|
842
|
if( $sep eq '/') | ||||
1685 | { | ||||||
1686 |
736
|
840
|
$perlfunc .= sprintf( _join_n( q#foreach my $current_elt (@current_elts) #, | ||||
1687 | q# { next if( !$current_elt); #, | ||||||
1688 | q# $current_elt--; #, | ||||||
1689 | q# $elt= $stack->[$current_elt]; #, | ||||||
1690 | q# if( %s) { push @new_current_elts, $current_elt;} #, | ||||||
1691 | q# } #, | ||||||
1692 | ), | ||||||
1693 | $cond | ||||||
1694 | ); | ||||||
1695 | } | ||||||
1696 | elsif( $sep eq '//') | ||||||
1697 | { | ||||||
1698 |
1
|
2
|
$perlfunc .= sprintf( _join_n( q#foreach my $current_elt (@current_elts) #, | ||||
1699 | q# { next if( !$current_elt); #, | ||||||
1700 | q# $current_elt--; #, | ||||||
1701 | q# my $candidate= $current_elt; #, | ||||||
1702 | q# while( $candidate >=0) #, | ||||||
1703 | q# { $elt= $stack->[$candidate]; #, | ||||||
1704 | q# if( %s) { push @new_current_elts, $candidate;} #, | ||||||
1705 | q# $candidate--; #, | ||||||
1706 | q# } #, | ||||||
1707 | q# } #, | ||||||
1708 | ), | ||||||
1709 | $cond | ||||||
1710 | ); | ||||||
1711 | } | ||||||
1712 |
737
|
1042
|
my $warn= $DEBUG_HANDLER >= 2 ? _join_n( qq#warn qq%fail at cond '$cond'%;#) : ''; | ||||
1713 |
737
|
758
|
$perlfunc .= sprintf( _join_n( q#unless( @new_current_elts) { %s return 0; } #, | ||||
1714 | q#@current_elts= @new_current_elts; #, | ||||||
1715 | q#@new_current_elts=(); #, | ||||||
1716 | ), | ||||||
1717 | $warn | ||||||
1718 | ); | ||||||
1719 | |||||||
1720 |
737
|
1746
|
$sep= pop @xpath_steps; | ||||
1721 | } | ||||||
1722 | |||||||
1723 |
660
|
787
|
if( $anchored) # there should be a better way, but this works | ||||
1724 | { | ||||||
1725 |
31
|
44
|
my $warn= $DEBUG_HANDLER >= 2 ? _join_n( qq#warn qq{fail, stack not empty};#) : ''; | ||||
1726 |
31
|
31
|
$perlfunc .= sprintf( _join_n( q#if( ! grep { $_ == 0 } @current_elts) { %s return 0;}#), $warn); | ||||
1727 | } | ||||||
1728 | |||||||
1729 |
660
|
743
|
$perlfunc.= qq{warn "handler for '$xpath_to_display' triggered\\n";\n} if( $DEBUG_HANDLER >=2); | ||||
1730 |
660
|
725
|
$perlfunc.= qq{return q{$xpath_original};\n}; | ||||
1731 |
660
|
724
|
_warn_debug_handler( "\nperlfunc:\n$perlfunc\n") if( $DEBUG_HANDLER>=1); | ||||
1732 |
660
18
18
18
16
16
16
21
21
21
19
19
19
16
16
16
14
14
14
16
16
16
17
15
15
13
13
13
12
12
12
5
5
5
6
6
6
7
7
7
7
7
7
5
5
5
5
5
5
4
4
4
4
4
4
7
7
7
3
3
3
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
1
1
1
|
34049
61
18
1780
48
18
1510
65
22
1976
58
19
1802
52
15
1741
48
15
1540
51
15
1545
346
1863
1611
88
55
1507
35
51
1343
16
47
456
17
76
542
25
6
775
26
8
764
17
5
535
15
5
467
12
6
476
13
6
495
22
9
515
10
4
383
15
3
361
10
3
330
9
3
322
8
3
442
10
2
322
11
4
350
10
3
419
8
3
404
9
4
324
10
3
436
10
2
337
7
3
272
7
2
277
9
2
261
11
2
382
8
2
293
9
2
323
9
3
363
9
3
369
8
2
236
13
4
341
12
4
322
11
4
288
12
4
314
12
5
330
13
5
263
12
5
239
4
1
155
3
1
106
3
1
161
4
1
120
4
1
108
3
2
107
3
1
113
3
1
117
4
1
116
3
2
113
3
1
111
3
1
116
3
5
115
3
1
122
3
1
124
3
1
119
3
2
118
3
1
110
3
1
109
4
1
128
4
1
122
4
1
102
3
2
124
3
1
130
3
1
125
3
1
127
4
1
126
3
1
125
4
1
179
3
1
109
3
1
117
3
1
102
3
2
133
3
1
124
3
1
173
3
1
107
4
1
134
8
2
121
7
2
77
8
2
104
8
2
161
6
2
139
3
1
105
|
my $s= eval "sub { $perlfunc }"; | ||||
1733 |
660
|
1302
|
if( $@) | ||||
1734 |
1
|
134
|
{ croak "wrong handler condition '$xpath' ($@);" } | ||||
1735 | |||||||
1736 |
659
|
948
|
_warn_debug_handler( "last tag: '$last_tag', test_on_text: '$flag->{test_on_text}'\n") if( $DEBUG_HANDLER >=1); | ||||
1737 |
659
4
|
815
8
|
_warn_debug_handler( "score: ", join( ' ', map { "$_: $score->{$_}" } sort keys %$score), "\n") if( $DEBUG_HANDLER >=1); | ||||
1738 |
659
|
4392
|
return { tag=> $last_tag, score => $score, trigger => $s, path => $xpath_original, handler => $handler, test_on_text => $flag->{test_on_text} }; | ||||
1739 | } | ||||||
1740 | |||||||
1741 |
2167
|
7018
|
sub _join_n { return join( "\n", @_, ''); } | ||||
1742 | |||||||
1743 | # the "tag" part can be <tag>, <tag>.<class> or <tag>#<id> (where tag can be *, or start with # for hidden tags) | ||||||
1744 | sub _tag_cond | ||||||
1745 |
737
|
667
|
{ my( $full_tag)= @_; | ||||
1746 | |||||||
1747 |
737
|
418
|
my( $tag, $class, $id); | ||||
1748 |
737
|
894
|
if( $full_tag=~ m{^(.+)#(.+)$}) | ||||
1749 |
4
|
8
|
{ ($tag, $id)= ($1, $2); } # <tag>#<id> | ||||
1750 | else | ||||||
1751 |
733
|
1351
|
{ ( $tag, $class)= $css_sel ? $full_tag=~ m{^(.*?)(?:\.([^.]*))?$} : ($full_tag, undef); } | ||||
1752 | |||||||
1753 |
737
|
2405
|
my $tag_cond = $tag && $tag ne '*' ? qq#(\$elt->{'$ST_TAG'} eq "$tag")# : ''; | ||||
1754 |
737
|
772
|
my $id_cond = defined $id ? qq#(\$elt->{id} eq "$id")# : ''; | ||||
1755 |
737
|
721
|
my $class_cond = defined $class ? qq#(\$elt->{class}=~ m{(^| )$class( |\$)})# : ''; | ||||
1756 | |||||||
1757 |
737
2211
|
713
2540
|
my $full_cond= join( ' && ', grep { $_ } ( $tag_cond, $class_cond, $id_cond)); | ||||
1758 | |||||||
1759 |
737
|
1153
|
return $full_cond; | ||||
1760 | } | ||||||
1761 | |||||||
1762 | # input: the predicate ($_[0]) which will be changed in place | ||||||
1763 | # flags, a hashref with various flags (like test_on_text) | ||||||
1764 | # the score | ||||||
1765 | sub _parse_predicate_in_handler | ||||||
1766 |
221
|
332
|
{ my( $flag, $score)= @_[1..2]; | ||||
1767 |
221
585
|
5291
1991
|
$_[0]=~ s{( ($REG_STRING) # strings | ||||
1768 | |\@($REG_TAG_NAME)(?=\s*(?:[><=!]|!~|=~)) # @att (followed by a comparison operator) | ||||||
1769 | |\@($REG_TAG_NAME) # @att (not followed by a comparison operator) | ||||||
1770 |
585
|
527
|
|=~|!~ # matching operators | ||||
1771 | |([><]=?|=|!=)(?=\s*[\d+-]) # test before a number | ||||||
1772 | |([><]=?|=|!=) # test, other cases | ||||||
1773 |
585
44
|
2895
55
|
|($REG_FUNCTION) # no arg functions | ||||
1774 | # this bit is a mess, but it is the only solution with this half-baked parser | ||||||
1775 |
585
139
|
1833
346
|
|(string\(\s*$REG_NAME\s*\)\s*$REG_MATCH\s*$REG_REGEXP) # string( child)=~ /regexp/ | ||||
1776 |
1
|
4
|
|(string\(\s*$REG_NAME\s*\)\s*$REG_COMP\s*$REG_STRING) # string( child) = "value" (or other test) | ||||
1777 |
161
|
709
|
|(string\(\s*$REG_NAME\s*\)\s*$REG_COMP\s*$REG_NUMBER) # string( child) = nb (or other test) | ||||
1778 | |(and|or) | ||||||
1779 | # |($REG_NAME(?=\s*(and|or|$))) # nested tag name (needs to be after all other unquoted strings) | ||||||
1780 | |($REG_TAG_IN_PREDICATE) # nested tag name (needs to be after all other unquoted strings) | ||||||
1781 | |||||||
1782 | )} | ||||||
1783 | { my( $token, $str, $att, $bare_att, $num_test, $alpha_test, $func, $str_regexp, $str_test_alpha, $str_test_num, $and_or, $tag) | ||||||
1784 |
2
|
10
|
= ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12); | ||||
1785 | |||||||
1786 | $score->{predicates}++; | ||||||
1787 | |||||||
1788 | # store tests on text (they are not always allowed) | ||||||
1789 |
2
|
11
|
if( $func || $str_regexp || $str_test_num || $str_test_alpha ) { $flag->{test_on_text}= 1; } | ||||
1790 | |||||||
1791 |
2
|
5
|
if( defined $str) { $token } | ||||
1792 |
2
|
2
|
elsif( $tag) { qq{(\$elt->{'$ST_ELT'} && \$elt->{'$ST_ELT'}->has_child( '$tag'))} } | ||||
1793 |
2
|
4
|
elsif( $att) { $att=~ m{^#} ? qq{ (\$elt->{'$ST_ELT'} && \$elt->{'$ST_ELT'}->{att}->{'$att'})} | ||||
1794 |
2
|
2
|
: qq{\$elt->{'$att'}} | ||||
1795 |
2
|
10
|
} | ||||
1796 | # for some reason Devel::Cover flags the following lines as not tested. They are though. | ||||||
1797 |
0
|
0
|
elsif( $bare_att) { $bare_att=~ m{^#} ? qq{(\$elt->{'$ST_ELT'} && defined(\$elt->{'$ST_ELT'}->{att}->{'$bare_att'}))} | ||||
1798 |
0
|
0
|
: qq{defined( \$elt->{'$bare_att'})} | ||||
1799 | } | ||||||
1800 |
12
12
|
14
54
|
elsif( $num_test && ($num_test eq '=') ) { "==" } # others tests are unchanged | ||||
1801 |
60
|
911
|
elsif( $alpha_test) { $PERL_ALPHA_TEST{$alpha_test} } | ||||
1802 | elsif( $func && $func=~ m{^string}) | ||||||
1803 | { "\$elt->{'$ST_ELT'}->text"; } | ||||||
1804 | elsif( $str_regexp && $str_regexp =~ m{string\(\s*($REG_TAG_NAME)\s*\)\s*($REG_MATCH)\s*($REG_REGEXP)}) | ||||||
1805 | { "defined( _first_n { \$_->text $2 $3 } 1, \$elt->{'$ST_ELT'}->_children( '$1'))"; } | ||||||
1806 | elsif( $str_test_alpha && $str_test_alpha =~ m{string\(\s*($REG_TAG_NAME)\s*\)\s*($REG_COMP)\s*($REG_STRING)}) | ||||||
1807 | { my( $tag, $op, $str)= ($1, $2, $3); | ||||||
1808 | $str=~ s{(?<=.)'(?=.)}{\\'}g; # escape a quote within the string | ||||||
1809 | $str=~ s{^"}{'}; | ||||||
1810 | $str=~ s{"$}{'}; | ||||||
1811 | "defined( _first_n { \$_->text $PERL_ALPHA_TEST{$op} $str } 1, \$elt->{'$ST_ELT'}->children( '$tag'))"; } | ||||||
1812 | elsif( $str_test_num && $str_test_num =~ m{string\(\s*($REG_TAG_NAME)\s*\)\s*($REG_COMP)\s*($REG_NUMBER)}) | ||||||
1813 | { my $test= ($2 eq '=') ? '==' : $2; | ||||||
1814 | "defined( _first_n { \$_->text $test $3 } 1, \$elt->{'$ST_ELT'}->children( '$1'))"; | ||||||
1815 | } | ||||||
1816 | elsif( $and_or) { $score->{tests}++; $and_or eq 'and' ? '&&' : '||' ; } | ||||||
1817 | else { $token; } | ||||||
1818 | }gexs; | ||||||
1819 | } | ||||||
1820 | |||||||
1821 | |||||||
1822 | sub setCharHandler | ||||||
1823 |
1
|
1
|
1
|
{ my( $t, $handler)= @_; | |||
1824 |
1
|
2
|
$t->{twig_char_handler}= $handler; | ||||
1825 | } | ||||||
1826 | |||||||
1827 | |||||||
1828 | sub _reset_handlers | ||||||
1829 |
423
|
681
|
{ my $handlers= shift; | ||||
1830 |
423
|
508
|
delete $handlers->{handlers}; | ||||
1831 |
423
|
329
|
delete $handlers->{path_handlers}; | ||||
1832 |
423
|
351
|
delete $handlers->{subpath_handlers}; | ||||
1833 |
423
|
595
|
$handlers->{attcond_handlers_exp}=[] if( $handlers->{attcond_handlers}); | ||||
1834 |
423
|
539
|
delete $handlers->{attcond_handlers}; | ||||
1835 | } | ||||||
1836 | |||||||
1837 | sub _set_handlers | ||||||
1838 |
423
|
651
|
{ my $handlers= shift || return; | ||||
1839 |
422
|
394
|
my $set_handlers= {}; | ||||
1840 |
422
422
|
363
892
|
foreach my $path (keys %{$handlers}) | ||||
1841 |
613
|
926
|
{ _set_handler( $set_handlers, $path, $handlers->{$path}); } | ||||
1842 | |||||||
1843 |
415
|
2077
|
return $set_handlers; | ||||
1844 | } | ||||||
1845 | |||||||
1846 | |||||||
1847 | sub setTwigHandler | ||||||
1848 |
64
|
1
|
2918
|
{ my( $t, $path, $handler)= @_; | |||
1849 |
64
|
251
|
$t->{twig_handlers} ||={}; | ||||
1850 |
64
|
112
|
return _set_handler( $t->{twig_handlers}, $path, $handler); | ||||
1851 | } | ||||||
1852 | |||||||
1853 | sub setTwigHandlers | ||||||
1854 |
263
|
1
|
507
|
{ my( $t, $handlers)= @_; | |||
1855 |
263
|
660
|
my $previous_handlers= $t->{twig_handlers} || undef; | ||||
1856 |
263
|
668
|
_reset_handlers( $t->{twig_handlers}); | ||||
1857 |
263
|
433
|
$t->{twig_handlers}= _set_handlers( $handlers); | ||||
1858 |
256
|
471
|
return $previous_handlers; | ||||
1859 | } | ||||||
1860 | |||||||
1861 | sub setStartTagHandler | ||||||
1862 |
3
|
1
|
8
|
{ my( $t, $path, $handler)= @_; | |||
1863 |
3
|
5
|
$t->{twig_starttag_handlers}||={}; | ||||
1864 |
3
|
4
|
return _set_handler( $t->{twig_starttag_handlers}, $path, $handler); | ||||
1865 | } | ||||||
1866 | |||||||
1867 | sub setStartTagHandlers | ||||||
1868 |
34
|
1
|
293
|
{ my( $t, $handlers)= @_; | |||
1869 |
34
|
101
|
my $previous_handlers= $t->{twig_starttag_handlers} || undef; | ||||
1870 |
34
|
77
|
_reset_handlers( $t->{twig_starttag_handlers}); | ||||
1871 |
34
|
56
|
$t->{twig_starttag_handlers}= _set_handlers( $handlers); | ||||
1872 |
34
|
60
|
return $previous_handlers; | ||||
1873 | } | ||||||
1874 | |||||||
1875 | sub setIgnoreEltsHandler | ||||||
1876 |
2
|
1
|
499
|
{ my( $t, $path, $action)= @_; | |||
1877 |
2
|
12
|
$t->{twig_ignore_elts_handlers}||={}; | ||||
1878 |
2
|
5
|
return _set_handler( $t->{twig_ignore_elts_handlers}, $path, $action ); | ||||
1879 | } | ||||||
1880 | |||||||
1881 | sub setIgnoreEltsHandlers | ||||||
1882 |
11
|
1
|
14
|
{ my( $t, $handlers)= @_; | |||
1883 |
11
|
9
|
my $previous_handlers= $t->{twig_ignore_elts_handlers}; | ||||
1884 |
11
|
35
|
_reset_handlers( $t->{twig_ignore_elts_handlers}); | ||||
1885 |
11
|
20
|
$t->{twig_ignore_elts_handlers}= _set_handlers( $handlers); | ||||
1886 |
11
|
12
|
return $previous_handlers; | ||||
1887 | } | ||||||
1888 | |||||||
1889 | sub setEndTagHandler | ||||||
1890 |
2
|
1
|
48
|
{ my( $t, $path, $handler)= @_; | |||
1891 |
2
|
10
|
$t->{twig_endtag_handlers}||={}; | ||||
1892 |
2
|
5
|
return _set_handler( $t->{twig_endtag_handlers}, $path,$handler); | ||||
1893 | } | ||||||
1894 | |||||||
1895 | sub setEndTagHandlers | ||||||
1896 |
14
|
1
|
12
|
{ my( $t, $handlers)= @_; | |||
1897 |
14
|
16
|
my $previous_handlers= $t->{twig_endtag_handlers}; | ||||
1898 |
14
|
30
|
_reset_handlers( $t->{twig_endtag_handlers}); | ||||
1899 |
14
|
21
|
$t->{twig_endtag_handlers}= _set_handlers( $handlers); | ||||
1900 |
14
|
13
|
return $previous_handlers; | ||||
1901 | } | ||||||
1902 | |||||||
1903 | # a little more complex: set the twig_handlers only if a code ref is given | ||||||
1904 | sub setTwigRoots | ||||||
1905 |
101
|
1
|
96
|
{ my( $t, $handlers)= @_; | |||
1906 |
101
|
100
|
my $previous_roots= $t->{twig_roots}; | ||||
1907 |
101
|
231
|
_reset_handlers($t->{twig_roots}); | ||||
1908 |
101
|
151
|
$t->{twig_roots}= _set_handlers( $handlers); | ||||
1909 | |||||||
1910 |
101
|
178
|
_check_illegal_twig_roots_handlers( $t->{twig_roots}); | ||||
1911 | |||||||
1912 |
99
99
|
71
145
|
foreach my $path (keys %{$handlers}) | ||||
1913 |
98
|
315
|
{ $t->{twig_handlers}||= {}; | ||||
1914 |
98
|
450
|
_set_handler( $t->{twig_handlers}, $path, $handlers->{$path}) | ||||
1915 | if( ref($handlers->{$path}) && isa( $handlers->{$path}, 'CODE')); | ||||||
1916 | } | ||||||
1917 |
99
|
115
|
return $previous_roots; | ||||
1918 | } | ||||||
1919 | |||||||
1920 | sub _check_illegal_twig_roots_handlers | ||||||
1921 |
101
|
91
|
{ my( $handlers)= @_; | ||||
1922 |
101
101
|
84
229
|
foreach my $tag_handlers (values %{$handlers->{xpath_handler}}) | ||||
1923 |
82
|
101
|
{ foreach my $handler_data (@$tag_handlers) | ||||
1924 |
82
|
231
|
{ if( my $type= $handler_data->{test_on_text}) | ||||
1925 |
2
|
204
|
{ croak "string() condition not supported on twig_roots option"; } | ||||
1926 | } | ||||||
1927 | } | ||||||
1928 |
99
|
91
|
return; | ||||
1929 | } | ||||||
1930 | |||||||
1931 | |||||||
1932 | # just store the reference to the expat object in the twig | ||||||
1933 | sub _twig_init | ||||||
1934 | { # warn " in _twig_init...\n"; # DEBUG handler | ||||||
1935 | |||||||
1936 |
3102
|
423328
|
my $p= shift; | ||||
1937 |
3102
|
3357
|
my $t=$p->{twig}; | ||||
1938 | |||||||
1939 |
3102
1
|
4244
104
|
if( $t->{twig_parsing} ) { croak "cannot reuse a twig that is already parsing"; } | ||||
1940 |
3101
|
2725
|
$t->{twig_parsing}=1; | ||||
1941 | |||||||
1942 |
3101
|
3048
|
$t->{twig_parser}= $p; | ||||
1943 |
3101
3078
|
3498
5174
|
if( $weakrefs) { weaken( $t->{twig_parser}); } | ||||
1944 | |||||||
1945 | # in case they had been created by a previous parse | ||||||
1946 |
3101
|
2504
|
delete $t->{twig_dtd}; | ||||
1947 |
3101
|
2339
|
delete $t->{twig_doctype}; | ||||
1948 |
3101
|
2059
|
delete $t->{twig_xmldecl}; | ||||
1949 |
3101
|
2539
|
delete $t->{twig_root}; | ||||
1950 | |||||||
1951 | # if needed set the output filehandle | ||||||
1952 |
3101
|
4135
|
$t->_set_fh_to_twig_output_fh(); | ||||
1953 |
3101
|
3708
|
return; | ||||
1954 | } | ||||||
1955 | |||||||
1956 | # uses eval to catch the parser's death | ||||||
1957 | sub safe_parse | ||||||
1958 |
13
|
1
|
92
|
{ my $t= shift; | |||
1959 |
13
13
|
84
138
|
eval { $t->parse( @_); } ; | ||||
1960 |
13
|
77
|
return $@ ? $t->_reset_twig_after_error : $t; | ||||
1961 | } | ||||||
1962 | |||||||
1963 | sub safe_parsefile | ||||||
1964 |
25
|
1
|
33
|
{ my $t= shift; | |||
1965 |
25
25
|
27
55
|
eval { $t->parsefile( @_); } ; | ||||
1966 |
25
|
73
|
return $@ ? $t->_reset_twig_after_error : $t; | ||||
1967 | } | ||||||
1968 | |||||||
1969 | # restore a twig in a proper state so it can be reused for a new parse | ||||||
1970 | sub _reset_twig | ||||||
1971 |
10
|
7
|
{ my $t= shift; | ||||
1972 |
10
|
12
|
$t->{twig_parsing}= 0; | ||||
1973 |
10
|
16
|
delete $t->{twig_current}; | ||||
1974 |
10
|
13
|
delete $t->{extra_data}; | ||||
1975 |
10
|
12
|
delete $t->{twig_dtd}; | ||||
1976 |
10
|
9
|
delete $t->{twig_in_pcdata}; | ||||
1977 |
10
|
10
|
delete $t->{twig_in_cdata}; | ||||
1978 |
10
|
14
|
delete $t->{twig_stored_space}; | ||||
1979 |
10
|
31
|
delete $t->{twig_entity_list}; | ||||
1980 |
10
|
41
|
$t->root->delete if( $t->root); | ||||
1981 |
10
|
28
|
delete $t->{twig_root}; | ||||
1982 |
10
|
11
|
return $t; | ||||
1983 | } | ||||||
1984 | |||||||
1985 | sub _reset_twig_after_error | ||||||
1986 |
10
|
12
|
{ my $t= shift; | ||||
1987 |
10
|
24
|
$t->_reset_twig; | ||||
1988 |
10
|
18
|
return undef; | ||||
1989 | } | ||||||
1990 | |||||||
1991 | |||||||
1992 | sub _add_or_discard_stored_spaces | ||||||
1993 |
87121
|
60829
|
{ my $t= shift; | ||||
1994 | |||||||
1995 |
87121
|
62892
|
$t->{twig_right_after_root}=0; #XX | ||||
1996 | |||||||
1997 |
87121
|
106393
|
my $current= $t->{twig_current} or return; # ugly hack, with ignore on, twig_current can disappear | ||||
1998 |
84026
|
116208
|
return unless length $t->{twig_stored_spaces}; | ||||
1999 |
8375
|
8362
|
my $current_gi= $XML::Twig::index2gi[$current->{'gi'}]; | ||||
2000 | |||||||
2001 |
8375
|
10365
|
if( ! $t->{twig_discard_all_spaces}) | ||||
2002 |
8374
|
13634
|
{ if( ! defined( $t->{twig_space_policy}->{$current_gi})) | ||||
2003 |
594
|
828
|
{ $t->{twig_space_policy}->{$current_gi}= _space_policy( $t, $current_gi); } | ||||
2004 |
8374
|
40966
|
if( $t->{twig_space_policy}->{$current_gi} || ($t->{twig_stored_spaces}!~ m{\n}) || $t->{twig_preserve_space}) | ||||
2005 |
113
|
131
|
{ _insert_pcdata( $t, $t->{twig_stored_spaces} ); } | ||||
2006 | } | ||||||
2007 | |||||||
2008 |
8375
|
7236
|
$t->{twig_stored_spaces}=''; | ||||
2009 | |||||||
2010 |
8375
|
6714
|
return; | ||||
2011 | } | ||||||
2012 | |||||||
2013 | # the default twig handlers, which build the tree | ||||||
2014 | sub _twig_start | ||||||
2015 | { # warn " in _twig_start...\n"; # DEBUG handler | ||||||
2016 | |||||||
2017 | #foreach my $s (@_) { next if ref $s; warn "$s: ", is_utf8( $s) ? "has flag" : "FLAG NOT SET"; } # YYY | ||||||
2018 | |||||||
2019 |
43600
|
1688675
|
my ($p, $gi, @att)= @_; | ||||
2020 |
43600
|
33292
|
my $t=$p->{twig}; | ||||
2021 | |||||||
2022 | # empty the stored pcdata (space stored in case they are really part of | ||||||
2023 | # a pcdata element) or stored it if the space policy dictates so | ||||||
2024 | # create a pcdata element with the spaces if need be | ||||||
2025 |
43600
|
41682
|
_add_or_discard_stored_spaces( $t); | ||||
2026 |
43600
|
27768
|
my $parent= $t->{twig_current}; | ||||
2027 | |||||||
2028 | # if we were parsing PCDATA then we exit the pcdata | ||||||
2029 |
43600
|
46316
|
if( $t->{twig_in_pcdata}) | ||||
2030 |
18139
|
11352
|
{ $t->{twig_in_pcdata}= 0; | ||||
2031 |
18139
|
14295
|
delete $parent->{'twig_current'}; | ||||
2032 |
18139
|
15077
|
$parent= $parent->{parent}; | ||||
2033 | } | ||||||
2034 | |||||||
2035 | # if we choose to keep the encoding then we need to parse the tag | ||||||
2036 |
43600
|
67428
|
if( my $func = $t->{parse_start_tag}) | ||||
2037 |
1654
|
2668
|
{ ($gi, @att)= &$func($p->original_string); } | ||||
2038 | elsif( $t->{twig_entities_in_attribute}) | ||||||
2039 | { | ||||||
2040 |
0
|
0
|
($gi,@att)= _parse_start_tag( $p->recognized_string); | ||||
2041 |
0
|
0
|
$t->{twig_entities_in_attribute}=0; | ||||
2042 | } | ||||||
2043 | |||||||
2044 | # if we are using an external DTD, we need to fill the default attributes | ||||||
2045 |
43600
49
|
51037
59
|
if( $t->{twig_read_external_dtd}) { _fill_default_atts( $t, $gi, \@att); } | ||||
2046 | |||||||
2047 | # filter the input data if need be | ||||||
2048 |
43600
|
48990
|
if( my $filter= $t->{twig_input_filter}) | ||||
2049 |
5
|
10
|
{ $gi= $filter->( $gi); | ||||
2050 |
5
4
|
18
8
|
foreach my $att (@att) { $att= $filter->($att); } | ||||
2051 | } | ||||||
2052 | |||||||
2053 |
43600
|
22410
|
my $ns_decl; | ||||
2054 |
43600
|
42909
|
if( $t->{twig_map_xmlns}) | ||||
2055 |
69
|
83
|
{ $ns_decl= _replace_ns( $t, \$gi, \@att); } | ||||
2056 | |||||||
2057 |
43600
|
63829
|
my $elt= $t->{twig_elt_class}->new( $gi); | ||||
2058 |
43600
|
54639
|
$elt->set_atts( @att); | ||||
2059 | |||||||
2060 | # now we can store the tag and atts | ||||||
2061 |
43600
|
80260
|
my $context= { $ST_TAG => $gi, $ST_ELT => $elt, @att}; | ||||
2062 |
43600
|
47374
|
$context->{$ST_NS}= $ns_decl if $ns_decl; | ||||
2063 |
43600
25523
|
43401
36742
|
if( $weakrefs) { weaken( $context->{$ST_ELT}); } | ||||
2064 |
43600
43600
|
26442
49970
|
push @{$t->{_twig_context_stack}}, $context; | ||||
2065 | |||||||
2066 |
43600
|
61504
|
delete $parent->{'twig_current'} if( $parent); | ||||
2067 |
43600
|
30548
|
$t->{twig_current}= $elt; | ||||
2068 |
43600
|
33434
|
$elt->{'twig_current'}=1; | ||||
2069 | |||||||
2070 |
43600
|
35955
|
if( $parent) | ||||
2071 |
40505
|
29693
|
{ my $prev_sibling= $parent->{last_child}; | ||||
2072 |
40505
|
40097
|
if( $prev_sibling) | ||||
2073 |
37932
|
32161
|
{ $prev_sibling->{next_sibling}= $elt; | ||||
2074 |
37932
37932
19924
|
29610
39780
24006
|
$elt->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ; | ||||
2075 | } | ||||||
2076 | |||||||
2077 |
40505
40505
22451
|
33195
37737
21625
|
$elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ; | ||||
2078 |
40505
2573
|
48028
2271
|
unless( $parent->{first_child}) { $parent->{first_child}= $elt; } | ||||
2079 |
40505
40505
40505
22451
|
27549
26892
42096
25789
|
$parent->{empty}=0; $parent->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; | ||||
2080 | } | ||||||
2081 | else | ||||||
2082 | { # processing root | ||||||
2083 |
3095
|
3881
|
$t->set_root( $elt); | ||||
2084 | # call dtd handler if need be | ||||||
2085 |
3095
|
3918
|
$t->{twig_dtd_handler}->($t, $t->{twig_dtd}) | ||||
2086 | if( defined $t->{twig_dtd_handler}); | ||||||
2087 | |||||||
2088 | # set this so we can catch external entities | ||||||
2089 | # (the handler was modified during DTD processing) | ||||||
2090 |
3095
|
4352
|
if( $t->{twig_default_print}) | ||||
2091 |
62
|
167
|
{ $p->setHandlers( Default => \&_twig_print); } | ||||
2092 | elsif( $t->{twig_roots}) | ||||||
2093 |
42
127
|
159
221
|
{ $p->setHandlers( Default => sub { return }); } | ||||
2094 | else | ||||||
2095 |
2991
|
6364
|
{ $p->setHandlers( Default => \&_twig_default); } | ||||
2096 | } | ||||||
2097 | |||||||
2098 |
43600
|
113512
|
$elt->{empty}= $p->recognized_string=~ m{/\s*>$}s ? 1 : 0; | ||||
2099 | |||||||
2100 |
43600
|
252830
|
$elt->{extra_data}= $t->{extra_data} if( $t->{extra_data}); | ||||
2101 |
43600
|
32700
|
$t->{extra_data}=''; | ||||
2102 | |||||||
2103 | # if the element is ID-ed then store that info | ||||||
2104 |
43600
|
36540
|
my $id= $elt->{'att'}->{$ID}; | ||||
2105 |
43600
|
47062
|
if( defined $id) | ||||
2106 |
18684
|
21739
|
{ $t->{twig_id_list}->{$id}= $elt; | ||||
2107 |
18684
9681
|
19634
11298
|
if( $weakrefs) { weaken( $t->{twig_id_list}->{$id}); } | ||||
2108 | } | ||||||
2109 | |||||||
2110 | # call user handler if need be | ||||||
2111 |
43600
|
50639
|
if( $t->{twig_starttag_handlers}) | ||||
2112 | { # call all appropriate handlers | ||||||
2113 |
156
|
188
|
my @handlers= _handler( $t, $t->{twig_starttag_handlers}, $gi); | ||||
2114 | |||||||
2115 |
156
|
117
|
local $_= $elt; | ||||
2116 | |||||||
2117 |
156
|
143
|
foreach my $handler ( @handlers) | ||||
2118 |
66
|
107
|
{ $handler->($t, $elt) || last; } | ||||
2119 | # call _all_ handler if needed | ||||||
2120 |
156
|
340
|
if( my $all= $t->{twig_starttag_handlers}->{handlers}->{$ALL}) | ||||
2121 |
3
|
6
|
{ $all->($t, $elt); } | ||||
2122 | } | ||||||
2123 | |||||||
2124 | # check if the tag is in the list of tags to be ignored | ||||||
2125 |
43600
|
47518
|
if( $t->{twig_ignore_elts_handlers}) | ||||
2126 |
81
|
105
|
{ my @handlers= _handler( $t, $t->{twig_ignore_elts_handlers}, $gi); | ||||
2127 | # only the first handler counts, it contains the action (discard/print/string) | ||||||
2128 |
81
24
24
|
98
26
37
|
if( @handlers) { my $action= shift @handlers; $t->ignore( $elt, $action); } | ||||
2129 | } | ||||||
2130 | |||||||
2131 |
43600
2
|
59475
4
|
if( $elt->{'att'}->{'xml:space'} && ( $elt->{'att'}->{'xml:space'} eq 'preserve')) { $t->{twig_preserve_space}++; } | ||||
2132 | |||||||
2133 | |||||||
2134 |
43600
|
103148
|
return; | ||||
2135 | } | ||||||
2136 | |||||||
2137 | sub _replace_ns | ||||||
2138 |
83
|
72
|
{ my( $t, $gi, $atts)= @_; | ||||
2139 |
83
|
39
|
my $decls; | ||||
2140 |
83
|
110
|
foreach my $new_prefix ( $t->parser->new_ns_prefixes) | ||||
2141 |
37
|
116
|
{ my $uri= $t->parser->expand_ns_prefix( $new_prefix); | ||||
2142 | # replace the prefix if it is mapped | ||||||
2143 |
37
|
189
|
$decls->{$new_prefix}= $uri; | ||||
2144 |
37
|
105
|
if( !$t->{twig_keep_original_prefix} && (my $mapped_prefix= $t->{twig_map_xmlns}->{$uri})) | ||||
2145 |
25
|
20
|
{ $new_prefix= $mapped_prefix; } | ||||
2146 | # now put the namespace declaration back in the element | ||||||
2147 |
37
|
40
|
if( $new_prefix eq '#default') | ||||
2148 |
3
|
21
|
{ push @$atts, "xmlns" => $uri; } | ||||
2149 | else | ||||||
2150 |
34
|
65
|
{ push @$atts, "xmlns:$new_prefix" => $uri; } | ||||
2151 | } | ||||||
2152 | |||||||
2153 |
83
|
352
|
if( $t->{twig_keep_original_prefix}) | ||||
2154 | { # things become more complex: we need to find the original prefix | ||||||
2155 | # and store both prefixes | ||||||
2156 |
13
|
18
|
my $ns_info= $t->_ns_info( $$gi); | ||||
2157 |
13
|
9
|
my $map_att; | ||||
2158 |
13
|
19
|
if( $ns_info->{mapped_prefix}) | ||||
2159 |
11
|
15
|
{ $$gi= "$ns_info->{mapped_prefix}:$$gi"; | ||||
2160 |
11
|
19
|
$map_att->{$ns_info->{mapped_prefix}}= $ns_info->{prefix}; | ||||
2161 | } | ||||||
2162 |
13
|
8
|
my $att_name=1; | ||||
2163 |
13
|
14
|
foreach( @$atts) | ||||
2164 |
30
|
29
|
{ if( $att_name) | ||||
2165 | { | ||||||
2166 |
15
|
14
|
my $ns_info= $t->_ns_info( $_); | ||||
2167 |
15
|
18
|
if( $ns_info->{mapped_prefix}) | ||||
2168 |
4
|
6
|
{ $_= "$ns_info->{mapped_prefix}:$_"; | ||||
2169 |
4
|
6
|
$map_att->{$ns_info->{mapped_prefix}}= $ns_info->{prefix}; | ||||
2170 | } | ||||||
2171 |
15
|
17
|
$att_name=0; | ||||
2172 | } | ||||||
2173 | else | ||||||
2174 |
15
|
12
|
{ $att_name=1; } | ||||
2175 | } | ||||||
2176 |
13
|
28
|
push @$atts, '#original_gi', $map_att if( $map_att); | ||||
2177 | } | ||||||
2178 | else | ||||||
2179 |
70
|
80
|
{ $$gi= $t->_replace_prefix( $$gi); | ||||
2180 |
70
|
52
|
my $att_name=1; | ||||
2181 |
70
|
71
|
foreach( @$atts) | ||||
2182 |
182
91
91
|
152
84
73
|
{ if( $att_name) { $_= $t->_replace_prefix( $_); $att_name=0; } | ||||
2183 |
91
|
75
|
else { $att_name=1; } | ||||
2184 | } | ||||||
2185 | } | ||||||
2186 |
83
|
85
|
return $decls; | ||||
2187 | } | ||||||
2188 | |||||||
2189 | |||||||
2190 | # extract prefix, local_name, uri, mapped_prefix from a name | ||||||
2191 | # will only work if called from a start or end tag handler | ||||||
2192 | sub _ns_info | ||||||
2193 |
28
|
21
|
{ my( $t, $name)= @_; | ||||
2194 |
28
|
23
|
my $ns_info={}; | ||||
2195 |
28
|
23
|
my $p= $t->parser; | ||||
2196 |
28
|
43
|
$ns_info->{uri}= $p->namespace( $name); | ||||
2197 |
28
|
150
|
return $ns_info unless( $ns_info->{uri}); | ||||
2198 | |||||||
2199 |
15
|
16
|
$ns_info->{prefix}= _a_proper_ns_prefix( $p, $ns_info->{uri}); | ||||
2200 |
15
|
37
|
$ns_info->{mapped_prefix}= $t->{twig_map_xmlns}->{$ns_info->{uri}} || $ns_info->{prefix}; | ||||
2201 | |||||||
2202 |
15
|
11
|
return $ns_info; | ||||
2203 | } | ||||||
2204 | |||||||
2205 | sub _a_proper_ns_prefix | ||||||
2206 |
22
|
20
|
{ my( $p, $uri)= @_; | ||||
2207 |
22
|
32
|
foreach my $prefix ($p->current_ns_prefixes) | ||||
2208 |
38
|
280
|
{ if( $p->expand_ns_prefix( $prefix) eq $uri) | ||||
2209 |
22
|
112
|
{ return $prefix; } | ||||
2210 | } | ||||||
2211 |
0
|
0
|
return; | ||||
2212 | } | ||||||
2213 | |||||||
2214 | # returns the uri bound to a prefix in the original document | ||||||
2215 | # only works in a handler | ||||||
2216 | # can be used to deal with xsi:type attributes | ||||||
2217 | sub original_uri | ||||||
2218 |
2
|
1
|
16
|
{ my( $t, $prefix)= @_; | |||
2219 |
2
|
2
|
my $ST_NS = '##ns' ; | ||||
2220 |
2
6
2
|
1
12
3
|
foreach my $ns (map { $_->{$ST_NS} if $_->{$ST_NS} } reverse @{$t->{_twig_context_stack}}) | ||||
2221 |
6
|
17
|
{ return $ns->{$prefix} || next; } | ||||
2222 |
0
|
0
|
return; | ||||
2223 | } | ||||||
2224 | |||||||
2225 | |||||||
2226 | sub _fill_default_atts | ||||||
2227 |
49
|
46
|
{ my( $t, $gi, $atts)= @_; | ||||
2228 |
49
|
37
|
my $dtd= $t->{twig_dtd}; | ||||
2229 |
49
|
51
|
my $attlist= $dtd->{att}->{$gi}; | ||||
2230 |
49
|
68
|
my %value= @$atts; | ||||
2231 |
49
|
92
|
foreach my $att (keys %$attlist) | ||||
2232 |
66
|
170
|
{ if( !exists( $value{$att}) | ||||
2233 | && exists( $attlist->{$att}->{default}) | ||||||
2234 | && ( $attlist->{$att}->{default} ne '#IMPLIED') | ||||||
2235 | ) | ||||||
2236 | { # the quotes are included in the default, so we need to remove them | ||||||
2237 |
13
|
22
|
my $default_value= substr( $attlist->{$att}->{default}, 1, -1); | ||||
2238 |
13
|
16
|
push @$atts, $att, $default_value; | ||||
2239 | } | ||||||
2240 | } | ||||||
2241 |
49
|
73
|
return; | ||||
2242 | } | ||||||
2243 | |||||||
2244 | |||||||
2245 | # the default function to parse a start tag (in keep_encoding mode) | ||||||
2246 | # can be overridden with the parse_start_tag method | ||||||
2247 | # only works for 1-byte character sets | ||||||
2248 | sub _parse_start_tag | ||||||
2249 |
1682
|
5826
|
{ my $string= shift; | ||||
2250 |
1682
|
921
|
my( $gi, @atts); | ||||
2251 | |||||||
2252 | # get the gi (between < and the first space, / or > character) | ||||||
2253 | #if( $string=~ s{^<\s*([^\s>/]*)[\s>/]*}{}s) | ||||||
2254 |
1682
|
14189
|
if( $string=~ s{^<\s*($REG_TAG_NAME)\s*[\s>/]}{}s) | ||||
2255 |
1664
|
2665
|
{ $gi= $1; } | ||||
2256 | else | ||||||
2257 |
18
|
1223
|
{ croak "error parsing tag '$string'"; } | ||||
2258 |
1664
|
2455
|
while( $string=~ s{^([^\s=]*)\s*=\s*(["'])(.*?)\2\s*}{}s) | ||||
2259 |
200
|
531
|
{ push @atts, $1, $3; } | ||||
2260 |
1664
|
3767
|
return $gi, @atts; | ||||
2261 | } | ||||||
2262 | |||||||
2263 | sub set_root | ||||||
2264 |
3137
|
1
|
2615
|
{ my( $t, $elt)= @_; | |||
2265 |
3137
|
2923
|
$t->{twig_root}= $elt; | ||||
2266 |
3137
|
3658
|
if( $elt) | ||||
2267 |
3137
|
2708
|
{ $elt->{twig}= $t; | ||||
2268 |
3137
3111
|
3404
3973
|
if( $weakrefs) { weaken( $elt->{twig}); } | ||||
2269 | } | ||||||
2270 |
3137
|
2329
|
return $t; | ||||
2271 | } | ||||||
2272 | |||||||
2273 | sub _twig_end | ||||||
2274 | { # warn " in _twig_end...\n"; # DEBUG handler | ||||||
2275 |
43486
|
33715
|
my ($p, $gi) = @_; | ||||
2276 | |||||||
2277 |
43486
|
31401
|
my $t=$p->{twig}; | ||||
2278 | |||||||
2279 |
43486
|
80840
|
if( $t->{twig_in_pcdata} && (my $text_handler= $t->{TwigHandlers}->{$TEXT}) ) | ||||
2280 |
0
0
|
0
0
|
{ local $_= $t->{twig_current}; $text_handler->( $t, $_) if $_; | ||||
2281 | } | ||||||
2282 | |||||||
2283 |
43486
69
|
44753
83
|
if( $t->{twig_map_xmlns}) { $gi= $t->_replace_prefix( $gi); } | ||||
2284 | |||||||
2285 |
43486
|
34325
|
_add_or_discard_stored_spaces( $t); | ||||
2286 | |||||||
2287 | # the new twig_current is the parent | ||||||
2288 |
43486
|
27268
|
my $elt= $t->{twig_current}; | ||||
2289 |
43486
|
34855
|
delete $elt->{'twig_current'}; | ||||
2290 | |||||||
2291 | # if we were parsing PCDATA then we exit the pcdata too | ||||||
2292 |
43486
|
45867
|
if( $t->{twig_in_pcdata}) | ||||
2293 | { | ||||||
2294 |
20602
|
12304
|
$t->{twig_in_pcdata}= 0; | ||||
2295 |
20602
|
28992
|
$elt= $elt->{parent} if($elt->{parent}); | ||||
2296 |
20602
|
14882
|
delete $elt->{'twig_current'}; | ||||
2297 | } | ||||||
2298 | |||||||
2299 | # parent is the new current element | ||||||
2300 |
43486
|
27200
|
my $parent= $elt->{parent}; | ||||
2301 |
43486
|
30450
|
$t->{twig_current}= $parent; | ||||
2302 | |||||||
2303 |
43486
|
41196
|
if( $parent) | ||||
2304 |
40415
|
29684
|
{ $parent->{'twig_current'}=1; | ||||
2305 | # twig_to_be_normalized | ||||||
2306 |
40415
5
5
|
44164
9
6
|
if( $parent->{twig_to_be_normalized}) { $parent->normalize; $parent->{twig_to_be_normalized}=0; } | ||||
2307 | } | ||||||
2308 | |||||||
2309 |
43486
|
47085
|
if( $t->{extra_data}) | ||||
2310 |
324
|
454
|
{ $elt->_set_extra_data_before_end_tag( $t->{extra_data}); | ||||
2311 |
324
|
242
|
$t->{extra_data}=''; | ||||
2312 | } | ||||||
2313 | |||||||
2314 |
43486
|
44700
|
if( $t->{twig_handlers}) | ||||
2315 | { # look for handlers | ||||||
2316 |
3773
|
5123
|
my @handlers= _handler( $t, $t->{twig_handlers}, $gi); | ||||
2317 | |||||||
2318 |
3773
|
4091
|
if( $t->{twig_tdh}) | ||||
2319 |
48
29
29
|
44
17
54
|
{ if( @handlers) { push @{$t->{twig_handlers_to_trigger}}, [ $elt, \@handlers ]; } | ||||
2320 |
48
|
71
|
if( my $all= $t->{twig_handlers}->{handlers}->{$ALL}) | ||||
2321 |
12
12
|
9
20
|
{ push @{$t->{twig_handlers_to_trigger}}, [ $elt, [$all] ]; } | ||||
2322 | } | ||||||
2323 | else | ||||||
2324 | { | ||||||
2325 |
3725
|
2691
|
local $_= $elt; # so we can use $_ in the handlers | ||||
2326 | |||||||
2327 |
3725
|
3301
|
foreach my $handler ( @handlers) | ||||
2328 |
711
|
1867
|
{ $handler->($t, $elt) || last; } | ||||
2329 | # call _all_ handler if needed | ||||||
2330 |
3713
|
4950
|
my $all= $t->{twig_handlers}->{handlers}->{$ALL}; | ||||
2331 |
3713
|
3720
|
if( $all) | ||||
2332 |
283
|
394
|
{ $all->($t, $elt); } | ||||
2333 |
3713
883
|
8363
1204
|
if( @handlers || $all) { $t->{twig_right_after_root}=0; } | ||||
2334 | } | ||||||
2335 | } | ||||||
2336 | |||||||
2337 | # if twig_roots is set for the element then set appropriate handler | ||||||
2338 |
43474
|
53155
|
if( $t->{twig_root_depth} and ($p->depth == $t->{twig_root_depth}) ) | ||||
2339 |
142
|
652
|
{ if( $t->{twig_default_print}) | ||||
2340 | { # select the proper fh (and store the currently selected one) | ||||||
2341 |
79
|
99
|
$t->_set_fh_to_twig_output_fh(); | ||||
2342 |
79
0
|
110
0
|
if( !$p->depth==1) { $t->{twig_right_after_root}=1; } #XX | ||||
2343 |
79
|
282
|
if( $t->{twig_keep_encoding}) | ||||
2344 |
60
|
194
|
{ $p->setHandlers( %twig_handlers_roots_print_original); } | ||||
2345 | else | ||||||
2346 |
19
|
64
|
{ $p->setHandlers( %twig_handlers_roots_print); } | ||||
2347 | } | ||||||
2348 | else | ||||||
2349 |
63
|
210
|
{ $p->setHandlers( %twig_handlers_roots); } | ||||
2350 | } | ||||||
2351 | |||||||
2352 |
43474
2
|
63319
2
|
if( $elt->{'att'}->{'xml:space'} && ( $elt->{'att'}->{'xml:space'} eq 'preserve')) { $t->{twig_preserve_space}--; } | ||||
2353 | |||||||
2354 |
43474
43474
|
23024
35381
|
pop @{$t->{_twig_context_stack}}; | ||||
2355 |
43474
|
101240
|
return; | ||||
2356 | } | ||||||
2357 | |||||||
2358 | sub _trigger_tdh | ||||||
2359 |
4
|
4
|
{ my( $t)= @_; | ||||
2360 | |||||||
2361 |
4
4
|
0
10
|
if( @{$t->{twig_handlers_to_trigger}}) | ||||
2362 |
4
78
4
|
1
78
9
|
{ my @handlers_to_trigger_now= sort { $a->[0]->cmp( $b->[0]) } @{$t->{twig_handlers_to_trigger}}; | ||||
2363 |
4
|
5
|
foreach my $elt_handlers (@handlers_to_trigger_now) | ||||
2364 |
41
|
25
|
{ my( $handled_elt, $handlers_to_trigger)= @$elt_handlers; | ||||
2365 |
41
|
26
|
foreach my $handler ( @$handlers_to_trigger) | ||||
2366 |
41
41
|
24
38
|
{ local $_= $handled_elt; $handler->($t, $handled_elt) || last; } | ||||
2367 | } | ||||||
2368 | } | ||||||
2369 |
4
|
4
|
return; | ||||
2370 | } | ||||||
2371 | |||||||
2372 | # return the list of handler that can be activated for an element | ||||||
2373 | # (either of CODE ref's or 1's for twig_roots) | ||||||
2374 | |||||||
2375 | sub _handler | ||||||
2376 |
4645
|
3760
|
{ my( $t, $handlers, $gi)= @_; | ||||
2377 | |||||||
2378 |
4645
|
3275
|
my @found_handlers=(); | ||||
2379 |
4645
|
2444
|
my $found_handler; | ||||
2380 | |||||||
2381 |
4645
1140
9290
|
8330
1769
10486
|
foreach my $handler ( map { @$_ } grep { $_ } $handlers->{xpath_handler}->{$gi}, $handlers->{xpath_handler}->{'*'}) | ||||
2382 |
1493
|
1094
|
{ my $trigger= $handler->{trigger}; | ||||
2383 |
1493
|
26337
|
if( my $found_path= $trigger->( $t->{_twig_context_stack})) | ||||
2384 |
992
|
815
|
{ my $found_handler= $handler->{handler}; | ||||
2385 |
992
|
1436
|
push @found_handlers, $found_handler; | ||||
2386 | } | ||||||
2387 | } | ||||||
2388 | |||||||
2389 | # if no handler found call default handler if defined | ||||||
2390 |
4645
|
11566
|
if( !@found_handlers && defined $handlers->{handlers}->{$DEFAULT}) | ||||
2391 |
41
|
49
|
{ push @found_handlers, $handlers->{handlers}->{$DEFAULT}; } | ||||
2392 | |||||||
2393 |
4645
|
7341
|
if( @found_handlers and $t->{twig_do_not_chain_handlers}) | ||||
2394 |
1
|
1
|
{ @found_handlers= ($found_handlers[0]); } | ||||
2395 | |||||||
2396 |
4645
|
5666
|
return @found_handlers; # empty if no handler found | ||||
2397 | |||||||
2398 | } | ||||||
2399 | |||||||
2400 | |||||||
2401 | sub _replace_prefix | ||||||
2402 |
234
|
169
|
{ my( $t, $name)= @_; | ||||
2403 |
234
|
201
|
my $p= $t->parser; | ||||
2404 |
234
|
359
|
my $uri= $p->namespace( $name); | ||||
2405 | # try to get the namespace from default if none is found (for attributes) | ||||||
2406 | # this should probably be an option | ||||||
2407 |
234
98
|
1312
135
|
if( !$uri and( $name!~/^xml/)) { $uri= $p->expand_ns_prefix( '#default'); } | ||||
2408 |
234
|
508
|
if( $uri) | ||||
2409 |
112
|
217
|
{ if (my $mapped_prefix= $t->{twig_map_xmlns}->{$uri} || $DEFAULT_URI2NS{$uri}) | ||||
2410 |
105
|
166
|
{ return "$mapped_prefix:$name"; } | ||||
2411 | else | ||||||
2412 |
7
|
10
|
{ my $prefix= _a_proper_ns_prefix( $p, $uri); | ||||
2413 |
7
1
|
13
1
|
if( $prefix eq '#default') { $prefix=''; } | ||||
2414 |
7
|
16
|
return $prefix ? "$prefix:$name" : $name; | ||||
2415 | } | ||||||
2416 | } | ||||||
2417 | else | ||||||
2418 |
122
|
231
|
{ return $name; } | ||||
2419 | } | ||||||
2420 | |||||||
2421 | |||||||
2422 | sub _twig_char | ||||||
2423 | { # warn " in _twig_char...\n"; # DEBUG handler | ||||||
2424 | |||||||
2425 |
55618
|
44898
|
my ($p, $string)= @_; | ||||
2426 |
55618
|
41272
|
my $t=$p->{twig}; | ||||
2427 | |||||||
2428 |
55618
|
62518
|
if( $t->{twig_keep_encoding}) | ||||
2429 |
7407
|
5683
|
{ if( !$t->{twig_in_cdata}) | ||||
2430 |
1309
|
1913
|
{ $string= $p->original_string(); } | ||||
2431 | else | ||||||
2432 | { | ||||||
2433 |
187
187
187
|
749
203
739
|
use bytes; # > perl 5.5 | ||||
2434 |
6098
|
4486
|
if( length( $string) < 1024) | ||||
2435 |
6058
|
6846
|
{ $string= $p->original_string(); } | ||||
2436 | else | ||||||
2437 | { #warn "dodgy case"; | ||||||
2438 | # TODO original_string does not hold the entire string, but $string is wrong | ||||||
2439 | # I believe due to a bug in XML::Parser | ||||||
2440 | # for now, we use the original string, even if it means that it's been converted to utf8 | ||||||
2441 | } | ||||||
2442 | } | ||||||
2443 | } | ||||||
2444 | |||||||
2445 |
55618
2
|
72011
4
|
if( $t->{twig_input_filter}) { $string= $t->{twig_input_filter}->( $string); } | ||||
2446 |
55618
2
|
55632
3
|
if( $t->{twig_char_handler}) { $string= $t->{twig_char_handler}->( $string); } | ||||
2447 | |||||||
2448 |
55618
|
36156
|
my $elt= $t->{twig_current}; | ||||
2449 | |||||||
2450 |
55618
|
70396
|
if( $t->{twig_in_cdata}) | ||||
2451 | { # text is the continuation of a previously created cdata | ||||||
2452 |
6156
|
5153
|
$elt->{cdata}.= $t->{twig_stored_spaces} . $string; | ||||
2453 | } | ||||||
2454 | elsif( $t->{twig_in_pcdata}) | ||||||
2455 | { # text is the continuation of a previously created pcdata | ||||||
2456 |
314
|
370
|
if( $t->{extra_data}) | ||||
2457 |
75
|
139
|
{ $elt->_push_extra_data_in_pcdata( $t->{extra_data}, length( $elt->{pcdata})); | ||||
2458 |
75
|
70
|
$t->{extra_data}=''; | ||||
2459 | } | ||||||
2460 |
314
|
266
|
$elt->{pcdata}.= $string; | ||||
2461 | } | ||||||
2462 | else | ||||||
2463 | { | ||||||
2464 | # text is just space, which might be discarded later | ||||||
2465 |
49148
|
80653
|
if( $string=~/\A\s*\Z/s) | ||||
2466 | { | ||||||
2467 |
10362
|
9799
|
if( $t->{extra_data}) | ||||
2468 | { # we got extra data (comment, pi), lets add the spaces to it | ||||||
2469 |
34
|
35
|
$t->{extra_data} .= $string; | ||||
2470 | } | ||||||
2471 | else | ||||||
2472 | { # no extra data, just store the spaces | ||||||
2473 |
10328
|
11534
|
$t->{twig_stored_spaces}.= $string; | ||||
2474 | } | ||||||
2475 | } | ||||||
2476 | else | ||||||
2477 |
38786
|
52616
|
{ my $new_elt= _insert_pcdata( $t, $t->{twig_stored_spaces}.$string); | ||||
2478 |
38786
|
36402
|
delete $elt->{'twig_current'}; | ||||
2479 |
38786
|
27994
|
$new_elt->{'twig_current'}=1; | ||||
2480 |
38786
|
24620
|
$t->{twig_current}= $new_elt; | ||||
2481 |
38786
|
25802
|
$t->{twig_in_pcdata}=1; | ||||
2482 |
38786
|
51802
|
if( $t->{extra_data}) | ||||
2483 |
23
|
36
|
{ $new_elt->_push_extra_data_in_pcdata( $t->{extra_data}, 0); | ||||
2484 |
23
|
24
|
$t->{extra_data}=''; | ||||
2485 | } | ||||||
2486 | } | ||||||
2487 | } | ||||||
2488 |
55618
|
126368
|
return; | ||||
2489 | } | ||||||
2490 | |||||||
2491 | sub _twig_cdatastart | ||||||
2492 | { # warn " in _twig_cdatastart...\n"; # DEBUG handler | ||||||
2493 | |||||||
2494 |
103
|
74
|
my $p= shift; | ||||
2495 |
103
|
88
|
my $t=$p->{twig}; | ||||
2496 | |||||||
2497 |
103
|
90
|
$t->{twig_in_cdata}=1; | ||||
2498 |
103
|
147
|
my $cdata= $t->{twig_elt_class}->new( $CDATA); | ||||
2499 |
103
|
86
|
my $twig_current= $t->{twig_current}; | ||||
2500 | |||||||
2501 |
103
|
113
|
if( $t->{twig_in_pcdata}) | ||||
2502 | { # create the node as a sibling of the PCDATA | ||||||
2503 |
20
20
19
|
20
28
31
|
$cdata->{prev_sibling}=$twig_current; if( $XML::Twig::weakrefs) { weaken( $cdata->{prev_sibling});} ; | ||||
2504 |
20
|
18
|
$twig_current->{next_sibling}= $cdata; | ||||
2505 |
20
|
24
|
my $parent= $twig_current->{parent}; | ||||
2506 |
20
20
19
|
13
25
27
|
$cdata->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $cdata->{parent});} ; | ||||
2507 |
20
20
20
19
|
21
17
24
24
|
$parent->{empty}=0; $parent->{last_child}=$cdata; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; | ||||
2508 |
20
|
21
|
$t->{twig_in_pcdata}=0; | ||||
2509 | } | ||||||
2510 | else | ||||||
2511 | { # we have to create a PCDATA element if we need to store spaces | ||||||
2512 |
83
|
132
|
if( $t->_space_policy($XML::Twig::index2gi[$twig_current->{'gi'}]) && $t->{twig_stored_spaces}) | ||||
2513 |
5
|
6
|
{ _insert_pcdata( $t, $t->{twig_stored_spaces}); } | ||||
2514 |
83
|
76
|
$t->{twig_stored_spaces}=''; | ||||
2515 | |||||||
2516 | # create the node as a child of the current element | ||||||
2517 |
83
83
83
|
69
94
121
|
$cdata->{parent}=$twig_current; if( $XML::Twig::weakrefs) { weaken( $cdata->{parent});} ; | ||||
2518 |
83
|
103
|
if( my $prev_sibling= $twig_current->{last_child}) | ||||
2519 |
8
8
8
|
9
12
9
|
{ $cdata->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $cdata->{prev_sibling});} ; | ||||
2520 |
8
|
8
|
$prev_sibling->{next_sibling}= $cdata; | ||||
2521 | } | ||||||
2522 | else | ||||||
2523 |
75
|
73
|
{ $twig_current->{first_child}= $cdata; } | ||||
2524 |
83
83
83
83
|
56
74
96
114
|
$twig_current->{empty}=0; $twig_current->{last_child}=$cdata; if( $XML::Twig::weakrefs) { weaken( $twig_current->{last_child});} ; | ||||
2525 | |||||||
2526 | } | ||||||
2527 | |||||||
2528 |
103
|
118
|
delete $twig_current->{'twig_current'}; | ||||
2529 |
103
|
68
|
$t->{twig_current}= $cdata; | ||||
2530 |
103
|
74
|
$cdata->{'twig_current'}=1; | ||||
2531 |
103
4
4
|
131
6
3
|
if( $t->{extra_data}) { $cdata->set_extra_data( $t->{extra_data}); $t->{extra_data}='' }; | ||||
2532 |
103
|
429
|
return; | ||||
2533 | } | ||||||
2534 | |||||||
2535 | sub _twig_cdataend | ||||||
2536 | { # warn " in _twig_cdataend...\n"; # DEBUG handler | ||||||
2537 | |||||||
2538 |
103
|
75
|
my $p= shift; | ||||
2539 |
103
|
74
|
my $t=$p->{twig}; | ||||
2540 | |||||||
2541 |
103
|
83
|
$t->{twig_in_cdata}=0; | ||||
2542 | |||||||
2543 |
103
|
71
|
my $elt= $t->{twig_current}; | ||||
2544 |
103
|
92
|
delete $elt->{'twig_current'}; | ||||
2545 |
103
|
94
|
my $cdata= $elt->{cdata}; | ||||
2546 |
103
|
113
|
$elt->_set_cdata( $cdata); | ||||
2547 | |||||||
2548 |
103
103
|
65
199
|
push @{$t->{_twig_context_stack}}, { $ST_TAG => $CDATA }; | ||||
2549 | |||||||
2550 |
103
|
151
|
if( $t->{twig_handlers}) | ||||
2551 | { # look for handlers | ||||||
2552 |
19
|
28
|
my @handlers= _handler( $t, $t->{twig_handlers}, $CDATA); | ||||
2553 |
19
|
15
|
local $_= $elt; # so we can use $_ in the handlers | ||||
2554 |
19
4
|
33
6
|
foreach my $handler ( @handlers) { $handler->($t, $elt) || last; } | ||||
2555 | } | ||||||
2556 | |||||||
2557 |
103
103
|
52
96
|
pop @{$t->{_twig_context_stack}}; | ||||
2558 | |||||||
2559 |
103
|
130
|
$elt= $elt->{parent}; | ||||
2560 |
103
|
82
|
$t->{twig_current}= $elt; | ||||
2561 |
103
|
82
|
$elt->{'twig_current'}=1; | ||||
2562 | |||||||
2563 |
103
|
98
|
$t->{twig_long_cdata}=0; | ||||
2564 |
103
|
197
|
return; | ||||
2565 | } | ||||||
2566 | |||||||
2567 | sub _pi_elt_handlers | ||||||
2568 |
1525
|
1072
|
{ my( $t, $pi)= @_; | ||||
2569 |
1525
|
3177
|
my $pi_handlers= $t->{twig_handlers}->{pi_handlers} || return; | ||||
2570 |
66
|
101
|
foreach my $handler ( $pi_handlers->{$pi->{target}}, $pi_handlers->{''}) | ||||
2571 |
132
66
66
|
239
47
101
|
{ if( $handler) { local $_= $pi; $handler->( $t, $pi) || last; } } | ||||
2572 | } | ||||||
2573 | |||||||
2574 | sub _pi_text_handler | ||||||
2575 |
1506
|
1123
|
{ my( $t, $target, $data)= @_; | ||||
2576 |
1506
|
2290
|
if( my $handler= $t->{twig_handlers}->{pi_handlers}->{$target}) | ||||
2577 |
1
|
3
|
{ return $handler->( $t, $target, $data); } | ||||
2578 |
1505
|
1620
|
if( my $handler= $t->{twig_handlers}->{pi_handlers}->{''}) | ||||
2579 |
1
|
3
|
{ return $handler->( $t, $target, $data); } | ||||
2580 |
1504
|
4942
|
return defined( $data) && $data ne '' ? "<?$target $data?>" : "<?$target?>" ; | ||||
2581 | } | ||||||
2582 | |||||||
2583 | sub _comment_elt_handler | ||||||
2584 |
1755
|
1125
|
{ my( $t, $comment)= @_; | ||||
2585 |
1755
|
4068
|
if( my $handler= $t->{twig_handlers}->{handlers}->{$COMMENT}) | ||||
2586 |
6
6
|
5
10
|
{ local $_= $comment; $handler->($t, $comment); } | ||||
2587 | } | ||||||
2588 | |||||||
2589 | sub _comment_text_handler | ||||||
2590 |
1892
|
1269
|
{ my( $t, $comment)= @_; | ||||
2591 |
1892
|
3046
|
if( my $handler= $t->{twig_handlers}->{handlers}->{$COMMENT}) | ||||
2592 |
4
|
10
|
{ $comment= $handler->($t, $comment); | ||||
2593 |
4
2
|
22
3
|
if( !defined $comment || $comment eq '') { return ''; } | ||||
2594 | } | ||||||
2595 |
1890
|
2999
|
return "<!--$comment-->"; | ||||
2596 | } | ||||||
2597 | |||||||
2598 | |||||||
2599 | |||||||
2600 | sub _twig_comment | ||||||
2601 | { # warn " in _twig_comment...\n"; # DEBUG handler | ||||||
2602 | |||||||
2603 |
3653
|
16883
|
my( $p, $comment_text)= @_; | ||||
2604 |
3653
|
2788
|
my $t=$p->{twig}; | ||||
2605 | |||||||
2606 |
3653
1751
|
4334
2595
|
if( $t->{twig_keep_encoding}) { $comment_text= substr( $p->original_string(), 4, -3); } | ||||
2607 | |||||||
2608 |
3653
|
12427
|
$t->_twig_pi_comment( $p, $COMMENT, $t->{twig_keep_comments}, $t->{twig_process_comments}, | ||||
2609 | '_set_comment', '_comment_elt_handler', '_comment_text_handler', $comment_text | ||||||
2610 | ); | ||||||
2611 |
3653
|
9919
|
return; | ||||
2612 | } | ||||||
2613 | |||||||
2614 | sub _twig_pi | ||||||
2615 | { # warn " in _twig_pi...\n"; # DEBUG handler | ||||||
2616 | |||||||
2617 |
3038
|
12526
|
my( $p, $target, $data)= @_; | ||||
2618 |
3038
|
2430
|
my $t=$p->{twig}; | ||||
2619 | |||||||
2620 |
3038
|
3539
|
if( $t->{twig_keep_encoding}) | ||||
2621 |
1518
|
2351
|
{ my $pi_text= substr( $p->original_string(), 2, -2); | ||||
2622 |
1518
|
6841
|
($target, $data)= split( /\s+/, $pi_text, 2); | ||||
2623 | } | ||||||
2624 | |||||||
2625 |
3038
|
5763
|
$t->_twig_pi_comment( $p, $PI, $t->{twig_keep_pi}, $t->{twig_process_pi}, | ||||
2626 | '_set_pi', '_pi_elt_handlers', '_pi_text_handler', $target, $data | ||||||
2627 | ); | ||||||
2628 |
3038
|
8531
|
return; | ||||
2629 | } | ||||||
2630 | |||||||
2631 | sub _twig_pi_comment | ||||||
2632 |
6691
|
12309
|
{ my( $t, $p, $type, $keep, $process, $set, $elt_handler, $text_handler, @parser_args)= @_; | ||||
2633 | |||||||
2634 |
6691
|
7295
|
if( $t->{twig_input_filter}) | ||||
2635 |
2
3
|
2
7
|
{ foreach my $arg (@parser_args) { $arg= $t->{twig_input_filter}->( $arg); } } | ||||
2636 | |||||||
2637 | # if pi/comments are to be kept then we piggyback them to the current element | ||||||
2638 |
6691
|
7645
|
if( $keep) | ||||
2639 | { # first add spaces | ||||||
2640 |
3398
|
3854
|
if( $t->{twig_stored_spaces}) | ||||
2641 |
22
|
26
|
{ $t->{extra_data}.= $t->{twig_stored_spaces}; | ||||
2642 |
22
|
23
|
$t->{twig_stored_spaces}= ''; | ||||
2643 | } | ||||||
2644 | |||||||
2645 |
3398
|
4810
|
my $extra_data= $t->$text_handler( @parser_args); | ||||
2646 |
3398
|
4849
|
$t->{extra_data}.= $extra_data; | ||||
2647 | |||||||
2648 | } | ||||||
2649 | elsif( $process) | ||||||
2650 | { | ||||||
2651 |
3280
|
2267
|
my $twig_current= $t->{twig_current}; # defined unless we are outside of the root | ||||
2652 | |||||||
2653 |
3280
|
4861
|
my $elt= $t->{twig_elt_class}->new( $type); | ||||
2654 |
3280
|
4396
|
$elt->$set( @parser_args); | ||||
2655 |
3280
|
3887
|
if( $t->{extra_data}) | ||||
2656 |
676
|
755
|
{ $elt->set_extra_data( $t->{extra_data}); | ||||
2657 |
676
|
458
|
$t->{extra_data}=''; | ||||
2658 | } | ||||||
2659 | |||||||
2660 |
3280
|
2900
|
unless( $t->root) | ||||
2661 |
1451
|
1278
|
{ $t->_add_cpi_outside_of_root( leading_cpi => $elt); | ||||
2662 | } | ||||||
2663 | elsif( $t->{twig_in_pcdata}) | ||||||
2664 | { # create the node as a sibling of the PCDATA | ||||||
2665 |
7
|
13
|
$elt->paste_after( $twig_current); | ||||
2666 |
7
|
4
|
$t->{twig_in_pcdata}=0; | ||||
2667 | } | ||||||
2668 | elsif( $twig_current) | ||||||
2669 | { # we have to create a PCDATA element if we need to store spaces | ||||||
2670 |
376
|
572
|
if( $t->_space_policy($XML::Twig::index2gi[$twig_current->{'gi'}]) && $t->{twig_stored_spaces}) | ||||
2671 |
1
|
2
|
{ _insert_pcdata( $t, $t->{twig_stored_spaces}); } | ||||
2672 |
376
|
363
|
$t->{twig_stored_spaces}=''; | ||||
2673 | # create the node as a child of the current element | ||||||
2674 |
376
|
419
|
$elt->paste_last_child( $twig_current); | ||||
2675 | } | ||||||
2676 | else | ||||||
2677 |
1446
|
1437
|
{ $t->_add_cpi_outside_of_root( trailing_cpi => $elt); } | ||||
2678 | |||||||
2679 |
3280
|
3052
|
if( $twig_current) | ||||
2680 |
383
|
396
|
{ delete $twig_current->{'twig_current'}; | ||||
2681 |
383
|
256
|
my $parent= $elt->{parent}; | ||||
2682 |
383
|
271
|
$t->{twig_current}= $parent; | ||||
2683 |
383
|
330
|
$parent->{'twig_current'}=1; | ||||
2684 | } | ||||||
2685 | |||||||
2686 |
3280
|
4082
|
$t->$elt_handler( $elt); | ||||
2687 | } | ||||||
2688 | |||||||
2689 | } | ||||||
2690 | |||||||
2691 | |||||||
2692 | # add a comment or pi before the first element | ||||||
2693 | sub _add_cpi_outside_of_root | ||||||
2694 |
2902
|
2106
|
{ my($t, $type, $elt)= @_; # $type is 'leading_cpi' or 'trailing_cpi' | ||||
2695 |
2902
|
4782
|
$t->{$type} ||= $t->{twig_elt_class}->new( '#CPI'); | ||||
2696 | # create the node as a child of the current element | ||||||
2697 |
2902
|
3299
|
$elt->paste_last_child( $t->{$type}); | ||||
2698 |
2902
|
1743
|
return $t; | ||||
2699 | } | ||||||
2700 | |||||||
2701 | sub _twig_final | ||||||
2702 | { # warn " in _twig_final...\n"; # DEBUG handler | ||||||
2703 | |||||||
2704 |
3084
|
21097
|
my $p= shift; | ||||
2705 |
3084
|
9197
|
my $t= $p->isa( 'XML::Twig') ? $p : $p->{twig}; | ||||
2706 | |||||||
2707 | # store trailing data | ||||||
2708 |
3084
579
579
|
3966
983
464
|
if( $t->{extra_data}) { $t->{trailing_cpi_text} = $t->{extra_data}; $t->{extra_data}=''; } | ||||
2709 |
3084
|
8195
|
$t->{trailing_spaces}= $t->{twig_stored_spaces} || ''; | ||||
2710 |
3084
3084
|
2190
3749
|
my $s= $t->{twig_stored_spaces}; $s=~s{\n}{\\n}g; | ||||
2711 |
3084
287
|
3539
346
|
if( $t->{twig_stored_spaces}) { my $s= $t->{twig_stored_spaces}; } | ||||
2712 | |||||||
2713 | # restore the selected filehandle if needed | ||||||
2714 |
3084
|
3535
|
$t->_set_fh_to_selected_fh(); | ||||
2715 | |||||||
2716 |
3084
|
3741
|
$t->_trigger_tdh if( $t->{twig_tdh}); | ||||
2717 | |||||||
2718 |
3084
|
3262
|
select $t->{twig_original_selected_fh} if($t->{twig_original_selected_fh}); # probably dodgy | ||||
2719 | |||||||
2720 |
3084
|
3869
|
if( exists $t->{twig_autoflush_data}) | ||||
2721 |
36
|
39
|
{ my @args; | ||||
2722 |
36
|
78
|
push @args, $t->{twig_autoflush_data}->{fh} if( $t->{twig_autoflush_data}->{fh}); | ||||
2723 |
36
1
|
73
2
|
push @args, @{$t->{twig_autoflush_data}->{args}} if( $t->{twig_autoflush_data}->{args}); | ||||
2724 |
36
|
59
|
$t->flush( @args); | ||||
2725 |
36
|
59
|
delete $t->{twig_autoflush_data}; | ||||
2726 |
36
|
76
|
$t->root->delete if $t->root; | ||||
2727 | } | ||||||
2728 | |||||||
2729 | # tries to clean-up (probably not very well at the moment) | ||||||
2730 | #undef $p->{twig}; | ||||||
2731 |
3084
|
3046
|
undef $t->{twig_parser}; | ||||
2732 |
3084
|
2972
|
delete $t->{twig_parsing}; | ||||
2733 |
3084
3084
|
2263
5072
|
@{$t}{ qw( twig_parser twig_parsing _twig_context_stack twig_current) }=(); | ||||
2734 | |||||||
2735 |
3084
|
5042
|
return $t; | ||||
2736 | } | ||||||
2737 | |||||||
2738 | sub _insert_pcdata | ||||||
2739 |
38905
|
28236
|
{ my( $t, $string)= @_; | ||||
2740 | # create a new PCDATA element | ||||||
2741 |
38905
|
25050
|
my $parent= $t->{twig_current}; # always defined | ||||
2742 |
38905
|
19729
|
my $elt; | ||||
2743 |
38905
|
38709
|
if( exists $t->{twig_alt_elt_class}) | ||||
2744 |
162
|
235
|
{ $elt= $t->{twig_elt_class}->new( $PCDATA); | ||||
2745 |
162
|
157
|
$elt->_set_pcdata( $string); | ||||
2746 | } | ||||||
2747 | else | ||||||
2748 |
38743
|
109928
|
{ $elt= bless( { gi => $XML::Twig::gi2index{$PCDATA}, pcdata => $string }, 'XML::Twig::Elt'); } | ||||
2749 | |||||||
2750 |
38905
|
29744
|
my $prev_sibling= $parent->{last_child}; | ||||
2751 |
38905
|
32449
|
if( $prev_sibling) | ||||
2752 |
191
|
182
|
{ $prev_sibling->{next_sibling}= $elt; | ||||
2753 |
191
191
185
|
154
244
283
|
$elt->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ; | ||||
2754 | } | ||||||
2755 | else | ||||||
2756 |
38714
|
45673
|
{ $parent->{first_child}= $elt; } | ||||
2757 | |||||||
2758 |
38905
38905
20880
|
29084
39477
22940
|
$elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ; | ||||
2759 |
38905
38905
38905
20880
|
29307
42881
43015
21995
|
$parent->{empty}=0; $parent->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; | ||||
2760 |
38905
|
32758
|
$t->{twig_stored_spaces}=''; | ||||
2761 |
38905
|
40251
|
return $elt; | ||||
2762 | } | ||||||
2763 | |||||||
2764 | sub _space_policy | ||||||
2765 |
1053
|
939
|
{ my( $t, $gi)= @_; | ||||
2766 |
1053
|
654
|
my $policy; | ||||
2767 |
1053
|
1408
|
$policy=0 if( $t->{twig_discard_spaces}); | ||||
2768 |
1053
|
1280
|
$policy=1 if( $t->{twig_keep_spaces}); | ||||
2769 |
1053
|
1655
|
$policy=1 if( $t->{twig_keep_spaces_in} | ||||
2770 | && $t->{twig_keep_spaces_in}->{$gi}); | ||||||
2771 |
1053
|
1528
|
$policy=0 if( $t->{twig_discard_spaces_in} | ||||
2772 | && $t->{twig_discard_spaces_in}->{$gi}); | ||||||
2773 |
1053
|
1849
|
return $policy; | ||||
2774 | } | ||||||
2775 | |||||||
2776 | |||||||
2777 | sub _twig_entity | ||||||
2778 | { # warn " in _twig_entity...\n"; # DEBUG handler | ||||||
2779 |
76
|
94
|
my( $p, $name, $val, $sysid, $pubid, $ndata, $param)= @_; | ||||
2780 |
76
|
79
|
my $t=$p->{twig}; | ||||
2781 | |||||||
2782 | #{ no warnings; my $base= $p->base; warn "_twig_entity called: expand: '$t->{twig_expand_external_ents}', base: '$base', name: '$name', val: '$val', sysid: '$sysid', pubid: '$pubid', ndata: '$ndata', param: '$param'\n";} | ||||||
2783 | |||||||
2784 |
76
|
55
|
my $missing_entity=0; | ||||
2785 | |||||||
2786 |
76
|
125
|
if( $sysid) | ||||
2787 |
38
|
49
|
{ if($ndata) | ||||
2788 |
15
14
|
35
16
|
{ if( ! -f _based_filename( $sysid, $p->base)) { $missing_entity= 1; } | ||||
2789 | } | ||||||
2790 | else | ||||||
2791 |
23
|
41
|
{ if( $t->{twig_expand_external_ents}) | ||||
2792 |
5
5
|
9
16
|
{ $val= eval { _slurp_uri( $sysid, $p->base) }; | ||||
2793 |
5
|
22
|
if( ! defined $val) | ||||
2794 |
3
|
10
|
{ if( $t->{twig_extern_ent_nofail}) | ||||
2795 |
2
|
3
|
{ $missing_entity= 1; } | ||||
2796 | else | ||||||
2797 |
1
|
6
|
{ _croak( "cannot load SYSTEM entity '$name' from '$sysid': $@", 3); } | ||||
2798 | } | ||||||
2799 | } | ||||||
2800 | } | ||||||
2801 | } | ||||||
2802 | |||||||
2803 |
75
|
200
|
my $ent=XML::Twig::Entity->new( $name, $val, $sysid, $pubid, $ndata, $param); | ||||
2804 |
75
16
|
103
34
|
if( $missing_entity) { $t->{twig_missing_system_entities}->{$name}= $ent; } | ||||
2805 | |||||||
2806 |
75
|
107
|
my $entity_list= $t->entity_list; | ||||
2807 |
75
75
|
131
107
|
if( $entity_list) { $entity_list->add( $ent); } | ||||
2808 | |||||||
2809 |
75
|
110
|
if( $parser_version > 2.27) | ||||
2810 | { # this is really ugly, but with some versions of XML::Parser the value | ||||||
2811 | # of the entity is not properly returned by the default handler | ||||||
2812 |
75
|
105
|
my $ent_decl= $ent->text; | ||||
2813 |
75
|
114
|
if( $t->{twig_keep_encoding}) | ||||
2814 |
3
|
18
|
{ if( defined $ent->{val} && ($ent_decl !~ /["']/)) | ||||
2815 |
0
|
0
|
{ my $val= $ent->{val}; | ||||
2816 |
0
|
0
|
$ent_decl .= $val =~ /"/ ? qq{'$val' } : qq{"$val" }; | ||||
2817 | } | ||||||
2818 | # for my solaris box (perl 5.6.1, XML::Parser 2.31, expat?) | ||||||
2819 |
3
0
|
39
0
|
$t->{twig_doctype}->{internal}=~ s{<!ENTITY\s+$name\s+$}{substr( $ent_decl, 0, -1)}e; | ||||
2820 | } | ||||||
2821 |
75
|
1014
|
$t->{twig_doctype}->{internal} .= $ent_decl | ||||
2822 | unless( $t->{twig_doctype}->{internal}=~ m{<!ENTITY\s+$name\s+}); | ||||||
2823 | } | ||||||
2824 | |||||||
2825 |
75
|
244
|
return; | ||||
2826 | } | ||||||
2827 | |||||||
2828 | |||||||
2829 | sub _twig_extern_ent | ||||||
2830 | { # warn " in _twig_extern_ent...I (", $_[0]->original_string, ")\n"; # DEBUG handler | ||||||
2831 |
16
|
22
|
my( $p, $base, $sysid, $pubid)= @_; | ||||
2832 |
16
|
19
|
my $t= $p->{twig}; | ||||
2833 |
16
|
27
|
if( $t->{twig_no_expand}) | ||||
2834 |
8
|
16
|
{ my $ent_name= $t->{twig_keep_encoding} ? $p->original_string : $p->recognized_string; | ||||
2835 |
8
|
33
|
_twig_insert_ent( $t, $ent_name); | ||||
2836 |
8
|
102
|
return ''; | ||||
2837 | } | ||||||
2838 |
8
8
|
12
22
|
my $ent_content= eval { $t->{twig_ext_ent_handler}->( $p, $base, $sysid) }; | ||||
2839 |
8
|
15939
|
if( ! defined $ent_content) | ||||
2840 | { | ||||||
2841 |
2
|
6
|
my $ent_name = $p->recognized_string; | ||||
2842 |
2
|
12
|
my $file = _based_filename( $sysid, $base); | ||||
2843 |
2
|
6
|
my $error_message= "cannot expand $ent_name - cannot load '$file'"; | ||||
2844 |
2
1
|
5
14
|
if( $t->{twig_extern_ent_nofail}) { return "<!-- $error_message -->"; } | ||||
2845 |
1
|
1
|
else { _croak( $error_message); } | ||||
2846 | } | ||||||
2847 |
6
|
104
|
return $ent_content; | ||||
2848 | } | ||||||
2849 | |||||||
2850 | # I use this so I can change the $Carp::CarpLevel (which determines how many call frames to skip when reporting an error) | ||||||
2851 | sub _croak | ||||||
2852 |
50
|
70
|
{ my( $message, $level)= @_; | ||||
2853 |
50
|
145
|
$Carp::CarpLevel= $level || 0; | ||||
2854 |
50
|
5819
|
croak $message; | ||||
2855 | } | ||||||
2856 | |||||||
2857 | sub _twig_xmldecl | ||||||
2858 | { # warn " in _twig_xmldecl...\n"; # DEBUG handler | ||||||
2859 | |||||||
2860 |
123
|
31700
|
my $p= shift; | ||||
2861 |
123
|
139
|
my $t=$p->{twig}; | ||||
2862 |
123
|
395
|
$t->{twig_xmldecl}||={}; # could have been set by set_output_encoding | ||||
2863 |
123
|
190
|
$t->{twig_xmldecl}->{version}= shift; | ||||
2864 |
123
|
138
|
$t->{twig_xmldecl}->{encoding}= shift; | ||||
2865 |
123
|
128
|
$t->{twig_xmldecl}->{standalone}= shift; | ||||
2866 |
123
|
455
|
return; | ||||
2867 | } | ||||||
2868 | |||||||
2869 | sub _twig_doctype | ||||||
2870 | { # warn " in _twig_doctype...\n"; # DEBUG handler | ||||||
2871 |
111
|
22616
|
my( $p, $name, $sysid, $pub, $internal)= @_; | ||||
2872 |
111
|
134
|
my $t=$p->{twig}; | ||||
2873 |
111
|
385
|
$t->{twig_doctype}||= {}; # create | ||||
2874 |
111
|
203
|
$t->{twig_doctype}->{name}= $name; # always there | ||||
2875 |
111
|
145
|
$t->{twig_doctype}->{sysid}= $sysid; # | ||||
2876 |
111
|
156
|
$t->{twig_doctype}->{pub}= $pub; # | ||||
2877 | |||||||
2878 | # now let's try to cope with XML::Parser 2.28 and above | ||||||
2879 |
111
|
190
|
if( $parser_version > 2.27) | ||||
2880 |
111
|
328
|
{ @saved_default_handler= $p->setHandlers( Default => \&_twig_store_internal_dtd, | ||||
2881 | Entity => \&_twig_entity, | ||||||
2882 | ); | ||||||
2883 |
111
|
2122
|
$p->setHandlers( DoctypeFin => \&_twig_stop_storing_internal_dtd); | ||||
2884 |
111
|
1014
|
$t->{twig_doctype}->{internal}=''; | ||||
2885 | } | ||||||
2886 | else | ||||||
2887 | # for XML::Parser before 2.28 | ||||||
2888 |
0
|
0
|
{ $internal||=''; | ||||
2889 |
0
|
0
|
$internal=~ s{^\s*\[}{}; | ||||
2890 |
0
|
0
|
$internal=~ s{]\s*$}{}; | ||||
2891 |
0
|
0
|
$t->{twig_doctype}->{internal}=$internal; | ||||
2892 | } | ||||||
2893 | |||||||
2894 | # now check if we want to get the DTD info | ||||||
2895 |
111
|
250
|
if( $t->{twig_read_external_dtd} && $sysid) | ||||
2896 | { # let's build a fake document with an internal DTD | ||||||
2897 |
5
|
17
|
my $dtd= "<!DOCTYPE $name [" . _slurp_uri( $sysid) . "]><$name/>"; | ||||
2898 | |||||||
2899 |
5
|
14
|
$t->save_global_state(); # save the globals (they will be reset by the following new) | ||||
2900 |
5
|
27
|
my $t_dtd= XML::Twig->new( load_DTD => 1, ParseParamEnt => 1, error_context => $t->{ErrorContext} || 0); # create a temp twig | ||||
2901 |
5
|
12
|
$t_dtd->parse( $dtd); # parse it | ||||
2902 |
5
|
9
|
$t->{twig_dtd}= $t_dtd->{twig_dtd}; # grab the dtd info | ||||
2903 | #$t->{twig_dtd_is_external}=1; | ||||||
2904 |
5
|
10
|
$t->entity_list->_add_list( $t_dtd->entity_list) if( $t_dtd->entity_list); # grab the entity info | ||||
2905 |
5
|
8
|
$t->restore_global_state(); | ||||
2906 | } | ||||||
2907 |
111
|
427
|
return; | ||||
2908 | } | ||||||
2909 | |||||||
2910 | sub _twig_element | ||||||
2911 | { # warn " in _twig_element...\n"; # DEBUG handler | ||||||
2912 | |||||||
2913 |
45
|
48
|
my( $p, $name, $model)= @_; | ||||
2914 |
45
|
37
|
my $t=$p->{twig}; | ||||
2915 |
45
|
100
|
$t->{twig_dtd}||= {}; # may create the dtd | ||||
2916 |
45
|
90
|
$t->{twig_dtd}->{model}||= {}; # may create the model hash | ||||
2917 |
45
|
89
|
$t->{twig_dtd}->{elt_list}||= []; # ordered list of elements | ||||
2918 |
45
45
|
22
490
|
push @{$t->{twig_dtd}->{elt_list}}, $name; # store the elt | ||||
2919 |
45
|
70
|
$t->{twig_dtd}->{model}->{$name}= $model; # store the model | ||||
2920 |
45
|
582
|
if( ($parser_version > 2.27) && ($t->{twig_doctype}->{internal}=~ m{(^|>)\s*$}) ) | ||||
2921 |
45
|
106
|
{ my $text= $XML::Twig::Elt::keep_encoding ? $p->original_string : $p->recognized_string; | ||||
2922 |
45
|
226
|
unless( $text) | ||||
2923 | { # this version of XML::Parser does not return the text in the *_string method | ||||||
2924 | # we need to rebuild it | ||||||
2925 |
45
|
334
|
$text= "<!ELEMENT $name $model>"; | ||||
2926 | } | ||||||
2927 |
45
|
582
|
$t->{twig_doctype}->{internal} .= $text; | ||||
2928 | } | ||||||
2929 |
45
|
119
|
return; | ||||
2930 | } | ||||||
2931 | |||||||
2932 | sub _twig_attlist | ||||||
2933 | { # warn " in _twig_attlist...\n"; # DEBUG handler | ||||||
2934 | |||||||
2935 |
37
|
39
|
my( $p, $gi, $att, $type, $default, $fixed)= @_; | ||||
2936 | #warn "in attlist: gi: '$gi', att: '$att', type: '$type', default: '$default', fixed: '$fixed'\n"; | ||||||
2937 |
37
|
26
|
my $t=$p->{twig}; | ||||
2938 |
37
|
45
|
$t->{twig_dtd}||= {}; # create dtd if need be | ||||
2939 |
37
|
76
|
$t->{twig_dtd}->{$gi}||= {}; # create elt if need be | ||||
2940 | #$t->{twig_dtd}->{$gi}->{att}||= {}; # create att if need be | ||||||
2941 |
37
|
889
|
if( ($parser_version > 2.27) && ($t->{twig_doctype}->{internal}=~ m{(^|>)\s*$}) ) | ||||
2942 |
37
|
65
|
{ my $text= $XML::Twig::Elt::keep_encoding ? $p->original_string : $p->recognized_string; | ||||
2943 |
37
|
154
|
unless( $text) | ||||
2944 | { # this version of XML::Parser does not return the text in the *_string method | ||||||
2945 | # we need to rebuild it | ||||||
2946 |
37
|
44
|
my $att_decl="$att $type"; | ||||
2947 |
37
|
42
|
$att_decl .= " #FIXED" if( $fixed); | ||||
2948 |
37
|
46
|
$att_decl .= " $default" if( defined $default); | ||||
2949 | # 2 cases: there is already an attlist on that element or not | ||||||
2950 |
37
|
55
|
if( $t->{twig_dtd}->{att}->{$gi}) | ||||
2951 | { # there is already an attlist, add to it | ||||||
2952 |
15
15
|
236
77
|
$t->{twig_doctype}->{internal}=~ s{(<!ATTLIST\s*$gi )(.*?)\n?>} | ||||
2953 | { "$1$2\n" . ' ' x length( $1) . "$att_decl\n>"}es; | ||||||
2954 | } | ||||||
2955 | else | ||||||
2956 | { # create the attlist | ||||||
2957 |
22
|
47
|
$t->{twig_doctype}->{internal}.= "<!ATTLIST $gi $att_decl>" | ||||
2958 | } | ||||||
2959 | } | ||||||
2960 | } | ||||||
2961 |
37
|
73
|
$t->{twig_dtd}->{att}->{$gi}->{$att}= {} ; | ||||
2962 |
37
|
57
|
$t->{twig_dtd}->{att}->{$gi}->{$att}->{type}= $type; | ||||
2963 |
37
|
78
|
$t->{twig_dtd}->{att}->{$gi}->{$att}->{default}= $default if( defined $default); | ||||
2964 |
37
|
57
|
$t->{twig_dtd}->{att}->{$gi}->{$att}->{fixed}= $fixed; | ||||
2965 |
37
|
97
|
return; | ||||
2966 | } | ||||||
2967 | |||||||
2968 | sub _twig_default | ||||||
2969 | { # warn " in _twig_default...\n"; # DEBUG handler | ||||||
2970 | |||||||
2971 |
415
|
4351
|
my( $p, $string)= @_; | ||||
2972 | |||||||
2973 |
415
|
523
|
my $t= $p->{twig}; | ||||
2974 | |||||||
2975 | # we need to process the data in 2 cases: entity, or spaces after the closing tag | ||||||
2976 | |||||||
2977 | # after the closing tag (no twig_current and root has been created) | ||||||
2978 |
415
287
|
2076
340
|
if( ! $t->{twig_current} && $t->{twig_root} && $string=~ m{^\s+$}m) { $t->{twig_stored_spaces} .= $string; } | ||||
2979 | |||||||
2980 | # process only if we have an entity | ||||||
2981 |
415
|
2765
|
if( $string=~ m{^&([^;]*);$}) | ||||
2982 | { # the entity has to be pure pcdata, or we have a problem | ||||||
2983 |
36
|
451
|
if( ($p->original_string=~ m{^<}) && ($p->original_string=~ m{>$}) ) | ||||
2984 | { # string is a tag, entity is in an attribute | ||||||
2985 |
0
|
0
|
$t->{twig_entities_in_attribute}=1 if( $t->{twig_do_not_escape_amp_in_atts}); | ||||
2986 | } | ||||||
2987 | else | ||||||
2988 |
36
|
199
|
{ my $ent; | ||||
2989 |
36
|
39
|
if( $t->{twig_keep_encoding}) | ||||
2990 |
9
|
10
|
{ _twig_char( $p, $string); | ||||
2991 |
9
|
20
|
$ent= substr( $string, 1, -1); | ||||
2992 | } | ||||||
2993 | else | ||||||
2994 |
27
|
34
|
{ $ent= _twig_insert_ent( $t, $string); | ||||
2995 | } | ||||||
2996 | |||||||
2997 |
36
|
130
|
return $ent; | ||||
2998 | } | ||||||
2999 | } | ||||||
3000 | } | ||||||
3001 | |||||||
3002 | sub _twig_insert_ent | ||||||
3003 | { | ||||||
3004 |
35
|
34
|
my( $t, $string)=@_; | ||||
3005 | |||||||
3006 |
35
|
31
|
my $twig_current= $t->{twig_current}; | ||||
3007 | |||||||
3008 |
35
|
53
|
my $ent= $t->{twig_elt_class}->new( $ENT); | ||||
3009 |
35
|
39
|
$ent->{ent}= $string; | ||||
3010 | |||||||
3011 |
35
|
40
|
_add_or_discard_stored_spaces( $t); | ||||
3012 | |||||||
3013 |
35
|
43
|
if( $t->{twig_in_pcdata}) | ||||
3014 | { # create the node as a sibling of the #PCDATA | ||||||
3015 | |||||||
3016 |
16
16
13
|
17
22
27
|
$ent->{prev_sibling}=$twig_current; if( $XML::Twig::weakrefs) { weaken( $ent->{prev_sibling});} ; | ||||
3017 |
16
|
16
|
$twig_current->{next_sibling}= $ent; | ||||
3018 |
16
|
13
|
my $parent= $twig_current->{parent}; | ||||
3019 |
16
16
13
|
14
21
17
|
$ent->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $ent->{parent});} ; | ||||
3020 |
16
16
16
13
|
13
12
25
16
|
$parent->{empty}=0; $parent->{last_child}=$ent; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; | ||||
3021 | # the twig_current is now the parent | ||||||
3022 |
16
|
18
|
delete $twig_current->{'twig_current'}; | ||||
3023 |
16
|
13
|
$t->{twig_current}= $parent; | ||||
3024 | # we left pcdata | ||||||
3025 |
16
|
13
|
$t->{twig_in_pcdata}=0; | ||||
3026 | } | ||||||
3027 | else | ||||||
3028 | { # create the node as a child of the current element | ||||||
3029 |
19
19
15
|
21
26
20
|
$ent->{parent}=$twig_current; if( $XML::Twig::weakrefs) { weaken( $ent->{parent});} ; | ||||
3030 |
19
|
24
|
if( my $prev_sibling= $twig_current->{last_child}) | ||||
3031 |
10
10
8
|
11
15
13
|
{ $ent->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $ent->{prev_sibling});} ; | ||||
3032 |
10
|
10
|
$prev_sibling->{next_sibling}= $ent; | ||||
3033 | } | ||||||
3034 | else | ||||||
3035 |
9
9
|
14
11
|
{ if( $twig_current) { $twig_current->{first_child}= $ent; } } | ||||
3036 |
19
19
19
19
15
|
20
17
22
26
24
|
if( $twig_current) { $twig_current->{empty}=0; $twig_current->{last_child}=$ent; if( $XML::Twig::weakrefs) { weaken( $twig_current->{last_child});} ; } | ||||
3037 | } | ||||||
3038 | |||||||
3039 | # meant to trigger entity handler, does not seem to be activated at this time | ||||||
3040 | #if( my $handler= $t->{twig_handlers}->{gi}->{$ENT}) | ||||||
3041 | # { local $_= $ent; $handler->( $t, $ent); } | ||||||
3042 | |||||||
3043 |
35
|
36
|
return $ent; | ||||
3044 | } | ||||||
3045 | |||||||
3046 | sub parser | ||||||
3047 |
384
|
1
|
486
|
{ return $_[0]->{twig_parser}; } | |||
3048 | |||||||
3049 | # returns the declaration text (or a default one) | ||||||
3050 | sub xmldecl | ||||||
3051 |
2462
|
1
|
1480
|
{ my $t= shift; | |||
3052 |
2462
|
9273
|
return '' unless( $t->{twig_xmldecl} || $t->{output_encoding}); | ||||
3053 |
76
|
70
|
my $decl_string; | ||||
3054 |
76
|
83
|
my $decl= $t->{twig_xmldecl}; | ||||
3055 |
76
|
110
|
if( $decl) | ||||
3056 |
72
|
85
|
{ my $version= $decl->{version}; | ||||
3057 |
72
|
72
|
$decl_string= q{<?xml}; | ||||
3058 |
72
|
145
|
$decl_string .= qq{ version="$version"}; | ||||
3059 | |||||||
3060 | # encoding can either have been set (in $decl->{output_encoding}) | ||||||
3061 | # or come from the document (in $decl->{encoding}) | ||||||
3062 |
72
|
185
|
if( $t->{output_encoding}) | ||||
3063 |
13
|
11
|
{ my $encoding= $t->{output_encoding}; | ||||
3064 |
13
|
18
|
$decl_string .= qq{ encoding="$encoding"}; | ||||
3065 | } | ||||||
3066 | elsif( $decl->{encoding}) | ||||||
3067 |
16
|
13
|
{ my $encoding= $decl->{encoding}; | ||||
3068 |
16
|
24
|
$decl_string .= qq{ encoding="$encoding"}; | ||||
3069 | } | ||||||
3070 | |||||||
3071 |
72
|
123
|
if( defined( $decl->{standalone})) | ||||
3072 |
12
|
17
|
{ $decl_string .= q{ standalone="}; | ||||
3073 |
12
|
28
|
$decl_string .= $decl->{standalone} ? "yes" : "no"; | ||||
3074 |
12
|
15
|
$decl_string .= q{"}; | ||||
3075 | } | ||||||
3076 | |||||||
3077 |
72
|
91
|
$decl_string .= "?>\n"; | ||||
3078 | } | ||||||
3079 | else | ||||||
3080 |
4
|
7
|
{ my $encoding= $t->{output_encoding}; | ||||
3081 |
4
|
11
|
$decl_string= qq{<?xml version="1.0" encoding="$encoding"?>}; | ||||
3082 | } | ||||||
3083 | |||||||
3084 |
76
|
201
|
my $output_filter= XML::Twig::Elt::output_filter(); | ||||
3085 |
76
|
586
|
return $output_filter ? $output_filter->( $decl_string) : $decl_string; | ||||
3086 | } | ||||||
3087 | |||||||
3088 | sub set_doctype | ||||||
3089 |
4
|
1
|
13
|
{ my( $t, $name, $system, $public, $internal)= @_; | |||
3090 |
4
|
11
|
$t->{twig_doctype}= {} unless defined $t->{twig_doctype}; | ||||
3091 |
4
|
4
|
my $doctype= $t->{twig_doctype}; | ||||
3092 |
4
|
9
|
$doctype->{name} = $name if( defined $name); | ||||
3093 |
4
|
10
|
$doctype->{sysid} = $system if( defined $system); | ||||
3094 |
4
|
6
|
$doctype->{pub} = $public if( defined $public); | ||||
3095 |
4
|
11
|
$doctype->{internal} = $internal if( defined $internal); | ||||
3096 | } | ||||||
3097 | |||||||
3098 | sub doctype_name | ||||||
3099 |
4
|
1
|
9
|
{ my $t= shift; | |||
3100 |
4
|
9
|
my $doctype= $t->{twig_doctype} or return ''; | ||||
3101 |
3
|
9
|
return $doctype->{name} || ''; | ||||
3102 | } | ||||||
3103 | |||||||
3104 | sub system_id | ||||||
3105 |
4
|
1
|
4
|
{ my $t= shift; | |||
3106 |
4
|
8
|
my $doctype= $t->{twig_doctype} or return ''; | ||||
3107 |
3
|
9
|
return $doctype->{sysid} || ''; | ||||
3108 | } | ||||||
3109 | |||||||
3110 | sub public_id | ||||||
3111 |
4
|
1
|
6
|
{ my $t= shift; | |||
3112 |
4
|
11
|
my $doctype= $t->{twig_doctype} or return ''; | ||||
3113 |
3
|
11
|
return $doctype->{pub} || ''; | ||||
3114 | } | ||||||
3115 | |||||||
3116 | sub internal_subset | ||||||
3117 |
4
|
1
|
4
|
{ my $t= shift; | |||
3118 |
4
|
7
|
my $doctype= $t->{twig_doctype} or return ''; | ||||
3119 |
3
|
10
|
return $doctype->{internal} || ''; | ||||
3120 | } | ||||||
3121 | |||||||
3122 | # return the dtd object | ||||||
3123 | sub dtd | ||||||
3124 |
7
|
1
|
89
|
{ my $t= shift; | |||
3125 |
7
|
35
|
return $t->{twig_dtd}; | ||||
3126 | } | ||||||
3127 | |||||||
3128 | # return an element model, or the list of element models | ||||||
3129 | sub model | ||||||
3130 |
3
|
1
|
54
|
{ my $t= shift; | |||
3131 |
3
|
5
|
my $elt= shift; | ||||
3132 |
3
|
10
|
return $t->dtd->{model}->{$elt} if( $elt); | ||||
3133 |
1
1
|
2
3
|
return (sort keys %{$t->dtd->{model}}); | ||||
3134 | } | ||||||
3135 | |||||||
3136 | |||||||
3137 | # return the entity_list object | ||||||
3138 | sub entity_list | ||||||
3139 |
165
|
1
|
305
|
{ my $t= shift; | |||
3140 |
165
|
345
|
return $t->{twig_entity_list}; | ||||
3141 | } | ||||||
3142 | |||||||
3143 | # return the list of entity names | ||||||
3144 | sub entity_names | ||||||
3145 |
6
|
1
|
132
|
{ my $t= shift; | |||
3146 |
6
|
12
|
return $t->entity_list->entity_names; | ||||
3147 | } | ||||||
3148 | |||||||
3149 | # return the entity object | ||||||
3150 | sub entity | ||||||
3151 |
14
|
1
|
47
|
{ my $t= shift; | |||
3152 |
14
|
16
|
my $entity_name= shift; | ||||
3153 |
14
|
18
|
return $t->entity_list->ent( $entity_name); | ||||
3154 | } | ||||||
3155 | |||||||
3156 | |||||||
3157 | sub print_prolog | ||||||
3158 |
911
|
1
|
522
|
{ my $t= shift; | |||
3159 |
911
|
5349
|
my $fh= isa( $_[0], 'GLOB') || isa( $_[0], 'IO::Scalar') ? shift : $t->{twig_output_fh} || select() || \*STDOUT; | ||||
3160 | ## no critic (TestingAndDebugging::ProhibitNoStrict); | ||||||
3161 |
187
187
187
|
688199
216
326971
|
no strict 'refs'; | ||||
3162 |
911
911
|
977
1259
|
print {$fh} $t->prolog( @_); | ||||
3163 | } | ||||||
3164 | |||||||
3165 | sub prolog | ||||||
3166 |
2464
|
1
|
1635
|
{ my $t= shift; | |||
3167 |
2464
4
|
2889
10
|
if( $t->{no_prolog}){ return ''; } | ||||
3168 | |||||||
3169 |
2460
|
5154
|
return $t->{no_prolog} ? '' | ||||
3170 | : defined $t->{no_dtd_output} ? $t->xmldecl | ||||||
3171 | : $t->xmldecl . $t->doctype( @_); | ||||||
3172 | } | ||||||
3173 | |||||||
3174 | sub doctype | ||||||
3175 |
2462
|
1
|
2218
|
{ my $t= shift; | |||
3176 |
2462
|
2371
|
my %args= _normalize_args( @_); | ||||
3177 |
2462
|
4710
|
my $update_dtd = $args{UpdateDTD} || ''; | ||||
3178 |
2462
|
1489
|
my $doctype_text=''; | ||||
3179 | |||||||
3180 |
2462
|
1904
|
my $doctype= $t->{twig_doctype}; | ||||
3181 | |||||||
3182 |
2462
|
2538
|
if( $doctype) | ||||
3183 |
64
|
162
|
{ $doctype_text .= qq{<!DOCTYPE $doctype->{name}} if( $doctype->{name}); | ||||
3184 |
64
|
120
|
$doctype_text .= qq{ PUBLIC "$doctype->{pub}"} if( $doctype->{pub}); | ||||
3185 |
64
|
259
|
$doctype_text .= qq{ SYSTEM} if( $doctype->{sysid} && !$doctype->{pub}); | ||||
3186 |
64
|
145
|
$doctype_text .= qq{ "$doctype->{sysid}"} if( $doctype->{sysid}); | ||||
3187 | } | ||||||
3188 | |||||||
3189 |
2462
|
3656
|
if( $update_dtd) | ||||
3190 |
10
|
32
|
{ if( $doctype) | ||||
3191 |
7
2
|
12
2
|
{ my $internal=$doctype->{internal}; | ||||
3192 | # awful hack, but at least it works a little better that what was there before | ||||||
3193 |
7
|
18
|
if( $internal) | ||||
3194 | { # remove entity declarations (they will be re-generated from the updated entity list) | ||||||
3195 |
5
11
11
11
|
709
36
12
1243
|
$internal=~ s{<! \s* ENTITY \s+ $REG_TAG_NAME \s+ ( ("[^"]*"|'[^']*') \s* | SYSTEM [^>]*) >\s*}{}xg; | ||||
3196 |
5
|
29541
|
$internal=~ s{^\n}{}; | ||||
3197 | } | ||||||
3198 |
7
|
17
|
$internal .= $t->entity_list->text ||'' if( $t->entity_list); | ||||
3199 |
7
4
|
17
13
|
if( $internal) { $doctype_text .= "[\n$internal]>\n"; } | ||||
3200 | } | ||||||
3201 | elsif( !$t->{'twig_dtd'} && keys %{$t->entity_list}) | ||||||
3202 |
2
|
2
|
{ $doctype_text .= "<!DOCTYPE " . $t->root->gi . " [\n" . $t->entity_list->text . "\n]>";;} | ||||
3203 | else | ||||||
3204 |
1
|
2
|
{ $doctype_text= $t->{twig_dtd}; | ||||
3205 |
1
|
2
|
$doctype_text .= $t->dtd_text; | ||||
3206 | } | ||||||
3207 | } | ||||||
3208 | elsif( $doctype) | ||||||
3209 |
57
|
110
|
{ if( my $internal= $doctype->{internal}) | ||||
3210 | { # add opening and closing brackets if not already there | ||||||
3211 | # plus some spaces and newlines for a nice formating | ||||||
3212 | # I test it here because I can't remember which version of | ||||||
3213 | # XML::Parser need it or not, nor guess which one will in the | ||||||
3214 | # future, so this about the best I can do | ||||||
3215 |
25
|
127
|
$internal=~ s{^\s*(\[\s*)?}{ [\n}; | ||||
3216 |
25
|
972
|
$internal=~ s{\s*(\]\s*(>\s*)?)?\s*$}{\n]>\n}; | ||||
3217 |
25
|
37
|
$doctype_text .= $internal; | ||||
3218 | } | ||||||
3219 | } | ||||||
3220 | |||||||
3221 |
2462
|
2217
|
if( $doctype_text) | ||||
3222 | { | ||||||
3223 | # terrible hack, as I can't figure out in which case the darn prolog | ||||||
3224 | # should get an extra > (depends on XML::Parser and expat versions) | ||||||
3225 |
67
|
1607
|
$doctype_text=~ s/(>\s*)*$/>\n/; # if($doctype_text); | ||||
3226 | |||||||
3227 |
67
|
95
|
my $output_filter= XML::Twig::Elt::output_filter(); | ||||
3228 |
67
|
294
|
return $output_filter ? $output_filter->( $doctype_text) : $doctype_text; | ||||
3229 | } | ||||||
3230 | else | ||||||
3231 |
2395
|
6798
|
{ return $doctype_text; } | ||||
3232 | } | ||||||
3233 | |||||||
3234 | sub _leading_cpi | ||||||
3235 |
2463
|
1596
|
{ my $t= shift; | ||||
3236 |
2463
|
6050
|
my $leading_cpi= $t->{leading_cpi} || return ''; | ||||
3237 |
777
|
901
|
return $leading_cpi->sprint( 1); | ||||
3238 | } | ||||||
3239 | |||||||
3240 | sub _trailing_cpi | ||||||
3241 |
2454
|
1567
|
{ my $t= shift; | ||||
3242 |
2454
|
4717
|
my $trailing_cpi= $t->{trailing_cpi} || return ''; | ||||
3243 |
772
|
804
|
return $trailing_cpi->sprint( 1); | ||||
3244 | } | ||||||
3245 | |||||||
3246 | sub _trailing_cpi_text | ||||||
3247 |
2454
|
1473
|
{ my $t= shift; | ||||
3248 |
2454
|
6876
|
return $t->{trailing_cpi_text} || ''; | ||||
3249 | } | ||||||
3250 | |||||||
3251 | sub print_to_file | ||||||
3252 |
3
|
1
|
5
|
{ my( $t, $filename)= (shift, shift); | |||
3253 |
3
|
4
|
my $out_fh; | ||||
3254 | # open( $out_fh, ">$filename") or _croak( "cannot create file $filename: $!"); # < perl 5.8 | ||||||
3255 |
3
|
14
|
my $mode= $t->{twig_keep_encoding} ? '>' : '>:utf8'; # >= perl 5.8 | ||||
3256 |
3
|
79
|
open( $out_fh, $mode, $filename) or _croak( "cannot create file $filename: $!"); # >= perl 5.8 | ||||
3257 |
2
|
6
|
$t->print( $out_fh, @_); | ||||
3258 |
2
|
69
|
close $out_fh; | ||||
3259 |
2
|
15
|
return $t; | ||||
3260 | } | ||||||
3261 | |||||||
3262 | # probably only works on *nix (at least the chmod bit) | ||||||
3263 | # first print to a temporary file, then rename that file to the desired file name, then change permissions | ||||||
3264 | # to the original file permissions (or to the current umask) | ||||||
3265 | sub safe_print_to_file | ||||||
3266 |
2
|
1
|
5
|
{ my( $t, $filename)= (shift, shift); | |||
3267 |
2
|
14
|
my $perm= -f $filename ? (stat $filename)[2] & 07777 : ~umask() ; | ||||
3268 |
2
|
3
|
XML::Twig::_use( 'File::Temp') || croak "need File::Temp to use safe_print_to_file\n"; | ||||
3269 |
2
|
72
|
my $tmpdir= dirname( $filename); | ||||
3270 |
2
|
7
|
my( $fh, $tmpfilename) = File::Temp::tempfile( DIR => $tmpdir); | ||||
3271 |
1
|
212
|
$t->print_to_file( $tmpfilename, @_); | ||||
3272 |
1
|
23
|
rename( $tmpfilename, $filename) or unlink $tmpfilename && _croak( "cannot move temporary file to $filename: $!"); | ||||
3273 |
1
|
6
|
chmod $perm, $filename; | ||||
3274 |
1
|
12
|
return $t; | ||||
3275 | } | ||||||
3276 | |||||||
3277 | |||||||
3278 | sub print | ||||||
3279 |
56
|
1
|
371
|
{ my $t= shift; | |||
3280 |
56
|
204
|
my $fh= isa( $_[0], 'GLOB') || isa( $_[0], 'IO::Scalar') ? shift : undef; | ||||
3281 |
56
|
81
|
my %args= _normalize_args( @_); | ||||
3282 | |||||||
3283 |
56
|
135
|
my $old_select = defined $fh ? select $fh : undef; | ||||
3284 |
56
|
95
|
my $old_pretty = defined ($args{PrettyPrint}) ? $t->set_pretty_print( $args{PrettyPrint}) : undef; | ||||
3285 |
56
|
69
|
my $old_empty_tag = defined ($args{EmptyTags}) ? $t->set_empty_tag_style( $args{EmptyTags}) : undef; | ||||
3286 | |||||||
3287 | #if( !$t->{encoding} || lc( $t->{encoding}) eq 'utf-8') { my $out= $fh || \*STDOUT; binmode $out, ':utf8'; } | ||||||
3288 | |||||||
3289 |
56
|
332
|
if( $perl_version > 5.006 && ! $t->{twig_keep_encoding}) | ||||
3290 |
22
|
265673
|
{ if( grep /useperlio=define/, `$^X -V`) # we can only use binmode :utf8 if perl was compiled with useperlio | ||||
3291 |
22
|
327
|
{ binmode( $fh || \*STDOUT, ":utf8" ); } | ||||
3292 | } | ||||||
3293 | |||||||
3294 |
56
|
411
|
print $t->prolog( %args) . $t->_leading_cpi( %args); | ||||
3295 |
56
|
297
|
$t->{twig_root}->print; | ||||
3296 |
56
|
109
|
print $t->_trailing_cpi # trailing comments and pi's (elements, in 'process' mode) | ||||
3297 | . $t->_trailing_cpi_text # trailing comments and pi's (in 'keep' mode) | ||||||
3298 | . ( ($t->{twig_keep_spaces}||'') && ($t->{trailing_spaces} || '')) | ||||||
3299 | ; | ||||||
3300 | |||||||
3301 | |||||||
3302 |
56
|
268
|
$t->set_pretty_print( $old_pretty) if( defined $old_pretty); | ||||
3303 |
56
|
95
|
$t->set_empty_tag_style( $old_empty_tag) if( defined $old_empty_tag); | ||||
3304 |
56
53
|
84
138
|
if( $fh) { select $old_select; } | ||||
3305 | |||||||
3306 |
56
|
128
|
return $t; | ||||
3307 | } | ||||||
3308 | |||||||
3309 | |||||||
3310 | sub flush | ||||||
3311 |
1181
|
1
|
864
|
{ my $t= shift; | |||
3312 | |||||||
3313 |
1181
|
1461
|
$t->_trigger_tdh if $t->{twig_tdh}; | ||||
3314 | |||||||
3315 |
1181
|
1320
|
return if( $t->{twig_completely_flushed}); | ||||
3316 | |||||||
3317 |
1152
|
2829
|
my $fh= isa( $_[0], 'GLOB') || isa( $_[0], 'IO::Scalar') ? shift : undef; | ||||
3318 |
1152
|
2230
|
my $old_select= defined $fh ? select $fh : undef; | ||||
3319 |
1152
|
1211
|
my $up_to= ref $_[0] ? shift : undef; | ||||
3320 |
1152
|
1258
|
my %args= _normalize_args( @_); | ||||
3321 | |||||||
3322 |
1152
|
644
|
my $old_pretty; | ||||
3323 |
1152
|
1539
|
if( defined $args{PrettyPrint}) | ||||
3324 |
6
|
7
|
{ $old_pretty= $t->set_pretty_print( $args{PrettyPrint}); | ||||
3325 |
6
|
8
|
delete $args{PrettyPrint}; | ||||
3326 | } | ||||||
3327 | |||||||
3328 |
1152
|
586
|
my $old_empty_tag_style; | ||||
3329 |
1152
|
1200
|
if( $args{EmptyTags}) | ||||
3330 |
8
|
10
|
{ $old_empty_tag_style= $t->set_empty_tag_style( $args{EmptyTags}); | ||||
3331 |
8
|
6
|
delete $args{EmptyTags}; | ||||
3332 | } | ||||||
3333 | |||||||
3334 | |||||||
3335 | # the "real" last element processed, as _twig_end has closed it | ||||||
3336 |
1152
|
580
|
my $last_elt; | ||||
3337 |
1152
|
736
|
my $flush_trailing_data=0; | ||||
3338 |
1152
|
1538
|
if( $up_to) | ||||
3339 |
22
|
18
|
{ $last_elt= $up_to; } | ||||
3340 | elsif( $t->{twig_current}) | ||||||
3341 |
228
|
285
|
{ $last_elt= $t->{twig_current}->_last_child; } | ||||
3342 | else | ||||||
3343 |
902
|
601
|
{ $last_elt= $t->{twig_root}; | ||||
3344 |
902
|
496
|
$flush_trailing_data=1; | ||||
3345 |
902
|
1140
|
$t->{twig_completely_flushed}=1; | ||||
3346 | } | ||||||
3347 | |||||||
3348 | # flush the DTD unless it has ready flushed (ie root has been flushed) | ||||||
3349 |
1152
|
804
|
my $elt= $t->{twig_root}; | ||||
3350 |
1152
|
1263
|
unless( $elt->_flushed) | ||||
3351 | { # store flush info so we can auto-flush later | ||||||
3352 |
911
|
1019
|
if( $t->{twig_autoflush}) | ||||
3353 |
902
|
1060
|
{ $t->{twig_autoflush_data}={}; | ||||
3354 |
902
|
1576
|
$t->{twig_autoflush_data}->{fh} = $fh if( $fh); | ||||
3355 |
902
|
1022
|
$t->{twig_autoflush_data}->{args} = \@_ if( @_); | ||||
3356 | } | ||||||
3357 |
911
|
1124
|
$t->print_prolog( %args); | ||||
3358 |
911
|
9727
|
print $t->_leading_cpi; | ||||
3359 | } | ||||||
3360 | |||||||
3361 |
1152
|
8005
|
while( $elt) | ||||
3362 |
1596
|
867
|
{ my $next_elt; | ||||
3363 |
1596
|
2507
|
if( $last_elt && $last_elt->in( $elt)) | ||||
3364 | { | ||||||
3365 |
422
|
371
|
unless( $elt->_flushed) | ||||
3366 | { # just output the front tag | ||||||
3367 |
107
|
149
|
print $elt->start_tag(); | ||||
3368 |
107
|
325
|
$elt->_set_flushed; | ||||
3369 | } | ||||||
3370 |
422
|
322
|
$next_elt= $elt->{first_child}; | ||||
3371 | } | ||||||
3372 | else | ||||||
3373 | { # an element before the last one or the last one, | ||||||
3374 |
1174
|
860
|
$next_elt= $elt->{next_sibling}; | ||||
3375 |
1174
|
1259
|
$elt->_flush(); | ||||
3376 |
1174
|
1215
|
$elt->delete; | ||||
3377 |
1174
|
3007
|
last if( $last_elt && ($elt == $last_elt)); | ||||
3378 | } | ||||||
3379 |
444
|
599
|
$elt= $next_elt; | ||||
3380 | } | ||||||
3381 | |||||||
3382 |
1152
|
2066
|
if( $flush_trailing_data) | ||||
3383 |
902
|
1048
|
{ print $t->_trailing_cpi # trailing comments and pi's (elements, in 'process' mode) | ||||
3384 | , $t->_trailing_cpi_text # trailing comments and pi's (in 'keep' mode) | ||||||
3385 | } | ||||||
3386 | |||||||
3387 |
1152
|
9376
|
select $old_select if( defined $old_select); | ||||
3388 |
1152
|
1301
|
$t->set_pretty_print( $old_pretty) if( defined $old_pretty); | ||||
3389 |
1152
|
1240
|
$t->set_empty_tag_style( $old_empty_tag_style) if( defined $old_empty_tag_style); | ||||
3390 | |||||||
3391 |
1152
|
1431
|
if( my $ids= $t->{twig_id_list}) | ||||
3392 |
2
|
5
|
{ while( my ($id, $elt)= each %$ids) | ||||
3393 |
7
|
13
|
{ if( ! defined $elt) | ||||
3394 |
2
|
4
|
{ delete $t->{twig_id_list}->{$id} } | ||||
3395 | } | ||||||
3396 | } | ||||||
3397 | |||||||
3398 |
1152
|
3174
|
return $t; | ||||
3399 | } | ||||||
3400 | |||||||
3401 | |||||||
3402 | # flushes up to an element | ||||||
3403 | # this method just reorders the arguments and calls flush | ||||||
3404 | sub flush_up_to | ||||||
3405 |
22
|
1
|
27
|
{ my $t= shift; | |||
3406 |
22
|
19
|
my $up_to= shift; | ||||
3407 |
22
|
120
|
if( isa( $_[0], 'GLOB') || isa( $_[0], 'IO::Scalar')) | ||||
3408 |
11
|
9
|
{ my $fh= shift; | ||||
3409 |
11
|
19
|
$t->flush( $fh, $up_to, @_); | ||||
3410 | } | ||||||
3411 | else | ||||||
3412 |
11
|
28
|
{ $t->flush( $up_to, @_); } | ||||
3413 | |||||||
3414 |
22
|
89
|
return $t; | ||||
3415 | } | ||||||
3416 | |||||||
3417 | |||||||
3418 | # same as print except the entire document text is returned as a string | ||||||
3419 | sub sprint | ||||||
3420 |
1496
|
1
|
2516
|
{ my $t= shift; | |||
3421 |
1496
|
1805
|
my %args= _normalize_args( @_); | ||||
3422 | |||||||
3423 |
1496
|
976
|
my $old_pretty; | ||||
3424 |
1496
|
1883
|
if( defined $args{PrettyPrint}) | ||||
3425 |
8
|
23
|
{ $old_pretty= $t->set_pretty_print( $args{PrettyPrint}); | ||||
3426 |
8
|
15
|
delete $args{PrettyPrint}; | ||||
3427 | } | ||||||
3428 | |||||||
3429 |
1496
|
1229
|
my $old_empty_tag_style; | ||||
3430 |
1496
|
1774
|
if( defined $args{EmptyTags}) | ||||
3431 |
12
|
30
|
{ $old_empty_tag_style= $t->set_empty_tag_style( $args{EmptyTags}); | ||||
3432 |
12
|
18
|
delete $args{EmptyTags}; | ||||
3433 | } | ||||||
3434 | |||||||
3435 |
1496
|
1911
|
my $string= $t->prolog( %args) # xml declaration and doctype | ||||
3436 | . $t->_leading_cpi( %args) # leading comments and pi's in 'process' mode | ||||||
3437 | . ( ($t->{twig_root} && $t->{twig_root}->sprint) || '') | ||||||
3438 | . $t->_trailing_cpi # trailing comments and pi's (elements, in 'process' mode) | ||||||
3439 | . $t->_trailing_cpi_text # trailing comments and pi's (in 'keep' mode) | ||||||
3440 | ; | ||||||
3441 |
1496
4
|
2932
5
|
if( $t->{twig_keep_spaces} && $t->{trailing_spaces}) { $string .= $t->{trailing_spaces}; } | ||||
3442 | |||||||
3443 |
1496
|
1820
|
$t->set_pretty_print( $old_pretty) if( defined $old_pretty); | ||||
3444 |
1496
|
1540
|
$t->set_empty_tag_style( $old_empty_tag_style) if( defined $old_empty_tag_style); | ||||
3445 | |||||||
3446 |
1496
|
4118
|
return $string; | ||||
3447 | } | ||||||
3448 | |||||||
3449 | |||||||
3450 | # this method discards useless elements in a tree | ||||||
3451 | # it does the same thing as a flush except it does not print it | ||||||
3452 | # the second argument is an element, the last purged element | ||||||
3453 | # (this argument is usually set through the purge_up_to method) | ||||||
3454 | sub purge | ||||||
3455 |
10
|
1
|
13
|
{ my $t= shift; | |||
3456 |
10
|
7
|
my $up_to= shift; | ||||
3457 | |||||||
3458 |
10
|
19
|
$t->_trigger_tdh if $t->{twig_tdh}; | ||||
3459 | |||||||
3460 | # the "real" last element processed, as _twig_end has closed it | ||||||
3461 |
10
|
9
|
my $last_elt; | ||||
3462 |
10
|
15
|
if( $up_to) | ||||
3463 |
7
|
5
|
{ $last_elt= $up_to; } | ||||
3464 | elsif( $t->{twig_current}) | ||||||
3465 |
2
|
3
|
{ $last_elt= $t->{twig_current}->_last_child; } | ||||
3466 | else | ||||||
3467 |
1
|
2
|
{ $last_elt= $t->{twig_root}; } | ||||
3468 | |||||||
3469 |
10
|
14
|
my $elt= $t->{twig_root}; | ||||
3470 | |||||||
3471 |
10
|
15
|
while( $elt) | ||||
3472 |
29
|
22
|
{ my $next_elt; | ||||
3473 |
29
|
50
|
if( $last_elt && $last_elt->in( $elt)) | ||||
3474 |
13
|
15
|
{ $elt->_set_flushed; | ||||
3475 |
13
|
9
|
$next_elt= $elt->{first_child}; | ||||
3476 | } | ||||||
3477 | else | ||||||
3478 | { # an element before the last one or the last one, | ||||||
3479 |
16
|
10
|
$next_elt= $elt->{next_sibling}; | ||||
3480 |
16
|
19
|
$elt->delete; | ||||
3481 |
16
|
44
|
last if( $last_elt && ($elt == $last_elt) ); | ||||
3482 | } | ||||||
3483 |
19
|
32
|
$elt= $next_elt; | ||||
3484 | } | ||||||
3485 | |||||||
3486 |
10
|
18
|
if( my $ids= $t->{twig_id_list}) | ||||
3487 |
2
8
4
|
5
13
7
|
{ while( my ($id, $elt)= each %$ids) { if( ! defined $elt) { delete $t->{twig_id_list}->{$id} } } } | ||||
3488 | |||||||
3489 |
10
|
40
|
return $t; | ||||
3490 | } | ||||||
3491 | |||||||
3492 | # flushes up to an element. This method just calls purge | ||||||
3493 | sub purge_up_to | ||||||
3494 |
7
|
1
|
6
|
{ my $t= shift; | |||
3495 |
7
|
11
|
return $t->purge( @_); | ||||
3496 | } | ||||||
3497 | |||||||
3498 | sub root | ||||||
3499 |
5179
|
1
|
12263
|
{ return $_[0]->{twig_root}; } | |||
3500 | |||||||
3501 | sub normalize | ||||||
3502 |
1
|
1
|
3
|
{ return $_[0]->root->normalize; } | |||
3503 | |||||||
3504 | |||||||
3505 | # create accessor methods on attribute names | ||||||
3506 | { my %accessor; # memorize accessor names so re-creating them won't trigger an error | ||||||
3507 | sub att_accessors | ||||||
3508 | { | ||||||
3509 |
6
|
1
|
44
|
my $twig_or_class= shift; | |||
3510 |
6
|
11
|
my $elt_class= ref $twig_or_class ? $twig_or_class->{twig_elt_class} | ||||
3511 | : 'XML::Twig::Elt' | ||||||
3512 | ; | ||||||
3513 | ## no critic (TestingAndDebugging::ProhibitNoStrict); | ||||||
3514 |
187
187
187
|
647
163
36164
|
no strict 'refs'; | ||||
3515 |
6
|
9
|
foreach my $att (@_) | ||||
3516 |
8
|
44
|
{ _croak( "attempt to redefine existing method $att using att_accessors") | ||||
3517 | if( $elt_class->can( $att) && !$accessor{$att}); | ||||||
3518 | |||||||
3519 |
7
|
11
|
if( !$accessor{$att}) | ||||
3520 |
5
|
9
|
{ *{"$elt_class\::$att"}= | ||||
3521 | sub | ||||||
3522 | :lvalue # > perl 5.5 | ||||||
3523 |
15
|
22
|
{ my $elt= shift; | ||||
3524 |
15
2
|
19
2
|
if( @_) { $elt->{att}->{$att}= $_[0]; } | ||||
3525 |
15
|
34
|
$elt->{att}->{$att}; | ||||
3526 |
5
|
11
|
}; | ||||
3527 |
5
|
10
|
$accessor{$att}=1; | ||||
3528 | } | ||||||
3529 | } | ||||||
3530 |
5
|
5
|
return $twig_or_class; | ||||
3531 | } | ||||||
3532 | } | ||||||
3533 | |||||||
3534 | { my %accessor; # memorize accessor names so re-creating them won't trigger an error | ||||||
3535 | sub elt_accessors | ||||||
3536 | { | ||||||
3537 |
8
|
1
|
13
|
my $twig_or_class= shift; | |||
3538 |
8
|
16
|
my $elt_class= ref $twig_or_class ? $twig_or_class->{twig_elt_class} | ||||
3539 | : 'XML::Twig::Elt' | ||||||
3540 | ; | ||||||
3541 | |||||||
3542 | # if arg is a hash ref, it's exp => name, otherwise it's a list of tags | ||||||
3543 |
3
9
|
9
18
|
my %exp_to_alias= ref( $_[0]) && isa( $_[0], 'HASH') ? %{$_[0]} | ||||
3544 |
8
|
30
|
: map { $_ => $_ } @_; | ||||
3545 | ## no critic (TestingAndDebugging::ProhibitNoStrict); | ||||||
3546 |
187
187
187
|
594
160
35994
|
no strict 'refs'; | ||||
3547 |
8
|
22
|
while( my( $alias, $exp)= each %exp_to_alias ) | ||||
3548 |
16
|
80
|
{ if( $elt_class->can( $alias) && !$accessor{$alias}) | ||||
3549 |
1
|
2
|
{ _croak( "attempt to redefine existing method $alias using elt_accessors"); } | ||||
3550 | |||||||
3551 |
15
|
25
|
if( !$accessor{$alias}) | ||||
3552 |
9
|
21
|
{ *{"$elt_class\::$alias"}= | ||||
3553 | sub | ||||||
3554 |
18
|
29
|
{ my $elt= shift; | ||||
3555 |
18
|
48
|
return wantarray ? $elt->children( $exp) : $elt->first_child( $exp); | ||||
3556 |
9
|
18
|
}; | ||||
3557 |
9
|
23
|
$accessor{$alias}=1; | ||||
3558 | } | ||||||
3559 | } | ||||||
3560 |
7
|
11
|
return $twig_or_class; | ||||
3561 | } | ||||||
3562 | } | ||||||
3563 | |||||||
3564 | { my %accessor; # memorize accessor names so re-creating them won't trigger an error | ||||||
3565 | sub field_accessors | ||||||
3566 | { | ||||||
3567 |
4
|
1
|
29
|
my $twig_or_class= shift; | |||
3568 |
4
|
8
|
my $elt_class= ref $twig_or_class ? $twig_or_class->{twig_elt_class} | ||||
3569 | : 'XML::Twig::Elt' | ||||||
3570 | ; | ||||||
3571 |
1
4
|
3
8
|
my %exp_to_alias= ref( $_[0]) && isa( $_[0], 'HASH') ? %{$_[0]} | ||||
3572 |
4
|
14
|
: map { $_ => $_ } @_; | ||||
3573 | |||||||
3574 | ## no critic (TestingAndDebugging::ProhibitNoStrict); | ||||||
3575 |
187
187
187
|
588
165
248677
|
no strict 'refs'; | ||||
3576 |
4
|
11
|
while( my( $alias, $exp)= each %exp_to_alias ) | ||||
3577 |
5
|
31
|
{ if( $elt_class->can( $alias) && !$accessor{$alias}) | ||||
3578 |
1
|
2
|
{ _croak( "attempt to redefine existing method $exp using field_accessors"); } | ||||
3579 |
4
|
7
|
if( !$accessor{$alias}) | ||||
3580 |
3
|
74
|
{ *{"$elt_class\::$alias"}= | ||||
3581 | sub | ||||||
3582 |
4
|
9
|
{ my $elt= shift; | ||||
3583 |
4
|
15
|
$elt->field( $exp) | ||||
3584 |
3
|
11
|
}; | ||||
3585 |
3
|
10
|
$accessor{$alias}=1; | ||||
3586 | } | ||||||
3587 | } | ||||||
3588 |
3
|
5
|
return $twig_or_class; | ||||
3589 | } | ||||||
3590 | } | ||||||
3591 | |||||||
3592 | sub first_elt | ||||||
3593 |
122
|
1
|
352
|
{ my( $t, $cond)= @_; | |||
3594 |
122
|
155
|
my $root= $t->root || return undef; | ||||
3595 |
121
|
192
|
return $root if( $root->passes( $cond)); | ||||
3596 |
116
|
223
|
return $root->next_elt( $cond); | ||||
3597 | } | ||||||
3598 | |||||||
3599 | sub last_elt | ||||||
3600 |
8
|
1
|
62
|
{ my( $t, $cond)= @_; | |||
3601 |
8
|
10
|
my $root= $t->root || return undef; | ||||
3602 |
7
|
15
|
return $root->last_descendant( $cond); | ||||
3603 | } | ||||||
3604 | |||||||
3605 | sub next_n_elt | ||||||
3606 |
4
|
1
|
9
|
{ my( $t, $offset, $cond)= @_; | |||
3607 |
4
|
6
|
$offset -- if( $t->root->matches( $cond) ); | ||||
3608 |
4
|
6
|
return $t->root->next_n_elt( $offset, $cond); | ||||
3609 | } | ||||||
3610 | |||||||
3611 | sub get_xpath | ||||||
3612 |
155
|
1
|
2177
|
{ my $twig= shift; | |||
3613 |
155
|
546
|
if( isa( $_[0], 'ARRAY')) | ||||
3614 |
1
|
2
|
{ my $elt_array= shift; | ||||
3615 |
1
2
|
2
5
|
return _unique_elts( map { $_->get_xpath( @_) } @$elt_array); | ||||
3616 | } | ||||||
3617 | else | ||||||
3618 |
154
|
180
|
{ return $twig->root->get_xpath( @_); } | ||||
3619 | } | ||||||
3620 | |||||||
3621 | # get a list of elts and return a sorted list of unique elts | ||||||
3622 | sub _unique_elts | ||||||
3623 |
195
203
|
356
229
|
{ my @sorted= sort { $a ->cmp( $b) } @_; | ||||
3624 |
195
|
126
|
my @unique; | ||||
3625 |
195
|
291
|
while( my $current= shift @sorted) | ||||
3626 |
313
|
885
|
{ push @unique, $current unless( @unique && ($unique[-1] == $current)); } | ||||
3627 |
195
|
470
|
return @unique; | ||||
3628 | } | ||||||
3629 | |||||||
3630 | sub findvalue | ||||||
3631 |
16
|
1
|
260
|
{ my $twig= shift; | |||
3632 |
16
|
61
|
if( isa( $_[0], 'ARRAY')) | ||||
3633 |
1
|
2
|
{ my $elt_array= shift; | ||||
3634 |
1
2
|
2
5
|
return join( '', map { $_->findvalue( @_) } @$elt_array); | ||||
3635 | } | ||||||
3636 | else | ||||||
3637 |
15
|
22
|
{ return $twig->root->findvalue( @_); } | ||||
3638 | } | ||||||
3639 | |||||||
3640 | sub findvalues | ||||||
3641 |
3
|
1
|
9
|
{ my $twig= shift; | |||
3642 |
3
|
13
|
if( isa( $_[0], 'ARRAY')) | ||||
3643 |
1
|
1
|
{ my $elt_array= shift; | ||||
3644 |
1
2
|
1
5
|
return map { $_->findvalues( @_) } @$elt_array; | ||||
3645 | } | ||||||
3646 | else | ||||||
3647 |
2
|
4
|
{ return $twig->root->findvalues( @_); } | ||||
3648 | } | ||||||
3649 | |||||||
3650 | sub set_id_seed | ||||||
3651 |
2
|
1
|
7
|
{ my $t= shift; | |||
3652 |
2
|
4
|
XML::Twig::Elt->set_id_seed( @_); | ||||
3653 |
2
|
3
|
return $t; | ||||
3654 | } | ||||||
3655 | |||||||
3656 | # return an array ref to an index, or undef | ||||||
3657 | sub index | ||||||
3658 |
10
|
1
|
11
|
{ my( $twig, $name, $index)= @_; | |||
3659 |
10
|
25
|
return defined( $index) ? $twig->{_twig_index}->{$name}->[$index] : $twig->{_twig_index}->{$name}; | ||||
3660 | } | ||||||
3661 | |||||||
3662 | # return a list with just the root | ||||||
3663 | # if a condition is given then return an empty list unless the root matches | ||||||
3664 | sub children | ||||||
3665 |
82
|
1
|
89
|
{ my( $t, $cond)= @_; | |||
3666 |
82
|
92
|
my $root= $t->root; | ||||
3667 |
82
|
174
|
unless( $cond && !($root->passes( $cond)) ) | ||||
3668 |
79
|
862
|
{ return ($root); } | ||||
3669 | else | ||||||
3670 |
3
|
45
|
{ return (); } | ||||
3671 | } | ||||||
3672 | |||||||
3673 |
1
|
3
|
sub _children { return ($_[0]->root); } | ||||
3674 | |||||||
3675 | # weird, but here for completude | ||||||
3676 | # used to solve (non-sensical) /doc[1] XPath queries | ||||||
3677 | sub child | ||||||
3678 |
2
|
1
|
3
|
{ my $t= shift; | |||
3679 |
2
|
1
|
my $nb= shift; | ||||
3680 |
2
|
4
|
return ($t->children( @_))[$nb]; | ||||
3681 | } | ||||||
3682 | |||||||
3683 | sub descendants | ||||||
3684 |
95
|
1
|
152
|
{ my( $t, $cond)= @_; | |||
3685 |
95
|
113
|
my $root= $t->root; | ||||
3686 |
95
|
131
|
if( $root->passes( $cond) ) | ||||
3687 |
44
|
58
|
{ return ($root, $root->descendants( $cond)); } | ||||
3688 | else | ||||||
3689 |
51
|
88
|
{ return ( $root->descendants( $cond)); } | ||||
3690 | } | ||||||
3691 | |||||||
3692 |
3
3
|
1
|
40
7
|
sub simplify { my $t= shift; $t->root->simplify( @_); } | |||
3693 |
18
18
|
1
|
55
23
|
sub subs_text { my $t= shift; $t->root->subs_text( @_); } | |||
3694 |
18
18
|
1
|
12
20
|
sub trim { my $t= shift; $t->root->trim( @_); } | |||
3695 | |||||||
3696 | |||||||
3697 | sub set_keep_encoding | ||||||
3698 |
3151
|
1
|
2488
|
{ my( $t, $keep)= @_; | |||
3699 |
3151
|
3424
|
$t->{twig_keep_encoding}= $keep; | ||||
3700 |
3151
|
3325
|
$t->{NoExpand}= $keep; | ||||
3701 |
3151
|
3810
|
return XML::Twig::Elt::set_keep_encoding( $keep); | ||||
3702 | } | ||||||
3703 | |||||||
3704 | sub set_expand_external_entities | ||||||
3705 |
3131
|
1
|
3872
|
{ return XML::Twig::Elt::set_expand_external_entities( @_); } | |||
3706 | |||||||
3707 | sub escape_gt | ||||||
3708 |
2
2
2
|
1
|
2
4
3
|
{ my $t= shift; $t->{twig_escape_gt}= 1; return XML::Twig::Elt::escape_gt( @_); } | |||
3709 | |||||||
3710 | sub do_not_escape_gt | ||||||
3711 |
1
1
1
|
1
|
2
1
2
|
{ my $t= shift; $t->{twig_escape_gt}= 0; return XML::Twig::Elt::do_not_escape_gt( @_); } | |||
3712 | |||||||
3713 | sub elt_id | ||||||
3714 |
70
|
1
|
511
|
{ return $_[0]->{twig_id_list}->{$_[1]}; } | |||
3715 | |||||||
3716 | # change it in ALL twigs at the moment | ||||||
3717 | sub change_gi | ||||||
3718 |
4
|
1
|
10
|
{ my( $twig, $old_gi, $new_gi)= @_; | |||
3719 |
4
|
49
|
my $index; | ||||
3720 |
4
|
13
|
return unless($index= $XML::Twig::gi2index{$old_gi}); | ||||
3721 |
2
|
3
|
$XML::Twig::index2gi[$index]= $new_gi; | ||||
3722 |
2
|
3
|
delete $XML::Twig::gi2index{$old_gi}; | ||||
3723 |
2
|
3
|
$XML::Twig::gi2index{$new_gi}= $index; | ||||
3724 |
2
|
1
|
return $twig; | ||||
3725 | } | ||||||
3726 | |||||||
3727 | |||||||
3728 | # builds the DTD from the stored (possibly updated) data | ||||||
3729 | sub dtd_text | ||||||
3730 |
5
|
1
|
9
|
{ my $t= shift; | |||
3731 |
5
|
7
|
my $dtd= $t->{twig_dtd}; | ||||
3732 |
5
|
14
|
my $doctype= $t->{twig_doctype} or return ''; | ||||
3733 |
4
|
10
|
my $string= "<!DOCTYPE ".$doctype->{name}; | ||||
3734 | |||||||
3735 |
4
|
7
|
$string .= " [\n"; | ||||
3736 | |||||||
3737 |
4
4
|
7
10
|
foreach my $gi (@{$dtd->{elt_list}}) | ||||
3738 |
5
|
19
|
{ $string.= "<!ELEMENT $gi ".$dtd->{model}->{$gi}.">\n" ; | ||||
3739 |
5
|
58
|
if( $dtd->{att}->{$gi}) | ||||
3740 |
2
|
3
|
{ my $attlist= $dtd->{att}->{$gi}; | ||||
3741 |
2
|
3
|
$string.= "<!ATTLIST $gi\n"; | ||||
3742 |
2
2
|
2
4
|
foreach my $att ( sort keys %{$attlist}) | ||||
3743 | { | ||||||
3744 |
4
|
7
|
if( $attlist->{$att}->{fixed}) | ||||
3745 |
1
|
3
|
{ $string.= " $att $attlist->{$att}->{type} #FIXED $attlist->{$att}->{default}"; } | ||||
3746 | else | ||||||
3747 |
3
|
24
|
{ $string.= " $att $attlist->{$att}->{type} $attlist->{$att}->{default}"; } | ||||
3748 |
4
|
5
|
$string.= "\n"; | ||||
3749 | } | ||||||
3750 |
2
|
5
|
$string.= ">\n"; | ||||
3751 | } | ||||||
3752 | } | ||||||
3753 |
4
|
9
|
$string.= $t->entity_list->text if( $t->entity_list); | ||||
3754 |
4
|
6
|
$string.= "\n]>\n"; | ||||
3755 |
4
|
13
|
return $string; | ||||
3756 | } | ||||||
3757 | |||||||
3758 | # prints the DTD from the stored (possibly updated) data | ||||||
3759 | sub dtd_print | ||||||
3760 |
3
|
1
|
11
|
{ my $t= shift; | |||
3761 |
3
|
17
|
my $fh= isa( $_[0], 'GLOB') || isa( $_[0], 'IO::Scalar') ? shift : undef; | ||||
3762 |
3
2
|
6
6
|
if( $fh) { print $fh $t->dtd_text; } | ||||
3763 |
1
|
1
|
else { print $t->dtd_text; } | ||||
3764 |
3
|
27
|
return $t; | ||||
3765 | } | ||||||
3766 | |||||||
3767 | # build the subs that call directly expat | ||||||
3768 | BEGIN | ||||||
3769 |
187
|
821
|
{ my @expat_methods= qw( depth in_element within_element context | ||||
3770 | current_line current_column current_byte | ||||||
3771 | recognized_string original_string | ||||||
3772 | xpcroak xpcarp | ||||||
3773 | base current_element element_index | ||||||
3774 | xml_escape | ||||||
3775 | position_in_context); | ||||||
3776 |
187
|
275
|
foreach my $method (@expat_methods) | ||||
3777 | { | ||||||
3778 | ## no critic (TestingAndDebugging::ProhibitNoStrict); | ||||||
3779 |
187
187
187
|
640
176
11955
|
no strict 'refs'; | ||||
3780 |
2992
34
|
280949
1067
|
*{$method}= sub { my $t= shift; | ||||
3781 |
34
|
80
|
_croak( "calling $method after parsing is finished") unless( $t->{twig_parsing}); | ||||
3782 |
18
|
51
|
return $t->{twig_parser}->$method(@_); | ||||
3783 |
2992
|
4751
|
}; | ||||
3784 | } | ||||||
3785 | } | ||||||
3786 | |||||||
3787 | sub path | ||||||
3788 |
19
|
1
|
32
|
{ my( $t, $gi)= @_; | |||
3789 |
19
|
23
|
if( $t->{twig_map_xmlns}) | ||||
3790 |
2
4
|
5
10
|
{ return "/" . join( "/", map { $t->_replace_prefix( $_)} ($t->{twig_parser}->context, $gi)); } | ||||
3791 | else | ||||||
3792 |
17
|
29
|
{ return "/" . join( "/", ($t->{twig_parser}->context, $gi)); } | ||||
3793 | } | ||||||
3794 | |||||||
3795 | sub finish | ||||||
3796 |
1
|
1
|
3
|
{ my $t= shift; | |||
3797 |
1
|
3
|
return $t->{twig_parser}->finish; | ||||
3798 | } | ||||||
3799 | |||||||
3800 | # just finish the parse by printing the rest of the document | ||||||
3801 | sub finish_print | ||||||
3802 |
6
|
1
|
26
|
{ my( $t, $fh)= @_; | |||
3803 |
6
|
5
|
my $old_fh; | ||||
3804 |
6
|
12
|
unless( defined $fh) | ||||
3805 |
2
|
3
|
{ $t->_set_fh_to_twig_output_fh(); } | ||||
3806 | elsif( defined $fh) | ||||||
3807 |
4
|
13
|
{ $old_fh= select $fh; | ||||
3808 |
4
|
10
|
$t->{twig_original_selected_fh}= $old_fh if( $old_fh); | ||||
3809 | } | ||||||
3810 | |||||||
3811 |
6
|
7
|
my $p=$t->{twig_parser}; | ||||
3812 |
6
|
7
|
if( $t->{twig_keep_encoding}) | ||||
3813 |
1
|
4
|
{ $p->setHandlers( %twig_handlers_finish_print); } | ||||
3814 | else | ||||||
3815 |
5
|
19
|
{ $p->setHandlers( %twig_handlers_finish_print_original); } | ||||
3816 |
6
|
353
|
return $t; | ||||
3817 | } | ||||||
3818 | |||||||
3819 |
3115
|
1
|
3338
|
sub set_remove_cdata { return XML::Twig::Elt::set_remove_cdata( @_); } | |||
3820 | |||||||
3821 |
19
|
1
|
33
|
sub output_filter { return XML::Twig::Elt::output_filter( @_); } | |||
3822 |
3138
|
1
|
3427
|
sub set_output_filter { return XML::Twig::Elt::set_output_filter( @_); } | |||
3823 | |||||||
3824 |
2
|
1
|
6
|
sub output_text_filter { return XML::Twig::Elt::output_text_filter( @_); } | |||
3825 |
3124
|
1
|
3082
|
sub set_output_text_filter { return XML::Twig::Elt::set_output_text_filter( @_); } | |||
3826 | |||||||
3827 | sub set_input_filter | ||||||
3828 |
4
|
1
|
4
|
{ my( $t, $input_filter)= @_; | |||
3829 |
4
|
5
|
my $old_filter= $t->{twig_input_filter}; | ||||
3830 |
4
|
21
|
if( !$input_filter || isa( $input_filter, 'CODE') ) | ||||
3831 |
2
|
4
|
{ $t->{twig_input_filter}= $input_filter; } | ||||
3832 | elsif( $input_filter eq 'latin1') | ||||||
3833 |
0
|
0
|
{ $t->{twig_input_filter}= latin1(); } | ||||
3834 | elsif( $filter{$input_filter}) | ||||||
3835 |
0
|
0
|
{ $t->{twig_input_filter}= $filter{$input_filter}; } | ||||
3836 | else | ||||||
3837 |
2
|
3
|
{ _croak( "invalid input filter: $input_filter"); } | ||||
3838 | |||||||
3839 |
2
|
2
|
return $old_filter; | ||||
3840 | } | ||||||
3841 | |||||||
3842 | sub set_empty_tag_style | ||||||
3843 |
131
|
1
|
272
|
{ return XML::Twig::Elt::set_empty_tag_style( @_); } | |||
3844 | |||||||
3845 | sub set_pretty_print | ||||||
3846 |
121
|
1
|
18122
|
{ return XML::Twig::Elt::set_pretty_print( @_); } | |||
3847 | |||||||
3848 | sub set_quote | ||||||
3849 |
3115
|
1
|
3059
|
{ return XML::Twig::Elt::set_quote( @_); } | |||
3850 | |||||||
3851 | sub set_indent | ||||||
3852 |
4
|
1
|
9
|
{ return XML::Twig::Elt::set_indent( @_); } | |||
3853 | |||||||
3854 | sub set_keep_atts_order | ||||||
3855 |
3115
3115
|
1
|
1915
3140
|
{ shift; return XML::Twig::Elt::set_keep_atts_order( @_); } | |||
3856 | |||||||
3857 | sub keep_atts_order | ||||||
3858 |
2
|
1
|
4
|
{ return XML::Twig::Elt::keep_atts_order( @_); } | |||
3859 | |||||||
3860 | sub set_do_not_escape_amp_in_atts | ||||||
3861 |
3130
|
1
|
3466
|
{ return XML::Twig::Elt::set_do_not_escape_amp_in_atts( @_); } | |||
3862 | |||||||
3863 | # save and restore package globals (the ones in XML::Twig::Elt) | ||||||
3864 | # should probably return the XML::Twig object itself, but instead | ||||||
3865 | # returns the state (as a hashref) for backward compatibility | ||||||
3866 | sub save_global_state | ||||||
3867 |
10
|
1
|
3528
|
{ my $t= shift; | |||
3868 |
10
|
20
|
return $t->{twig_saved_state}= XML::Twig::Elt::global_state(); | ||||
3869 | } | ||||||
3870 | |||||||
3871 | sub restore_global_state | ||||||
3872 |
16
|
1
|
63
|
{ my $t= shift; | |||
3873 |
16
|
34
|
XML::Twig::Elt::set_global_state( $t->{twig_saved_state}); | ||||
3874 | } | ||||||
3875 | |||||||
3876 | sub global_state | ||||||
3877 |
1
|
1
|
3
|
{ return XML::Twig::Elt::global_state(); } | |||
3878 | |||||||
3879 | sub set_global_state | ||||||
3880 |
1
|
1
|
3
|
{ return XML::Twig::Elt::set_global_state( $_[1]); } | |||
3881 | |||||||
3882 | sub dispose | ||||||
3883 |
3
|
1
|
2
|
{ my $t= shift; | |||
3884 |
3
|
4
|
$t->DESTROY; | ||||
3885 |
3
|
4
|
return; | ||||
3886 | } | ||||||
3887 | |||||||
3888 | sub DESTROY | ||||||
3889 |
2965
|
5092
|
{ my $t= shift; | ||||
3890 |
2965
|
8445
|
if( $t->{twig_root} && isa( $t->{twig_root}, 'XML::Twig::Elt')) | ||||
3891 |
1994
|
2694
|
{ $t->{twig_root}->delete } | ||||
3892 | |||||||
3893 | # added to break circular references | ||||||
3894 |
2965
|
2711
|
undef $t->{twig}; | ||||
3895 |
2965
|
3385
|
undef $t->{twig_root}->{twig} if( $t->{twig_root}); | ||||
3896 |
2965
|
2094
|
undef $t->{twig_parser}; | ||||
3897 | |||||||
3898 |
2965
|
22762
|
undef %$t;# prevents memory leaks (especially when using mod_perl) | ||||
3899 |
2965
|
17067
|
undef $t; | ||||
3900 | } | ||||||
3901 | |||||||
3902 | |||||||
3903 | # | ||||||
3904 | # non standard handlers | ||||||
3905 | # | ||||||
3906 | |||||||
3907 | # kludge: expat 1.95.2 calls both Default AND Doctype handlers | ||||||
3908 | # so if the default handler finds '<!DOCTYPE' then it must | ||||||
3909 | # unset itself (_twig_print_doctype will reset it) | ||||||
3910 | sub _twig_print_check_doctype | ||||||
3911 | { # warn " in _twig_print_check_doctype...\n"; # DEBUG handler | ||||||
3912 | |||||||
3913 |
15
|
9
|
my $p= shift; | ||||
3914 |
15
|
26
|
my $string= $p->recognized_string(); | ||||
3915 |
15
|
60
|
if( $string eq '<!DOCTYPE') | ||||
3916 | { | ||||||
3917 |
0
|
0
|
$p->setHandlers( Default => undef); | ||||
3918 |
0
|
0
|
$p->setHandlers( Entity => undef); | ||||
3919 |
0
|
0
|
$expat_1_95_2=1; | ||||
3920 | } | ||||||
3921 | else | ||||||
3922 |
15
|
16
|
{ print $string; } | ||||
3923 | |||||||
3924 |
15
|
83
|
return; | ||||
3925 | } | ||||||
3926 | |||||||
3927 | |||||||
3928 | sub _twig_print | ||||||
3929 | { # warn " in _twig_print...\n"; # DEBUG handler | ||||||
3930 |
489
|
472
|
my $p= shift; | ||||
3931 |
489
|
742
|
if( $expat_1_95_2 && ($p->recognized_string eq '[') && !$p->{twig}->{expat_1_95_2_seen_bracket}) | ||||
3932 | { # otherwise the opening square bracket of the doctype gets printed twice | ||||||
3933 |
0
|
0
|
$p->{twig}->{expat_1_95_2_seen_bracket}=1; | ||||
3934 | } | ||||||
3935 | else | ||||||
3936 |
489
|
476
|
{ if( $p->{twig}->{twig_right_after_root}) | ||||
3937 |
0
0
|
0
0
|
{ my $s= $p->recognized_string(); print $s if $s=~ m{\S}; } | ||||
3938 | else | ||||||
3939 |
489
|
646
|
{ print $p->recognized_string(); } | ||||
3940 | } | ||||||
3941 |
489
|
2377
|
return; | ||||
3942 | } | ||||||
3943 | # recognized_string does not seem to work for entities, go figure! | ||||||
3944 | # so this handler is used to print them anyway | ||||||
3945 | sub _twig_print_entity | ||||||
3946 | { # warn " in _twig_print_entity...\n"; # DEBUG handler | ||||||
3947 |
6
|
42
|
my $p= shift; | ||||
3948 |
6
|
21
|
XML::Twig::Entity->new( @_)->print; | ||||
3949 | } | ||||||
3950 | |||||||
3951 | # kludge: expat 1.95.2 calls both Default AND Doctype handlers | ||||||
3952 | # so if the default handler finds '<!DOCTYPE' then it must | ||||||
3953 | # unset itself (_twig_print_doctype will reset it) | ||||||
3954 | sub _twig_print_original_check_doctype | ||||||
3955 | { # warn " in _twig_print_original_check_doctype...\n"; # DEBUG handler | ||||||
3956 | |||||||
3957 |
21
|
23
|
my $p= shift; | ||||
3958 |
21
|
44
|
my $string= $p->original_string(); | ||||
3959 |
21
|
92
|
if( $string eq '<!DOCTYPE') | ||||
3960 |
0
|
0
|
{ $p->setHandlers( Default => undef); | ||||
3961 |
0
|
0
|
$p->setHandlers( Entity => undef); | ||||
3962 |
0
|
0
|
$expat_1_95_2=1; | ||||
3963 | } | ||||||
3964 | else | ||||||
3965 |
21
|
27
|
{ print $string; } | ||||
3966 | |||||||
3967 |
21
|
198
|
return; | ||||
3968 | } | ||||||
3969 | |||||||
3970 | sub _twig_print_original | ||||||
3971 | { # warn " in _twig_print_original...\n"; # DEBUG handler | ||||||
3972 |
435
|
301
|
my $p= shift; | ||||
3973 |
435
|
663
|
print $p->original_string(); | ||||
3974 |
435
|
2093
|
return; | ||||
3975 | } | ||||||
3976 | |||||||
3977 | |||||||
3978 | sub _twig_print_original_doctype | ||||||
3979 | { # warn " in _twig_print_original_doctype...\n"; # DEBUG handler | ||||||
3980 | |||||||
3981 |
5
|
108
|
my( $p, $name, $sysid, $pubid, $internal)= @_; | ||||
3982 |
5
|
13
|
if( $name) | ||||
3983 | { # with recent versions of XML::Parser original_string does not work, | ||||||
3984 | # hence we need to rebuild the doctype declaration | ||||||
3985 |
5
|
5
|
my $doctype=''; | ||||
3986 |
5
|
15
|
$doctype .= qq{<!DOCTYPE $name} if( $name); | ||||
3987 |
5
|
10
|
$doctype .= qq{ PUBLIC "$pubid"} if( $pubid); | ||||
3988 |
5
|
15
|
$doctype .= qq{ SYSTEM} if( $sysid && !$pubid); | ||||
3989 |
5
|
9
|
$doctype .= qq{ "$sysid"} if( $sysid); | ||||
3990 |
5
|
18
|
$doctype .= ' [' if( $internal && !$expat_1_95_2) ; | ||||
3991 |
5
|
21
|
$doctype .= qq{>} unless( $internal || $expat_1_95_2); | ||||
3992 |
5
|
18
|
$p->{twig}->{twig_doctype}->{has_internal}=$internal; | ||||
3993 |
5
|
17
|
print $doctype; | ||||
3994 | } | ||||||
3995 |
5
|
16
|
$p->setHandlers( Default => \&_twig_print_original); | ||||
3996 |
5
|
73
|
return; | ||||
3997 | } | ||||||
3998 | |||||||
3999 | sub _twig_print_doctype | ||||||
4000 | { # warn " in _twig_print_doctype...\n"; # DEBUG handler | ||||||
4001 |
11
|
99
|
my( $p, $name, $sysid, $pubid, $internal)= @_; | ||||
4002 |
11
|
19
|
if( $name) | ||||
4003 | { # with recent versions of XML::Parser original_string does not work, | ||||||
4004 | # hence we need to rebuild the doctype declaration | ||||||
4005 |
11
|
9
|
my $doctype=''; | ||||
4006 |
11
|
24
|
$doctype .= qq{<!DOCTYPE $name} if( $name); | ||||
4007 |
11
|
17
|
$doctype .= qq{ PUBLIC "$pubid"} if( $pubid); | ||||
4008 |
11
|
31
|
$doctype .= qq{ SYSTEM} if( $sysid && !$pubid); | ||||
4009 |
11
|
26
|
$doctype .= qq{ "$sysid"} if( $sysid); | ||||
4010 |
11
|
15
|
$doctype .= ' [' if( $internal) ; | ||||
4011 |
11
|
33
|
$doctype .= qq{>} unless( $internal || $expat_1_95_2); | ||||
4012 |
11
|
29
|
$p->{twig}->{twig_doctype}->{has_internal}=$internal; | ||||
4013 |
11
|
23
|
print $doctype; | ||||
4014 | } | ||||||
4015 |
11
|
28
|
$p->setHandlers( Default => \&_twig_print); | ||||
4016 |
11
|
127
|
return; | ||||
4017 | } | ||||||
4018 | |||||||
4019 | |||||||
4020 | sub _twig_print_original_default | ||||||
4021 | { # warn " in _twig_print_original_default...\n"; # DEBUG handler | ||||||
4022 |
2
|
110
|
my $p= shift; | ||||
4023 |
2
|
7
|
print $p->original_string(); | ||||
4024 |
2
|
16
|
return; | ||||
4025 | } | ||||||
4026 | |||||||
4027 | # account for the case where the element is empty | ||||||
4028 | sub _twig_print_end_original | ||||||
4029 | { # warn " in _twig_print_end_original...\n"; # DEBUG handler | ||||||
4030 |
10
|
89
|
my $p= shift; | ||||
4031 |
10
|
18
|
print $p->original_string(); | ||||
4032 |
10
|
82
|
return; | ||||
4033 | } | ||||||
4034 | |||||||
4035 | sub _twig_start_check_roots | ||||||
4036 | { # warn " in _twig_start_check_roots...\n"; # DEBUG handler | ||||||
4037 |
524
|
73185
|
my $p= shift; | ||||
4038 |
524
|
375
|
my $gi= shift; | ||||
4039 | |||||||
4040 |
524
|
400
|
my $t= $p->{twig}; | ||||
4041 | |||||||
4042 |
524
|
1208
|
my $fh= $t->{twig_output_fh} || select() || \*STDOUT; | ||||
4043 | |||||||
4044 |
524
|
321
|
my $ns_decl; | ||||
4045 |
524
|
804
|
unless( $p->depth == 0) | ||||
4046 |
420
14
|
1556
22
|
{ if( $t->{twig_map_xmlns}) { $ns_decl= _replace_ns( $t, \$gi, \@_); } | ||||
4047 | } | ||||||
4048 | |||||||
4049 |
524
|
1307
|
my $context= { $ST_TAG => $gi, @_}; | ||||
4050 |
524
|
584
|
$context->{$ST_NS}= $ns_decl if $ns_decl; | ||||
4051 |
524
524
|
314
508
|
push @{$t->{_twig_context_stack}}, $context; | ||||
4052 |
524
|
551
|
my %att= @_; | ||||
4053 | |||||||
4054 |
524
|
584
|
if( _handler( $t, $t->{twig_roots}, $gi)) | ||||
4055 |
144
|
492
|
{ $p->setHandlers( %twig_handlers); # restore regular handlers | ||||
4056 |
144
|
9051
|
$t->{twig_root_depth}= $p->depth; | ||||
4057 |
144
144
|
452
144
|
pop @{$t->{_twig_context_stack}}; # will be pushed back in _twig_start | ||||
4058 |
144
|
186
|
_twig_start( $p, $gi, @_); | ||||
4059 |
144
|
529
|
return; | ||||
4060 | } | ||||||
4061 | |||||||
4062 | # $tag will always be true if it needs to be printed (the tag string is never empty) | ||||||
4063 |
380
|
786
|
my $tag= $t->{twig_default_print} ? $t->{twig_keep_encoding} ? $p->original_string | ||||
4064 | : $p->recognized_string | ||||||
4065 | : ''; | ||||||
4066 | |||||||
4067 |
380
|
1146
|
if( $p->depth == 0) | ||||
4068 | { | ||||||
4069 | ## no critic (TestingAndDebugging::ProhibitNoStrict); | ||||||
4070 |
187
187
187
|
619
176
18405
|
no strict 'refs'; | ||||
4071 |
102
62
|
412
238
|
print {$fh} $tag if( $tag); | ||||
4072 |
102
102
|
96
117
|
pop @{$t->{_twig_context_stack}}; # will be pushed back in _twig_start | ||||
4073 |
102
|
156
|
_twig_start( $p, $gi, @_); | ||||
4074 |
102
|
165
|
$t->root->_set_flushed; # or the root start tag gets output the first time we flush | ||||
4075 | } | ||||||
4076 | elsif( $t->{twig_starttag_handlers}) | ||||||
4077 | { # look for start tag handlers | ||||||
4078 | |||||||
4079 |
38
|
140
|
my @handlers= _handler( $t, $t->{twig_starttag_handlers}, $gi); | ||||
4080 |
38
|
24
|
my $last_handler_res; | ||||
4081 |
38
|
36
|
foreach my $handler ( @handlers) | ||||
4082 |
13
|
30
|
{ $last_handler_res= $handler->($t, $gi, %att); | ||||
4083 |
13
|
77
|
last unless $last_handler_res; | ||||
4084 | } | ||||||
4085 | ## no critic (TestingAndDebugging::ProhibitNoStrict); | ||||||
4086 |
187
187
187
|
561
154
6156
|
no strict 'refs'; | ||||
4087 |
38
27
|
98
44
|
print {$fh} $tag if( $tag && (!@handlers || $last_handler_res)); | ||||
4088 | } | ||||||
4089 | else | ||||||
4090 | { | ||||||
4091 | ## no critic (TestingAndDebugging::ProhibitNoStrict); | ||||||
4092 |
187
187
187
|
494
161
28858
|
no strict 'refs'; | ||||
4093 |
240
121
|
819
136
|
print {$fh} $tag if( $tag); | ||||
4094 | } | ||||||
4095 |
380
|
1054
|
return; | ||||
4096 | } | ||||||
4097 | |||||||
4098 | sub _twig_end_check_roots | ||||||
4099 | { # warn " in _twig_end_check_roots...\n"; # DEBUG handler | ||||||
4100 | |||||||
4101 |
380
|
446
|
my( $p, $gi, %att)= @_; | ||||
4102 |
380
|
303
|
my $t= $p->{twig}; | ||||
4103 | # $tag can be empty (<elt/>), hence the undef and the tests for defined | ||||||
4104 |
380
|
687
|
my $tag= $t->{twig_default_print} ? $t->{twig_keep_encoding} ? $p->original_string | ||||
4105 | : $p->recognized_string | ||||||
4106 | : undef; | ||||||
4107 |
380
|
1373
|
my $fh= $t->{twig_output_fh} || select() || \*STDOUT; | ||||
4108 | |||||||
4109 |
380
|
460
|
if( $t->{twig_endtag_handlers}) | ||||
4110 | { # look for end tag handlers | ||||||
4111 |
40
|
51
|
my @handlers= _handler( $t, $t->{twig_endtag_handlers}, $gi); | ||||
4112 |
40
|
24
|
my $last_handler_res=1; | ||||
4113 |
40
|
40
|
foreach my $handler ( @handlers) | ||||
4114 |
13
|
26
|
{ $last_handler_res= $handler->($t, $gi) || last; } | ||||
4115 | #if( ! $last_handler_res) | ||||||
4116 | # { pop @{$t->{_twig_context_stack}}; warn "tested"; | ||||||
4117 | # return; | ||||||
4118 | # } | ||||||
4119 | } | ||||||
4120 | { | ||||||
4121 | ## no critic (TestingAndDebugging::ProhibitNoStrict); | ||||||
4122 |
187
187
187
380
|
574
160
34846
281
|
no strict 'refs'; | ||||
4123 |
380
210
|
427
240
|
print {$fh} $tag if( defined $tag); | ||||
4124 | } | ||||||
4125 |
380
|
610
|
if( $p->depth == 0) | ||||
4126 | { | ||||||
4127 |
102
|
419
|
_twig_end( $p, $gi); | ||||
4128 |
102
|
145
|
$t->root->{end_tag_flushed}=1; | ||||
4129 | } | ||||||
4130 | |||||||
4131 |
380
380
|
919
323
|
pop @{$t->{_twig_context_stack}}; | ||||
4132 |
380
|
906
|
return; | ||||
4133 | } | ||||||
4134 | |||||||
4135 | sub _twig_pi_check_roots | ||||||
4136 | { # warn " in _twig_pi_check_roots...\n"; # DEBUG handler | ||||||
4137 |
78
|
114
|
my( $p, $target, $data)= @_; | ||||
4138 |
78
|
70
|
my $t= $p->{twig}; | ||||
4139 |
78
|
194
|
my $pi= $t->{twig_default_print} ? $t->{twig_keep_encoding} ? $p->original_string | ||||
4140 | : $p->recognized_string | ||||||
4141 | : undef; | ||||||
4142 |
78
|
338
|
my $fh= $t->{twig_output_fh} || select() || \*STDOUT; | ||||
4143 | |||||||
4144 |
78
|
201
|
if( my $handler= $t->{twig_handlers}->{pi_handlers}->{$target} | ||||
4145 | || $t->{twig_handlers}->{pi_handlers}->{''} | ||||||
4146 | ) | ||||||
4147 | { # if handler is called on pi, then it needs to be processed as a regular node | ||||||
4148 |
60
|
102
|
my @flags= qw( twig_process_pi twig_keep_pi); | ||||
4149 |
60
60
|
43
90
|
my @save= @{$t}{@flags}; # save pi related flags | ||||
4150 |
60
60
|
48
66
|
@{$t}{@flags}= (1, 0); # override them, pi needs to be processed | ||||
4151 |
60
|
76
|
_twig_pi( @_); # call handler on the pi | ||||
4152 |
60
60
|
54
112
|
@{$t}{@flags}= @save;; # restore flag | ||||
4153 | } | ||||||
4154 | else | ||||||
4155 | { | ||||||
4156 | ## no critic (TestingAndDebugging::ProhibitNoStrict); | ||||||
4157 |
187
187
187
|
571
165
363718
|
no strict 'refs'; | ||||
4158 |
18
18
|
23
23
|
print {$fh} $pi if( defined( $pi)); | ||||
4159 | } | ||||||
4160 |
78
|
190
|
return; | ||||
4161 | } | ||||||
4162 | |||||||
4163 | |||||||
4164 | sub _output_ignored | ||||||
4165 |
32
|
25
|
{ my( $t, $p)= @_; | ||||
4166 |
32
|
23
|
my $action= $t->{twig_ignore_action}; | ||||
4167 | |||||||
4168 |
32
|
35
|
my $get_string= $t->{twig_keep_encoding} ? 'original_string' : 'recognized_string'; | ||||
4169 | |||||||
4170 |
32
8
|
29
14
|
if( $action eq 'print' ) { print $p->$get_string; } | ||||
4171 | else | ||||||
4172 |
24
|
14
|
{ my $string_ref; | ||||
4173 |
24
|
43
|
if( $action eq 'string') | ||||
4174 |
12
|
10
|
{ $string_ref= \$t->{twig_buffered_string}; } | ||||
4175 | elsif( ref( $action) && ref( $action) eq 'SCALAR') | ||||||
4176 |
12
|
8
|
{ $string_ref= $action; } | ||||
4177 | else | ||||||
4178 |
0
|
0
|
{ _croak( "wrong ignore action: $action"); } | ||||
4179 | |||||||
4180 |
24
|
43
|
$$string_ref .= $p->$get_string; | ||||
4181 | } | ||||||
4182 | } | ||||||
4183 | |||||||
4184 | |||||||
4185 | |||||||
4186 | sub _twig_ignore_start | ||||||
4187 | { # warn " in _twig_ignore_start...\n"; # DEBUG handler | ||||||
4188 | |||||||
4189 |
55
|
95
|
my( $p, $gi)= @_; | ||||
4190 |
55
|
42
|
my $t= $p->{twig}; | ||||
4191 |
55
|
40
|
$t->{twig_ignore_level}++; | ||||
4192 |
55
|
38
|
my $action= $t->{twig_ignore_action}; | ||||
4193 | |||||||
4194 |
55
|
94
|
$t->_output_ignored( $p) unless $action eq 'discard'; | ||||
4195 |
55
|
107
|
return; | ||||
4196 | } | ||||||
4197 | |||||||
4198 | sub _twig_ignore_end | ||||||
4199 | { # warn " in _twig_ignore_end...\n"; # DEBUG handler | ||||||
4200 | |||||||
4201 |
128
|
154
|
my( $p, $gi)= @_; | ||||
4202 |
128
|
82
|
my $t= $p->{twig}; | ||||
4203 | |||||||
4204 |
128
|
98
|
my $action= $t->{twig_ignore_action}; | ||||
4205 |
128
|
137
|
$t->_output_ignored( $p) unless $action eq 'discard'; | ||||
4206 | |||||||
4207 |
128
|
162
|
$t->{twig_ignore_level}--; | ||||
4208 | |||||||
4209 |
128
|
144
|
if( ! $t->{twig_ignore_level}) | ||||
4210 | { | ||||||
4211 |
57
|
41
|
$t->{twig_current} = $t->{twig_ignore_elt}; | ||||
4212 |
57
|
91
|
$t->{twig_current}->set_twig_current; | ||||
4213 | |||||||
4214 |
57
|
72
|
$t->{twig_ignore_elt}->cut; # there could possibly be a memory leak here (delete would avoid it, | ||||
4215 | # but could also delete elements that should not be deleted) | ||||||
4216 | |||||||
4217 | # restore the saved stack to the current level | ||||||
4218 |
57
57
|
24
154
|
splice( @{$t->{_twig_context_stack}}, $p->depth+ 1 ); | ||||
4219 | #warn "stack: ", _dump_stack( $t->{_twig_context_stack}), "\n"; | ||||||
4220 | |||||||
4221 |
57
57
|
206
119
|
$p->setHandlers( @{$t->{twig_saved_handlers}}); | ||||
4222 | # test for handlers | ||||||
4223 |
57
|
3714
|
if( $t->{twig_endtag_handlers}) | ||||
4224 | { # look for end tag handlers | ||||||
4225 |
14
|
19
|
my @handlers= _handler( $t, $t->{twig_endtag_handlers}, $gi); | ||||
4226 |
14
|
11
|
my $last_handler_res=1; | ||||
4227 |
14
|
11
|
foreach my $handler ( @handlers) | ||||
4228 |
15
|
28
|
{ $last_handler_res= $handler->($t, $gi) || last; } | ||||
4229 | } | ||||||
4230 |
57
57
|
82
58
|
pop @{$t->{_twig_context_stack}}; | ||||
4231 | }; | ||||||
4232 |
128
|
312
|
return; | ||||
4233 | } | ||||||
4234 | |||||||
4235 | #sub _dump_stack { my( $stack)= @_; return join( ":", map { $_->{$ST_TAG} } @$stack); } | ||||||
4236 | |||||||
4237 | sub ignore | ||||||
4238 |
60
|
1
|
63
|
{ my( $t, $elt, $action)= @_; | |||
4239 |
60
|
42
|
my $current= $t->{twig_current}; | ||||
4240 | |||||||
4241 |
60
6
|
740
6
|
if( ! ($elt && ref( $elt) && isa( $elt, 'XML::Twig::Elt'))) { $elt= $current; } | ||||
4242 | |||||||
4243 | #warn "ignore: current = ", $current->tag, ", elt = ", $elt->tag, ")\n"; | ||||||
4244 | |||||||
4245 | # we need the ($elt == $current->{last_child}) test because the current element is set to the | ||||||
4246 | # parent _before_ handlers are called (and I can't figure out how to fix this) | ||||||
4247 |
60
|
153
|
unless( ($elt == $current) || ($current->{last_child} && ($elt == $current->{last_child})) || $current->in( $elt)) | ||||
4248 |
2
|
3
|
{ _croak( "element to be ignored must be ancestor of current element"); } | ||||
4249 | |||||||
4250 |
58
|
101
|
$t->{twig_ignore_level}= $current == $elt ? 1 : $t->_level_in_stack( $current) - $t->_level_in_stack($elt) + 1; | ||||
4251 | #warn "twig_ignore_level: $t->{twig_ignore_level} (current: ", $current->tag, ", elt: ", $elt->tag, ")\n"; | ||||||
4252 |
58
|
189
|
$t->{twig_ignore_elt} = $elt; # save it, so we can delete it later | ||||
4253 | |||||||
4254 |
58
|
159
|
$action ||= 'discard'; | ||||
4255 |
58
|
238
|
if( !($action eq 'print' || $action eq 'string' || ( ref( $action) && ref( $action) eq 'SCALAR'))) | ||||
4256 |
49
|
41
|
{ $action= 'discard'; } | ||||
4257 | |||||||
4258 |
58
|
61
|
$t->{twig_ignore_action}= $action; | ||||
4259 | |||||||
4260 |
58
|
43
|
my $p= $t->{twig_parser}; | ||||
4261 |
58
|
225
|
my @saved_handlers= $p->setHandlers( %twig_handlers_ignore); # set handlers | ||||
4262 | |||||||
4263 |
58
|
3555
|
my $get_string= $t->{twig_keep_encoding} ? 'original_string' : 'recognized_string'; | ||||
4264 | |||||||
4265 |
58
|
46
|
my $default_handler; | ||||
4266 | |||||||
4267 |
58
|
93
|
if( $action ne 'discard') | ||||
4268 |
9
|
10
|
{ if( $action eq 'print') | ||||
4269 |
2
4
|
10
8
|
{ $p->setHandlers( Default => sub { print $_[0]->$get_string; }); } | ||||
4270 | else | ||||||
4271 |
7
|
6
|
{ my $string_ref; | ||||
4272 |
7
|
20
|
if( $action eq 'string') | ||||
4273 |
4
2
|
8
5
|
{ if( ! exists $t->{twig_buffered_string}) { $t->{twig_buffered_string}=''; } | ||||
4274 |
4
|
5
|
$string_ref= \$t->{twig_buffered_string}; | ||||
4275 | } | ||||||
4276 | elsif( ref( $action) && ref( $action) eq 'SCALAR') | ||||||
4277 |
3
|
3
|
{ $string_ref= $action; } | ||||
4278 | |||||||
4279 |
7
14
|
29
41
|
$p->setHandlers( Default => sub { $$string_ref .= $_[0]->$get_string; }); | ||||
4280 | } | ||||||
4281 |
9
|
86
|
$t->_output_ignored( $p, $action); | ||||
4282 | } | ||||||
4283 | |||||||
4284 | |||||||
4285 |
58
|
252
|
$t->{twig_saved_handlers}= \@saved_handlers; # save current handlers | ||||
4286 | } | ||||||
4287 | |||||||
4288 | sub _level_in_stack | ||||||
4289 |
30
|
25
|
{ my( $t, $elt)= @_; | ||||
4290 |
30
|
18
|
my $level=1; | ||||
4291 |
30
30
|
18
36
|
foreach my $elt_in_stack ( @{$t->{_twig_context_stack}} ) | ||||
4292 |
83
30
|
201
54
|
{ if( $elt_in_stack->{$ST_ELT} && ($elt == $elt_in_stack->{$ST_ELT})) { return $level } | ||||
4293 |
53
|
36
|
$level++; | ||||
4294 | } | ||||||
4295 | } | ||||||
4296 | |||||||
4297 | |||||||
4298 | |||||||
4299 | # select $t->{twig_output_fh} and store the current selected fh | ||||||
4300 | sub _set_fh_to_twig_output_fh | ||||||
4301 |
3182
|
2080
|
{ my $t= shift; | ||||
4302 |
3182
|
2477
|
my $output_fh= $t->{twig_output_fh}; | ||||
4303 |
3182
|
5264
|
if( $output_fh && !$t->{twig_output_fh_selected}) | ||||
4304 | { # there is an output fh | ||||||
4305 |
59
|
150
|
$t->{twig_selected_fh}= select(); # store the currently selected fh | ||||
4306 |
59
|
71
|
$t->{twig_output_fh_selected}=1; | ||||
4307 |
59
|
90
|
select $output_fh; # select the output fh for the twig | ||||
4308 | } | ||||||
4309 | } | ||||||
4310 | |||||||
4311 | # select the fh that was stored in $t->{twig_selected_fh} | ||||||
4312 | # (before $t->{twig_output_fh} was selected) | ||||||
4313 | sub _set_fh_to_selected_fh | ||||||
4314 |
3084
|
1948
|
{ my $t= shift; | ||||
4315 |
3084
|
4264
|
return unless( $t->{twig_output_fh}); | ||||
4316 |
59
|
66
|
my $selected_fh= $t->{twig_selected_fh}; | ||||
4317 |
59
|
50
|
$t->{twig_output_fh_selected}=0; | ||||
4318 |
59
|
119
|
select $selected_fh; | ||||
4319 |
59
|
65
|
return; | ||||
4320 | } | ||||||
4321 | |||||||
4322 | |||||||
4323 | sub encoding | ||||||
4324 |
8
|
1
|
34
|
{ return $_[0]->{twig_xmldecl}->{encoding} if( $_[0]->{twig_xmldecl}); } | |||
4325 | |||||||
4326 | sub set_encoding | ||||||
4327 |
5
|
1
|
11
|
{ my( $t, $encoding)= @_; | |||
4328 |
5
|
25
|
$t->{twig_xmldecl} ||={}; | ||||
4329 |
5
|
10
|
$t->set_xml_version( "1.0") unless( $t->xml_version); | ||||
4330 |
5
|
7
|
$t->{twig_xmldecl}->{encoding}= $encoding; | ||||
4331 |
5
|
7
|
return $t; | ||||
4332 | } | ||||||
4333 | |||||||
4334 | sub output_encoding | ||||||
4335 |
2
|
1
|
7
|
{ return $_[0]->{output_encoding}; } | |||
4336 | |||||||
4337 | sub set_output_encoding | ||||||
4338 |
18
|
1
|
79
|
{ my( $t, $encoding)= @_; | |||
4339 |
18
|
26
|
my $output_filter= $t->output_filter || ''; | ||||
4340 | |||||||
4341 |
18
|
147
|
if( ($encoding && $encoding !~ m{^utf-?8$}i) || $t->{twig_keep_encoding} || $output_filter) | ||||
4342 |
16
|
45
|
{ $t->set_output_filter( _encoding_filter( $encoding || '')); } | ||||
4343 | |||||||
4344 |
18
|
86
|
$t->{output_encoding}= $encoding; | ||||
4345 |
18
|
32
|
return $t; | ||||
4346 | } | ||||||
4347 | |||||||
4348 | sub xml_version | ||||||
4349 |
12
|
1
|
53
|
{ return $_[0]->{twig_xmldecl}->{version} if( $_[0]->{twig_xmldecl}); } | |||
4350 | |||||||
4351 | sub set_xml_version | ||||||
4352 |
5
|
1
|
9
|
{ my( $t, $version)= @_; | |||
4353 |
5
|
11
|
$t->{twig_xmldecl} ||={}; | ||||
4354 |
5
|
8
|
$t->{twig_xmldecl}->{version}= $version; | ||||
4355 |
5
|
7
|
return $t; | ||||
4356 | } | ||||||
4357 | |||||||
4358 | sub standalone | ||||||
4359 |
6
|
1
|
24
|
{ return $_[0]->{twig_xmldecl}->{standalone} if( $_[0]->{twig_xmldecl}); } | |||
4360 | |||||||
4361 | sub set_standalone | ||||||
4362 |
3
|
1
|
9
|
{ my( $t, $standalone)= @_; | |||
4363 |
3
|
9
|
$t->{twig_xmldecl} ||={}; | ||||
4364 |
3
|
5
|
$t->set_xml_version( "1.0") unless( $t->xml_version); | ||||
4365 |
3
|
4
|
$t->{twig_xmldecl}->{standalone}= $standalone; | ||||
4366 |
3
|
5
|
return $t; | ||||
4367 | } | ||||||
4368 | |||||||
4369 | |||||||
4370 | # SAX methods | ||||||
4371 | |||||||
4372 | sub toSAX1 | ||||||
4373 |
2
|
1
|
16
|
{ _croak( "cannot use toSAX1 while parsing (use flush_toSAX1)") if (defined $_[0]->{twig_parser}); | |||
4374 |
1
|
4
|
shift(@_)->_toSAX(@_, \&XML::Twig::Elt::_start_tag_data_SAX1, | ||||
4375 | \&XML::Twig::Elt::_end_tag_data_SAX1 | ||||||
4376 | ); | ||||||
4377 | } | ||||||
4378 | |||||||
4379 | sub toSAX2 | ||||||
4380 |
5
|
1
|
280
|
{ _croak( "cannot use toSAX2 while parsing (use flush_toSAX2)") if (defined $_[0]->{twig_parser}); | |||
4381 |
4
|
9
|
shift(@_)->_toSAX(@_, \&XML::Twig::Elt::_start_tag_data_SAX2, | ||||
4382 | \&XML::Twig::Elt::_end_tag_data_SAX2 | ||||||
4383 | ); | ||||||
4384 | } | ||||||
4385 | |||||||
4386 | |||||||
4387 | sub _toSAX | ||||||
4388 |
5
|
8
|
{ my( $t, $handler, $start_tag_data, $end_tag_data) = @_; | ||||
4389 | |||||||
4390 |
5
|
25
|
if( my $start_document = $handler->can( 'start_document')) | ||||
4391 |
4
|
12
|
{ $start_document->( $handler); } | ||||
4392 | |||||||
4393 |
5
|
750
|
$t->_prolog_toSAX( $handler); | ||||
4394 | |||||||
4395 |
5
5
|
57
6
|
if( $t->root) { $t->root->_toSAX( $handler, $start_tag_data, $end_tag_data) ; } | ||||
4396 |
5
|
45
|
if( my $end_document = $handler->can( 'end_document')) | ||||
4397 |
5
|
11
|
{ $end_document->( $handler); } | ||||
4398 | } | ||||||
4399 | |||||||
4400 | |||||||
4401 | sub flush_toSAX1 | ||||||
4402 |
3
|
1
|
9
|
{ shift(@_)->_flush_toSAX(@_, \&XML::Twig::Elt::_start_tag_data_SAX1, | |||
4403 | \&XML::Twig::Elt::_end_tag_data_SAX1 | ||||||
4404 | ); | ||||||
4405 | } | ||||||
4406 | |||||||
4407 | sub flush_toSAX2 | ||||||
4408 |
3
|
1
|
16
|
{ shift(@_)->_flush_toSAX(@_, \&XML::Twig::Elt::_start_tag_data_SAX2, | |||
4409 | \&XML::Twig::Elt::_end_tag_data_SAX2 | ||||||
4410 | ); | ||||||
4411 | } | ||||||
4412 | |||||||
4413 | sub _flush_toSAX | ||||||
4414 |
6
|
7
|
{ my( $t, $handler, $start_tag_data, $end_tag_data)= @_; | ||||
4415 | |||||||
4416 | # the "real" last element processed, as _twig_end has closed it | ||||||
4417 |
6
|
4
|
my $last_elt; | ||||
4418 |
6
|
7
|
if( $t->{twig_current}) | ||||
4419 |
4
|
8
|
{ $last_elt= $t->{twig_current}->_last_child; } | ||||
4420 | else | ||||||
4421 |
2
|
2
|
{ $last_elt= $t->{twig_root}; } | ||||
4422 | |||||||
4423 |
6
|
5
|
my $elt= $t->{twig_root}; | ||||
4424 |
6
|
10
|
unless( $elt->_flushed) | ||||
4425 | { # init unless already done (ie root has been flushed) | ||||||
4426 |
2
|
7
|
if( my $start_document = $handler->can( 'start_document')) | ||||
4427 |
2
|
7
|
{ $start_document->( $handler); } | ||||
4428 | # flush the DTD | ||||||
4429 |
2
|
394
|
$t->_prolog_toSAX( $handler) | ||||
4430 | } | ||||||
4431 | |||||||
4432 |
6
|
7
|
while( $elt) | ||||
4433 |
18
|
8
|
{ my $next_elt; | ||||
4434 |
18
|
33
|
if( $last_elt && $last_elt->in( $elt)) | ||||
4435 | { | ||||||
4436 |
6
|
6
|
unless( $elt->_flushed) | ||||
4437 | { # just output the front tag | ||||||
4438 |
4
|
10
|
if( my $start_element = $handler->can( 'start_element')) | ||||
4439 |
4
|
7
|
{ if( my $tag_data= $start_tag_data->( $elt)) | ||||
4440 |
4
|
11
|
{ $start_element->( $handler, $tag_data); } | ||||
4441 | } | ||||||
4442 |
4
|
100
|
$elt->_set_flushed; | ||||
4443 | } | ||||||
4444 |
6
|
5
|
$next_elt= $elt->{first_child}; | ||||
4445 | } | ||||||
4446 | else | ||||||
4447 | { # an element before the last one or the last one, | ||||||
4448 |
12
|
10
|
$next_elt= $elt->{next_sibling}; | ||||
4449 |
12
|
13
|
$elt->_toSAX( $handler, $start_tag_data, $end_tag_data); | ||||
4450 |
12
|
60
|
$elt->delete; | ||||
4451 |
12
|
31
|
last if( $last_elt && ($elt == $last_elt)); | ||||
4452 | } | ||||||
4453 |
12
|
29
|
$elt= $next_elt; | ||||
4454 | } | ||||||
4455 |
6
|
18
|
if( !$t->{twig_parsing}) | ||||
4456 |
2
|
7
|
{ if( my $end_document = $handler->can( 'end_document')) | ||||
4457 |
2
|
6
|
{ $end_document->( $handler); } | ||||
4458 | } | ||||||
4459 | } | ||||||
4460 | |||||||
4461 | |||||||
4462 | sub _prolog_toSAX | ||||||
4463 |
7
|
11
|
{ my( $t, $handler)= @_; | ||||
4464 |
7
|
9
|
$t->_xmldecl_toSAX( $handler); | ||||
4465 |
7
|
87
|
$t->_DTD_toSAX( $handler); | ||||
4466 | } | ||||||
4467 | |||||||
4468 | sub _xmldecl_toSAX | ||||||
4469 |
7
|
5
|
{ my( $t, $handler)= @_; | ||||
4470 |
7
|
7
|
my $decl= $t->{twig_xmldecl}; | ||||
4471 |
7
|
19
|
my $data= { Version => $decl->{version}, | ||||
4472 | Encoding => $decl->{encoding}, | ||||||
4473 | Standalone => $decl->{standalone}, | ||||||
4474 | }; | ||||||
4475 |
7
|
29
|
if( my $xml_decl= $handler->can( 'xml_decl')) | ||||
4476 |
4
|
8
|
{ $xml_decl->( $handler, $data); } | ||||
4477 | } | ||||||
4478 | |||||||
4479 | sub _DTD_toSAX | ||||||
4480 |
7
|
6
|
{ my( $t, $handler)= @_; | ||||
4481 |
7
|
7
|
my $doctype= $t->{twig_doctype}; | ||||
4482 |
7
|
11
|
return unless( $doctype); | ||||
4483 |
3
|
7
|
my $data= { Name => $doctype->{name}, | ||||
4484 | PublicId => $doctype->{pub}, | ||||||
4485 | SystemId => $doctype->{sysid}, | ||||||
4486 | }; | ||||||
4487 | |||||||
4488 |
3
|
13
|
if( my $start_dtd= $handler->can( 'start_dtd')) | ||||
4489 |
2
|
5
|
{ $start_dtd->( $handler, $data); } | ||||
4490 | |||||||
4491 | # I should call code to export the internal subset here | ||||||
4492 | |||||||
4493 |
3
|
50
|
if( my $end_dtd= $handler->can( 'end_dtd')) | ||||
4494 |
2
|
5
|
{ $end_dtd->( $handler); } | ||||
4495 | } | ||||||
4496 | |||||||
4497 | # input/output filters | ||||||
4498 | |||||||
4499 | sub latin1 | ||||||
4500 |
2
|
1
|
6
|
{ local $SIG{__DIE__}; | |||
4501 |
2
|
6
|
if( _use( 'Encode')) | ||||
4502 |
2
|
4
|
{ return encode_convert( 'ISO-8859-15'); } | ||||
4503 | elsif( _use( 'Text::Iconv')) | ||||||
4504 |
0
|
0
|
{ return iconv_convert( 'ISO-8859-15'); } | ||||
4505 | elsif( _use( 'Unicode::Map8') && _use( 'Unicode::String')) | ||||||
4506 |
0
|
0
|
{ return unicode_convert( 'ISO-8859-15'); } | ||||
4507 | else | ||||||
4508 |
0
|
0
|
{ return \®exp2latin1; } | ||||
4509 | } | ||||||
4510 | |||||||
4511 | sub _encoding_filter | ||||||
4512 | { | ||||||
4513 |
16
16
|
9
46
|
{ local $SIG{__DIE__}; | ||||
4514 |
16
|
42
|
my $encoding= $_[1] || $_[0]; | ||||
4515 |
16
|
24
|
if( _use( 'Encode')) | ||||
4516 |
16
|
22
|
{ my $sub= encode_convert( $encoding); | ||||
4517 |
16
|
53
|
return $sub; | ||||
4518 | } | ||||||
4519 | elsif( _use( 'Text::Iconv')) | ||||||
4520 |
0
|
0
|
{ return iconv_convert( $encoding); } | ||||
4521 | elsif( _use( 'Unicode::Map8') && _use( 'Unicode::String')) | ||||||
4522 |
0
|
0
|
{ return unicode_convert( $encoding); } | ||||
4523 | } | ||||||
4524 |
0
|
0
|
_croak( "Encode, Text::Iconv or Unicode::Map8 and Unicode::String need to be installed in order to use encoding options"); | ||||
4525 | } | ||||||
4526 | |||||||
4527 | # shamelessly lifted from XML::TyePYX (works only with XML::Parse 2.27) | ||||||
4528 | sub regexp2latin1 | ||||||
4529 |
1
|
1
|
7
|
{ my $text=shift; | |||
4530 |
1
0
|
4
0
|
$text=~s{([\xc0-\xc3])(.)}{ my $hi = ord($1); | ||||
4531 |
0
|
0
|
my $lo = ord($2); | ||||
4532 |
0
|
0
|
chr((($hi & 0x03) <<6) | ($lo & 0x3F)) | ||||
4533 | }ge; | ||||||
4534 |
1
|
2
|
return $text; | ||||
4535 | } | ||||||
4536 | |||||||
4537 | |||||||
4538 | sub html_encode | ||||||
4539 |
5
|
1
|
6
|
{ _use( 'HTML::Entities') or croak "cannot use html_encode: missing HTML::Entities"; | |||
4540 |
5
|
16
|
return HTML::Entities::encode_entities($_[0] ); | ||||
4541 | } | ||||||
4542 | |||||||
4543 | sub safe_encode | ||||||
4544 |
19
|
1
|
16
|
{ my $str= shift; | |||
4545 |
19
|
21
|
if( $perl_version < 5.008) | ||||
4546 | { # the no utf8 makes the regexp work in 5.6 | ||||||
4547 |
187
187
187
|
636
153
742
|
no utf8; # = perl 5.6 | ||||
4548 |
0
0
|
0
0
|
$str =~ s{([\xC0-\xDF].|[\xE0-\xEF]..|[\xF0-\xFF]...)} | ||||
4549 | {_XmlUtf8Decode($1)}egs; | ||||||
4550 | } | ||||||
4551 | else | ||||||
4552 |
19
|
30
|
{ $str= encode( ascii => $str, $FB_HTMLCREF); } | ||||
4553 |
19
|
278
|
return $str; | ||||
4554 | } | ||||||
4555 | |||||||
4556 | sub safe_encode_hex | ||||||
4557 |
19
|
1
|
13
|
{ my $str= shift; | |||
4558 |
19
|
18
|
if( $perl_version < 5.008) | ||||
4559 | { # the no utf8 makes the regexp work in 5.6 | ||||||
4560 |
187
187
187
|
19617
212
409
|
no utf8; # = perl 5.6 | ||||
4561 |
0
0
|
0
0
|
$str =~ s{([\xC0-\xDF].|[\xE0-\xEF]..|[\xF0-\xFF]...)} | ||||
4562 | {_XmlUtf8Decode($1, 1)}egs; | ||||||
4563 | } | ||||||
4564 | else | ||||||
4565 |
19
|
27
|
{ $str= encode( ascii => $str, $FB_XMLCREF); } | ||||
4566 |
19
|
251
|
return $str; | ||||
4567 | } | ||||||
4568 | |||||||
4569 | # this one shamelessly lifted from XML::DOM | ||||||
4570 | # does NOT work on 5.8.0 | ||||||
4571 | sub _XmlUtf8Decode | ||||||
4572 |
4
|
15
|
{ my ($str, $hex) = @_; | ||||
4573 |
4
|
4
|
my $len = length ($str); | ||||
4574 |
4
|
2
|
my $n; | ||||
4575 | |||||||
4576 |
4
|
4
|
if ($len == 2) | ||||
4577 |
4
|
15
|
{ my @n = unpack "C2", $str; | ||||
4578 |
4
|
5
|
$n = (($n[0] & 0x3f) << 6) + ($n[1] & 0x3f); | ||||
4579 | } | ||||||
4580 | elsif ($len == 3) | ||||||
4581 |
0
|
0
|
{ my @n = unpack "C3", $str; | ||||
4582 |
0
|
0
|
$n = (($n[0] & 0x1f) << 12) + (($n[1] & 0x3f) << 6) + ($n[2] & 0x3f); | ||||
4583 | } | ||||||
4584 | elsif ($len == 4) | ||||||
4585 |
0
|
0
|
{ my @n = unpack "C4", $str; | ||||
4586 |
0
|
0
|
$n = (($n[0] & 0x0f) << 18) + (($n[1] & 0x3f) << 12) | ||||
4587 | + (($n[2] & 0x3f) << 6) + ($n[3] & 0x3f); | ||||||
4588 | } | ||||||
4589 | elsif ($len == 1) # just to be complete... | ||||||
4590 |
0
|
0
|
{ $n = ord ($str); } | ||||
4591 | else | ||||||
4592 |
0
|
0
|
{ croak "bad value [$str] for _XmlUtf8Decode"; } | ||||
4593 | |||||||
4594 |
4
|
7
|
my $char= $hex ? sprintf ("&#x%x;", $n) : "&#$n;"; | ||||
4595 |
4
|
10
|
return $char; | ||||
4596 | } | ||||||
4597 | |||||||
4598 | |||||||
4599 | sub unicode_convert | ||||||
4600 |
1
|
1
|
857185
|
{ my $enc= $_[1] ? $_[1] : $_[0]; # so the method can be called on the twig or directly | |||
4601 |
1
|
4
|
_use( 'Unicode::Map8') or croak "Unicode::Map8 not available, needed for encoding filter: $!"; | ||||
4602 |
1
|
2
|
_use( 'Unicode::String') or croak "Unicode::String not available, needed for encoding filter: $!"; | ||||
4603 |
1
|
28
|
import Unicode::String qw(utf8); | ||||
4604 |
1
1
1
1
1
|
47
106
3
1
116
|
my $sub= eval qq{ { $NO_WARNINGS; | ||||
4605 | my \$cnv; | ||||||
4606 | BEGIN { \$cnv= Unicode::Map8->new(\$enc) | ||||||
4607 | or croak "Can't create converter to \$enc"; | ||||||
4608 | } | ||||||
4609 | sub { return \$cnv->to8 (utf8(\$_[0])->ucs2); } | ||||||
4610 | } | ||||||
4611 | }; | ||||||
4612 |
1
0
|
3
0
|
unless( $sub) { croak $@; } | ||||
4613 |
1
|
6
|
return $sub; | ||||
4614 | } | ||||||
4615 | |||||||
4616 | sub iconv_convert | ||||||
4617 |
4
|
1
|
225002
|
{ my $enc= $_[1] ? $_[1] : $_[0]; # so the method can be called on the twig or directly | |||
4618 |
4
|
10
|
_use( 'Text::Iconv') or croak "Text::Iconv not available, needed for encoding filter: $!"; | ||||
4619 |
4
1
1
1
1
1
1
1
1
|
169
3
1
106
3
1
117
4
1
|
my $sub= eval qq{ { $NO_WARNINGS; | ||||
4620 | my \$cnv; | ||||||
4621 | BEGIN { \$cnv = Text::Iconv->new( 'utf8', \$enc) | ||||||
4622 | or croak "Can't create iconv converter to \$enc"; | ||||||
4623 | } | ||||||
4624 | sub { return \$cnv->convert( \$_[0]); } | ||||||
4625 | } | ||||||
4626 | }; | ||||||
4627 |
4
|
9
|
unless( $sub) | ||||
4628 |
1
|
3
|
{ if( $@=~ m{^Unsupported conversion: Invalid argument}) | ||||
4629 |
0
|
0
|
{ croak "Unsupported encoding: $enc"; } | ||||
4630 | else | ||||||
4631 |
1
|
94
|
{ croak $@; } | ||||
4632 | } | ||||||
4633 | |||||||
4634 |
3
|
14
|
return $sub; | ||||
4635 | } | ||||||
4636 | |||||||
4637 | sub encode_convert | ||||||
4638 |
18
|
1
|
27
|
{ my $enc= $_[1] ? $_[1] : $_[0]; # so the method can be called on the twig or directly | |||
4639 |
18
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
|
1051
4
1
108
4
1
101
3
1
116
4
1
103
3
2
106
2
2
112
3
2
102
3
1
109
3
2
109
3
0
102
112
3
1
117
3
1
3
1
116
3
1
114
3
1
117
3
2
117
|
my $sub= eval qq{sub { $NO_WARNINGS; return encode( "$enc", \$_[0]); } }; | ||||
4640 |
18
|
33
|
croak "can't create Encode-based filter: $@" unless( $sub); | ||||
4641 |
18
|
28
|
return $sub; | ||||
4642 | } | ||||||
4643 | |||||||
4644 | |||||||
4645 | # XML::XPath compatibility | ||||||
4646 |
136
|
1
|
89050
|
sub getRootNode { return $_[0]; } | |||
4647 |
17
|
1
|
103
|
sub getParentNode { return undef; } | |||
4648 |
184
184
|
1
|
8726
446
|
sub getChildNodes { my @children= ($_[0]->root); return wantarray ? @children : \@children; } | |||
4649 | |||||||
4650 |
3
|
76269
|
sub _weakrefs { return $weakrefs; } | ||||
4651 |
13
13
|
294
39
|
sub _set_weakrefs { $weakrefs=shift() || 0; XML::Twig::Elt::set_destroy()if ! $weakrefs; } # for testing purposes | ||||
4652 | |||||||
4653 | sub _dump | ||||||
4654 |
6
|
12
|
{ my $t= shift; | ||||
4655 |
6
|
7
|
my $dump=''; | ||||
4656 | |||||||
4657 |
6
|
5
|
$dump="document\n"; # should dump twig level data here | ||||
4658 |
6
5
|
12
7
|
if( $t->root) { $dump .= $t->root->_dump( @_); } | ||||
4659 | |||||||
4660 |
6
|
12
|
return $dump; | ||||
4661 | |||||||
4662 | } | ||||||
4663 | |||||||
4664 | |||||||
4665 | 1; | ||||||
4666 | |||||||
4667 | ###################################################################### | ||||||
4668 | package XML::Twig::Entity_list; | ||||||
4669 | ###################################################################### | ||||||
4670 | |||||||
4671 | *isa= *UNIVERSAL::isa; | ||||||
4672 | |||||||
4673 | sub new | ||||||
4674 |
3114
|
2541
|
{ my $class = shift; | ||||
4675 |
3114
|
5362
|
my $self={ entities => {}, updated => 0}; | ||||
4676 | |||||||
4677 |
3114
|
4488
|
bless $self, $class; | ||||
4678 |
3114
|
4149
|
return $self; | ||||
4679 | |||||||
4680 | } | ||||||
4681 | |||||||
4682 | sub add_new_ent | ||||||
4683 |
5
|
5
|
{ my $ent_list= shift; | ||||
4684 |
5
|
10
|
my $ent= XML::Twig::Entity->new( @_); | ||||
4685 |
5
|
8
|
$ent_list->add( $ent); | ||||
4686 |
5
|
7
|
return $ent_list; | ||||
4687 | } | ||||||
4688 | |||||||
4689 | sub _add_list | ||||||
4690 |
5
|
6
|
{ my( $ent_list, $to_add)= @_; | ||||
4691 |
5
|
6
|
my $ents_to_add= $to_add->{entities}; | ||||
4692 |
5
|
18
|
return $ent_list unless( $ents_to_add && %$ents_to_add); | ||||
4693 |
3
3
|
8
8
|
@{$ent_list->{entities}}{keys %$ents_to_add}= values %$ents_to_add; | ||||
4694 |
3
|
4
|
$ent_list->{updated}=1; | ||||
4695 |
3
|
4
|
return $ent_list; | ||||
4696 | } | ||||||
4697 | |||||||
4698 | sub add | ||||||
4699 |
82
|
80
|
{ my( $ent_list, $ent)= @_; | ||||
4700 |
82
|
156
|
$ent_list->{entities}->{$ent->{name}}= $ent; | ||||
4701 |
82
|
72
|
$ent_list->{updated}=1; | ||||
4702 |
82
|
78
|
return $ent_list; | ||||
4703 | } | ||||||
4704 | |||||||
4705 | sub ent | ||||||
4706 |
16
|
20
|
{ my( $ent_list, $ent_name)= @_; | ||||
4707 |
16
|
112
|
return $ent_list->{entities}->{$ent_name}; | ||||
4708 | } | ||||||
4709 | |||||||
4710 | # can be called with an entity or with an entity name | ||||||
4711 | sub delete | ||||||
4712 |
4
|
6
|
{ my $ent_list= shift; | ||||
4713 |
4
|
16
|
if( isa( ref $_[0], 'XML::Twig::Entity')) | ||||
4714 | { # the second arg is an entity | ||||||
4715 |
1
|
2
|
my $ent= shift; | ||||
4716 |
1
|
3
|
delete $ent_list->{entities}->{$ent->{name}}; | ||||
4717 | } | ||||||
4718 | else | ||||||
4719 | { # the second arg was not entity, must be a string then | ||||||
4720 |
3
|
5
|
my $name= shift; | ||||
4721 |
3
|
5
|
delete $ent_list->{entities}->{$name}; | ||||
4722 | } | ||||||
4723 |
4
|
7
|
$ent_list->{updated}=1; | ||||
4724 |
4
|
6
|
return $ent_list; | ||||
4725 | } | ||||||
4726 | |||||||
4727 | sub print | ||||||
4728 |
2
|
3
|
{ my ($ent_list, $fh)= @_; | ||||
4729 |
2
|
6
|
my $old_select= defined $fh ? select $fh : undef; | ||||
4730 | |||||||
4731 |
2
2
|
2
12
|
foreach my $ent_name ( sort keys %{$ent_list->{entities}}) | ||||
4732 |
4
|
18
|
{ my $ent= $ent_list->{entities}->{$ent_name}; | ||||
4733 | # we have to test what the entity is or un-defined entities can creep in | ||||||
4734 |
4
4
|
11
5
|
if( isa( $ent, 'XML::Twig::Entity')) { $ent->print(); } | ||||
4735 | } | ||||||
4736 |
2
|
16
|
select $old_select if( defined $old_select); | ||||
4737 |
2
|
3
|
return $ent_list; | ||||
4738 | } | ||||||
4739 | |||||||
4740 | sub text | ||||||
4741 |
16
|
19
|
{ my ($ent_list)= @_; | ||||
4742 |
16
19
16
|
14
44
54
|
return join "\n", map { $ent_list->{entities}->{$_}->text} sort keys %{$ent_list->{entities}}; | ||||
4743 | } | ||||||
4744 | |||||||
4745 | # return the list of entity names | ||||||
4746 | sub entity_names | ||||||
4747 |
6
|
8
|
{ my $ent_list= shift; | ||||
4748 |
6
6
|
7
54
|
return (sort keys %{$ent_list->{entities}}) ; | ||||
4749 | } | ||||||
4750 | |||||||
4751 | |||||||
4752 | sub list | ||||||
4753 |
5
|
9
|
{ my ($ent_list)= @_; | ||||
4754 |
5
10
5
|
6
26
14
|
return map { $ent_list->{entities}->{$_} } sort keys %{$ent_list->{entities}}; | ||||
4755 | } | ||||||
4756 | |||||||
4757 | 1; | ||||||
4758 | |||||||
4759 | ###################################################################### | ||||||
4760 | package XML::Twig::Entity; | ||||||
4761 | ###################################################################### | ||||||
4762 | |||||||
4763 | #*isa= *UNIVERSAL::isa; | ||||||
4764 | |||||||
4765 | sub new | ||||||
4766 |
88
|
170
|
{ my( $class, $name, $val, $sysid, $pubid, $ndata, $param)= @_; | ||||
4767 |
88
|
213
|
$class= ref( $class) || $class; | ||||
4768 | |||||||
4769 |
88
|
88
|
my $self={}; | ||||
4770 | |||||||
4771 |
88
|
137
|
$self->{name} = $name; | ||||
4772 |
88
|
160
|
$self->{val} = $val if( defined $val ); | ||||
4773 |
88
|
241
|
$self->{sysid} = $sysid if( defined $sysid); | ||||
4774 |
88
|
112
|
$self->{pubid} = $pubid if( defined $pubid); | ||||
4775 |
88
|
108
|
$self->{ndata} = $ndata if( defined $ndata); | ||||
4776 |
88
|
112
|
$self->{param} = $param if( defined $param); | ||||
4777 | |||||||
4778 |
88
|
133
|
bless $self, $class; | ||||
4779 |
88
|
120
|
return $self; | ||||
4780 | } | ||||||
4781 | |||||||
4782 | |||||||
4783 |
8
|
115
|
sub name { return $_[0]->{name}; } | ||||
4784 |
2
|
7
|
sub val { return $_[0]->{val}; } | ||||
4785 |
3
|
14
|
sub sysid { return defined( $_[0]->{sysid}) ? $_[0]->{sysid} : ''; } | ||||
4786 |
2
|
9
|
sub pubid { return defined( $_[0]->{pubid}) ? $_[0]->{pubid} : ''; } | ||||
4787 |
2
|
9
|
sub ndata { return defined( $_[0]->{ndata}) ? $_[0]->{ndata} : ''; } | ||||
4788 |
2
|
13
|
sub param { return defined( $_[0]->{param}) ? $_[0]->{param} : ''; } | ||||
4789 | |||||||
4790 | |||||||
4791 | sub print | ||||||
4792 |
14
|
188
|
{ my ($ent, $fh)= @_; | ||||
4793 |
14
|
21
|
my $text= $ent->text; | ||||
4794 |
14
0
|
27
0
|
if( $fh) { print $fh $text . "\n"; } | ||||
4795 |
14
|
62
|
else { print $text . "\n"; } | ||||
4796 | } | ||||||
4797 | |||||||
4798 | sub sprint | ||||||
4799 |
3
|
3
|
{ my ($ent)= @_; | ||||
4800 |
3
|
3
|
return $ent->text; | ||||
4801 | } | ||||||
4802 | |||||||
4803 | sub text | ||||||
4804 |
119
|
115
|
{ my ($ent)= @_; | ||||
4805 | #warn "text called: '", $ent->_dump, "'\n"; | ||||||
4806 |
119
|
189
|
return '' if( !$ent->{name}); | ||||
4807 |
117
|
90
|
my @tokens; | ||||
4808 |
117
|
125
|
push @tokens, '<!ENTITY'; | ||||
4809 | |||||||
4810 |
117
|
165
|
push @tokens, '%' if( $ent->{param}); | ||||
4811 |
117
|
117
|
push @tokens, $ent->{name}; | ||||
4812 | |||||||
4813 |
117
|
442
|
if( defined $ent->{val} && !defined( $ent->{sysid}) && !defined($ent->{pubid}) ) | ||||
4814 |
58
|
86
|
{ push @tokens, _quoted_val( $ent->{val}); | ||||
4815 | } | ||||||
4816 | elsif( defined $ent->{sysid}) | ||||||
4817 |
59
|
88
|
{ push @tokens, 'PUBLIC', _quoted_val( $ent->{pubid}) if( $ent->{pubid}); | ||||
4818 |
59
|
105
|
push @tokens, 'SYSTEM' unless( $ent->{pubid}); | ||||
4819 |
59
|
88
|
push @tokens, _quoted_val( $ent->{sysid}); | ||||
4820 |
59
|
130
|
push @tokens, 'NDATA', $ent->{ndata} if( $ent->{ndata}); | ||||
4821 | } | ||||||
4822 |
117
|
402
|
return join( ' ', @tokens) . '>'; | ||||
4823 | } | ||||||
4824 | |||||||
4825 | sub _quoted_val | ||||||
4826 |
121
|
261
|
{ my $q= $_[0]=~ m{"} ? q{'} : q{"}; | ||||
4827 |
121
|
238
|
return qq{$q$_[0]$q}; | ||||
4828 | } | ||||||
4829 | |||||||
4830 | sub _dump | ||||||
4831 |
1
1
2
2
|
1
4
6
3
|
{ my( $ent)= @_; return join( " - ", map { "$_ => '$ent->{$_}'" } grep { defined $ent->{$_} } sort keys %$ent); } | ||||
4832 | |||||||
4833 | 1; | ||||||
4834 | |||||||
4835 | ###################################################################### | ||||||
4836 | package XML::Twig::Elt; | ||||||
4837 | ###################################################################### | ||||||
4838 | |||||||
4839 |
187
187
187
|
327966
1105
71838
|
use Carp; | ||||
4840 | *isa= *UNIVERSAL::isa; | ||||||
4841 | |||||||
4842 | my $CDATA_START = "<![CDATA["; | ||||||
4843 | my $CDATA_END = "]]>"; | ||||||
4844 | my $PI_START = "<?"; | ||||||
4845 | my $PI_END = "?>"; | ||||||
4846 | my $COMMENT_START = "<!--"; | ||||||
4847 | my $COMMENT_END = "-->"; | ||||||
4848 | |||||||
4849 | my $XMLNS_URI = 'http://www.w3.org/2000/xmlns/'; | ||||||
4850 | |||||||
4851 | |||||||
4852 | BEGIN | ||||||
4853 | { # set some aliases for methods | ||||||
4854 |
187
|
370
|
*tag = *gi; | ||||
4855 |
187
|
179
|
*name = *gi; | ||||
4856 |
187
|
176
|
*set_tag = *set_gi; | ||||
4857 |
187
|
150
|
*set_name = *set_gi; | ||||
4858 |
187
|
152
|
*find_nodes = *get_xpath; # as in XML::DOM | ||||
4859 |
187
|
160
|
*findnodes = *get_xpath; # as in XML::LibXML | ||||
4860 |
187
|
160
|
*field = *first_child_text; | ||||
4861 |
187
|
156
|
*trimmed_field = *first_child_trimmed_text; | ||||
4862 |
187
|
155
|
*is_field = *contains_only_text; | ||||
4863 |
187
|
163
|
*is = *passes; | ||||
4864 |
187
|
148
|
*matches = *passes; | ||||
4865 |
187
|
155
|
*has_child = *first_child; | ||||
4866 |
187
|
159
|
*has_children = *first_child; | ||||
4867 |
187
|
148
|
*all_children_pass = *all_children_are; | ||||
4868 |
187
|
160
|
*all_children_match= *all_children_are; | ||||
4869 |
187
|
150
|
*getElementsByTagName= *descendants; | ||||
4870 |
187
|
145
|
*find_by_tag_name= *descendants_or_self; | ||||
4871 |
187
|
153
|
*unwrap = *erase; | ||||
4872 |
187
|
152
|
*inner_xml = *xml_string; | ||||
4873 |
187
|
163
|
*outer_xml = *sprint; | ||||
4874 |
187
|
177
|
*add_class = *add_to_class; | ||||
4875 | |||||||
4876 |
187
|
168
|
*first_child_is = *first_child_matches; | ||||
4877 |
187
|
164
|
*last_child_is = *last_child_matches; | ||||
4878 |
187
|
159
|
*next_sibling_is = *next_sibling_matches; | ||||
4879 |
187
|
178
|
*prev_sibling_is = *prev_sibling_matches; | ||||
4880 |
187
|
161
|
*next_elt_is = *next_elt_matches; | ||||
4881 |
187
|
168
|
*prev_elt_is = *prev_elt_matches; | ||||
4882 |
187
|
160
|
*parent_is = *parent_matches; | ||||
4883 |
187
|
172
|
*child_is = *child_matches; | ||||
4884 |
187
|
158
|
*inherited_att = *inherit_att; | ||||
4885 | |||||||
4886 |
187
|
172
|
*sort_children_by_value= *sort_children_on_value; | ||||
4887 | |||||||
4888 |
187
|
188
|
*has_atts= *att_nb; | ||||
4889 | |||||||
4890 | # imports from XML::Twig | ||||||
4891 |
187
|
242
|
*_is_fh= *XML::Twig::_is_fh; | ||||
4892 | |||||||
4893 | # XML::XPath compatibility | ||||||
4894 |
187
|
164
|
*string_value = *text; | ||||
4895 |
187
|
165
|
*toString = *sprint; | ||||
4896 |
187
|
155
|
*getName = *gi; | ||||
4897 |
187
|
155
|
*getRootNode = *twig; | ||||
4898 |
187
|
169
|
*getNextSibling = *_next_sibling; | ||||
4899 |
187
|
156
|
*getPreviousSibling = *_prev_sibling; | ||||
4900 |
187
|
148
|
*isElementNode = *is_elt; | ||||
4901 |
187
|
157
|
*isTextNode = *is_text; | ||||
4902 |
187
|
154
|
*isPI = *is_pi; | ||||
4903 |
187
|
164
|
*isPINode = *is_pi; | ||||
4904 |
187
|
153
|
*isProcessingInstructionNode= *is_pi; | ||||
4905 |
187
|
154
|
*isComment = *is_comment; | ||||
4906 |
187
|
159
|
*isCommentNode = *is_comment; | ||||
4907 |
187
|
158
|
*getTarget = *target; | ||||
4908 |
187
|
162
|
*getFirstChild = *_first_child; | ||||
4909 |
187
|
162
|
*getLastChild = *_last_child; | ||||
4910 | |||||||
4911 | # try using weak references | ||||||
4912 | # test whether we can use weak references | ||||||
4913 |
187
187
|
171
540
|
{ local $SIG{__DIE__}; | ||||
4914 |
187
|
8537
|
if( eval 'require Scalar::Util' && defined( &Scalar::Util::weaken) ) | ||||
4915 |
187
|
1342222
|
{ import Scalar::Util qw(weaken); } | ||||
4916 | elsif( eval 'require WeakRef') | ||||||
4917 |
0
|
0
|
{ import WeakRef; } | ||||
4918 | } | ||||||
4919 | } | ||||||
4920 | |||||||
4921 | |||||||
4922 | # can be called as XML::Twig::Elt->new( [[$gi, $atts, [@content]]) | ||||||
4923 | # - gi is an optional gi given to the element | ||||||
4924 | # - $atts is a hashref to attributes for the element | ||||||
4925 | # - @content is an optional list of text and elements that will | ||||||
4926 | # be inserted under the element | ||||||
4927 | sub new | ||||||
4928 |
49253
|
4632834
|
{ my $class= shift; | ||||
4929 |
49253
|
90909
|
$class= ref $class || $class; | ||||
4930 |
49253
|
41632
|
my $elt = {}; | ||||
4931 |
49253
|
63941
|
bless ($elt, $class); | ||||
4932 | |||||||
4933 |
49253
|
50473
|
return $elt unless @_; | ||||
4934 | |||||||
4935 |
49249
1
|
149507
3
|
if( @_ == 1 && $_[0]=~ m{^\s*<}) { return $class->parse( @_); } | ||||
4936 | |||||||
4937 | # if a gi is passed then use it | ||||||
4938 |
49248
|
34562
|
my $gi= shift; | ||||
4939 |
49248
|
92449
|
$elt->{gi}=$XML::Twig::gi2index{$gi} or $elt->set_gi( $gi); | ||||
4940 | |||||||
4941 | |||||||
4942 |
49248
|
52132
|
my $atts= ref $_[0] eq 'HASH' ? shift : undef; | ||||
4943 | |||||||
4944 |
49248
|
55962
|
if( $atts && defined $atts->{$CDATA}) | ||||
4945 |
4
|
4
|
{ delete $atts->{$CDATA}; | ||||
4946 | |||||||
4947 |
4
|
9
|
my $cdata= $class->new( $CDATA => @_); | ||||
4948 |
3
|
5
|
return $class->new( $gi, $atts, $cdata); | ||||
4949 | } | ||||||
4950 | |||||||
4951 |
49244
|
107600
|
if( $gi eq $PCDATA) | ||||
4952 |
352
151
1
|
412
308
78
|
{ if( grep { ref $_ } @_) { croak "element $PCDATA can only be created from text"; } | ||||
4953 |
351
|
602
|
$elt->_set_pcdata( join( '', @_)); | ||||
4954 | } | ||||||
4955 | elsif( $gi eq $ENT) | ||||||
4956 |
53
|
54
|
{ $elt->{ent}= shift; } | ||||
4957 | elsif( $gi eq $CDATA) | ||||||
4958 |
112
9
1
|
169
19
105
|
{ if( grep { ref $_ } @_) { croak "element $CDATA can only be created from text"; } | ||||
4959 |
111
|
201
|
$elt->_set_cdata( join( '', @_)); | ||||
4960 | } | ||||||
4961 | elsif( $gi eq $COMMENT) | ||||||
4962 |
1761
4
1
|
2071
10
77
|
{ if( grep { ref $_ } @_) { croak "element $COMMENT can only be created from text"; } | ||||
4963 |
1760
|
2403
|
$elt->_set_comment( join( '', @_)); | ||||
4964 | } | ||||||
4965 | elsif( $gi eq $PI) | ||||||
4966 |
1552
3
1
|
1863
4
80
|
{ if( grep { ref $_ } @_) { croak "element $PI can only be created from text"; } | ||||
4967 |
1551
|
2217
|
$elt->_set_pi( shift, join( '', @_)); | ||||
4968 | } | ||||||
4969 | else | ||||||
4970 | { # the rest of the arguments are the content of the element | ||||||
4971 |
45414
|
37427
|
if( @_) | ||||
4972 |
135
|
197
|
{ $elt->set_content( @_); } | ||||
4973 | else | ||||||
4974 |
45279
|
40994
|
{ $elt->{empty}= 1; } | ||||
4975 | } | ||||||
4976 | |||||||
4977 |
49240
|
47567
|
if( $atts) | ||||
4978 | { # the attribute hash can be used to pass the asis status | ||||||
4979 |
106
2
2
|
148
6
2
|
if( defined $atts->{$ASIS}) { $elt->set_asis( $atts->{$ASIS} ); delete $atts->{$ASIS}; } | ||||
4980 |
106
2
2
|
574
3
9
|
if( defined $atts->{$EMPTY}) { $elt->{empty}= $atts->{$EMPTY}; delete $atts->{$EMPTY}; } | ||||
4981 |
106
55
|
142
69
|
if( keys %$atts) { $elt->set_atts( $atts); } | ||||
4982 |
106
|
129
|
$elt->_set_id( $atts->{$ID}) if( $atts->{$ID}); | ||||
4983 | } | ||||||
4984 | |||||||
4985 |
49240
|
49062
|
return $elt; | ||||
4986 | } | ||||||
4987 | |||||||
4988 | # optimized version of $elt->new( PCDATA, $text); | ||||||
4989 | sub _new_pcdata | ||||||
4990 |
166
|
118
|
{ my $class= $_[0]; | ||||
4991 |
166
|
242
|
$class= ref $class || $class; | ||||
4992 |
166
|
146
|
my $elt = {}; | ||||
4993 |
166
|
225
|
bless $elt, $class; | ||||
4994 |
166
|
308
|
$elt->{gi}=$XML::Twig::gi2index{$PCDATA} or $elt->set_gi( $PCDATA); | ||||
4995 |
166
|
243
|
$elt->_set_pcdata( $_[1]); | ||||
4996 |
166
|
148
|
return $elt; | ||||
4997 | } | ||||||
4998 | |||||||
4999 | # this function creates an XM:::Twig::Elt from a string | ||||||
5000 | # it is quite clumsy at the moment, as it just creates a | ||||||
5001 | # new twig then returns its root | ||||||
5002 | # there might also be memory leaks there | ||||||
5003 | # additional arguments are passed to new XML::Twig | ||||||
5004 | sub parse | ||||||
5005 |
28
|
529
|
{ my $class= shift; | ||||
5006 |
28
4
|
56
4
|
if( ref( $class)) { $class= ref( $class); } | ||||
5007 |
28
|
33
|
my $string= shift; | ||||
5008 |
28
|
44
|
my %args= @_; | ||||
5009 |
28
|
78
|
my $t= XML::Twig->new(%args); | ||||
5010 |
28
|
42
|
$t->parse( $string); | ||||
5011 |
28
|
43
|
my $elt= $t->root; | ||||
5012 | # clean-up the node | ||||||
5013 |
28
|
35
|
delete $elt->{twig}; # get rid of the twig data | ||||
5014 |
28
|
27
|
delete $elt->{twig_current}; # better get rid of this too | ||||
5015 |
28
7
|
41
10
|
if( $t->{twig_id_list}) { $elt->{twig_id_list}= $t->{twig_id_list}; } | ||||
5016 |
28
|
36
|
$elt->cut; | ||||
5017 |
28
|
26
|
undef $t->{twig_root}; | ||||
5018 |
28
|
51
|
return $elt; | ||||
5019 | } | ||||||
5020 | |||||||
5021 | sub set_inner_xml | ||||||
5022 |
2
|
2
|
{ my( $elt, $xml, @args)= @_; | ||||
5023 |
2
|
4
|
my $new_elt= $elt->parse( "<dummy>$xml</dummy>", @args); | ||||
5024 |
2
|
2
|
$elt->cut_children; | ||||
5025 |
2
|
2
|
$new_elt->paste_first_child( $elt); | ||||
5026 |
2
|
3
|
$new_elt->erase; | ||||
5027 |
2
|
8
|
return $elt; | ||||
5028 | } | ||||||
5029 | |||||||
5030 | sub set_outer_xml | ||||||
5031 |
2
|
3
|
{ my( $elt, $xml, @args)= @_; | ||||
5032 |
2
|
5
|
my $new_elt= $elt->parse( "<dummy>$xml</dummy>", @args); | ||||
5033 |
2
|
2
|
$elt->cut_children; | ||||
5034 |
2
|
3
|
$new_elt->replace( $elt); | ||||
5035 |
2
|
4
|
$new_elt->erase; | ||||
5036 |
2
|
4
|
return $new_elt; | ||||
5037 | } | ||||||
5038 | |||||||
5039 | |||||||
5040 | sub set_inner_html | ||||||
5041 |
8
|
104
|
{ my( $elt, $html)= @_; | ||||
5042 |
8
|
15
|
my $t= XML::Twig->new->parse_html( "<html>$html</html>"); | ||||
5043 |
8
|
10
|
my $new_elt= $t->root; | ||||
5044 |
8
|
11
|
if( $elt->tag eq 'head') | ||||
5045 |
2
|
4
|
{ $new_elt->first_child( 'head')->unwrap; | ||||
5046 |
2
|
5
|
$new_elt->first_child( 'body')->cut; | ||||
5047 | } | ||||||
5048 | elsif( $elt->tag ne 'html') | ||||||
5049 |
4
|
6
|
{ $new_elt->first_child( 'head')->cut; | ||||
5050 |
4
|
12
|
$new_elt->first_child( 'body')->unwrap; | ||||
5051 | } | ||||||
5052 |
8
|
18
|
$new_elt->cut; | ||||
5053 |
8
|
10
|
$elt->cut_children; | ||||
5054 |
8
|
12
|
$new_elt->paste_first_child( $elt); | ||||
5055 |
8
|
9
|
$new_elt->erase; | ||||
5056 |
8
|
15
|
return $elt; | ||||
5057 | } | ||||||
5058 | |||||||
5059 | sub set_gi | ||||||
5060 |
966
|
881
|
{ my ($elt, $gi)= @_; | ||||
5061 |
966
|
1396
|
unless( defined $XML::Twig::gi2index{$gi}) | ||||
5062 | { # new gi, create entries in %gi2index and @index2gi | ||||||
5063 |
960
|
1166
|
push @XML::Twig::index2gi, $gi; | ||||
5064 |
960
|
1487
|
$XML::Twig::gi2index{$gi}= $#XML::Twig::index2gi; | ||||
5065 | } | ||||||
5066 |
966
|
1073
|
$elt->{gi}= $XML::Twig::gi2index{$gi}; | ||||
5067 |
966
|
833
|
return $elt; | ||||
5068 | } | ||||||
5069 | |||||||
5070 |
8283
|
31251
|
sub gi { return $XML::Twig::index2gi[$_[0]->{gi}]; } | ||||
5071 | |||||||
5072 | sub local_name | ||||||
5073 |
114
|
776
|
{ my $elt= shift; | ||||
5074 |
114
|
128
|
return _local_name( $XML::Twig::index2gi[$elt->{'gi'}]); | ||||
5075 | } | ||||||
5076 | |||||||
5077 | sub ns_prefix | ||||||
5078 |
86
|
60
|
{ my $elt= shift; | ||||
5079 |
86
|
88
|
return _ns_prefix( $XML::Twig::index2gi[$elt->{'gi'}]); | ||||
5080 | } | ||||||
5081 | |||||||
5082 | # namespace prefix for any qname (can be used for elements or attributes) | ||||||
5083 | sub _ns_prefix | ||||||
5084 |
139
|
84
|
{ my $qname= shift; | ||||
5085 |
139
|
279
|
if( $qname=~ m{^([^:]*):}) | ||||
5086 |
61
|
137
|
{ return $1; } | ||||
5087 | else | ||||||
5088 |
78
|
112
|
{ return( ''); } # should it be '' ? | ||||
5089 | } | ||||||
5090 | |||||||
5091 | # local name for any qname (can be used for elements or attributes) | ||||||
5092 | sub _local_name | ||||||
5093 |
127
|
120
|
{ my $qname= shift; | ||||
5094 |
127
|
241
|
(my $local= $qname)=~ s{^[^:]*:}{}; | ||||
5095 |
127
|
195
|
return $local; | ||||
5096 | } | ||||||
5097 | |||||||
5098 | #sub get_namespace | ||||||
5099 | sub namespace ## no critic (Subroutines::ProhibitNestedSubs); | ||||||
5100 |
81
|
65
|
{ my $elt= shift; | ||||
5101 |
81
|
111
|
my $prefix= defined $_[0] ? shift() : $elt->ns_prefix; | ||||
5102 |
81
|
96
|
my $ns_att= $prefix ? "xmlns:$prefix" : "xmlns"; | ||||
5103 |
81
|
179
|
my $expanded= $DEFAULT_NS{$prefix} || $elt->_inherit_att_through_cut( $ns_att) || ''; | ||||
5104 |
81
|
153
|
return $expanded; | ||||
5105 | } | ||||||
5106 | |||||||
5107 | sub declare_missing_ns ## no critic (Subroutines::ProhibitNestedSubs); | ||||||
5108 |
1
|
2
|
{ my $root= shift; | ||||
5109 |
1
|
1
|
my %missing_prefix; | ||||
5110 |
1
|
12
|
my $map= $root->_current_ns_prefix_map; | ||||
5111 | |||||||
5112 |
1
|
2
|
foreach my $prefix (keys %$map) | ||||
5113 |
1
|
2
|
{ my $prefix_att= $prefix eq '#default' ? 'xmlns' : "xmlns:$prefix"; | ||||
5114 |
1
|
3
|
if( ! $root->{'att'}->{$prefix_att}) | ||||
5115 |
1
|
2
|
{ $root->set_att( $prefix_att => $map->{$prefix}); } | ||||
5116 | } | ||||||
5117 |
1
|
3
|
return $root; | ||||
5118 | } | ||||||
5119 | |||||||
5120 | sub _current_ns_prefix_map | ||||||
5121 |
1
|
1
|
{ my( $elt)= shift; | ||||
5122 |
1
|
1
|
my $map; | ||||
5123 |
1
|
2
|
while( $elt) | ||||
5124 |
2
|
2
|
{ foreach my $att ($elt->att_names) | ||||
5125 |
1
|
5
|
{ my $prefix= $att eq 'xmlns' ? '#default' | ||||
5126 | : $att=~ m{^xmlns:(.*)$} ? $1 | ||||||
5127 | : next | ||||||
5128 | ; | ||||||
5129 |
1
1
|
3
2
|
if( ! exists $map->{$prefix}) { $map->{$prefix}= $elt->{'att'}->{$att}; } | ||||
5130 | } | ||||||
5131 |
2
|
9
|
$elt= $elt->{parent} || ($elt->{former} && $elt->{former}->{parent}); | ||||
5132 | } | ||||||
5133 |
1
|
1
|
return $map; | ||||
5134 | } | ||||||
5135 | |||||||
5136 | sub set_ns_decl | ||||||
5137 |
5
|
3
|
{ my( $elt, $uri, $prefix)= @_; | ||||
5138 |
5
|
9
|
my $ns_att= $prefix ? "xmlns:$prefix" : 'xmlns'; | ||||
5139 |
5
|
6
|
$elt->set_att( $ns_att => $uri); | ||||
5140 |
5
|
8
|
return $elt; | ||||
5141 | } | ||||||
5142 | |||||||
5143 | sub set_ns_as_default | ||||||
5144 |
1
|
2
|
{ my( $root, $uri)= @_; | ||||
5145 |
1
|
0
|
my @ns_decl_to_remove; | ||||
5146 |
1
|
3
|
foreach my $elt ($root->descendants_or_self) | ||||
5147 |
7
|
7
|
{ if( $elt->_ns_prefix && $elt->namespace eq $uri) | ||||
5148 |
2
|
3
|
{ $elt->set_tag( $elt->local_name); } | ||||
5149 | # store any namespace declaration for that uri | ||||||
5150 |
7
1
|
8
7
|
foreach my $ns_decl (grep { $_=~ m{xmlns(:|$)} && $elt->{'att'}->{$_} eq $uri } $elt->att_names) | ||||
5151 |
1
|
3
|
{ push @ns_decl_to_remove, [$elt, $ns_decl]; } | ||||
5152 | } | ||||||
5153 |
1
|
2
|
$root->set_ns_decl( $uri); | ||||
5154 | # now remove the ns declarations (if done earlier then descendants of an element with the ns declaration | ||||||
5155 | # are not considered being in the namespace | ||||||
5156 |
1
|
1
|
foreach my $ns_decl_to_remove ( @ns_decl_to_remove) | ||||
5157 |
1
|
1
|
{ my( $elt, $ns_decl)= @$ns_decl_to_remove; | ||||
5158 |
1
|
1
|
$elt->del_att( $ns_decl); | ||||
5159 | } | ||||||
5160 | |||||||
5161 |
1
|
3
|
return $root; | ||||
5162 | } | ||||||
5163 | |||||||
5164 | |||||||
5165 | |||||||
5166 | # return #ELT for an element and #PCDATA... for others | ||||||
5167 | sub get_type | ||||||
5168 |
6
|
10
|
{ my $gi_nb= $_[0]->{gi}; # the number, not the string | ||||
5169 |
6
|
13
|
return $ELT if( $gi_nb >= $XML::Twig::SPECIAL_GI); | ||||
5170 |
5
|
9
|
return $_[0]->gi; | ||||
5171 | } | ||||||
5172 | |||||||
5173 | # return the gi if it's a "real" element, 0 otherwise | ||||||
5174 | sub is_elt | ||||||
5175 |
4947
|
17992
|
{ if( $_[0]->{gi} >= $XML::Twig::SPECIAL_GI) | ||||
5176 |
4663
|
5016
|
{ return $_[0]->gi; } | ||||
5177 | else | ||||||
5178 |
284
|
663
|
{ return 0; } | ||||
5179 | } | ||||||
5180 | |||||||
5181 | |||||||
5182 | sub is_pcdata | ||||||
5183 |
46
|
38
|
{ my $elt= shift; | ||||
5184 |
46
|
252
|
return (exists $elt->{'pcdata'}); | ||||
5185 | } | ||||||
5186 | |||||||
5187 | sub is_cdata | ||||||
5188 |
1
|
3
|
{ my $elt= shift; | ||||
5189 |
1
|
2
|
return (exists $elt->{'cdata'}); | ||||
5190 | } | ||||||
5191 | |||||||
5192 | sub is_pi | ||||||
5193 |
9
|
7
|
{ my $elt= shift; | ||||
5194 |
9
|
19
|
return (exists $elt->{'target'}); | ||||
5195 | } | ||||||
5196 | |||||||
5197 | sub is_comment | ||||||
5198 |
5
|
5
|
{ my $elt= shift; | ||||
5199 |
5
|
11
|
return (exists $elt->{'comment'}); | ||||
5200 | } | ||||||
5201 | |||||||
5202 | sub is_ent | ||||||
5203 |
1
|
1
|
{ my $elt= shift; | ||||
5204 |
1
|
4
|
return (exists $elt->{ent} || $elt->{ent_name}); | ||||
5205 | } | ||||||
5206 | |||||||
5207 | |||||||
5208 | sub is_text | ||||||
5209 |
2293
|
1570
|
{ my $elt= shift; | ||||
5210 |
2293
|
10875
|
return (exists( $elt->{'pcdata'}) || (exists $elt->{'cdata'})); | ||||
5211 | } | ||||||
5212 | |||||||
5213 | sub is_empty | ||||||
5214 |
11
|
48
|
{ return $_[0]->{empty} || 0; } | ||||
5215 | |||||||
5216 | sub set_empty | ||||||
5217 |
7
7
|
24
11
|
{ $_[0]->{empty}= defined( $_[1]) ? $_[1] : 1; return $_[0]; } | ||||
5218 | |||||||
5219 | sub set_not_empty | ||||||
5220 |
2
2
|
9
4
|
{ delete $_[0]->{empty} if( $_[0]->{'empty'}); return $_[0]; } | ||||
5221 | |||||||
5222 | |||||||
5223 | sub set_asis | ||||||
5224 |
20
|
21
|
{ my $elt=shift; | ||||
5225 | |||||||
5226 |
20
|
28
|
foreach my $descendant ($elt, $elt->_descendants ) | ||||
5227 |
27
|
29
|
{ $descendant->{asis}= 1; | ||||
5228 |
27
|
52
|
if( (exists $descendant->{'cdata'})) | ||||
5229 |
2
|
3
|
{ $descendant->{gi}=$XML::Twig::gi2index{$PCDATA} or $descendant->set_gi( $PCDATA); | ||||
5230 |
2
|
3
|
$descendant->_set_pcdata( $descendant->{cdata}); | ||||
5231 | } | ||||||
5232 | |||||||
5233 | } | ||||||
5234 |
20
|
28
|
return $elt; | ||||
5235 | } | ||||||
5236 | |||||||
5237 | sub set_not_asis | ||||||
5238 |
1
|
2
|
{ my $elt=shift; | ||||
5239 |
1
|
2
|
foreach my $descendant ($elt, $elt->descendants) | ||||
5240 |
2
|
4
|
{ delete $descendant->{asis} if $descendant->{asis};} | ||||
5241 |
1
|
2
|
return $elt; | ||||
5242 | } | ||||||
5243 | |||||||
5244 | sub is_asis | ||||||
5245 |
89
|
123
|
{ return $_[0]->{asis}; } | ||||
5246 | |||||||
5247 | sub closed | ||||||
5248 |
17
|
60
|
{ my $elt= shift; | ||||
5249 |
17
|
39
|
my $t= $elt->twig || return; | ||||
5250 |
16
|
25
|
my $curr_elt= $t->{twig_current}; | ||||
5251 |
16
|
29
|
return 1 unless( $curr_elt); | ||||
5252 |
14
|
32
|
return $curr_elt->in( $elt); | ||||
5253 | } | ||||||
5254 | |||||||
5255 | sub set_pcdata | ||||||
5256 |
46
|
49
|
{ my( $elt, $pcdata)= @_; | ||||
5257 | |||||||
5258 |
46
|
63
|
if( $elt->{extra_data_in_pcdata}) | ||||
5259 |
11
|
14
|
{ _try_moving_extra_data( $elt, $pcdata); | ||||
5260 | } | ||||||
5261 |
46
|
222
|
$elt->{pcdata}= $pcdata; | ||||
5262 |
46
|
41
|
return $elt; | ||||
5263 | } | ||||||
5264 | |||||||
5265 |
51
|
82
|
sub _extra_data_in_pcdata { return $_[0]->{extra_data_in_pcdata}; } | ||||
5266 |
12
12
|
13
11
|
sub _set_extra_data_in_pcdata { $_[0]->{extra_data_in_pcdata}= $_[1]; return $_[0]; } | ||||
5267 |
3
3
|
6
9
|
sub _del_extra_data_in_pcdata { delete $_[0]->{extra_data_in_pcdata}; return $_[0]; } | ||||
5268 | sub _unshift_extra_data_in_pcdata | ||||||
5269 |
14
|
7
|
{ my $e= shift; | ||||
5270 |
14
|
29
|
$e->{extra_data_in_pcdata}||=[]; | ||||
5271 |
14
14
|
10
34
|
unshift @{$e->{extra_data_in_pcdata}}, { text => shift(), offset => shift() }; | ||||
5272 | } | ||||||
5273 | sub _push_extra_data_in_pcdata | ||||||
5274 |
116
|
78
|
{ my $e= shift; | ||||
5275 |
116
|
258
|
$e->{extra_data_in_pcdata}||=[]; | ||||
5276 |
116
116
|
65
277
|
push @{$e->{extra_data_in_pcdata}}, { text => shift(), offset => shift() }; | ||||
5277 | } | ||||||
5278 | |||||||
5279 |
10
|
44
|
sub _extra_data_before_end_tag { return $_[0]->{extra_data_before_end_tag} || ''; } | ||||
5280 |
332
332
|
487
236
|
sub _set_extra_data_before_end_tag { $_[0]->{extra_data_before_end_tag}= $_[1]; return $_[0]} | ||||
5281 |
2
2
|
3
3
|
sub _del_extra_data_before_end_tag { delete $_[0]->{extra_data_before_end_tag}; return $_[0]} | ||||
5282 | sub _prefix_extra_data_before_end_tag | ||||||
5283 |
9
|
7
|
{ my( $elt, $data)= @_; | ||||
5284 |
9
|
103
|
if($elt->{extra_data_before_end_tag}) | ||||
5285 |
2
|
4
|
{ $elt->{extra_data_before_end_tag}= $data . $elt->{extra_data_before_end_tag}; } | ||||
5286 | else | ||||||
5287 |
7
|
11
|
{ $elt->{extra_data_before_end_tag}= $data; } | ||||
5288 |
9
|
10
|
return $elt; | ||||
5289 | } | ||||||
5290 | |||||||
5291 | # internal, in cases where we know there is no extra_data (inlined anyway!) | ||||||
5292 |
681
|
977
|
sub _set_pcdata { $_[0]->{pcdata}= $_[1]; } | ||||
5293 | |||||||
5294 | # try to figure out if we can keep the extra_data around | ||||||
5295 | sub _try_moving_extra_data | ||||||
5296 |
11
|
9
|
{ my( $elt, $modified)=@_; | ||||
5297 |
11
|
12
|
my $initial= $elt->{pcdata}; | ||||
5298 |
11
|
7
|
my $cpis= $elt->{extra_data_in_pcdata}; | ||||
5299 | |||||||
5300 |
11
|
40
|
if( (my $offset= index( $modified, $initial)) != -1) | ||||
5301 | { # text has been added | ||||||
5302 |
1
1
|
2
2
|
foreach (@$cpis) { $_->{offset}+= $offset; } | ||||
5303 | } | ||||||
5304 | elsif( ($offset= index( $initial, $modified)) != -1) | ||||||
5305 | { # text has been cut | ||||||
5306 |
4
|
3
|
my $len= length( $modified); | ||||
5307 |
4
4
|
4
6
|
foreach my $cpi (@$cpis) { $cpi->{offset} -= $offset; } | ||||
5308 |
4
4
|
5
15
|
$elt->_set_extra_data_in_pcdata( [ grep { $_->{offset} >= 0 && $_->{offset} < $len } @$cpis ]); | ||||
5309 | } | ||||||
5310 | else | ||||||
5311 |
6
|
10
|
{ _match_extra_data_words( $elt, $initial, $modified) | ||||
5312 | || _match_extra_data_chars( $elt, $initial, $modified) | ||||||
5313 | || $elt->_del_extra_data_in_pcdata; | ||||||
5314 | } | ||||||
5315 | } | ||||||
5316 | |||||||
5317 | sub _match_extra_data_words | ||||||
5318 |
6
|
8
|
{ my( $elt, $initial, $modified)= @_; | ||||
5319 |
6
|
82
|
my @initial= split /\b/, $initial; | ||||
5320 |
6
|
35867
|
my @modified= split /\b/, $modified; | ||||
5321 | |||||||
5322 |
6
|
18
|
return _match_extra_data( $elt, length( $initial), \@initial, \@modified); | ||||
5323 | } | ||||||
5324 | |||||||
5325 | sub _match_extra_data_chars | ||||||
5326 |
4
|
6
|
{ my( $elt, $initial, $modified)= @_; | ||||
5327 |
4
|
10
|
my @initial= split //, $initial; | ||||
5328 |
4
|
12
|
my @modified= split //, $modified; | ||||
5329 | |||||||
5330 |
4
|
8
|
return _match_extra_data( $elt, length( $initial), \@initial, \@modified); | ||||
5331 | } | ||||||
5332 | |||||||
5333 | sub _match_extra_data | ||||||
5334 |
10
|
8
|
{ my( $elt, $length, $initial, $modified)= @_; | ||||
5335 | |||||||
5336 |
10
|
17
|
my $cpis= $elt->{extra_data_in_pcdata}; | ||||
5337 | |||||||
5338 |
10
|
18
|
if( @$initial <= @$modified) | ||||
5339 | { | ||||||
5340 |
5
|
7
|
my( $ok, $positions, $offsets)= _pos_offset( $initial, $modified); | ||||
5341 |
5
|
9
|
if( $ok) | ||||
5342 |
2
|
3
|
{ my $offset=0; | ||||
5343 |
2
|
3
|
my $pos= shift @$positions; | ||||
5344 |
2
|
3
|
foreach my $cpi (@$cpis) | ||||
5345 |
2
|
3
|
{ while( $cpi->{offset} >= $pos) | ||||
5346 |
2
|
1
|
{ $offset= shift @$offsets; | ||||
5347 |
2
|
6
|
$pos= shift @$positions || $length +1; | ||||
5348 | } | ||||||
5349 |
2
|
3
|
$cpi->{offset} += $offset; | ||||
5350 | } | ||||||
5351 |
2
|
7
|
return 1; | ||||
5352 | } | ||||||
5353 | } | ||||||
5354 | else | ||||||
5355 |
5
|
8
|
{ my( $ok, $positions, $offsets)= _pos_offset( $modified, $initial); | ||||
5356 |
5
|
9
|
if( $ok) | ||||
5357 | { #print STDERR "pos: ", join( ':', @$positions), "\n", | ||||||
5358 | # "offset: ", join( ':', @$offsets), "\n"; | ||||||
5359 |
1
|
1
|
my $offset=0; | ||||
5360 |
1
|
1
|
my $pos= shift @$positions; | ||||
5361 |
1
|
1
|
my $prev_pos= 0; | ||||
5362 | |||||||
5363 |
1
|
1
|
foreach my $cpi (@$cpis) | ||||
5364 |
1
|
2
|
{ while( $cpi->{offset} >= $pos) | ||||
5365 |
1
|
1
|
{ $offset= shift @$offsets; | ||||
5366 |
1
|
1
|
$prev_pos= $pos; | ||||
5367 |
1
|
3
|
$pos= shift @$positions || $length +1; | ||||
5368 | } | ||||||
5369 |
1
|
2
|
$cpi->{offset} -= $offset; | ||||
5370 |
1
1
|
2
2
|
if( $cpi->{offset} < $prev_pos) { delete $cpi->{text}; } | ||||
5371 | } | ||||||
5372 |
1
1
|
1
3
|
$elt->_set_extra_data_in_pcdata( [ grep { exists $_->{text} } @$cpis ]); | ||||
5373 |
1
|
4
|
return 1; | ||||
5374 | } | ||||||
5375 | } | ||||||
5376 |
7
|
25
|
return 0; | ||||
5377 | } | ||||||
5378 | |||||||
5379 | |||||||
5380 | sub _pos_offset | ||||||
5381 |
10
|
8
|
{ my( $short, $long)= @_; | ||||
5382 |
10
|
6
|
my( @pos, @offset); | ||||
5383 |
10
|
7
|
my( $s_length, $l_length)=(0,0); | ||||
5384 |
10
|
15
|
while (@$short) | ||||
5385 |
22
|
19
|
{ my $s_word= shift @$short; | ||||
5386 |
22
|
13
|
my $l_word= shift @$long; | ||||
5387 |
22
|
26
|
if( $s_word ne $l_word) | ||||
5388 |
10
|
26
|
{ while( @$long && $s_word ne $l_word) | ||||
5389 |
31
|
22
|
{ $l_length += length( $l_word); | ||||
5390 |
31
|
60
|
$l_word= shift @$long; | ||||
5391 | } | ||||||
5392 |
10
7
|
24
13
|
if( !@$long && $s_word ne $l_word) { return 0; } | ||||
5393 |
3
|
4
|
push @pos, $s_length; | ||||
5394 |
3
|
2
|
push @offset, $l_length - $s_length; | ||||
5395 | } | ||||||
5396 |
15
|
10
|
my $length= length( $s_word); | ||||
5397 |
15
|
10
|
$s_length += $length; | ||||
5398 |
15
|
16
|
$l_length += $length; | ||||
5399 | } | ||||||
5400 |
3
|
4
|
return( 1, \@pos, \@offset); | ||||
5401 | } | ||||||
5402 | |||||||
5403 | sub append_pcdata | ||||||
5404 |
1
|
2
|
{ $_[0]->{'pcdata'}.= $_[1]; | ||||
5405 |
1
|
1
|
return $_[0]; | ||||
5406 | } | ||||||
5407 | |||||||
5408 |
13
|
47
|
sub pcdata { return $_[0]->{pcdata}; } | ||||
5409 | |||||||
5410 | |||||||
5411 | sub append_extra_data | ||||||
5412 |
1
|
3
|
{ $_[0]->{extra_data}.= $_[1]; | ||||
5413 |
1
|
1
|
return $_[0]; | ||||
5414 | } | ||||||
5415 | |||||||
5416 | sub set_extra_data | ||||||
5417 |
695
|
739
|
{ $_[0]->{extra_data}= $_[1]; | ||||
5418 |
695
|
446
|
return $_[0]; | ||||
5419 | } | ||||||
5420 |
124
|
363
|
sub extra_data { return $_[0]->{extra_data} || ''; } | ||||
5421 | |||||||
5422 | sub set_target | ||||||
5423 |
3103
|
2113
|
{ my( $elt, $target)= @_; | ||||
5424 |
3103
|
2554
|
$elt->{target}= $target; | ||||
5425 |
3103
|
2149
|
return $elt; | ||||
5426 | } | ||||||
5427 |
1
|
5
|
sub target { return $_[0]->{target}; } | ||||
5428 | |||||||
5429 | sub set_data | ||||||
5430 |
5
|
12
|
{ $_[0]->{'data'}= $_[1]; | ||||
5431 |
5
|
10
|
return $_[0]; | ||||
5432 | } | ||||||
5433 |
64
|
152
|
sub data { return $_[0]->{data}; } | ||||
5434 | |||||||
5435 | sub set_pi | ||||||
5436 |
22
|
16
|
{ my $elt= shift; | ||||
5437 |
22
|
34
|
unless( $elt->{gi} == $XML::Twig::gi2index{$PI}) | ||||
5438 |
1
|
1
|
{ $elt->cut_children; | ||||
5439 |
1
|
2
|
$elt->{gi}=$XML::Twig::gi2index{$PI} or $elt->set_gi( $PI); | ||||
5440 | } | ||||||
5441 |
22
|
23
|
return $elt->_set_pi( @_); | ||||
5442 | } | ||||||
5443 | |||||||
5444 | sub _set_pi | ||||||
5445 |
3100
|
3862
|
{ $_[0]->set_target( $_[1]); | ||||
5446 |
3100
|
2791
|
$_[0]->{data}= $_[2]; | ||||
5447 |
3100
|
2762
|
return $_[0]; | ||||
5448 | } | ||||||
5449 | |||||||
5450 |
1488
|
1502
|
sub pi_string { my $string= $PI_START . $_[0]->{target}; | ||||
5451 |
1488
|
1030
|
my $data= $_[0]->{data}; | ||||
5452 |
1488
1487
|
3112
1111
|
if( defined( $data) && $data ne '') { $string .= " $data"; } | ||||
5453 |
1488
|
1013
|
$string .= $PI_END ; | ||||
5454 |
1488
|
1768
|
return $string; | ||||
5455 | } | ||||||
5456 | |||||||
5457 | sub set_comment | ||||||
5458 |
7
|
11
|
{ my $elt= shift; | ||||
5459 |
7
|
15
|
unless( $elt->{gi} == $XML::Twig::gi2index{$COMMENT}) | ||||
5460 |
1
|
1
|
{ $elt->cut_children; | ||||
5461 |
1
|
3
|
$elt->{gi}=$XML::Twig::gi2index{$COMMENT} or $elt->set_gi( $COMMENT); | ||||
5462 | } | ||||||
5463 |
7
|
11
|
return $elt->_set_comment( @_); | ||||
5464 | } | ||||||
5465 | |||||||
5466 |
3523
3523
|
3522
2285
|
sub _set_comment { $_[0]->{comment}= $_[1]; return $_[0]; } | ||||
5467 |
3
|
15
|
sub comment { return $_[0]->{comment}; } | ||||
5468 |
1767
|
1811
|
sub comment_string { return $COMMENT_START . _comment_escaped_string( $_[0]->{comment}) . $COMMENT_END; } | ||||
5469 | # comments cannot start or end with | ||||||
5470 | sub _comment_escaped_string | ||||||
5471 |
1767
|
1264
|
{ my( $c)= @_; | ||||
5472 |
1767
|
1450
|
$c=~ s{^-}{ -}; | ||||
5473 |
1767
|
1265
|
$c=~ s{-$}{- }; | ||||
5474 |
1767
|
1040
|
$c=~ s{--}{- -}g; | ||||
5475 |
1767
|
3076
|
return $c; | ||||
5476 | } | ||||||
5477 | |||||||
5478 |
1
1
|
2
2
|
sub set_ent { $_[0]->{ent}= $_[1]; return $_[0]; } | ||||
5479 |
2
|
7
|
sub ent { return $_[0]->{ent}; } | ||||
5480 |
12
|
50
|
sub ent_name { return substr( $_[0]->{ent}, 1, -1);} | ||||
5481 | |||||||
5482 | sub set_cdata | ||||||
5483 |
2
|
8
|
{ my $elt= shift; | ||||
5484 |
2
|
7
|
unless( $elt->{gi} == $XML::Twig::gi2index{$CDATA}) | ||||
5485 |
1
|
2
|
{ $elt->cut_children; | ||||
5486 |
1
|
5
|
$elt->insert_new_elt( first_child => $CDATA, @_); | ||||
5487 |
1
|
2
|
return $elt; | ||||
5488 | } | ||||||
5489 |
1
|
2
|
return $elt->_set_cdata( @_); | ||||
5490 | } | ||||||
5491 | |||||||
5492 | sub _set_cdata | ||||||
5493 |
230
|
255
|
{ $_[0]->{cdata}= $_[1]; | ||||
5494 |
230
|
175
|
return $_[0]; | ||||
5495 | } | ||||||
5496 | |||||||
5497 | sub append_cdata | ||||||
5498 |
1
|
2
|
{ $_[0]->{cdata}.= $_[1]; | ||||
5499 |
1
|
2
|
return $_[0]; | ||||
5500 | } | ||||||
5501 |
47
|
130
|
sub cdata { return $_[0]->{cdata}; } | ||||
5502 | |||||||
5503 | |||||||
5504 | sub contains_only_text | ||||||
5505 |
42
|
57
|
{ my $elt= shift; | ||||
5506 |
42
|
29
|
return 0 unless $elt->is_elt; | ||||
5507 |
24
|
21
|
foreach my $child ($elt->_children) | ||||
5508 |
25
|
18
|
{ return 0 if $child->is_elt; } | ||||
5509 |
15
|
17
|
return $elt; | ||||
5510 | } | ||||||
5511 | |||||||
5512 | sub contains_only | ||||||
5513 |
5
|
8
|
{ my( $elt, $exp)= @_; | ||||
5514 |
5
5
5
5
5
7
7
5
|
4
6
6
6
9
8
13
10
|
my @children= do { my $elt= $elt; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; }; | ||||
5515 |
5
|
8
|
foreach my $child (@children) | ||||
5516 |
7
|
10
|
{ return 0 unless $child->is( $exp); } | ||||
5517 |
3
|
16
|
return @children || 1; | ||||
5518 | } | ||||||
5519 | |||||||
5520 | sub contains_a_single | ||||||
5521 |
202
|
191
|
{ my( $elt, $exp)= @_; | ||||
5522 |
202
|
839
|
my $child= $elt->{first_child} or return 0; | ||||
5523 |
57
|
67
|
return 0 unless $child->passes( $exp); | ||||
5524 |
53
|
77
|
return 0 if( $child->{next_sibling}); | ||||
5525 |
50
|
143
|
return $child; | ||||
5526 | } | ||||||
5527 | |||||||
5528 | |||||||
5529 | sub root | ||||||
5530 |
29906
|
16682
|
{ my $elt= shift; | ||||
5531 |
29906
19650
|
37511
20935
|
while( $elt->{parent}) { $elt= $elt->{parent}; } | ||||
5532 |
29906
|
25367
|
return $elt; | ||||
5533 | } | ||||||
5534 | |||||||
5535 | sub _root_through_cut | ||||||
5536 |
2
|
3
|
{ my $elt= shift; | ||||
5537 |
2
5
|
13
20
|
while( $elt->{parent} || ($elt->{former} && $elt->{former}->{parent})) { $elt= $elt->{parent} || ($elt->{former} && $elt->{former}->{parent}); } | ||||
5538 |
2
|
4
|
return $elt; | ||||
5539 | } | ||||||
5540 | |||||||
5541 | sub twig | ||||||
5542 |
29884
|
22338
|
{ my $elt= shift; | ||||
5543 |
29884
|
28204
|
my $root= $elt->root; | ||||
5544 |
29884
|
53628
|
return $root->{twig}; | ||||
5545 | } | ||||||
5546 | |||||||
5547 | sub _twig_through_cut | ||||||
5548 |
1
|
1
|
{ my $elt= shift; | ||||
5549 |
1
|
2
|
my $root= $elt->_root_through_cut; | ||||
5550 |
1
|
2
|
return $root->{twig}; | ||||
5551 | } | ||||||
5552 | |||||||
5553 | |||||||
5554 | # used for navigation | ||||||
5555 | # returns undef or the element, depending on whether $elt passes $cond | ||||||
5556 | # $cond can be | ||||||
5557 | # - empty: the element passes the condition | ||||||
5558 | # - ELT ('#ELT'): the element passes the condition if it is a "real" element | ||||||
5559 | # - TEXT ('#TEXT'): the element passes if it is a CDATA or PCDATA element | ||||||
5560 | # - a string with an XPath condition (only a subset of XPath is actually | ||||||
5561 | # supported). | ||||||
5562 | # - a regexp: the element passes if its gi matches the regexp | ||||||
5563 | # - a code ref: the element passes if the code, applied on the element, | ||||||
5564 | # returns true | ||||||
5565 | |||||||
5566 | my %cond_cache; # expression => coderef | ||||||
5567 | |||||||
5568 |
1
|
149
|
sub reset_cond_cache { %cond_cache=(); } | ||||
5569 | |||||||
5570 | { | ||||||
5571 | sub _install_cond | ||||||
5572 |
399
|
338
|
{ my $cond= shift; | ||||
5573 |
399
|
270
|
my $test; | ||||
5574 |
399
|
288
|
my $init=''; | ||||
5575 | |||||||
5576 |
399
|
279
|
my $original_cond= $cond; | ||||
5577 | |||||||
5578 |
399
|
715
|
my $not= ($cond=~ s{^\s*!}{}) ? '!' : ''; | ||||
5579 | |||||||
5580 |
399
1
|
560
2
|
if( ref $cond eq 'CODE') { return $cond; } | ||||
5581 | |||||||
5582 |
398
|
414
|
if( ref $cond eq 'Regexp') | ||||
5583 |
3
|
6
|
{ $test = qq{(\$_[0]->gi=~ /$cond/)}; } | ||||
5584 | else | ||||||
5585 |
395
|
700
|
{ my @tests; | ||||
5586 |
395
|
487
|
while( $cond) | ||||
5587 | { | ||||||
5588 | # the condition is a string | ||||||
5589 |
400
4
4
4
|
16641
13
4
287
|
if( $cond=~ s{$ELT$SEP}{}) | ||||
5590 |
5
|
13
|
{ push @tests, qq{\$_[0]->is_elt}; } | ||||
5591 | elsif( $cond=~ s{$TEXT$SEP}{}) | ||||||
5592 |
14
|
35
|
{ push @tests, qq{\$_[0]->is_text}; } | ||||
5593 | elsif( $cond=~ s{^\s*($REG_TAG_PART)$SEP}{}) | ||||||
5594 |
189
|
25733
|
{ push @tests, _gi_test( $1); } | ||||
5595 | elsif( $cond=~ s{^\s*($REG_REGEXP)$SEP}{}) | ||||||
5596 | { # /regexp/ | ||||||
5597 |
2
|
7
|
push @tests, qq{ \$_[0]->gi=~ $1 }; | ||||
5598 | } | ||||||
5599 | elsif( $cond=~ s{^\s*($REG_TAG_PART)?\s* # $1 | ||||||
5600 | \[\s*(-?)\s*(\d+)\s*\] # [$2] | ||||||
5601 | $SEP}{}xo | ||||||
5602 | ) | ||||||
5603 |
48
|
104
|
{ my( $gi, $neg, $index)= ($1, $2, $3); | ||||
5604 |
48
|
51
|
my $siblings= $neg ? q{$_[0]->_next_siblings} : q{$_[0]->_prev_siblings}; | ||||
5605 |
48
|
111
|
if( $gi && ($gi ne '*')) | ||||
5606 | #{ $test= qq{((\$_[0]->gi eq "$gi") && (scalar( grep { \$_->gi eq "$gi" } $siblings) + 1 == $index))}; } | ||||||
5607 |
31
|
30
|
{ push @tests, _and( _gi_test( $gi), qq{ (scalar( grep { \$_->gi eq "$gi" } $siblings) + 1 == $index)}); } | ||||
5608 | else | ||||||
5609 |
17
|
47
|
{ push @tests, qq{(scalar( $siblings) + 1 == $index)}; } | ||||
5610 | } | ||||||
5611 | elsif( $cond=~ s{^\s*($REG_TAG_PART?)\s*($REG_PREDICATE)$SEP}{}) | ||||||
5612 |
124
|
290
|
{ my( $gi, $predicate)= ( $1, $2); | ||||
5613 |
124
|
215
|
push @tests, _and( _gi_test( $gi), _parse_predicate_in_step( $predicate)); | ||||
5614 | } | ||||||
5615 | elsif( $cond=~ s{^\s*($REG_NAKED_PREDICATE)$SEP}{}) | ||||||
5616 |
14
|
19
|
{ push @tests, _parse_predicate_in_step( $1); } | ||||
5617 | else | ||||||
5618 |
4
|
322
|
{ croak "wrong navigation condition '$original_cond' ($@)"; } | ||||
5619 | } | ||||||
5620 |
391
8
|
741
14
|
$test= @tests > 1 ? '(' . join( '||', map { "($_)" } @tests) . ')' : $tests[0]; | ||||
5621 | } | ||||||
5622 | |||||||
5623 | #warn "init: '$init' - test: '$test'\n"; | ||||||
5624 | |||||||
5625 |
394
|
691
|
my $sub= qq{sub { $NO_WARNINGS; $init; return $not($test) ? \$_[0] : undef; } }; | ||||
5626 |
394
103
103
103
37
37
37
41
41
41
51
51
51
36
36
36
39
39
39
32
32
32
24
24
24
4
4
4
4
4
4
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
2
2
2
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
|
19693
362
98
8344
125
39
1951
134
44
3002
557
51
3999
119
40
2972
133
41
3615
106
37
2765
87
195
1894
13
77
296
13
5
481
4
1
108
4
1
107
4
1
137
4
0
104
4
1
107
3
1
106
3
1
147
7
2
120
3
1
140
4
1
132
3
1
104
3
1
117
3
1
114
3
1
112
3
1
105
4
1
106
3
1
129
4
1
127
3
1
127
3
1
128
3
2
121
4
1
105
7
2
120
8
3
140
8
3
159
6
2
133
6
2
149
6
2
145
3
1
112
3
1
111
3
1
112
3
2
111
3
1
105
|
my $s= eval $sub; | ||||
5627 | #warn "cond: $cond\n$sub\n"; | ||||||
5628 |
394
|
688
|
if( $@) | ||||
5629 |
5
|
405
|
{ croak "wrong navigation condition '$original_cond' ($@);" } | ||||
5630 |
389
|
1006
|
return $s; | ||||
5631 | } | ||||||
5632 | |||||||
5633 | sub _gi_test | ||||||
5634 |
344
|
483
|
{ my( $full_gi)= @_; | ||||
5635 | |||||||
5636 | # optimize if the gi exists, including the case where the gi includes a dot | ||||||
5637 |
344
|
394
|
my $index= $XML::Twig::gi2index{$full_gi}; | ||||
5638 |
344
264
|
471
925
|
if( $index) { return qq{\$_[0]->{gi} == $index}; } | ||||
5639 | |||||||
5640 |
80
|
285
|
my( $gi, $class, $id)= $full_gi=~ m{^(.*?)(?:[.]([^.]*)|[#](.*))?$}; | ||||
5641 | |||||||
5642 |
80
|
75
|
my $gi_test=''; | ||||
5643 |
80
|
218
|
if( $gi && $gi ne '*' ) | ||||
5644 | { # 2 options, depending on whether the gi exists in gi2index | ||||||
5645 | # start optimization | ||||||
5646 |
21
|
24
|
my $index= $XML::Twig::gi2index{$gi}; | ||||
5647 |
21
|
29
|
if( $index) | ||||
5648 | { # the gi exists, use its index as a faster shortcut | ||||||
5649 |
7
|
11
|
$gi_test = qq{\$_[0]->{gi} == $index}; | ||||
5650 | } | ||||||
5651 | else | ||||||
5652 | # end optimization | ||||||
5653 | { # it does not exist (but might be created later), compare the strings | ||||||
5654 |
14
|
25
|
$gi_test = qq{ \$_[0]->gi eq "$gi"}; | ||||
5655 | } | ||||||
5656 | } | ||||||
5657 | else | ||||||
5658 |
59
|
90
|
{ $gi_test= 1; } | ||||
5659 | |||||||
5660 |
80
|
135
|
my $class_test=''; | ||||
5661 | #warn "class: '$class'"; | ||||||
5662 |
80
|
103
|
if( $class) | ||||
5663 |
13
|
18
|
{ $class_test = qq{ defined( \$_[0]->{att}->{class}) && \$_[0]->{att}->{class}=~ m{\\b$class\\b} }; } | ||||
5664 | |||||||
5665 |
80
|
52
|
my $id_test=''; | ||||
5666 | #warn "id: '$id'"; | ||||||
5667 |
80
|
98
|
if( $id) | ||||
5668 |
4
|
5
|
{ $id_test = qq{ defined( \$_[0]->{att}->{$ID}) && \$_[0]->{att}->{$ID} eq '$id' }; } | ||||
5669 | |||||||
5670 | |||||||
5671 | #warn "gi_test: '$gi_test' - class_test: '$class_test' returning ", _and( $gi_test, $class_test); | ||||||
5672 |
80
|
103
|
return _and( $gi_test, $class_test, $id_test); | ||||
5673 | } | ||||||
5674 | |||||||
5675 | |||||||
5676 | # input: the original predicate | ||||||
5677 | sub _parse_predicate_in_step | ||||||
5678 |
138
|
135
|
{ my $cond= shift; | ||||
5679 |
138
|
466
|
my %PERL_ALPHA_TEST= ( '=' => ' eq ', '!=' => ' ne ', '>' => ' gt ', '>=' => ' ge ', '<' => ' lt ', '<=' => ' le '); | ||||
5680 | |||||||
5681 |
138
|
287
|
$cond=~ s{^\s*\[\s*}{}; | ||||
5682 |
138
|
245
|
$cond=~ s{\s*\]\s*$}{}; | ||||
5683 |
138
381
|
5974
1137
|
$cond=~ s{( ($REG_STRING|$REG_REGEXP) # strings or regexps | ||||
5684 | |\@($REG_TAG_NAME)(?=\s*(?:[><=!]|!~|=~)) # @att (followed by a comparison operator) | ||||||
5685 | |\@($REG_TAG_NAME) # @att (not followed by a comparison operator) | ||||||
5686 |
381
100
|
2428
197
|
|=~|!~ # matching operators | ||||
5687 |
93
|
339
|
|([><]=?|=|!=)(?=\s*[\d+-]) # test before a number | ||||
5688 |
16
|
45
|
|([><]=?|=|!=) # test, other cases | ||||
5689 |
1
|
5
|
|($REG_FUNCTION) # no arg functions | ||||
5690 |
79
|
265
|
# this bit is a mess, but it is the only solution with this half-baked parser | ||||
5691 | |((?:string|text)\(\s*$REG_TAG_NAME\s*\)\s*$REG_MATCH\s*$REG_REGEXP) # string( child) =~ /regexp/ | ||||||
5692 |
18
|
76
|
|((?:string|text)\(\s*$REG_TAG_NAME\s*\)\s*!?=\s*$REG_VALUE) # string( child) = "value" (or !=) | ||||
5693 | |((?:string|text)\(\s*$REG_TAG_NAME\s*\)\s*[<>]=?\s*$REG_VALUE) # string( child) > "value" | ||||||
5694 |
9
17
|
32
71
|
|(and|or) | ||||
5695 | )} | ||||||
5696 | { my( $token, $string, $att, $bare_att, $num_test, $alpha_test, $func, $string_regexp, $string_eq, $string_test, $and_or) | ||||||
5697 | = ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11); | ||||||
5698 | |||||||
5699 |
18
|
112
|
if( defined $string) { $token } | ||||
5700 |
30
|
126
|
elsif( $att) { "( \$_[0]->{att} && exists( \$_[0]->{att}->{'$att'}) && \$_[0]->{att}->{'$att'})"; } | ||||
5701 | elsif( $bare_att) { "(\$_[0]->{att} && defined( \$_[0]->{att}->{'$bare_att'}))"; } | ||||||
5702 | elsif( $num_test && ($num_test eq '=') ) { "==" } # others tests are unchanged | ||||||
5703 | elsif( $alpha_test) { $PERL_ALPHA_TEST{$alpha_test} } | ||||||
5704 | elsif( $func && $func=~ m{^(?:string|text)}) | ||||||
5705 | { "\$_[0]->text"; } | ||||||
5706 | elsif( $string_regexp && $string_regexp =~ m{(?:string|text)\(\s*($REG_TAG_NAME)\s*\)\s*($REG_MATCH)\s*($REG_REGEXP)}) | ||||||
5707 | { "(XML::Twig::_first_n { (\$_->gi eq '$1') && (\$_->text $2 $3) } 1, \$_[0]->_children)"; } | ||||||
5708 | elsif( $string_eq && $string_eq =~ m{(?:string|text)\(\s*($REG_TAG_NAME)\s*\)\s*(!?=)\s*($REG_VALUE)}) | ||||||
5709 | {"(XML::Twig::_first_n { (\$_->gi eq '$1') && (\$_->text $PERL_ALPHA_TEST{$2} $3) } 1, \$_[0]->_children)"; } | ||||||
5710 | elsif( $string_test && $string_test =~ m{(?:string|text)\(\s*($REG_TAG_NAME)\s*\)\s*([<>]=?)\s*($REG_VALUE)}) | ||||||
5711 | { "(XML::Twig::_first_n { (\$_->gi eq '$1') && (\$_->text $2 $3) } 1, \$_[0]->_children)"; } | ||||||
5712 | elsif( $and_or) { $and_or eq 'and' ? '&&' : '||' ; } | ||||||
5713 | else { $token; } | ||||||
5714 | }gexs; | ||||||
5715 |
138
|
552
|
return "($cond)"; | ||||
5716 | } | ||||||
5717 | |||||||
5718 | |||||||
5719 | sub _op | ||||||
5720 |
82
|
102
|
{ my $op= shift; | ||||
5721 |
82
78
|
96
63
|
if( $op eq '=') { $op= 'eq'; } | ||||
5722 |
3
|
2
|
elsif( $op eq '!=') { $op= 'ne'; } | ||||
5723 |
82
|
187
|
return $op; | ||||
5724 | } | ||||||
5725 | |||||||
5726 | sub passes | ||||||
5727 |
4008
|
2460
|
{ my( $elt, $cond)= @_; | ||||
5728 |
4008
|
7170
|
return $elt unless $cond; | ||||
5729 |
679
|
1640
|
my $sub= ($cond_cache{$cond} ||= _install_cond( $cond)); | ||||
5730 |
679
|
11133
|
return $sub->( $elt); | ||||
5731 | } | ||||||
5732 | } | ||||||
5733 | |||||||
5734 | sub set_parent | ||||||
5735 |
1
|
2
|
{ $_[0]->{parent}= $_[1]; | ||||
5736 |
1
1
|
2
2
|
if( $XML::Twig::weakrefs) { weaken( $_[0]->{parent}); } | ||||
5737 | } | ||||||
5738 | |||||||
5739 | sub parent | ||||||
5740 |
46
|
91
|
{ my $elt= shift; | ||||
5741 |
46
|
115
|
my $cond= shift || return $elt->{parent}; | ||||
5742 |
26
39
|
16
103
|
do { $elt= $elt->{parent} || return; } until ( $elt->passes( $cond)); | ||||
5743 |
22
|
41
|
return $elt; | ||||
5744 | } | ||||||
5745 | |||||||
5746 | sub set_first_child | ||||||
5747 |
50
|
59
|
{ $_[0]->{'first_child'}= $_[1]; | ||||
5748 | } | ||||||
5749 | |||||||
5750 | sub first_child | ||||||
5751 |
668
|
1782
|
{ my $elt= shift; | ||||
5752 |
668
|
1164
|
my $cond= shift || return $elt->{first_child}; | ||||
5753 |
428
|
453
|
my $child= $elt->{first_child}; | ||||
5754 |
428
|
896
|
my $test_cond= ($cond_cache{$cond} ||= _install_cond( $cond)); | ||||
5755 |
419
|
7929
|
while( $child && !$test_cond->( $child)) | ||||
5756 |
596
|
7861
|
{ $child= $child->{next_sibling}; } | ||||
5757 |
419
|
22712
|
return $child; | ||||
5758 | } | ||||||
5759 | |||||||
5760 |
20
|
111
|
sub _first_child { return $_[0]->{first_child}; } | ||||
5761 |
273
|
545
|
sub _last_child { return $_[0]->{last_child}; } | ||||
5762 |
44
|
634
|
sub _next_sibling { return $_[0]->{next_sibling}; } | ||||
5763 |
76
|
1133
|
sub _prev_sibling { return $_[0]->{prev_sibling}; } | ||||
5764 |
115
|
296
|
sub _parent { return $_[0]->{parent}; } | ||||
5765 |
76
76
76
296
76
|
55
33
105
313
1092
|
sub _next_siblings { my $elt= shift; my @siblings; while( $elt= $elt->{next_sibling}) { push @siblings, $elt; } return @siblings; } | ||||
5766 |
93
93
93
169
93
|
66
54
117
188
1336
|
sub _prev_siblings { my $elt= shift; my @siblings; while( $elt= $elt->{prev_sibling}) { push @siblings, $elt; } return @siblings; } | ||||
5767 | |||||||
5768 | # sets a field | ||||||
5769 | # arguments $record, $cond, @content | ||||||
5770 | sub set_field | ||||||
5771 |
3
|
6
|
{ my $record = shift; | ||||
5772 |
3
|
3
|
my $cond = shift; | ||||
5773 |
3
|
5
|
my $child= $record->first_child( $cond); | ||||
5774 |
3
|
5
|
if( $child) | ||||
5775 |
1
|
2
|
{ $child->set_content( @_); } | ||||
5776 | else | ||||||
5777 |
2
|
89
|
{ if( $cond=~ m{^\s*($REG_TAG_NAME)}) | ||||
5778 |
1
|
2
|
{ my $gi= $1; | ||||
5779 |
1
|
2
|
$child= $record->insert_new_elt( last_child => $gi, @_); | ||||
5780 | } | ||||||
5781 | else | ||||||
5782 |
1
|
79
|
{ croak "can't create a field name from $cond"; } | ||||
5783 | } | ||||||
5784 |
2
|
2
|
return $child; | ||||
5785 | } | ||||||
5786 | |||||||
5787 | sub set_last_child | ||||||
5788 |
36
|
32
|
{ $_[0]->{'last_child'}= $_[1]; | ||||
5789 |
36
36
|
43
51
|
if( $XML::Twig::weakrefs) { weaken( $_[0]->{'last_child'}); } | ||||
5790 | } | ||||||
5791 | |||||||
5792 | sub last_child | ||||||
5793 |
60
|
96
|
{ my $elt= shift; | ||||
5794 |
60
|
528
|
my $cond= shift || return $elt->{last_child}; | ||||
5795 |
26
|
57
|
my $test_cond= ($cond_cache{$cond} ||= _install_cond( $cond)); | ||||
5796 |
26
|
37
|
my $child= $elt->{last_child}; | ||||
5797 |
26
|
438
|
while( $child && !$test_cond->( $child) ) | ||||
5798 |
24
|
369
|
{ $child= $child->{prev_sibling}; } | ||||
5799 |
26
|
83
|
return $child | ||||
5800 | } | ||||||
5801 | |||||||
5802 | |||||||
5803 | sub set_prev_sibling | ||||||
5804 |
1
|
3
|
{ $_[0]->{'prev_sibling'}= $_[1]; | ||||
5805 |
1
1
|
2
2
|
if( $XML::Twig::weakrefs) { weaken( $_[0]->{'prev_sibling'}); } | ||||
5806 | } | ||||||
5807 | |||||||
5808 | sub prev_sibling | ||||||
5809 |
99
|
221
|
{ my $elt= shift; | ||||
5810 |
99
|
183
|
my $cond= shift || return $elt->{prev_sibling}; | ||||
5811 |
57
|
94
|
my $test_cond= ($cond_cache{$cond} ||= _install_cond( $cond)); | ||||
5812 |
57
|
66
|
my $sibling= $elt->{prev_sibling}; | ||||
5813 |
57
|
582
|
while( $sibling && !$test_cond->( $sibling) ) | ||||
5814 |
16
|
141
|
{ $sibling= $sibling->{prev_sibling}; } | ||||
5815 |
57
|
97
|
return $sibling; | ||||
5816 | } | ||||||
5817 | |||||||
5818 |
1
|
3
|
sub set_next_sibling { $_[0]->{'next_sibling'}= $_[1]; } | ||||
5819 | |||||||
5820 | sub next_sibling | ||||||
5821 |
618
|
504
|
{ my $elt= shift; | ||||
5822 |
618
|
1137
|
my $cond= shift || return $elt->{next_sibling}; | ||||
5823 |
266
|
356
|
my $test_cond= ($cond_cache{$cond} ||= _install_cond( $cond)); | ||||
5824 |
266
|
192
|
my $sibling= $elt->{next_sibling}; | ||||
5825 |
266
|
2640
|
while( $sibling && !$test_cond->( $sibling) ) | ||||
5826 |
131
|
1112
|
{ $sibling= $sibling->{next_sibling}; } | ||||
5827 |
266
|
425
|
return $sibling; | ||||
5828 | } | ||||||
5829 | |||||||
5830 | # methods dealing with the class attribute, convenient if you work with xhtml | ||||||
5831 |
53
|
209
|
sub class { $_[0]->{att}->{class}; } | ||||
5832 | # lvalue version of class. separate from class to avoid problem like RT# | ||||||
5833 | sub lclass | ||||||
5834 | :lvalue # > perl 5.5 | ||||||
5835 |
2
|
6
|
{ $_[0]->{att}->{class}; } | ||||
5836 | |||||||
5837 |
18
18
|
17
20
|
sub set_class { my( $elt, $class)= @_; $elt->set_att( class => $class); } | ||||
5838 | |||||||
5839 | # adds a class to an element | ||||||
5840 | sub add_to_class | ||||||
5841 |
6
|
5
|
{ my( $elt, $new_class)= @_; | ||||
5842 |
6
|
8
|
return $elt unless $new_class; | ||||
5843 |
5
|
5
|
my $class= $elt->class; | ||||
5844 |
5
8
|
11
13
|
my %class= $class ? map { $_ => 1 } split /\s+/, $class : (); | ||||
5845 |
5
|
8
|
$class{$new_class}= 1; | ||||
5846 |
5
|
15
|
$elt->set_class( join( ' ', sort keys %class)); | ||||
5847 | } | ||||||
5848 | |||||||
5849 | sub remove_class | ||||||
5850 |
5
|
6
|
{ my( $elt, $class_to_remove)= @_; | ||||
5851 |
5
|
8
|
return $elt unless $class_to_remove; | ||||
5852 |
5
|
7
|
my $class= $elt->class; | ||||
5853 |
5
7
|
9
13
|
my %class= $class ? map { $_ => 1 } split /\s+/, $class : (); | ||||
5854 |
5
|
6
|
delete $class{$class_to_remove}; | ||||
5855 |
5
|
10
|
$elt->set_class( join( ' ', sort keys %class)); | ||||
5856 | } | ||||||
5857 | |||||||
5858 |
2
2
|
1
3
|
sub att_to_class { my( $elt, $att)= @_; $elt->set_class( $elt->{'att'}->{$att}); } | ||||
5859 |
2
2
|
3
4
|
sub add_att_to_class { my( $elt, $att)= @_; $elt->add_to_class( $elt->{'att'}->{$att}); } | ||||
5860 |
1
1
|
1
2
|
sub move_att_to_class { my( $elt, $att)= @_; $elt->add_to_class( $elt->{'att'}->{$att}); | ||||
5861 |
1
|
2
|
$elt->del_att( $att); | ||||
5862 | } | ||||||
5863 |
2
2
|
2
3
|
sub tag_to_class { my( $elt)= @_; $elt->set_class( $elt->tag); } | ||||
5864 |
2
2
|
2
2
|
sub add_tag_to_class { my( $elt)= @_; $elt->add_to_class( $elt->tag); } | ||||
5865 |
1
1
1
|
1
1
2
|
sub set_tag_class { my( $elt, $new_tag)= @_; $elt->add_tag_to_class; $elt->set_tag( $new_tag); } | ||||
5866 | |||||||
5867 | sub tag_to_span | ||||||
5868 |
2
|
4
|
{ my( $elt)= @_; | ||||
5869 |
2
|
3
|
$elt->set_class( $elt->tag) unless( $elt->tag eq 'span' && $elt->class); # set class to span unless it would mean replacing it with span | ||||
5870 |
2
|
3
|
$elt->set_tag( 'span'); | ||||
5871 | } | ||||||
5872 | |||||||
5873 | sub tag_to_div | ||||||
5874 |
2
|
2
|
{ my( $elt)= @_; | ||||
5875 |
2
|
3
|
$elt->set_class( $elt->tag) unless( $elt->tag eq 'div' && $elt->class); # set class to div unless it would mean replacing it with div | ||||
5876 |
2
|
3
|
$elt->set_tag( 'div'); | ||||
5877 | } | ||||||
5878 | |||||||
5879 | sub in_class | ||||||
5880 |
10
|
9
|
{ my( $elt, $class)= @_; | ||||
5881 |
10
|
11
|
my $elt_class= $elt->class; | ||||
5882 |
10
|
12
|
return unless( defined $elt_class); | ||||
5883 |
10
|
8
|
return $elt->class=~ m{(?:^|\s)\Q$class\E(?:\s|$)} ? $elt : 0; | ||||
5884 | } | ||||||
5885 | |||||||
5886 | |||||||
5887 | # get or set all attributes | ||||||
5888 | # argument can be a hash or a hashref | ||||||
5889 | sub set_atts | ||||||
5890 |
43713
|
30133
|
{ my $elt= shift; | ||||
5891 |
43713
|
27788
|
my %atts; | ||||
5892 |
43713
|
39606
|
tie %atts, 'Tie::IxHash' if( keep_atts_order()); | ||||
5893 |
43713
113
|
234373
200
|
%atts= ( (ref( $_[0] || '') eq 'HASH') || isa( $_[0] || '', 'HASH')) ? %{$_[0]} : @_; | ||||
5894 |
43713
|
45031
|
$elt->{att}= \%atts; | ||||
5895 |
43713
18691
|
52681
22649
|
if( exists $atts{$ID}) { $elt->_set_id( $atts{$ID}); } | ||||
5896 |
43713
|
38503
|
return $elt; | ||||
5897 | } | ||||||
5898 | |||||||
5899 |
196
|
284
|
sub atts { return $_[0]->{att}; } | ||||
5900 |
576
576
|
359
1342
|
sub att_names { return (sort keys %{$_[0]->{att}}); } | ||||
5901 |
4
4
|
46
8
|
sub del_atts { $_[0]->{att}={}; return $_[0]; } | ||||
5902 | |||||||
5903 | # get or set a single attribute (set works for several atts) | ||||||
5904 | sub set_att | ||||||
5905 |
155
|
200
|
{ my $elt= shift; | ||||
5906 | |||||||
5907 |
155
|
464
|
if( $_[0] && ref( $_[0]) && !$_[1]) | ||||
5908 |
1
|
105
|
{ croak "improper call to set_att, usage is \$elt->set_att( att1 => 'val1', att2 => 'val2',...)"; } | ||||
5909 | |||||||
5910 |
154
|
215
|
unless( $elt->{att}) | ||||
5911 |
4
|
4
|
{ $elt->{att}={}; | ||||
5912 |
4
2
|
5
6
|
tie %{$elt->{att}}, 'Tie::IxHash' if( keep_atts_order()); | ||||
5913 | } | ||||||
5914 | |||||||
5915 |
154
|
210
|
while(@_) | ||||
5916 |
154
|
156
|
{ my( $att, $val)= (shift, shift); | ||||
5917 |
154
|
221
|
$elt->{att}->{$att}= $val; | ||||
5918 |
154
55
|
295
84
|
if( $att eq $ID) { $elt->_set_id( $val); } | ||||
5919 | } | ||||||
5920 |
154
|
183
|
return $elt; | ||||
5921 | } | ||||||
5922 | |||||||
5923 |
540
|
1496
|
sub att { $_[0]->{att}->{$_[1]}; } | ||||
5924 | # lvalue version of att. separate from class to avoid problem like RT# | ||||||
5925 | sub latt | ||||||
5926 | :lvalue # > perl 5.5 | ||||||
5927 |
5
|
12
|
{ $_[0]->{att}->{$_[1]}; } | ||||
5928 | |||||||
5929 | sub del_att | ||||||
5930 |
85
|
62
|
{ my $elt= shift; | ||||
5931 |
85
85
|
102
146
|
while( @_) { delete $elt->{'att'}->{shift()}; } | ||||
5932 |
85
|
83
|
return $elt; | ||||
5933 | } | ||||||
5934 | |||||||
5935 |
5
|
11
|
sub att_exists { return exists $_[0]->{att}->{$_[1]}; } | ||||
5936 | |||||||
5937 | # delete an attribute from all descendants of an element | ||||||
5938 | sub strip_att | ||||||
5939 |
13
|
11
|
{ my( $elt, $att)= @_; | ||||
5940 |
13
|
23
|
$_->del_att( $att) foreach ($elt->descendants_or_self( qq{*[\@$att]})); | ||||
5941 |
13
|
24
|
return $elt; | ||||
5942 | } | ||||||
5943 | |||||||
5944 | sub change_att_name | ||||||
5945 |
5
|
24
|
{ my( $elt, $old_name, $new_name)= @_; | ||||
5946 |
5
|
6
|
my $value= $elt->{'att'}->{$old_name}; | ||||
5947 |
5
|
8
|
return $elt unless( defined $value); | ||||
5948 |
4
|
6
|
$elt->del_att( $old_name) | ||||
5949 | ->set_att( $new_name => $value); | ||||||
5950 |
4
|
4
|
return $elt; | ||||
5951 | } | ||||||
5952 | |||||||
5953 | sub lc_attnames | ||||||
5954 |
1
|
1
|
{ my $elt= shift; | ||||
5955 |
1
|
1
|
foreach my $att ($elt->att_names) | ||||
5956 |
3
3
|
9
4
|
{ if( $att ne lc $att) { $elt->change_att_name( $att, lc $att); } } | ||||
5957 |
1
|
2
|
return $elt; | ||||
5958 | } | ||||||
5959 | |||||||
5960 |
58
|
72
|
sub set_twig_current { $_[0]->{twig_current}=1; } | ||||
5961 |
1
|
2
|
sub del_twig_current { delete $_[0]->{twig_current}; } | ||||
5962 | |||||||
5963 | |||||||
5964 | # get or set the id attribute | ||||||
5965 | sub set_id | ||||||
5966 |
51
|
57
|
{ my( $elt, $id)= @_; | ||||
5967 |
51
|
82
|
$elt->del_id() if( exists $elt->{att}->{$ID}); | ||||
5968 |
51
|
75
|
$elt->set_att($ID, $id); | ||||
5969 |
51
|
47
|
$elt->_set_id( $id); | ||||
5970 |
51
|
37
|
return $elt; | ||||
5971 | } | ||||||
5972 | |||||||
5973 | # only set id, does not update the attribute value | ||||||
5974 | sub _set_id | ||||||
5975 |
18798
|
13893
|
{ my( $elt, $id)= @_; | ||||
5976 |
18798
|
19613
|
my $t= $elt->twig || $elt; | ||||
5977 |
18798
|
32821
|
$t->{twig_id_list}->{$id}= $elt; | ||||
5978 |
18798
9795
|
22116
14185
|
if( $XML::Twig::weakrefs) { weaken( $t->{twig_id_list}->{$id}); } | ||||
5979 |
18798
|
14074
|
return $elt; | ||||
5980 | } | ||||||
5981 | |||||||
5982 |
830
|
4180
|
sub id { return $_[0]->{att}->{$ID}; } | ||||
5983 | |||||||
5984 | # methods used to add ids to elements that don't have one | ||||||
5985 | BEGIN | ||||||
5986 |
187
|
312
|
{ my $id_nb = "0001"; | ||||
5987 |
187
|
674013
|
my $id_seed = "twig_id_"; | ||||
5988 | |||||||
5989 | sub set_id_seed ## no critic (Subroutines::ProhibitNestedSubs); | ||||||
5990 |
2
2
|
3
3
|
{ $id_seed= $_[1]; $id_nb=1; } | ||||
5991 | |||||||
5992 | sub add_id ## no critic (Subroutines::ProhibitNestedSubs); | ||||||
5993 |
40
|
34
|
{ my $elt= shift; | ||||
5994 |
40
|
53
|
if( defined $elt->{'att'}->{$ID}) | ||||
5995 |
1
|
2
|
{ return $elt->{'att'}->{$ID}; } | ||||
5996 | else | ||||||
5997 |
39
|
90
|
{ my $id= $_[0] && ref( $_[0]) && isa( $_[0], 'CODE') ? $_[0]->( $elt) : $id_seed . $id_nb++; | ||||
5998 |
39
|
43
|
$elt->set_id( $id); | ||||
5999 |
39
|
29
|
return $id; | ||||
6000 | } | ||||||
6001 | } | ||||||
6002 | } | ||||||
6003 | |||||||
6004 | |||||||
6005 | |||||||
6006 | # delete the id attribute and remove the element from the id list | ||||||
6007 | sub del_id | ||||||
6008 |
2515
|
1515
|
{ my $elt= shift; | ||||
6009 |
2515
5
|
2831
9
|
if( ! exists $elt->{att}->{$ID}) { return $elt }; | ||||
6010 |
2510
|
2014
|
my $id= $elt->{att}->{$ID}; | ||||
6011 | |||||||
6012 |
2510
|
1992
|
delete $elt->{att}->{$ID}; | ||||
6013 | |||||||
6014 |
2510
|
2499
|
my $t= shift || $elt->twig; | ||||
6015 |
2510
2
|
2324
5
|
unless( $t) { return $elt; } | ||||
6016 |
2508
2508
|
4344
2303
|
if( exists $t->{twig_id_list}->{$id}) { delete $t->{twig_id_list}->{$id}; } | ||||
6017 | |||||||
6018 |
2508
|
1816
|
return $elt; | ||||
6019 | } | ||||||
6020 | |||||||
6021 | # return the list of children | ||||||
6022 | sub children | ||||||
6023 |
205
|
203
|
{ my $elt= shift; | ||||
6024 |
205
|
141
|
my @children; | ||||
6025 |
205
|
304
|
my $child= $elt->first_child( @_); | ||||
6026 |
205
|
268
|
while( $child) | ||||
6027 |
546
|
431
|
{ push @children, $child; | ||||
6028 |
546
|
650
|
$child= $child->next_sibling( @_); | ||||
6029 | } | ||||||
6030 |
205
|
1102
|
return @children; | ||||
6031 | } | ||||||
6032 | |||||||
6033 | sub _children | ||||||
6034 |
20441
|
11312
|
{ my $elt= shift; | ||||
6035 |
20441
|
12745
|
my @children=(); | ||||
6036 |
20441
|
13097
|
my $child= $elt->{first_child}; | ||||
6037 |
20441
|
20379
|
while( $child) | ||||
6038 |
18358
|
10951
|
{ push @children, $child; | ||||
6039 |
18358
|
21122
|
$child= $child->{next_sibling}; | ||||
6040 | } | ||||||
6041 |
20441
|
27612
|
return @children; | ||||
6042 | } | ||||||
6043 | |||||||
6044 | sub children_copy | ||||||
6045 |
1
|
2
|
{ my $elt= shift; | ||||
6046 |
1
|
2
|
my @children; | ||||
6047 |
1
|
3
|
my $child= $elt->first_child( @_); | ||||
6048 |
1
|
3
|
while( $child) | ||||
6049 |
2
|
5
|
{ push @children, $child->copy; | ||||
6050 |
2
|
5
|
$child= $child->next_sibling( @_); | ||||
6051 | } | ||||||
6052 |
1
|
3
|
return @children; | ||||
6053 | } | ||||||
6054 | |||||||
6055 | |||||||
6056 | sub children_count | ||||||
6057 |
776
|
428
|
{ my $elt= shift; | ||||
6058 |
776
|
476
|
my $cond= shift; | ||||
6059 |
776
|
400
|
my $count=0; | ||||
6060 |
776
|
517
|
my $child= $elt->{first_child}; | ||||
6061 |
776
|
756
|
while( $child) | ||||
6062 |
1139
|
947
|
{ $count++ if( $child->passes( $cond)); | ||||
6063 |
1139
|
1312
|
$child= $child->{next_sibling}; | ||||
6064 | } | ||||||
6065 |
776
|
795
|
return $count; | ||||
6066 | } | ||||||
6067 | |||||||
6068 | sub children_text | ||||||
6069 |
3
|
15
|
{ my $elt= shift; | ||||
6070 |
4
2
|
4
2
|
return wantarray() ? map { $_->text} $elt->children( @_) | ||||
6071 |
3
|
8
|
: join( '', map { $_->text} $elt->children( @_) ) | ||||
6072 | ; | ||||||
6073 | } | ||||||
6074 | |||||||
6075 | sub children_trimmed_text | ||||||
6076 |
2
|
2
|
{ my $elt= shift; | ||||
6077 |
2
2
|
3
3
|
return wantarray() ? map { $_->trimmed_text} $elt->children( @_) | ||||
6078 |
2
|
6
|
: join( '', map { $_->trimmed_text} $elt->children( @_) ) | ||||
6079 | ; | ||||||
6080 | } | ||||||
6081 | |||||||
6082 | sub all_children_are | ||||||
6083 |
3
|
2
|
{ my( $parent, $cond)= @_; | ||||
6084 |
3
|
5
|
foreach my $child ($parent->_children) | ||||
6085 |
4
|
5
|
{ return 0 unless( $child->passes( $cond)); } | ||||
6086 |
1
|
2
|
return $parent; | ||||
6087 | } | ||||||
6088 | |||||||
6089 | |||||||
6090 | sub ancestors | ||||||
6091 |
1109
|
645
|
{ my( $elt, $cond)= @_; | ||||
6092 |
1109
|
595
|
my @ancestors; | ||||
6093 |
1109
|
1550
|
while( $elt->{parent}) | ||||
6094 |
2128
|
1224
|
{ $elt= $elt->{parent}; | ||||
6095 |
2128
|
1733
|
push @ancestors, $elt if( $elt->passes( $cond)); | ||||
6096 | } | ||||||
6097 |
1109
|
1277
|
return @ancestors; | ||||
6098 | } | ||||||
6099 | |||||||
6100 | sub ancestors_or_self | ||||||
6101 |
12
|
12
|
{ my( $elt, $cond)= @_; | ||||
6102 |
12
|
9
|
my @ancestors; | ||||
6103 |
12
|
20
|
while( $elt) | ||||
6104 |
38
|
40
|
{ push @ancestors, $elt if( $elt->passes( $cond)); | ||||
6105 |
38
|
49
|
$elt= $elt->{parent}; | ||||
6106 | } | ||||||
6107 |
12
|
50
|
return @ancestors; | ||||
6108 | } | ||||||
6109 | |||||||
6110 | |||||||
6111 | sub _ancestors | ||||||
6112 |
2
|
2
|
{ my( $elt, $include_self)= @_; | ||||
6113 |
2
|
3
|
my @ancestors= $include_self ? ($elt) : (); | ||||
6114 |
2
4
|
4
6
|
while( $elt= $elt->{parent}) { push @ancestors, $elt; } | ||||
6115 |
2
|
4
|
return @ancestors; | ||||
6116 | } | ||||||
6117 | |||||||
6118 | |||||||
6119 | sub inherit_att | ||||||
6120 |
5571
|
3263
|
{ my $elt= shift; | ||||
6121 |
5571
|
3559
|
my $att= shift; | ||||
6122 |
5571
4
|
5162
7
|
my %tags= map { ($_, 1) } @_; | ||||
6123 | |||||||
6124 | do | ||||||
6125 |
5571
7775
|
3044
15639
|
{ if( (defined $elt->{'att'}->{$att}) | ||||
6126 | && ( !%tags || $tags{$XML::Twig::index2gi[$elt->{'gi'}]}) | ||||||
6127 | ) | ||||||
6128 |
10
|
22
|
{ return $elt->{'att'}->{$att}; } | ||||
6129 | } while( $elt= $elt->{parent}); | ||||||
6130 |
5561
|
13163
|
return undef; | ||||
6131 | } | ||||||
6132 | |||||||
6133 | sub _inherit_att_through_cut | ||||||
6134 |
76
|
50
|
{ my $elt= shift; | ||||
6135 |
76
|
50
|
my $att= shift; | ||||
6136 |
76
1
|
85
5
|
my %tags= map { ($_, 1) } @_; | ||||
6137 | |||||||
6138 | do | ||||||
6139 |
76
156
|
60
478
|
{ if( (defined $elt->{'att'}->{$att}) | ||||
6140 | && ( !%tags || $tags{$XML::Twig::index2gi[$elt->{'gi'}]}) | ||||||
6141 | ) | ||||||
6142 |
47
|
129
|
{ return $elt->{'att'}->{$att}; } | ||||
6143 | } while( $elt= $elt->{parent} || ($elt->{former} && $elt->{former}->{parent})); | ||||||
6144 |
29
|
77
|
return undef; | ||||
6145 | } | ||||||
6146 | |||||||
6147 | |||||||
6148 | sub current_ns_prefixes | ||||||
6149 |
3
|
5
|
{ my $elt= shift; | ||||
6150 |
3
|
4
|
my %prefix; | ||||
6151 |
3
|
7
|
$prefix{''}=1 if( $elt->namespace( '')); | ||||
6152 |
3
|
8
|
while( $elt) | ||||
6153 |
6
3
11
|
17
9
35
|
{ my @ns= grep { !m{^xml} } map { m{^([^:]+):} } ($XML::Twig::index2gi[$elt->{'gi'}], $elt->att_names); | ||||
6154 |
6
|
14
|
$prefix{$_}=1 foreach (@ns); | ||||
6155 |
6
|
15
|
$elt= $elt->{parent}; | ||||
6156 | } | ||||||
6157 | |||||||
6158 |
3
|
17
|
return (sort keys %prefix); | ||||
6159 | } | ||||||
6160 | |||||||
6161 | # kinda counter-intuitive actually: | ||||||
6162 | # the next element is found by looking for the next open tag after from the | ||||||
6163 | # current one, which is the first child, if it exists, or the next sibling | ||||||
6164 | # or the first next sibling of an ancestor | ||||||
6165 | # optional arguments are: | ||||||
6166 | # - $subtree_root: a reference to an element, when the next element is not | ||||||
6167 | # within $subtree_root anymore then next_elt returns undef | ||||||
6168 | # - $cond: a condition, next_elt returns the next element matching the condition | ||||||
6169 | |||||||
6170 | sub next_elt | ||||||
6171 |
494
|
413
|
{ my $elt= shift; | ||||
6172 |
494
|
312
|
my $subtree_root= 0; | ||||
6173 |
494
|
1272
|
$subtree_root= shift if( ref( $_[0]) && isa( $_[0], 'XML::Twig::Elt')); | ||||
6174 |
494
|
339
|
my $cond= shift; | ||||
6175 |
494
|
278
|
my $next_elt; | ||||
6176 | |||||||
6177 |
494
|
247
|
my $ind; # optimization | ||||
6178 |
494
|
273
|
my $test_cond; | ||||
6179 |
494
|
595
|
if( $cond) # optimization | ||||
6180 |
477
|
765
|
{ unless( defined( $ind= $XML::Twig::gi2index{$cond}) ) # optimization | ||||
6181 |
340
|
475
|
{ $test_cond= ($cond_cache{$cond} ||= _install_cond( $cond)); } # optimization | ||||
6182 | } # optimization | ||||||
6183 | |||||||
6184 | do | ||||||
6185 |
494
802
|
360
1188
|
{ if( $next_elt= $elt->{first_child}) | ||||
6186 | { # simplest case: the elt has a child | ||||||
6187 | } | ||||||
6188 | elsif( $next_elt= $elt->{next_sibling}) | ||||||
6189 | { # no child but a next sibling (just check we stay within the subtree) | ||||||
6190 | |||||||
6191 | # case where elt is subtree_root, is empty and has a sibling | ||||||
6192 |
110
|
245
|
return undef if( $subtree_root && ($elt == $subtree_root)); | ||||
6193 | |||||||
6194 | } | ||||||
6195 | else | ||||||
6196 | { # case where the element has no child and no next sibling: | ||||||
6197 | # get the first next sibling of an ancestor, checking subtree_root | ||||||
6198 | |||||||
6199 | # case where elt is subtree_root, is empty and has no sibling | ||||||
6200 |
223
|
518
|
return undef if( $subtree_root && ($elt == $subtree_root)); | ||||
6201 | |||||||
6202 |
221
|
187
|
$next_elt= $elt->{parent}; | ||||
6203 | |||||||
6204 |
221
|
276
|
until( $next_elt->{next_sibling}) | ||||
6205 |
191
|
522
|
{ return undef if( $subtree_root && ($subtree_root == $next_elt)); | ||||
6206 |
86
|
189
|
$next_elt= $next_elt->{parent} || return undef; | ||||
6207 | } | ||||||
6208 |
105
|
338
|
return undef if( $subtree_root && ($subtree_root == $next_elt)); | ||||
6209 |
101
|
83
|
$next_elt= $next_elt->{next_sibling}; | ||||
6210 | } | ||||||
6211 |
677
|
8810
|
$elt= $next_elt; # just in case we need to loop | ||||
6212 | } until( ! defined $elt | ||||||
6213 | || ! defined $cond | ||||||
6214 | || (defined $ind && ($elt->{gi} eq $ind)) # optimization | ||||||
6215 | || (defined $test_cond && ($test_cond->( $elt))) | ||||||
6216 | ); | ||||||
6217 | |||||||
6218 |
369
|
1683
|
return $elt; | ||||
6219 | } | ||||||
6220 | |||||||
6221 | # return the next_elt within the element | ||||||
6222 | # just call next_elt with the element as first and second argument | ||||||
6223 |
20
|
26
|
sub first_descendant { return $_[0]->next_elt( @_); } | ||||
6224 | |||||||
6225 | # get the last descendant, # then return the element found or call prev_elt with the condition | ||||||
6226 | sub last_descendant | ||||||
6227 |
38
|
32
|
{ my( $elt, $cond)= @_; | ||||
6228 |
38
|
37
|
my $last_descendant= $elt->_last_descendant; | ||||
6229 |
38
|
61
|
if( !$cond || $last_descendant->matches( $cond)) | ||||
6230 |
23
|
26
|
{ return $last_descendant; } | ||||
6231 | else | ||||||
6232 |
15
|
19
|
{ return $last_descendant->prev_elt( $elt, $cond); } | ||||
6233 | } | ||||||
6234 | |||||||
6235 | # no argument allowed here, just go down the last_child recursively | ||||||
6236 | sub _last_descendant | ||||||
6237 |
41
|
22
|
{ my $elt= shift; | ||||
6238 |
41
71
|
61
88
|
while( my $child= $elt->{last_child}) { $elt= $child; } | ||||
6239 |
41
|
41
|
return $elt; | ||||
6240 | } | ||||||
6241 | |||||||
6242 | # counter-intuitive too: | ||||||
6243 | # the previous element is found by looking | ||||||
6244 | # for the first open tag backwards from the current one | ||||||
6245 | # it's the last descendant of the previous sibling | ||||||
6246 | # if it exists, otherwise it's simply the parent | ||||||
6247 | sub prev_elt | ||||||
6248 |
37
|
51
|
{ my $elt= shift; | ||||
6249 |
37
|
29
|
my $subtree_root= 0; | ||||
6250 |
37
|
143
|
if( defined $_[0] and (ref( $_[0]) && isa( $_[0], 'XML::Twig::Elt'))) | ||||
6251 |
17
|
11
|
{ $subtree_root= shift ; | ||||
6252 |
17
|
33
|
return undef if( $elt == $subtree_root); | ||||
6253 | } | ||||||
6254 |
35
|
31
|
my $cond= shift; | ||||
6255 | # get prev elt | ||||||
6256 |
35
|
21
|
my $prev_elt; | ||||
6257 | do | ||||||
6258 |
35
105
|
26
132
|
{ return undef if( $elt == $subtree_root); | ||||
6259 |
102
|
124
|
if( $prev_elt= $elt->{prev_sibling}) | ||||
6260 |
37
|
52
|
{ while( $prev_elt->{last_child}) | ||||
6261 |
22
|
35
|
{ $prev_elt= $prev_elt->{last_child}; } | ||||
6262 | } | ||||||
6263 | else | ||||||
6264 |
65
|
106
|
{ $prev_elt= $elt->{parent} || return undef; } | ||||
6265 |
95
|
135
|
$elt= $prev_elt; # in case we need to loop | ||||
6266 | } until( $elt->passes( $cond)); | ||||||
6267 | |||||||
6268 |
25
|
58
|
return $elt; | ||||
6269 | } | ||||||
6270 | |||||||
6271 | sub _following_elt | ||||||
6272 |
7
|
3
|
{ my( $elt)= @_; | ||||
6273 |
7
|
23
|
while( $elt && !$elt->{next_sibling}) | ||||
6274 |
12
|
27
|
{ $elt= $elt->{parent}; } | ||||
6275 |
7
|
14
|
return $elt ? $elt->{next_sibling} : undef; | ||||
6276 | } | ||||||
6277 | |||||||
6278 | sub following_elt | ||||||
6279 |
7
|
7
|
{ my( $elt, $cond)= @_; | ||||
6280 |
7
|
10
|
$elt= $elt->_following_elt || return undef; | ||||
6281 |
6
|
13
|
return $elt if( !$cond || $elt->matches( $cond)); | ||||
6282 |
2
|
3
|
return $elt->next_elt( $cond); | ||||
6283 | } | ||||||
6284 | |||||||
6285 | sub following_elts | ||||||
6286 |
6
|
6
|
{ my( $elt, $cond)= @_; | ||||
6287 |
6
4
|
8
3
|
if( !$cond) { undef $cond; } | ||||
6288 |
6
|
8
|
my $following= $elt->following_elt( $cond); | ||||
6289 |
6
|
7
|
if( $following) | ||||
6290 |
5
|
5
|
{ my @followings= $following; | ||||
6291 |
5
|
6
|
while( $following= $following->next_elt( $cond)) | ||||
6292 |
6
|
7
|
{ push @followings, $following; } | ||||
6293 |
5
|
34
|
return( @followings); | ||||
6294 | } | ||||||
6295 | else | ||||||
6296 |
1
|
1
|
{ return (); } | ||||
6297 | } | ||||||
6298 | |||||||
6299 | sub _preceding_elt | ||||||
6300 |
7
|
5
|
{ my( $elt)= @_; | ||||
6301 |
7
|
20
|
while( $elt && !$elt->{prev_sibling}) | ||||
6302 |
11
|
19
|
{ $elt= $elt->{parent}; } | ||||
6303 |
7
|
15
|
return $elt ? $elt->{prev_sibling}->_last_descendant : undef; | ||||
6304 | } | ||||||
6305 | |||||||
6306 | sub preceding_elt | ||||||
6307 |
7
|
7
|
{ my( $elt, $cond)= @_; | ||||
6308 |
7
|
7
|
$elt= $elt->_preceding_elt || return undef; | ||||
6309 |
3
|
8
|
return $elt if( !$cond || $elt->matches( $cond)); | ||||
6310 |
2
|
3
|
return $elt->prev_elt( $cond); | ||||
6311 | } | ||||||
6312 | |||||||
6313 | sub preceding_elts | ||||||
6314 |
7
|
7
|
{ my( $elt, $cond)= @_; | ||||
6315 |
7
4
|
8
2
|
if( !$cond) { undef $cond; } | ||||
6316 |
7
|
9
|
my $preceding= $elt->preceding_elt( $cond); | ||||
6317 |
7
|
6
|
if( $preceding) | ||||
6318 |
3
|
4
|
{ my @precedings= $preceding; | ||||
6319 |
3
|
3
|
while( $preceding= $preceding->prev_elt( $cond)) | ||||
6320 |
4
|
4
|
{ push @precedings, $preceding; } | ||||
6321 |
3
|
29
|
return( @precedings); | ||||
6322 | } | ||||||
6323 | else | ||||||
6324 |
4
|
40
|
{ return (); } | ||||
6325 | } | ||||||
6326 | |||||||
6327 | # used in get_xpath | ||||||
6328 | sub _self | ||||||
6329 |
6
|
5
|
{ my( $elt, $cond)= @_; | ||||
6330 |
6
|
30
|
return $cond ? $elt->matches( $cond) : $elt; | ||||
6331 | } | ||||||
6332 | |||||||
6333 | sub next_n_elt | ||||||
6334 |
7
|
7
|
{ my $elt= shift; | ||||
6335 |
7
|
20
|
my $offset= shift || return undef; | ||||
6336 |
5
|
10
|
foreach (1..$offset) | ||||
6337 |
7
|
36
|
{ $elt= $elt->next_elt( @_) || return undef; } | ||||
6338 |
3
|
7
|
return $elt; | ||||
6339 | } | ||||||
6340 | |||||||
6341 | # checks whether $elt is included in $ancestor, returns 1 in that case | ||||||
6342 | sub in | ||||||
6343 |
3222
|
2132
|
{ my ($elt, $ancestor)= @_; | ||||
6344 |
3222
|
7990
|
if( ref( $ancestor) && isa( $ancestor, 'XML::Twig::Elt')) | ||||
6345 | { # element | ||||||
6346 |
3220
3823
|
3873
6765
|
while( $elt= $elt->{parent}) { return $elt if( $elt == $ancestor); } | ||||
6347 | } | ||||||
6348 | else | ||||||
6349 | { # condition | ||||||
6350 |
2
2
|
6
5
|
while( $elt= $elt->{parent}) { return $elt if( $elt->matches( $ancestor)); } | ||||
6351 | } | ||||||
6352 |
2456
|
3339
|
return 0; | ||||
6353 | } | ||||||
6354 | |||||||
6355 | sub first_child_text | ||||||
6356 |
21
|
28
|
{ my $elt= shift; | ||||
6357 |
21
|
32
|
my $dest=$elt->first_child(@_) or return ''; | ||||
6358 |
19
|
39
|
return $dest->text; | ||||
6359 | } | ||||||
6360 | |||||||
6361 | sub fields | ||||||
6362 |
1
|
1
|
{ my $elt= shift; | ||||
6363 |
1
3
|
2
3
|
return map { $elt->field( $_) } @_; | ||||
6364 | } | ||||||
6365 | |||||||
6366 | sub first_child_trimmed_text | ||||||
6367 |
4
|
6
|
{ my $elt= shift; | ||||
6368 |
4
|
6
|
my $dest=$elt->first_child(@_) or return ''; | ||||
6369 |
1
|
2
|
return $dest->trimmed_text; | ||||
6370 | } | ||||||
6371 | |||||||
6372 | sub first_child_matches | ||||||
6373 |
3
|
3
|
{ my $elt= shift; | ||||
6374 |
3
|
10
|
my $dest= $elt->{first_child} or return undef; | ||||
6375 |
2
|
3
|
return $dest->passes( @_); | ||||
6376 | } | ||||||
6377 | |||||||
6378 | sub last_child_text | ||||||
6379 |
3
|
7
|
{ my $elt= shift; | ||||
6380 |
3
|
5
|
my $dest=$elt->last_child(@_) or return ''; | ||||
6381 |
2
|
4
|
return $dest->text; | ||||
6382 | } | ||||||
6383 | |||||||
6384 | sub last_child_trimmed_text | ||||||
6385 |
2
|
2
|
{ my $elt= shift; | ||||
6386 |
2
|
5
|
my $dest=$elt->last_child(@_) or return ''; | ||||
6387 |
1
|
3
|
return $dest->trimmed_text; | ||||
6388 | } | ||||||
6389 | |||||||
6390 | sub last_child_matches | ||||||
6391 |
6
|
7
|
{ my $elt= shift; | ||||
6392 |
6
|
13
|
my $dest= $elt->{last_child} or return undef; | ||||
6393 |
5
|
6
|
return $dest->passes( @_); | ||||
6394 | } | ||||||
6395 | |||||||
6396 | sub child_text | ||||||
6397 |
2
|
3
|
{ my $elt= shift; | ||||
6398 |
2
|
5
|
my $dest=$elt->child(@_) or return ''; | ||||
6399 |
1
|
3
|
return $dest->text; | ||||
6400 | } | ||||||
6401 | |||||||
6402 | sub child_trimmed_text | ||||||
6403 |
2
|
11
|
{ my $elt= shift; | ||||
6404 |
2
|
4
|
my $dest=$elt->child(@_) or return ''; | ||||
6405 |
1
|
2
|
return $dest->trimmed_text; | ||||
6406 | } | ||||||
6407 | |||||||
6408 | sub child_matches | ||||||
6409 |
2
|
4
|
{ my $elt= shift; | ||||
6410 |
2
|
3
|
my $nb= shift; | ||||
6411 |
2
|
4
|
my $dest= $elt->child( $nb) or return undef; | ||||
6412 |
1
|
2
|
return $dest->passes( @_); | ||||
6413 | } | ||||||
6414 | |||||||
6415 | sub prev_sibling_text | ||||||
6416 |
2
|
4
|
{ my $elt= shift; | ||||
6417 |
2
|
3
|
my $dest=$elt->_prev_sibling(@_) or return ''; | ||||
6418 |
1
|
2
|
return $dest->text; | ||||
6419 | } | ||||||
6420 | |||||||
6421 | sub prev_sibling_trimmed_text | ||||||
6422 |
2
|
3
|
{ my $elt= shift; | ||||
6423 |
2
|
4
|
my $dest=$elt->_prev_sibling(@_) or return ''; | ||||
6424 |
1
|
3
|
return $dest->trimmed_text; | ||||
6425 | } | ||||||
6426 | |||||||
6427 | sub prev_sibling_matches | ||||||
6428 |
3
|
6
|
{ my $elt= shift; | ||||
6429 |
3
|
9
|
my $dest= $elt->{prev_sibling} or return undef; | ||||
6430 |
2
|
5
|
return $dest->passes( @_); | ||||
6431 | } | ||||||
6432 | |||||||
6433 | sub next_sibling_text | ||||||
6434 |
3
|
15
|
{ my $elt= shift; | ||||
6435 |
3
|
6
|
my $dest=$elt->next_sibling(@_) or return ''; | ||||
6436 |
2
|
5
|
return $dest->text; | ||||
6437 | } | ||||||
6438 | |||||||
6439 | sub next_sibling_trimmed_text | ||||||
6440 |
2
|
2
|
{ my $elt= shift; | ||||
6441 |
2
|
5
|
my $dest=$elt->next_sibling(@_) or return ''; | ||||
6442 |
1
|
2
|
return $dest->trimmed_text; | ||||
6443 | } | ||||||
6444 | |||||||
6445 | sub next_sibling_matches | ||||||
6446 |
2
|
3
|
{ my $elt= shift; | ||||
6447 |
2
|
8
|
my $dest= $elt->{next_sibling} or return undef; | ||||
6448 |
1
|
3
|
return $dest->passes( @_); | ||||
6449 | } | ||||||
6450 | |||||||
6451 | sub prev_elt_text | ||||||
6452 |
2
|
3
|
{ my $elt= shift; | ||||
6453 |
2
|
8
|
my $dest=$elt->prev_elt(@_) or return ''; | ||||
6454 |
1
|
2
|
return $dest->text; | ||||
6455 | } | ||||||
6456 | |||||||
6457 | sub prev_elt_trimmed_text | ||||||
6458 |
2
|
4
|
{ my $elt= shift; | ||||
6459 |
2
|
5
|
my $dest=$elt->prev_elt(@_) or return ''; | ||||
6460 |
1
|
2
|
return $dest->trimmed_text; | ||||
6461 | } | ||||||
6462 | |||||||
6463 | sub prev_elt_matches | ||||||
6464 |
3
|
4
|
{ my $elt= shift; | ||||
6465 |
3
|
5
|
my $dest= $elt->prev_elt or return undef; | ||||
6466 |
2
|
3
|
return $dest->passes( @_); | ||||
6467 | } | ||||||
6468 | |||||||
6469 | sub next_elt_text | ||||||
6470 |
2
|
2
|
{ my $elt= shift; | ||||
6471 |
2
|
3
|
my $dest=$elt->next_elt(@_) or return ''; | ||||
6472 |
1
|
2
|
return $dest->text; | ||||
6473 | } | ||||||
6474 | |||||||
6475 | sub next_elt_trimmed_text | ||||||
6476 |
2
|
4
|
{ my $elt= shift; | ||||
6477 |
2
|
6
|
my $dest=$elt->next_elt(@_) or return ''; | ||||
6478 |
1
|
1
|
return $dest->trimmed_text; | ||||
6479 | } | ||||||
6480 | |||||||
6481 | sub next_elt_matches | ||||||
6482 |
3
|
5
|
{ my $elt= shift; | ||||
6483 |
3
|
6
|
my $dest= $elt->next_elt or return undef; | ||||
6484 |
2
|
3
|
return $dest->passes( @_); | ||||
6485 | } | ||||||
6486 | |||||||
6487 | sub parent_text | ||||||
6488 |
4
|
7
|
{ my $elt= shift; | ||||
6489 |
4
|
9
|
my $dest=$elt->parent(@_) or return ''; | ||||
6490 |
3
|
8
|
return $dest->text; | ||||
6491 | } | ||||||
6492 | |||||||
6493 | sub parent_trimmed_text | ||||||
6494 |
2
|
3
|
{ my $elt= shift; | ||||
6495 |
2
|
4
|
my $dest=$elt->parent(@_) or return ''; | ||||
6496 |
1
|
2
|
return $dest->trimmed_text; | ||||
6497 | } | ||||||
6498 | |||||||
6499 | sub parent_matches | ||||||
6500 |
8
|
9
|
{ my $elt= shift; | ||||
6501 |
8
|
19
|
my $dest= $elt->{parent} or return undef; | ||||
6502 |
6
|
9
|
return $dest->passes( @_); | ||||
6503 | } | ||||||
6504 | |||||||
6505 | sub is_first_child | ||||||
6506 |
9
|
14
|
{ my $elt= shift; | ||||
6507 |
9
|
20
|
my $parent= $elt->{parent} or return 0; | ||||
6508 |
8
|
13
|
my $first_child= $parent->first_child( @_) or return 0; | ||||
6509 |
7
|
23
|
return ($first_child == $elt) ? $elt : 0; | ||||
6510 | } | ||||||
6511 | |||||||
6512 | sub is_last_child | ||||||
6513 |
9
|
25
|
{ my $elt= shift; | ||||
6514 |
9
|
19
|
my $parent= $elt->{parent} or return 0; | ||||
6515 |
8
|
15
|
my $last_child= $parent->last_child( @_) or return 0; | ||||
6516 |
7
|
31
|
return ($last_child == $elt) ? $elt : 0; | ||||
6517 | } | ||||||
6518 | |||||||
6519 | # returns the depth level of the element | ||||||
6520 | # if 2 parameter are used then counts the 2cd element name in the | ||||||
6521 | # ancestors list | ||||||
6522 | sub level | ||||||
6523 |
726
|
526
|
{ my( $elt, $cond)= @_; | ||||
6524 |
726
|
393
|
my $level=0; | ||||
6525 |
726
|
807
|
my $name=shift || ''; | ||||
6526 |
726
973
|
875
1998
|
while( $elt= $elt->{parent}) { $level++ if( !$cond || $elt->matches( $cond)); } | ||||
6527 |
726
|
762
|
return $level; | ||||
6528 | } | ||||||
6529 | |||||||
6530 | # checks whether $elt has an ancestor that satisfies $cond, returns the ancestor | ||||||
6531 | sub in_context | ||||||
6532 |
18
|
23
|
{ my ($elt, $cond, $level)= @_; | ||||
6533 |
18
|
31
|
$level= -1 unless( $level) ; # $level-- will never hit 0 | ||||
6534 | |||||||
6535 |
18
|
28
|
while( $level) | ||||
6536 |
32
|
70
|
{ $elt= $elt->{parent} or return 0; | ||||
6537 |
24
8
|
34
60
|
if( $elt->matches( $cond)) { return $elt; } | ||||
6538 |
16
|
78
|
$level--; | ||||
6539 | } | ||||||
6540 |
2
|
6
|
return 0; | ||||
6541 | } | ||||||
6542 | |||||||
6543 | sub _descendants | ||||||
6544 |
47
|
119
|
{ my( $subtree_root, $include_self)= @_; | ||||
6545 |
47
|
58
|
my @descendants= $include_self ? ($subtree_root) : (); | ||||
6546 | |||||||
6547 |
47
|
24
|
my $elt= $subtree_root; | ||||
6548 |
47
|
33
|
my $next_elt; | ||||
6549 | |||||||
6550 |
47
|
18
|
MAIN: while( 1) | ||||
6551 |
125
|
155
|
{ if( $next_elt= $elt->{first_child}) | ||||
6552 | { # simplest case: the elt has a child | ||||||
6553 | } | ||||||
6554 | elsif( $next_elt= $elt->{next_sibling}) | ||||||
6555 | { # no child but a next sibling (just check we stay within the subtree) | ||||||
6556 | |||||||
6557 | # case where elt is subtree_root, is empty and has a sibling | ||||||
6558 |
9
|
17
|
last MAIN if( $elt == $subtree_root); | ||||
6559 | } | ||||||
6560 | else | ||||||
6561 | { # case where the element has no child and no next sibling: | ||||||
6562 | # get the first next sibling of an ancestor, checking subtree_root | ||||||
6563 | |||||||
6564 | # case where elt is subtree_root, is empty and has no sibling | ||||||
6565 |
57
|
71
|
last MAIN if( $elt == $subtree_root); | ||||
6566 | |||||||
6567 | # backtrack until we find a parent with a next sibling | ||||||
6568 |
40
|
45
|
$next_elt= $elt->{parent} || last; | ||||
6569 |
40
|
42
|
until( $next_elt->{next_sibling}) | ||||
6570 |
37
|
69
|
{ last MAIN if( $subtree_root == $next_elt); | ||||
6571 |
19
|
35
|
$next_elt= $next_elt->{parent} || last MAIN; | ||||
6572 | } | ||||||
6573 |
22
|
24
|
last MAIN if( $subtree_root == $next_elt); | ||||
6574 |
16
|
12
|
$next_elt= $next_elt->{next_sibling}; | ||||
6575 | } | ||||||
6576 |
78
|
79
|
$elt= $next_elt || last MAIN; | ||||
6577 |
78
|
41
|
push @descendants, $elt; | ||||
6578 | } | ||||||
6579 |
47
|
76
|
return @descendants; | ||||
6580 | } | ||||||
6581 | |||||||
6582 | |||||||
6583 | sub descendants | ||||||
6584 |
266
|
240
|
{ my( $subtree_root, $cond)= @_; | ||||
6585 |
266
|
211
|
my @descendants=(); | ||||
6586 |
266
|
250
|
my $elt= $subtree_root; | ||||
6587 | |||||||
6588 | # this branch is pure optimization for speed: if $cond is a gi replace it | ||||||
6589 | # by the index of the gi and loop here | ||||||
6590 | # start optimization | ||||||
6591 |
266
|
160
|
my $ind; | ||||
6592 |
266
|
631
|
if( !$cond || ( defined ( $ind= $XML::Twig::gi2index{$cond})) ) | ||||
6593 | { | ||||||
6594 |
160
|
101
|
my $next_elt; | ||||
6595 | |||||||
6596 |
160
|
105
|
while( 1) | ||||
6597 |
1794
|
2073
|
{ if( $next_elt= $elt->{first_child}) | ||||
6598 | { # simplest case: the elt has a child | ||||||
6599 | } | ||||||
6600 | elsif( $next_elt= $elt->{next_sibling}) | ||||||
6601 | { # no child but a next sibling (just check we stay within the subtree) | ||||||
6602 | |||||||
6603 | # case where elt is subtree_root, is empty and has a sibling | ||||||
6604 |
234
|
530
|
last if( $subtree_root && ($elt == $subtree_root)); | ||||
6605 | } | ||||||
6606 | else | ||||||
6607 | { # case where the element has no child and no next sibling: | ||||||
6608 | # get the first next sibling of an ancestor, checking subtree_root | ||||||
6609 | |||||||
6610 | # case where elt is subtree_root, is empty and has no sibling | ||||||
6611 |
626
|
1321
|
last if( $subtree_root && ($elt == $subtree_root)); | ||||
6612 | |||||||
6613 | # backtrack until we find a parent with a next sibling | ||||||
6614 |
623
|
794
|
$next_elt= $elt->{parent} || last undef; | ||||
6615 |
623
|
666
|
until( $next_elt->{next_sibling}) | ||||
6616 |
461
|
983
|
{ last if( $subtree_root && ($subtree_root == $next_elt)); | ||||
6617 |
311
|
555
|
$next_elt= $next_elt->{parent} || last; | ||||
6618 | } | ||||||
6619 |
623
|
1633
|
last if( $subtree_root && ($subtree_root == $next_elt)); | ||||
6620 |
466
|
338
|
$next_elt= $next_elt->{next_sibling}; | ||||
6621 | } | ||||||
6622 |
1634
|
1535
|
$elt= $next_elt || last; | ||||
6623 |
1634
|
3042
|
push @descendants, $elt if( !$cond || ($elt->{gi} eq $ind)); | ||||
6624 | } | ||||||
6625 | } | ||||||
6626 | else | ||||||
6627 | # end optimization | ||||||
6628 | { # branch for a complex condition: use the regular (slow but simple) way | ||||||
6629 |
106
|
149
|
while( $elt= $elt->next_elt( $subtree_root, $cond)) | ||||
6630 |
202
|
275
|
{ push @descendants, $elt; } | ||||
6631 | } | ||||||
6632 |
266
|
1771
|
return @descendants; | ||||
6633 | } | ||||||
6634 | |||||||
6635 | |||||||
6636 | sub descendants_or_self | ||||||
6637 |
49
|
47
|
{ my( $elt, $cond)= @_; | ||||
6638 |
49
|
65
|
my @descendants= $elt->passes( $cond) ? ($elt) : (); | ||||
6639 |
49
|
84
|
push @descendants, $elt->descendants( $cond); | ||||
6640 |
49
|
124
|
return @descendants; | ||||
6641 | } | ||||||
6642 | |||||||
6643 | sub sibling | ||||||
6644 |
9
|
16
|
{ my $elt= shift; | ||||
6645 |
9
|
8
|
my $nb= shift; | ||||
6646 |
9
|
21
|
if( $nb > 0) | ||||
6647 |
4
|
9
|
{ foreach( 1..$nb) | ||||
6648 |
4
|
12
|
{ $elt= $elt->next_sibling( @_) or return undef; } | ||||
6649 | } | ||||||
6650 | elsif( $nb < 0) | ||||||
6651 |
3
|
5
|
{ foreach( 1..(-$nb)) | ||||
6652 |
3
|
8
|
{ $elt= $elt->prev_sibling( @_) or return undef; } | ||||
6653 | } | ||||||
6654 | else # $nb == 0 | ||||||
6655 |
2
|
4
|
{ return $elt->passes( $_[0]); } | ||||
6656 |
4
|
8
|
return $elt; | ||||
6657 | } | ||||||
6658 | |||||||
6659 | sub sibling_text | ||||||
6660 |
3
|
7
|
{ my $elt= sibling( @_); | ||||
6661 |
3
|
12
|
return $elt ? $elt->text : undef; | ||||
6662 | } | ||||||
6663 | |||||||
6664 | |||||||
6665 | sub child | ||||||
6666 |
43
|
40
|
{ my $elt= shift; | ||||
6667 |
43
|
32
|
my $nb= shift; | ||||
6668 |
43
|
48
|
if( $nb >= 0) | ||||
6669 |
27
|
39
|
{ $elt= $elt->first_child( @_) or return undef; | ||||
6670 |
20
|
30
|
foreach( 1..$nb) | ||||
6671 |
20
|
22
|
{ $elt= $elt->next_sibling( @_) or return undef; } | ||||
6672 | } | ||||||
6673 | else | ||||||
6674 |
16
|
21
|
{ $elt= $elt->last_child( @_) or return undef; | ||||
6675 |
13
|
19
|
foreach( 2..(-$nb)) | ||||
6676 |
16
|
20
|
{ $elt= $elt->prev_sibling( @_) or return undef; } | ||||
6677 | } | ||||||
6678 |
31
|
359
|
return $elt; | ||||
6679 | } | ||||||
6680 | |||||||
6681 | sub prev_siblings | ||||||
6682 |
24
|
17
|
{ my $elt= shift; | ||||
6683 |
24
|
19
|
my @siblings=(); | ||||
6684 |
24
|
24
|
while( $elt= $elt->prev_sibling( @_)) | ||||
6685 |
12
|
14
|
{ unshift @siblings, $elt; } | ||||
6686 |
24
|
73
|
return @siblings; | ||||
6687 | } | ||||||
6688 | |||||||
6689 | sub siblings | ||||||
6690 |
9
|
12
|
{ my $elt= shift; | ||||
6691 |
9
24
|
13
43
|
return grep { $_ ne $elt } $elt->{parent}->children( @_); | ||||
6692 | } | ||||||
6693 | |||||||
6694 | sub pos | ||||||
6695 |
17
|
26
|
{ my $elt= shift; | ||||
6696 |
17
|
29
|
return 0 if ($_[0] && !$elt->matches( @_)); | ||||
6697 |
9
|
7
|
my $pos=1; | ||||
6698 |
9
|
11
|
$pos++ while( $elt= $elt->prev_sibling( @_)); | ||||
6699 |
9
|
8
|
return $pos; | ||||
6700 | } | ||||||
6701 | |||||||
6702 | |||||||
6703 | sub next_siblings | ||||||
6704 |
6
|
6
|
{ my $elt= shift; | ||||
6705 |
6
|
6
|
my @siblings=(); | ||||
6706 |
6
|
8
|
while( $elt= $elt->next_sibling( @_)) | ||||
6707 |
8
|
10
|
{ push @siblings, $elt; } | ||||
6708 |
6
|
42
|
return @siblings; | ||||
6709 | } | ||||||
6710 | |||||||
6711 | |||||||
6712 | # used by get_xpath: parses the xpath expression and generates a sub that performs the | ||||||
6713 | # search | ||||||
6714 | { my %axis2method; | ||||||
6715 |
187
|
639425
|
BEGIN { %axis2method= ( child => 'children', | ||||
6716 | descendant => 'descendants', | ||||||
6717 | 'descendant-or-self' => 'descendants_or_self', | ||||||
6718 | parent => 'parent_is', | ||||||
6719 | ancestor => 'ancestors', | ||||||
6720 | 'ancestor-or-self' => 'ancestors_or_self', | ||||||
6721 | 'following-sibling' => 'next_siblings', | ||||||
6722 | 'preceding-sibling' => 'prev_siblings', | ||||||
6723 | following => 'following_elts', | ||||||
6724 | preceding => 'preceding_elts', | ||||||
6725 | self => '_self', | ||||||
6726 | ); | ||||||
6727 | } | ||||||
6728 | |||||||
6729 | sub _install_xpath | ||||||
6730 |
189
|
180
|
{ my( $xpath_exp, $type)= @_; | ||||
6731 |
189
|
174
|
my $original_exp= $xpath_exp; | ||||
6732 |
189
|
147
|
my $sub= 'my $elt= shift; my @results;'; | ||||
6733 | |||||||
6734 | # grab the root if expression starts with a / | ||||||
6735 |
189
|
601
|
if( $xpath_exp=~ s{^/}{}) | ||||
6736 |
163
|
169
|
{ $sub .= '@results= ($elt->twig) || croak "cannot use an XPath query starting with a / on a node not attached to a whole twig";'; } | ||||
6737 | elsif( $xpath_exp=~ s{^\./}{}) | ||||||
6738 |
18
|
25
|
{ $sub .= '@results= ($elt);'; } | ||||
6739 | else | ||||||
6740 |
8
|
7
|
{ $sub .= '@results= ($elt);'; } | ||||
6741 | |||||||
6742 | |||||||
6743 | #warn "xpath_exp= '$xpath_exp'\n"; | ||||||
6744 | |||||||
6745 |
189
|
4661
|
while( $xpath_exp && | ||||
6746 | $xpath_exp=~s{^\s*(/?) | ||||||
6747 | # the xxx=~/regexp/ is a pain as it includes / | ||||||
6748 | (\s*(?:(?:($REG_AXIS)::)?(\*|$REG_TAG_PART|\.\.|\.)\s*)?($REG_PREDICATE_ALT*) | ||||||
6749 | ) | ||||||
6750 | (/|$)}{}xo) | ||||||
6751 | |||||||
6752 |
312
|
12784
|
{ my( $wildcard, $sub_exp, $axis, $gi, $predicates)= ($1, $2, $3, $4, $5); | ||||
6753 |
312
|
469
|
if( $axis && ! $gi) | ||||
6754 |
0
|
0
|
{ _croak_and_doublecheck_xpath( $original_exp, "error in xpath expression $original_exp"); } | ||||
6755 | |||||||
6756 | # grab a parent | ||||||
6757 |
312
|
520
|
if( $sub_exp eq '..') | ||||
6758 |
4
|
9
|
{ _croak_and_doublecheck_xpath( $original_exp, "error in xpath expression $original_exp") if( $wildcard); | ||||
6759 |
4
|
32
|
$sub .= '@results= map { $_->{parent}} @results;'; | ||||
6760 | } | ||||||
6761 | # test the element itself | ||||||
6762 | elsif( $sub_exp=~ m{^\.(.*)$}s) | ||||||
6763 |
1
|
3
|
{ $sub .= "\@results= grep { \$_->matches( q{$1}) } \@results;" } | ||||
6764 | # grab children | ||||||
6765 | else | ||||||
6766 | { | ||||||
6767 |
307
|
343
|
if( !$axis) | ||||
6768 |
274
|
315
|
{ $axis= $wildcard ? 'descendant' : 'child'; } | ||||
6769 |
307
81
|
731
67
|
if( !$gi or $gi eq '*') { $gi=''; } | ||||
6770 |
307
|
172
|
my $function; | ||||
6771 | |||||||
6772 | # "special" predicates, that return just one element | ||||||
6773 |
307
|
1143
|
if( $predicates && ($predicates =~ m{^\s*\[\s*((-\s*)?\d+)\s*\]\s*$})) | ||||
6774 | { # [<nb>] | ||||||
6775 |
30
|
36
|
my $offset= $1; | ||||
6776 |
30
|
51
|
$offset-- if( $offset > 0); | ||||
6777 |
30
|
75
|
$function= $axis eq 'descendant' ? "next_n_elt( $offset, '$gi')" | ||||
6778 | : $axis eq 'child' ? "child( $offset, '$gi')" | ||||||
6779 | : _croak_and_doublecheck_xpath( $original_exp, "error [$1] not supported along axis '$axis'") | ||||||
6780 | ; | ||||||
6781 |
30
|
138
|
$sub .= "\@results= grep { \$_ } map { \$_->$function } \@results;" | ||||
6782 | } | ||||||
6783 | elsif( $predicates && ($predicates =~ m{^\s*\[\s*last\s*\(\s*\)\s*\]\s*$}) ) | ||||||
6784 | { # last() | ||||||
6785 |
3
|
4
|
_croak_and_doublecheck_xpath( $original_exp, "error in xpath expression $original_exp, usage of // and last() not supported") if( $wildcard); | ||||
6786 |
3
|
8
|
$sub .= "\@results= map { \$_->last_child( '$gi') } \@results;"; | ||||
6787 | } | ||||||
6788 | else | ||||||
6789 | { # follow the axis | ||||||
6790 | #warn "axis: '$axis' - method: '$axis2method{$axis}' - gi: '$gi'\n"; | ||||||
6791 | |||||||
6792 |
274
|
408
|
my $follow_axis= " \$_->$axis2method{$axis}( '$gi')"; | ||||
6793 |
274
|
190
|
my $step= $follow_axis; | ||||
6794 | |||||||
6795 | # now filter using the predicate | ||||||
6796 |
274
|
1623
|
while( $predicates=~ s{^\s*($REG_PREDICATE_ALT)\s*}{}o) | ||||
6797 |
143
|
182
|
{ my $pred= $1; | ||||
6798 |
143
|
197
|
$pred=~ s{^\s*\[\s*}{}; | ||||
6799 |
143
|
244
|
$pred=~ s{\s*\]\s*$}{}; | ||||
6800 |
143
|
102
|
my $test=""; | ||||
6801 |
143
|
82
|
my $pos; | ||||
6802 |
143
|
266
|
if( $pred=~ m{^(-?\s*\d+)$}) | ||||
6803 |
18
|
12
|
{ my $pos= $1; | ||||
6804 |
18
|
49
|
if( $step=~ m{^\s*grep(.*) (\$_->\w+\(\s*'[^']*'\s*\))}) | ||||
6805 |
16
|
46
|
{ $step= "XML::Twig::_first_n $1 $pos, $2"; } | ||||
6806 | else | ||||||
6807 |
2
2
|
6
1
|
{ if( $pos > 0) { $pos--; } | ||||
6808 |
2
|
23
|
$step= "($step)[$pos]"; | ||||
6809 | } | ||||||
6810 | #warn "number predicate '$pos' - generated step '$step'\n"; | ||||||
6811 | } | ||||||
6812 | else | ||||||
6813 |
125
|
92
|
{ my $syntax_error=0; | ||||
6814 | do | ||||||
6815 |
125
165
|
89
883
|
{ if( $pred =~ s{^string\(\s*\)\s*=\s*($REG_STRING)\s*}{}o) # string()="string" pred | ||||
6816 |
7
|
16
|
{ $test .= "\$_->text eq $1"; } | ||||
6817 | elsif( $pred =~ s{^string\(\s*\)\s*!=\s*($REG_STRING)\s*}{}o) # string()!="string" pred | ||||||
6818 |
1
|
4
|
{ $test .= "\$_->text ne $1"; } | ||||
6819 |
165
|
3026
|
if( $pred =~ s{^string\(\s*\)\s*=\s*($REG_NUMBER)\s*}{}o) # string()=<number> pred | ||||
6820 |
1
|
7
|
{ $test .= "\$_->text eq $1"; } | ||||
6821 | elsif( $pred =~ s{^string\(\s*\)\s*!=\s*($REG_NUMBER)\s*}{}o) # string()!=<number> pred | ||||||
6822 |
1
|
7
|
{ $test .= "\$_->text ne $1"; } | ||||
6823 | elsif( $pred =~ s{^string\(\s*\)\s*(>|<|>=|<=)\s*($REG_NUMBER)\s*}{}o) # string()!=<number> pred | ||||||
6824 |
4
|
25
|
{ $test .= "\$_->text $1 $2"; } | ||||
6825 | |||||||
6826 | elsif( $pred =~ s{^string\(\s*\)\s*($REG_MATCH)\s*($REG_REGEXP)\s*}{}o) # string()=~/regex/ pred | ||||||
6827 |
10
|
17
|
{ my( $match, $regexp)= ($1, $2); | ||||
6828 |
10
|
36
|
$test .= "\$_->text $match $regexp"; | ||||
6829 | } | ||||||
6830 | elsif( $pred =~ s{^string\(\s*\)\s*}{}o) # string() pred | ||||||
6831 |
1
|
5
|
{ $test .= "\$_->text"; } | ||||
6832 | elsif( $pred=~ s{^@($REG_TAG_NAME)\s*($REG_OP)\s*($REG_STRING|$REG_NUMBER)}{}o) # @att="val" pred | ||||||
6833 |
82
|
94
|
{ my( $att, $oper, $val)= ($1, _op( $2), $3); | ||||
6834 |
82
|
316
|
$test .= qq{((defined \$_->{'att'}->{"$att"}) && (\$_->{'att'}->{"$att"} $oper $val))}; | ||||
6835 | } | ||||||
6836 | elsif( $pred =~ s{^@($REG_TAG_NAME)\s*($REG_MATCH)\s*($REG_REGEXP)\s*}{}o) # @att=~/regex/ pred XXX | ||||||
6837 |
12
|
18
|
{ my( $att, $match, $regexp)= ($1, $2, $3); | ||||
6838 |
12
|
43
|
$test .= qq{((defined \$_->{'att'}->{"$att"}) && (\$_->{'att'}->{"$att"} $match $regexp))};; | ||||
6839 | } | ||||||
6840 | elsif( $pred=~ s{^@($REG_TAG_NAME)\s*}{}o) # @att pred | ||||||
6841 |
17
|
64
|
{ $test .= qq{(defined \$_->{'att'}->{"$1"})}; } | ||||
6842 | elsif( $pred=~ s{^\s*(?:not|!)\s*@($REG_TAG_NAME)\s*}{}o) # not @att pred | ||||||
6843 |
6
|
22
|
{ $test .= qq{((\$_->is_elt) && (not defined \$_->{'att'}->{"$1"}))}; } | ||||
6844 | elsif( $pred=~ s{^\s*([()])}{}) # ( or ) (just add to the test) | ||||||
6845 |
2
|
6
|
{ $test .= qq{$1}; } | ||||
6846 | elsif( $pred=~ s{^\s*(and|or)\s*}{}) | ||||||
6847 |
19
|
76
|
{ $test .= lc " $1 "; } | ||||
6848 | else | ||||||
6849 |
10
|
32
|
{ $syntax_error=1; } | ||||
6850 | |||||||
6851 | } while( !$syntax_error && $pred); | ||||||
6852 |
125
|
136
|
_croak_and_doublecheck_xpath( $original_exp, "error in xpath expression $original_exp at $pred") if( $pred); | ||||
6853 |
123
|
291
|
$step= " grep { $test } $step "; | ||||
6854 | } | ||||||
6855 | } | ||||||
6856 | #warn "step: '$step'"; | ||||||
6857 |
272
|
1201
|
$sub .= "\@results= grep { \$_ } map { $step } \@results;"; | ||||
6858 | } | ||||||
6859 | } | ||||||
6860 | } | ||||||
6861 | |||||||
6862 |
187
|
211
|
if( $xpath_exp) | ||||
6863 |
2
|
6
|
{ _croak_and_doublecheck_xpath( $original_exp, "error in xpath expression $original_exp around $xpath_exp"); } | ||||
6864 | |||||||
6865 |
185
|
140
|
$sub .= q{return XML::Twig::_unique_elts( @results); }; | ||||
6866 | #warn "generated: '$sub'\n"; | ||||||
6867 |
185
4
4
4
7
7
7
7
7
7
9
9
9
7
7
7
6
6
6
3
3
3
3
3
3
3
3
3
3
3
3
2
2
2
2
2
2
|
9229
13
6
520
21
8
548
23
8
638
27
9
659
25
5
674
19
7
659
10
4
489
13
4
463
13
4
450
12
5
445
9
3
348
9
4
353
|
my $s= eval "sub { $NO_WARNINGS; $sub }"; | ||||
6868 |
185
|
342
|
if( $@) | ||||
6869 |
0
|
0
|
{ _croak_and_doublecheck_xpath( $original_exp, "error in xpath expression $original_exp ($@);") } | ||||
6870 |
185
|
572
|
return( $s); | ||||
6871 | } | ||||||
6872 | } | ||||||
6873 | |||||||
6874 | sub _croak_and_doublecheck_xpath | ||||||
6875 |
4
|
6
|
{ my $xpath_expression= shift; | ||||
6876 |
4
|
6
|
my $mess= join( "\n", @_); | ||||
6877 |
4
|
15
|
if( $XML::Twig::XPath::VERSION || 0) | ||||
6878 |
1
|
3
|
{ my $check_twig= XML::Twig::XPath->new; | ||||
6879 |
1
1
|
2
2
|
if( eval { $check_twig->{twig_xp}->_parse( $xpath_expression) }) | ||||
6880 |
1
|
855
|
{ $mess .= "\nthe expression is a valid XPath statement, and you are using XML::Twig::XPath, but" | ||||
6881 | . "\nyou are using either 'find_nodes' or 'get_xpath' where the method you likely wanted" | ||||||
6882 | . "\nto use is 'findnodes', which is the only one that uses the full XPath engine\n"; | ||||||
6883 | } | ||||||
6884 | } | ||||||
6885 |
4
|
482
|
croak $mess; | ||||
6886 | } | ||||||
6887 | |||||||
6888 | |||||||
6889 | |||||||
6890 | { # extremely elaborate caching mechanism | ||||||
6891 | my %xpath; # xpath_expression => subroutine_code; | ||||||
6892 | sub get_xpath | ||||||
6893 |
198
|
231
|
{ my( $elt, $xpath_exp, $offset)= @_; | ||||
6894 |
198
|
586
|
my $sub= ($xpath{$xpath_exp} ||= _install_xpath( $xpath_exp)); | ||||
6895 |
194
|
3358
|
return $sub->( $elt) unless( defined $offset); | ||||
6896 |
13
|
250
|
my @res= $sub->( $elt); | ||||
6897 |
13
|
31
|
return $res[$offset]; | ||||
6898 | } | ||||||
6899 | } | ||||||
6900 | |||||||
6901 | |||||||
6902 | sub findvalues | ||||||
6903 |
4
|
3
|
{ my $elt= shift; | ||||
6904 |
4
6
|
10
11
|
return map { $_->text } $elt->get_xpath( @_); | ||||
6905 | } | ||||||
6906 | |||||||
6907 | sub findvalue | ||||||
6908 |
17
|
9
|
{ my $elt= shift; | ||||
6909 |
17
17
|
23
23
|
return join '', map { $_->text } $elt->get_xpath( @_); | ||||
6910 | } | ||||||
6911 | |||||||
6912 | |||||||
6913 | # XML::XPath compatibility | ||||||
6914 |
1
|
3
|
sub getElementById { return $_[0]->twig->elt_id( $_[1]); } | ||||
6915 |
1682
1682
1682
1682
1682
1790
1790
1682
1682
|
86064
1118
1114
1124
1786
1096
1975
1675
2910
|
sub getChildNodes { my @children= do { my $elt= $_[0]; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; }; return wantarray ? @children : \@children; } | ||||
6916 | |||||||
6917 |
3105
|
4327
|
sub _flushed { return $_[0]->{flushed}; } | ||||
6918 |
227
|
405
|
sub _set_flushed { $_[0]->{flushed}=1; } | ||||
6919 |
1
|
3
|
sub _del_flushed { delete $_[0]->{flushed}; } | ||||
6920 | |||||||
6921 | sub cut | ||||||
6922 |
3694
|
2407
|
{ my $elt= shift; | ||||
6923 |
3694
|
2113
|
my( $parent, $prev_sibling, $next_sibling); | ||||
6924 |
3694
|
2762
|
$parent= $elt->{parent}; | ||||
6925 |
3694
|
7789
|
my $a= $elt->{'att'}->{'a'} || 'na'; | ||||
6926 |
3694
|
6364
|
if( ! $parent && $elt->is_elt) | ||||
6927 | { # are we cutting the root? | ||||||
6928 |
3046
|
2221
|
my $t= $elt->{twig}; | ||||
6929 |
3046
|
6325
|
if( $t && ! $t->{twig_parsing}) | ||||
6930 |
2871
|
2956
|
{ delete $t->{twig_root}; | ||||
6931 |
2871
|
2314
|
delete $elt->{twig}; | ||||
6932 |
2871
|
2745
|
return $elt; | ||||
6933 | } # cutt`ing the root | ||||||
6934 | else | ||||||
6935 |
175
|
181
|
{ return; } # cutting an orphan, returning $elt would break backward compatibility | ||||
6936 | } | ||||||
6937 | |||||||
6938 | # save the old links, that'll make it easier for some loops | ||||||
6939 |
648
|
668
|
foreach my $link ( qw(parent prev_sibling next_sibling) ) | ||||
6940 |
1944
|
2638
|
{ $elt->{former}->{$link}= $elt->{$link}; | ||||
6941 |
1944
1938
|
1963
2872
|
if( $XML::Twig::weakrefs) { weaken( $elt->{former}->{$link}); } | ||||
6942 | } | ||||||
6943 | |||||||
6944 | # if we cut the current element then its parent becomes the current elt | ||||||
6945 |
648
|
1331
|
if( $elt->{twig_current}) | ||||
6946 |
57
|
41
|
{ my $twig_current= $elt->{parent}; | ||||
6947 |
57
|
72
|
$elt->twig->{twig_current}= $twig_current; | ||||
6948 |
57
|
60
|
$twig_current->{'twig_current'}=1; | ||||
6949 |
57
|
69
|
delete $elt->{'twig_current'}; | ||||
6950 | } | ||||||
6951 | |||||||
6952 |
648
|
2132
|
if( $parent->{first_child} && $parent->{first_child} == $elt) | ||||
6953 |
478
|
453
|
{ $parent->{first_child}= $elt->{next_sibling}; | ||||
6954 | # cutting can make the parent empty | ||||||
6955 |
478
376
|
647
326
|
if( ! $parent->{first_child}) { $parent->{empty}= 1; } | ||||
6956 | } | ||||||
6957 | |||||||
6958 |
648
|
1700
|
if( $parent->{last_child} && $parent->{last_child} == $elt) | ||||
6959 |
453
453
453
452
|
315
403
499
479
|
{ $parent->{empty}=0; $parent->{last_child}=$elt->{prev_sibling}; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; | ||||
6960 | } | ||||||
6961 | |||||||
6962 |
648
|
802
|
if( $prev_sibling= $elt->{prev_sibling}) | ||||
6963 |
168
|
161
|
{ $prev_sibling->{next_sibling}= $elt->{next_sibling}; } | ||||
6964 |
648
|
761
|
if( $next_sibling= $elt->{next_sibling}) | ||||
6965 |
193
193
192
|
181
221
221
|
{ $next_sibling->{prev_sibling}=$elt->{prev_sibling}; if( $XML::Twig::weakrefs) { weaken( $next_sibling->{prev_sibling});} ; } | ||||
6966 | |||||||
6967 | |||||||
6968 |
648
648
646
|
546
625
595
|
$elt->{parent}=undef; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ; | ||||
6969 |
648
648
646
|
515
645
683
|
$elt->{prev_sibling}=undef; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ; | ||||
6970 |
648
|
552
|
$elt->{next_sibling}= undef; | ||||
6971 | |||||||
6972 | # merge 2 (now) consecutive text nodes if they are of the same type | ||||||
6973 | # (type can be PCDATA or CDATA) | ||||||
6974 |
648
|
1154
|
if( $prev_sibling && $next_sibling && $prev_sibling->is_text && ( $XML::Twig::index2gi[$prev_sibling->{'gi'}] eq $XML::Twig::index2gi[$next_sibling->{'gi'}])) | ||||
6975 |
9
|
15
|
{ $prev_sibling->merge_text( $next_sibling); } | ||||
6976 | |||||||
6977 |
648
|
647
|
return $elt; | ||||
6978 | } | ||||||
6979 | |||||||
6980 | |||||||
6981 |
4
|
9
|
sub former_next_sibling { return $_[0]->{former}->{next_sibling}; } | ||||
6982 |
3
|
6
|
sub former_prev_sibling { return $_[0]->{former}->{prev_sibling}; } | ||||
6983 |
3
|
10
|
sub former_parent { return $_[0]->{former}->{parent}; } | ||||
6984 | |||||||
6985 | sub cut_children | ||||||
6986 |
25
|
29
|
{ my( $elt, $exp)= @_; | ||||
6987 |
25
|
43
|
my @children= $elt->children( $exp); | ||||
6988 |
25
45
|
33
47
|
foreach (@children) { $_->cut; } | ||||
6989 |
25
23
|
37
19
|
if( ! $elt->has_children) { $elt->{empty}= 1; } | ||||
6990 |
25
|
72
|
return @children; | ||||
6991 | } | ||||||
6992 | |||||||
6993 | sub cut_descendants | ||||||
6994 |
3
|
5
|
{ my( $elt, $exp)= @_; | ||||
6995 |
3
|
12
|
my @descendants= $elt->descendants( $exp); | ||||
6996 |
3
4
|
4
5
|
foreach ($elt->descendants( $exp)) { $_->cut; } | ||||
6997 |
3
1
|
5
1
|
if( ! $elt->has_children) { $elt->{empty}= 1; } | ||||
6998 |
3
|
12
|
return @descendants; | ||||
6999 | } | ||||||
7000 | |||||||
7001 | |||||||
7002 | |||||||
7003 | sub erase | ||||||
7004 |
93
|
111
|
{ my $elt= shift; | ||||
7005 | #you cannot erase the current element | ||||||
7006 |
93
|
125
|
if( $elt->{twig_current}) | ||||
7007 |
1
|
73
|
{ croak "trying to erase an element before it has been completely parsed"; } | ||||
7008 |
92
|
112
|
unless( $elt->{parent}) | ||||
7009 | { # trying to erase the root (of a twig or of a cut/new element) | ||||||
7010 |
6
6
6
6
6
7
7
6
|
6
6
7
6
12
7
11
9
|
my @children= do { my $elt= $elt; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; }; | ||||
7011 |
6
|
15
|
unless( @children == 1) | ||||
7012 |
1
|
78
|
{ croak "can only erase an element with no parent if it has a single child"; } | ||||
7013 |
5
|
8
|
$elt->_move_extra_data_after_erase; | ||||
7014 |
5
|
5
|
my $child= shift @children; | ||||
7015 |
5
5
4
|
196
10
6
|
$child->{parent}=undef; if( $XML::Twig::weakrefs) { weaken( $child->{parent});} ; | ||||
7016 |
5
|
7
|
my $twig= $elt->twig; | ||||
7017 |
5
|
8
|
$twig->set_root( $child); | ||||
7018 | } | ||||||
7019 | else | ||||||
7020 | { # normal case | ||||||
7021 |
86
|
102
|
$elt->_move_extra_data_after_erase; | ||||
7022 |
86
86
86
86
86
106
106
86
|
50
55
67
59
98
63
128
103
|
my @children= do { my $elt= $elt; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; }; | ||||
7023 |
86
|
98
|
if( @children) | ||||
7024 | { # elt has children, move them up | ||||||
7025 | |||||||
7026 |
71
|
57
|
my $first_child= $elt->{first_child}; | ||||
7027 |
71
|
50
|
my $prev_sibling=$elt->{prev_sibling}; | ||||
7028 |
71
|
67
|
if( $prev_sibling) | ||||
7029 | { # connect first child to previous sibling | ||||||
7030 |
21
21
20
|
19
28
21
|
$first_child->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $first_child->{prev_sibling});} ; | ||||
7031 |
21
|
18
|
$prev_sibling->{next_sibling}= $first_child; | ||||
7032 | } | ||||||
7033 | else | ||||||
7034 | { # elt was the first child | ||||||
7035 |
50
|
75
|
$elt->{parent}->set_first_child( $first_child); | ||||
7036 | } | ||||||
7037 | |||||||
7038 |
71
|
48
|
my $last_child= $elt->{last_child}; | ||||
7039 |
71
|
50
|
my $next_sibling= $elt->{next_sibling}; | ||||
7040 |
71
|
68
|
if( $next_sibling) | ||||
7041 | { # connect last child to next sibling | ||||||
7042 |
35
|
33
|
$last_child->{next_sibling}= $next_sibling; | ||||
7043 |
35
35
34
|
28
43
46
|
$next_sibling->{prev_sibling}=$last_child; if( $XML::Twig::weakrefs) { weaken( $next_sibling->{prev_sibling});} ; | ||||
7044 | } | ||||||
7045 | else | ||||||
7046 | { # elt was the last child | ||||||
7047 |
36
|
45
|
$elt->{parent}->set_last_child( $last_child); | ||||
7048 | } | ||||||
7049 | # update parent for all siblings | ||||||
7050 |
71
|
84
|
foreach my $child (@children) | ||||
7051 |
106
106
104
|
90
104
159
|
{ $child->{parent}=$elt->{parent}; if( $XML::Twig::weakrefs) { weaken( $child->{parent});} ; } | ||||
7052 | |||||||
7053 | # merge consecutive text elements if need be | ||||||
7054 |
71
|
125
|
if( $prev_sibling && $prev_sibling->is_text && ($XML::Twig::index2gi[$first_child->{'gi'}] eq $XML::Twig::index2gi[$prev_sibling->{'gi'}]) ) | ||||
7055 |
12
|
17
|
{ $prev_sibling->merge_text( $first_child); } | ||||
7056 |
71
|
123
|
if( $next_sibling && $next_sibling->is_text && ($XML::Twig::index2gi[$last_child->{'gi'}] eq $XML::Twig::index2gi[$next_sibling->{'gi'}]) ) | ||||
7057 |
10
|
11
|
{ $last_child->merge_text( $next_sibling); } | ||||
7058 | |||||||
7059 | # if parsing and have now a PCDATA text, mark so we can normalize later on if need be | ||||||
7060 |
71
9
|
124
11
|
if( $elt->{parent}->{twig_current} && $elt->{last_child}->is_text) { $elt->{parent}->{twig_to_be_normalized}=1; } | ||||
7061 | |||||||
7062 | # elt is not referenced any more, so it will be DESTROYed | ||||||
7063 | # so we'd better break the links to its children ## FIX | ||||||
7064 |
71
|
53
|
undef $elt->{first_child}; | ||||
7065 |
71
|
48
|
undef $elt->{last_child}; | ||||
7066 |
71
|
44
|
undef $elt->{parent}; | ||||
7067 |
71
|
46
|
undef $elt->{next_sibling}; | ||||
7068 |
71
|
85
|
undef $elt->{prev_sibling}; | ||||
7069 | |||||||
7070 | } | ||||||
7071 | { # elt had no child, delete it | ||||||
7072 |
86
86
|
64
100
|
$elt->delete; | ||||
7073 | } | ||||||
7074 | |||||||
7075 | } | ||||||
7076 |
91
|
97
|
return $elt; | ||||
7077 | |||||||
7078 | } | ||||||
7079 | |||||||
7080 | sub _move_extra_data_after_erase | ||||||
7081 |
91
|
69
|
{ my( $elt)= @_; | ||||
7082 | # extra_data | ||||||
7083 |
91
|
125
|
if( my $extra_data= $elt->{extra_data}) | ||||
7084 |
20
|
29
|
{ my $target= $elt->{first_child} || $elt->{next_sibling}; | ||||
7085 |
20
|
16
|
if( $target) | ||||
7086 | { | ||||||
7087 |
14
|
12
|
if( $target->is( $ELT)) | ||||
7088 |
7
|
10
|
{ $target->set_extra_data( $extra_data . ($target->extra_data || '')); } | ||||
7089 | elsif( $target->is( $TEXT)) | ||||||
7090 |
7
|
9
|
{ $target->_unshift_extra_data_in_pcdata( $extra_data, 0); } # TO CHECK | ||||
7091 | } | ||||||
7092 | else | ||||||
7093 |
6
|
14
|
{ my $parent= $elt->{parent}; # always exists or the erase cannot be performed | ||||
7094 |
6
|
8
|
$parent->_prefix_extra_data_before_end_tag( $extra_data); | ||||
7095 | } | ||||||
7096 | } | ||||||
7097 | |||||||
7098 | # extra_data_before_end_tag | ||||||
7099 |
91
|
112
|
if( my $extra_data= $elt->{extra_data_before_end_tag}) | ||||
7100 |
13
|
17
|
{ if( my $target= $elt->{next_sibling}) | ||||
7101 |
10
|
11
|
{ if( $target->is( $ELT)) | ||||
7102 |
3
|
3
|
{ $target->set_extra_data( $extra_data . ($target->extra_data || '')); } | ||||
7103 | elsif( $target->is( $TEXT)) | ||||||
7104 | { | ||||||
7105 |
7
|
8
|
$target->_unshift_extra_data_in_pcdata( $extra_data, 0); | ||||
7106 | } | ||||||
7107 | } | ||||||
7108 | elsif( my $parent= $elt->{parent}) | ||||||
7109 |
3
|
3
|
{ $parent->_prefix_extra_data_before_end_tag( $extra_data); } | ||||
7110 | } | ||||||
7111 | |||||||
7112 |
91
|
75
|
return $elt; | ||||
7113 | |||||||
7114 | } | ||||||
7115 | BEGIN | ||||||
7116 |
187
|
410826
|
{ my %method= ( before => \&paste_before, | ||||
7117 | after => \&paste_after, | ||||||
7118 | first_child => \&paste_first_child, | ||||||
7119 | last_child => \&paste_last_child, | ||||||
7120 | within => \&paste_within, | ||||||
7121 | ); | ||||||
7122 | |||||||
7123 | # paste elt somewhere around ref | ||||||
7124 | # pos can be first_child (default), last_child, before, after or within | ||||||
7125 | sub paste ## no critic (Subroutines::ProhibitNestedSubs); | ||||||
7126 |
616
|
553
|
{ my $elt= shift; | ||||
7127 |
616
|
748
|
if( $elt->{parent}) | ||||
7128 |
1
|
80
|
{ croak "cannot paste an element that belongs to a tree"; } | ||||
7129 |
615
|
340
|
my $pos; | ||||
7130 |
615
|
328
|
my $ref; | ||||
7131 |
615
|
596
|
if( ref $_[0]) | ||||
7132 |
25
|
21
|
{ $pos= 'first_child'; | ||||
7133 |
25
|
119
|
croak "wrong argument order in paste, should be $_[1] first" if($_[1]); | ||||
7134 | } | ||||||
7135 | else | ||||||
7136 |
590
|
445
|
{ $pos= shift; } | ||||
7137 | |||||||
7138 |
614
|
740
|
if( my $method= $method{$pos}) | ||||
7139 | { | ||||||
7140 |
613
|
1930
|
unless( ref( $_[0]) && isa( $_[0], 'XML::Twig::Elt')) | ||||
7141 |
4
|
8
|
{ if( ! defined( $_[0])) | ||||
7142 |
1
|
69
|
{ croak "missing target in paste"; } | ||||
7143 | elsif( ! ref( $_[0])) | ||||||
7144 |
1
|
71
|
{ croak "wrong target type in paste (not a reference), should be XML::Twig::Elt or a subclass"; } | ||||
7145 | else | ||||||
7146 |
2
|
2
|
{ my $ref= ref $_[0]; | ||||
7147 |
2
|
152
|
croak "wrong target type in paste: '$ref', should be XML::Twig::Elt or a subclass"; | ||||
7148 | } | ||||||
7149 | } | ||||||
7150 |
609
|
377
|
$ref= $_[0]; | ||||
7151 | # check here so error message lists the caller file/line | ||||||
7152 |
609
|
1563
|
if( !$ref->{parent} && ($pos=~ m{^(before|after)$}) && !(exists $elt->{'target'}) && !(exists $elt->{'comment'})) | ||||
7153 |
6
|
482
|
{ croak "cannot paste $1 root"; } | ||||
7154 |
603
|
642
|
$elt->$method( @_); | ||||
7155 | } | ||||||
7156 | else | ||||||
7157 |
1
|
78
|
{ croak "tried to paste in wrong position '$pos', allowed positions " . | ||||
7158 | " are 'first_child', 'last_child', 'before', 'after' and " . | ||||||
7159 | "'within'"; | ||||||
7160 | } | ||||||
7161 |
603
|
900
|
if( (my $ids= $elt->{twig_id_list}) && (my $t= $ref->twig) ) | ||||
7162 |
22
|
39
|
{ $t->{twig_id_list}||={}; | ||||
7163 |
22
|
55
|
foreach my $id (keys %$ids) | ||||
7164 |
24
|
45
|
{ $t->{twig_id_list}->{$id}= $ids->{$id}; | ||||
7165 |
24
24
|
32
66
|
if( $XML::Twig::weakrefs) { weaken( $t->{twig_id_list}->{$id}); } | ||||
7166 | } | ||||||
7167 | } | ||||||
7168 |
603
|
1823
|
return $elt; | ||||
7169 | } | ||||||
7170 | |||||||
7171 | |||||||
7172 | sub paste_before | ||||||
7173 |
16
|
94
|
{ my( $elt, $ref)= @_; | ||||
7174 |
16
|
16
|
my( $parent, $prev_sibling, $next_sibling ); | ||||
7175 | |||||||
7176 | # trying to paste before an orphan (root or detached wlt) | ||||||
7177 |
16
|
34
|
unless( $ref->{parent}) | ||||
7178 |
5
|
15
|
{ if( my $t= $ref->twig) | ||||
7179 |
4
|
17
|
{ if( (exists $elt->{'comment'}) || (exists $elt->{'target'})) # we can still do this | ||||
7180 |
2
2
|
4
2
|
{ $t->_add_cpi_outside_of_root( leading_cpi => $elt); return; } | ||||
7181 | else | ||||||
7182 |
2
|
152
|
{ croak "cannot paste before root"; } | ||||
7183 | } | ||||||
7184 | else | ||||||
7185 |
1
|
251
|
{ croak "cannot paste before an orphan element"; } | ||||
7186 | } | ||||||
7187 |
11
|
12
|
$parent= $ref->{parent}; | ||||
7188 |
11
|
12
|
$prev_sibling= $ref->{prev_sibling}; | ||||
7189 |
11
|
11
|
$next_sibling= $ref; | ||||
7190 | |||||||
7191 |
11
11
11
|
12
19
21
|
$elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ; | ||||
7192 |
11
6
|
25
8
|
if( $parent->{first_child} == $ref) { $parent->{first_child}= $elt; } | ||||
7193 | |||||||
7194 |
11
5
|
21
10
|
if( $prev_sibling) { $prev_sibling->{next_sibling}= $elt; } | ||||
7195 |
11
11
11
|
12
15
17
|
$elt->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ; | ||||
7196 | |||||||
7197 |
11
11
11
|
12
16
21
|
$next_sibling->{prev_sibling}=$elt; if( $XML::Twig::weakrefs) { weaken( $next_sibling->{prev_sibling});} ; | ||||
7198 |
11
|
15
|
$elt->{next_sibling}= $ref; | ||||
7199 |
11
|
14
|
return $elt; | ||||
7200 | } | ||||||
7201 | |||||||
7202 | sub paste_after | ||||||
7203 |
283
|
303
|
{ my( $elt, $ref)= @_; | ||||
7204 |
283
|
150
|
my( $parent, $prev_sibling, $next_sibling ); | ||||
7205 | |||||||
7206 | # trying to paste after an orphan (root or detached wlt) | ||||||
7207 |
283
|
312
|
unless( $ref->{parent}) | ||||
7208 |
5
|
8
|
{ if( my $t= $ref->twig) | ||||
7209 |
4
|
17
|
{ if( (exists $elt->{'comment'}) || (exists $elt->{'target'})) # we can still do this | ||||
7210 |
2
2
|
3
2
|
{ $t->_add_cpi_outside_of_root( trailing_cpi => $elt); return; } | ||||
7211 | else | ||||||
7212 |
2
|
202
|
{ croak "cannot paste after root"; } | ||||
7213 | } | ||||||
7214 | else | ||||||
7215 |
1
|
129
|
{ croak "cannot paste after an orphan element"; } | ||||
7216 | } | ||||||
7217 |
278
|
184
|
$parent= $ref->{parent}; | ||||
7218 |
278
|
157
|
$prev_sibling= $ref; | ||||
7219 |
278
|
260
|
$next_sibling= $ref->{next_sibling}; | ||||
7220 | |||||||
7221 |
278
278
278
|
228
305
348
|
$elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ; | ||||
7222 |
278
170
170
170
170
|
411
126
123
180
223
|
if( $parent->{last_child}== $ref) { $parent->{empty}=0; $parent->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; } | ||||
7223 | |||||||
7224 |
278
|
239
|
$prev_sibling->{next_sibling}= $elt; | ||||
7225 |
278
278
278
|
218
278
359
|
$elt->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ; | ||||
7226 | |||||||
7227 |
278
108
108
108
|
277
90
111
276
|
if( $next_sibling) { $next_sibling->{prev_sibling}=$elt; if( $XML::Twig::weakrefs) { weaken( $next_sibling->{prev_sibling});} ; } | ||||
7228 |
278
|
305
|
$elt->{next_sibling}= $next_sibling; | ||||
7229 |
278
|
260
|
return $elt; | ||||
7230 | |||||||
7231 | } | ||||||
7232 | |||||||
7233 | sub paste_first_child | ||||||
7234 |
70
|
64
|
{ my( $elt, $ref)= @_; | ||||
7235 |
70
|
41
|
my( $parent, $prev_sibling, $next_sibling ); | ||||
7236 |
70
|
55
|
$parent= $ref; | ||||
7237 |
70
|
58
|
$next_sibling= $ref->{first_child}; | ||||
7238 | |||||||
7239 |
70
70
69
|
60
89
106
|
$elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ; | ||||
7240 |
70
|
60
|
$parent->{first_child}= $elt; | ||||
7241 |
70
39
39
39
39
|
164
35
35
66
60
|
unless( $parent->{last_child}) { $parent->{empty}=0; $parent->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; } | ||||
7242 | |||||||
7243 |
70
70
69
|
71
81
76
|
$elt->{prev_sibling}=undef; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ; | ||||
7244 | |||||||
7245 |
70
31
31
30
|
91
26
39
46
|
if( $next_sibling) { $next_sibling->{prev_sibling}=$elt; if( $XML::Twig::weakrefs) { weaken( $next_sibling->{prev_sibling});} ; } | ||||
7246 |
70
|
63
|
$elt->{next_sibling}= $next_sibling; | ||||
7247 |
70
|
66
|
return $elt; | ||||
7248 | } | ||||||
7249 | |||||||
7250 | sub paste_last_child | ||||||
7251 |
3537
|
2423
|
{ my( $elt, $ref)= @_; | ||||
7252 |
3537
|
1927
|
my( $parent, $prev_sibling, $next_sibling ); | ||||
7253 |
3537
|
2041
|
$parent= $ref; | ||||
7254 |
3537
|
2313
|
$prev_sibling= $ref->{last_child}; | ||||
7255 | |||||||
7256 |
3537
3537
3531
|
2757
3366
4501
|
$elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ; | ||||
7257 |
3537
3537
3537
3531
|
2333
2652
3135
3983
|
$parent->{empty}=0; $parent->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; | ||||
7258 |
3537
2060
|
3780
1922
|
unless( $parent->{first_child}) { $parent->{first_child}= $elt; } | ||||
7259 | |||||||
7260 |
3537
3537
3531
|
2539
3245
2832
|
$elt->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ; | ||||
7261 |
3537
1477
|
3184
1018
|
if( $prev_sibling) { $prev_sibling->{next_sibling}= $elt; } | ||||
7262 | |||||||
7263 |
3537
|
2672
|
$elt->{next_sibling}= undef; | ||||
7264 |
3537
|
2556
|
return $elt; | ||||
7265 | } | ||||||
7266 | |||||||
7267 | sub paste_within | ||||||
7268 |
2
|
4
|
{ my( $elt, $ref, $offset)= @_; | ||||
7269 |
2
|
2
|
my $text= $ref->is_text ? $ref : $ref->next_elt( $TEXT, $ref); | ||||
7270 |
2
|
3
|
my $new= $text->split_at( $offset); | ||||
7271 |
2
|
5
|
$elt->paste_before( $new); | ||||
7272 |
2
|
2
|
return $elt; | ||||
7273 | } | ||||||
7274 | } | ||||||
7275 | |||||||
7276 | # load an element into a structure similar to XML::Simple's | ||||||
7277 | sub simplify | ||||||
7278 |
122
|
87
|
{ my $elt= shift; | ||||
7279 | |||||||
7280 | # normalize option names | ||||||
7281 |
122
|
183
|
my %options= @_; | ||||
7282 |
122
118
|
276
141
|
%options= map { my ($key, $val)= ($_, $options{$_}); | ||||
7283 |
118
|
399
|
$key=~ s{(\w)([A-Z])}{$1_\L$2}g; | ||||
7284 |
118
|
349
|
$key => $val | ||||
7285 | } keys %options; | ||||||
7286 | |||||||
7287 | # check options | ||||||
7288 |
122
|
336
|
my @allowed_options= qw( keyattr forcearray noattr content_key | ||||
7289 | var var_regexp variables var_attr | ||||||
7290 | group_tags forcecontent | ||||||
7291 | normalise_space normalize_space | ||||||
7292 | ); | ||||||
7293 |
122
1464
|
128
1539
|
my %allowed_options= map { $_ => 1 } @allowed_options; | ||||
7294 |
122
|
277
|
foreach my $option (keys %options) | ||||
7295 |
118
|
246
|
{ carp "invalid option $option\n" unless( $allowed_options{$option}); } | ||||
7296 | |||||||
7297 |
122
|
447
|
$options{normalise_space} ||= $options{normalize_space} || 0; | ||||
7298 | |||||||
7299 |
122
|
279
|
$options{content_key} ||= 'content'; | ||||
7300 |
122
|
202
|
if( $options{content_key}=~ m{^-}) | ||||
7301 | { # need to remove the - and to activate extra folding | ||||||
7302 |
5
|
8
|
$options{content_key}=~ s{^-}{}; | ||||
7303 |
5
|
5
|
$options{extra_folding}= 1; | ||||
7304 | } | ||||||
7305 | else | ||||||
7306 |
117
|
118
|
{ $options{extra_folding}= 0; } | ||||
7307 | |||||||
7308 |
122
|
214
|
$options{forcearray} ||=0; | ||||
7309 |
122
|
207
|
if( isa( $options{forcearray}, 'ARRAY')) | ||||
7310 |
5
5
5
|
3
10
7
|
{ my %forcearray_tags= map { $_ => 1 } @{$options{forcearray}}; | ||||
7311 |
5
|
8
|
$options{forcearray_tags}= \%forcearray_tags; | ||||
7312 |
5
|
6
|
$options{forcearray}= 0; | ||||
7313 | } | ||||||
7314 | |||||||
7315 |
122
|
310
|
$options{keyattr} ||= ['name', 'key', 'id']; | ||||
7316 |
122
|
194
|
if( ref $options{keyattr} eq 'ARRAY') | ||||
7317 |
107
107
|
62
148
|
{ foreach my $keyattr (@{$options{keyattr}}) | ||||
7318 |
296
|
658
|
{ my( $prefix, $att)= ($keyattr=~ m{^([+-])?(.*)}); | ||||
7319 |
296
|
563
|
$prefix ||= ''; | ||||
7320 |
296
|
372
|
$options{key_for_all}->{$att}= 1; | ||||
7321 |
296
|
462
|
$options{remove_key_for_all}->{$att}=1 unless( $prefix eq '+'); | ||||
7322 |
296
|
428
|
$options{prefix_key_for_all}->{$att}=1 if( $prefix eq '-'); | ||||
7323 | } | ||||||
7324 | } | ||||||
7325 | elsif( ref $options{keyattr} eq 'HASH') | ||||||
7326 |
15
30
|
14
58
|
{ while( my( $elt, $keyattr)= each %{$options{keyattr}}) | ||||
7327 |
15
|
49
|
{ my( $prefix, $att)= ($keyattr=~ m{^([+-])?(.*)}); | ||||
7328 |
15
|
25
|
$prefix ||=''; | ||||
7329 |
15
|
28
|
$options{key_for_elt}->{$elt}= $att; | ||||
7330 |
15
|
24
|
$options{remove_key_for_elt}->{"$elt#$att"}=1 unless( $prefix); | ||||
7331 |
15
|
33
|
$options{prefix_key_for_elt}->{"$elt#$att"}=1 if( $prefix eq '-'); | ||||
7332 | } | ||||||
7333 | } | ||||||
7334 | |||||||
7335 | |||||||
7336 |
122
|
334
|
$options{var}||= $options{var_attr}; # for compat with XML::Simple | ||||
7337 |
122
11
|
124
16
|
if( $options{var}) { $options{var_values}= {}; } | ||||
7338 |
111
|
106
|
else { $options{var}=''; } | ||||
7339 | |||||||
7340 |
122
|
142
|
if( $options{variables}) | ||||
7341 |
7
|
15
|
{ $options{var}||= 1; | ||||
7342 |
7
|
8
|
$options{var_values}= $options{variables}; | ||||
7343 | } | ||||||
7344 | |||||||
7345 |
122
|
194
|
if( $options{var_regexp} and !$options{var}) | ||||
7346 |
0
|
0
|
{ warn "var option not used, var_regexp option ignored\n"; } | ||||
7347 |
122
|
234
|
$options{var_regexp} ||= '\$\{?(\w+)\}?'; | ||||
7348 | |||||||
7349 |
122
|
194
|
$elt->_simplify( \%options); | ||||
7350 | |||||||
7351 | } | ||||||
7352 | |||||||
7353 | sub _simplify | ||||||
7354 |
764
|
536
|
{ my( $elt, $options)= @_; | ||||
7355 | |||||||
7356 |
764
|
410
|
my $data; | ||||
7357 | |||||||
7358 |
764
|
595
|
my $gi= $XML::Twig::index2gi[$elt->{'gi'}]; | ||||
7359 |
764
764
764
764
764
1118
1118
764
|
364
438
495
495
780
761
1297
939
|
my @children= do { my $elt= $elt; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; }; | ||||
7360 |
764
732
|
1688
1512
|
my %atts= $options->{noattr} || !$elt->{att} ? () : %{$elt->{att}}; | ||||
7361 |
764
|
636
|
my $nb_atts= keys %atts; | ||||
7362 |
764
|
742
|
my $nb_children= $elt->children_count + $nb_atts; | ||||
7363 | |||||||
7364 |
764
|
452
|
my %nb_children; | ||||
7365 |
764
1118
|
666
1014
|
foreach (@children) { $nb_children{$_->tag}++; } | ||||
7366 |
764
576
|
1028
657
|
foreach (keys %atts) { $nb_children{$_}++; } | ||||
7367 | |||||||
7368 |
764
|
485
|
my $arrays; # tag => array where elements are stored | ||||
7369 | |||||||
7370 | |||||||
7371 | # store children | ||||||
7372 |
764
|
615
|
foreach my $child (@children) | ||||
7373 |
1118
|
1066
|
{ if( $child->is_text) | ||||
7374 | { # generate with a content key | ||||||
7375 |
476
|
498
|
my $text= $elt->_text_with_vars( $options); | ||||
7376 |
476
21
|
650
22
|
if( $options->{normalise_space} >= 2) { $text= _normalize_space( $text); } | ||||
7377 |
476
|
1439
|
if( $options->{force_content} | ||||
7378 | || $nb_atts | ||||||
7379 | || (scalar @children > 1) | ||||||
7380 | ) | ||||||
7381 |
201
|
467
|
{ $data->{$options->{content_key}}= $text; } | ||||
7382 | else | ||||||
7383 |
275
|
392
|
{ $data= $text; } | ||||
7384 | } | ||||||
7385 | else | ||||||
7386 | { # element with sub-elements | ||||||
7387 |
642
|
545
|
my $child_gi= $XML::Twig::index2gi[$child->{'gi'}]; | ||||
7388 | |||||||
7389 |
642
|
682
|
my $child_data= $child->_simplify( $options); | ||||
7390 | |||||||
7391 | # first see if we need to simplify further the child data | ||||||
7392 | # simplify because of grouped tags | ||||||
7393 |
642
|
935
|
if( my $grouped_tag= $options->{group_tags}->{$child_gi}) | ||||
7394 | { # check that the child data is a hash with a single field | ||||||
7395 |
9
|
42
|
unless( (ref( $child_data) eq 'HASH') | ||||
7396 | && (keys %$child_data == 1) | ||||||
7397 | && defined ( my $grouped_child_data= $child_data->{$grouped_tag}) | ||||||
7398 | ) | ||||||
7399 |
3
|
261
|
{ croak "error in grouped tag $child_gi"; } | ||||
7400 | else | ||||||
7401 |
6
|
4
|
{ $child_data= $grouped_child_data; } | ||||
7402 | } | ||||||
7403 | # simplify because of extra folding | ||||||
7404 |
639
|
705
|
if( $options->{extra_folding}) | ||||
7405 |
27
|
88
|
{ if( (ref( $child_data) eq 'HASH') | ||||
7406 | && (keys %$child_data == 1) | ||||||
7407 | && defined( my $content= $child_data->{$options->{content_key}}) | ||||||
7408 | ) | ||||||
7409 |
8
|
8
|
{ $child_data= $content; } | ||||
7410 | } | ||||||
7411 | |||||||
7412 |
639
|
710
|
if( my $keyatt= $child->_key_attr( $options)) | ||||
7413 | { # simplify element with key | ||||||
7414 |
207
|
197
|
my $key= $child->{'att'}->{$keyatt}; | ||||
7415 |
207
22
|
260
16
|
if( $options->{normalise_space} >= 1) { $key= _normalize_space( $key); } | ||||
7416 |
207
|
516
|
$data->{$child_gi}->{$key}= $child_data; | ||||
7417 | } | ||||||
7418 | elsif( $options->{forcearray} | ||||||
7419 | || $options->{forcearray_tags}->{$child_gi} | ||||||
7420 | || ( $nb_children{$child_gi} > 1) | ||||||
7421 | ) | ||||||
7422 | { # simplify element to store in an array | ||||||
7423 |
239
|
458
|
$data->{$child_gi} ||= []; | ||||
7424 |
239
239
|
154
478
|
push @{$data->{$child_gi}}, $child_data; | ||||
7425 | } | ||||||
7426 | else | ||||||
7427 | { # simplify element to store as a hash field | ||||||
7428 |
193
|
360
|
$data->{$child_gi}= $child_data; | ||||
7429 | } | ||||||
7430 | } | ||||||
7431 | } | ||||||
7432 | |||||||
7433 | # store atts | ||||||
7434 | # TODO: deal with att that already have an element by that name | ||||||
7435 |
761
|
1024
|
foreach my $att (keys %atts) | ||||
7436 | { # do not store if the att is a key that needs to be removed | ||||||
7437 |
576
|
1597
|
if( $options->{remove_key_for_all}->{$att} | ||||
7438 | || $options->{remove_key_for_elt}->{"$gi#$att"} | ||||||
7439 | ) | ||||||
7440 |
201
|
210
|
{ next; } | ||||
7441 | |||||||
7442 |
375
|
529
|
my $att_text= $options->{var} ? _replace_vars_in_text( $atts{$att}, $options) : $atts{$att} ; | ||||
7443 |
375
15
|
968
16
|
if( $options->{normalise_space} >= 2) { $att_text= _normalize_space( $att_text); } | ||||
7444 | |||||||
7445 |
375
|
986
|
if( $options->{prefix_key_for_all}->{$att} | ||||
7446 | || $options->{prefix_key_for_elt}->{"$gi#$att"} | ||||||
7447 | ) | ||||||
7448 | { # prefix the att | ||||||
7449 |
3
|
5
|
$data->{"-$att"}= $att_text; | ||||
7450 | } | ||||||
7451 | else | ||||||
7452 | { # normal case | ||||||
7453 |
372
|
588
|
$data->{$att}= $att_text; | ||||
7454 | } | ||||||
7455 | } | ||||||
7456 | |||||||
7457 |
761
|
1665
|
return $data; | ||||
7458 | } | ||||||
7459 | |||||||
7460 | sub _key_attr | ||||||
7461 |
639
|
464
|
{ my( $elt, $options)=@_; | ||||
7462 |
639
|
781
|
return if( $options->{noattr}); | ||||
7463 |
612
|
615
|
if( $options->{key_for_all}) | ||||
7464 |
531
|
515
|
{ foreach my $att ($elt->att_names) | ||||
7465 |
238
|
377
|
{ if( $options->{key_for_all}->{$att}) | ||||
7466 |
198
|
326
|
{ return $att; } | ||||
7467 | } | ||||||
7468 | } | ||||||
7469 | elsif( $options->{key_for_elt}) | ||||||
7470 |
81
|
131
|
{ if( my $key_for_elt= $options->{key_for_elt}->{$XML::Twig::index2gi[$elt->{'gi'}]} ) | ||||
7471 |
12
|
27
|
{ return $key_for_elt if( defined( $elt->{'att'}->{$key_for_elt})); } | ||||
7472 | } | ||||||
7473 |
405
|
1721
|
return; | ||||
7474 | } | ||||||
7475 | |||||||
7476 | sub _text_with_vars | ||||||
7477 |
476
|
322
|
{ my( $elt, $options)= @_; | ||||
7478 |
476
|
243
|
my $text; | ||||
7479 |
476
|
445
|
if( $options->{var}) | ||||
7480 |
68
|
81
|
{ $text= _replace_vars_in_text( $elt->text, $options); | ||||
7481 |
68
|
87
|
$elt->_store_var( $options); | ||||
7482 | } | ||||||
7483 | else | ||||||
7484 |
408
|
453
|
{ $text= $elt->text; } | ||||
7485 |
476
|
522
|
return $text; | ||||
7486 | } | ||||||
7487 | |||||||
7488 | |||||||
7489 | sub _normalize_space | ||||||
7490 |
58
|
39
|
{ my $text= shift; | ||||
7491 |
58
|
103
|
$text=~ s{\s+}{ }sg; | ||||
7492 |
58
|
72
|
$text=~ s{^\s}{}; | ||||
7493 |
58
|
69
|
$text=~ s{\s$}{}; | ||||
7494 |
58
|
59
|
return $text; | ||||
7495 | } | ||||||
7496 | |||||||
7497 | |||||||
7498 | sub att_nb | ||||||
7499 |
16
|
48
|
{ return 0 unless( my $atts= $_[0]->{att}); | ||||
7500 |
14
|
29
|
return scalar keys %$atts; | ||||
7501 | } | ||||||
7502 | |||||||
7503 | sub has_no_atts | ||||||
7504 |
5
|
19
|
{ return 1 unless( my $atts= $_[0]->{att}); | ||||
7505 |
3
|
13
|
return scalar keys %$atts ? 0 : 1; | ||||
7506 | } | ||||||
7507 | |||||||
7508 | sub _replace_vars_in_text | ||||||
7509 |
117
|
94
|
{ my( $text, $options)= @_; | ||||
7510 | |||||||
7511 |
117
14
|
353
44
|
$text=~ s{($options->{var_regexp})} | ||||
7512 |
8
|
15
|
{ if( defined( my $value= $options->{var_values}->{$2})) | ||||
7513 | { $value } | ||||||
7514 |
6
|
67
|
else | ||||
7515 |
6
|
33
|
{ warn "unknown variable $2\n"; | ||||
7516 | $1 | ||||||
7517 | } | ||||||
7518 | }gex; | ||||||
7519 |
117
|
148
|
return $text; | ||||
7520 | } | ||||||
7521 | |||||||
7522 | sub _store_var | ||||||
7523 |
68
|
43
|
{ my( $elt, $options)= @_; | ||||
7524 |
68
|
141
|
if( defined (my $var_name= $elt->{'att'}->{$options->{var}})) | ||||
7525 |
3
|
4
|
{ $options->{var_values}->{$var_name}= $elt->text; | ||||
7526 | } | ||||||
7527 | } | ||||||
7528 | |||||||
7529 | |||||||
7530 | # split a text element at a given offset | ||||||
7531 | sub split_at | ||||||
7532 |
86
|
69
|
{ my( $elt, $offset)= @_; | ||||
7533 |
86
|
92
|
my $text_elt= $elt->is_text ? $elt : $elt->first_child( $TEXT) || return ''; | ||||
7534 |
85
|
114
|
my $string= $text_elt->text; | ||||
7535 |
85
|
110
|
my $left_string= substr( $string, 0, $offset); | ||||
7536 |
85
|
70
|
my $right_string= substr( $string, $offset); | ||||
7537 |
85
|
304
|
$text_elt->{pcdata}= (delete $text_elt->{empty} || 1) && $left_string; | ||||
7538 |
85
|
128
|
my $new_elt= $elt->new( $XML::Twig::index2gi[$elt->{'gi'}], $right_string); | ||||
7539 |
85
|
103
|
$new_elt->paste( after => $elt); | ||||
7540 |
85
|
83
|
return $new_elt; | ||||
7541 | } | ||||||
7542 | |||||||
7543 | |||||||
7544 | # split an element or its text descendants into several, in place | ||||||
7545 | # all elements (new and untouched) are returned | ||||||
7546 | sub split | ||||||
7547 |
12
|
17
|
{ my $elt= shift; | ||||
7548 |
12
|
7
|
my @text_chunks; | ||||
7549 |
12
|
9
|
my @result; | ||||
7550 |
12
1
|
17
2
|
if( $elt->is_text) { @text_chunks= ($elt); } | ||||
7551 |
11
|
16
|
else { @text_chunks= $elt->descendants( $TEXT); } | ||||
7552 |
12
|
15
|
foreach my $text_chunk (@text_chunks) | ||||
7553 |
12
|
18
|
{ push @result, $text_chunk->_split( 1, @_); } | ||||
7554 |
12
|
16
|
return @result; | ||||
7555 | } | ||||||
7556 | |||||||
7557 | # split an element or its text descendants into several, in place | ||||||
7558 | # created elements (those which match the regexp) are returned | ||||||
7559 | sub mark | ||||||
7560 |
26
|
44
|
{ my $elt= shift; | ||||
7561 |
26
|
17
|
my @text_chunks; | ||||
7562 |
26
|
10
|
my @result; | ||||
7563 |
26
1
|
24
2
|
if( $elt->is_text) { @text_chunks= ($elt); } | ||||
7564 |
25
|
33
|
else { @text_chunks= $elt->descendants( $TEXT); } | ||||
7565 |
26
|
30
|
foreach my $text_chunk (@text_chunks) | ||||
7566 |
26
|
35
|
{ push @result, $text_chunk->_split( 0, @_); } | ||||
7567 |
26
|
49
|
return @result; | ||||
7568 | } | ||||||
7569 | |||||||
7570 | # split a single text element | ||||||
7571 | # return_all defines what is returned: if it is true | ||||||
7572 | # only returns the elements created by matches in the split regexp | ||||||
7573 | # otherwise all elements (new and untouched) are returned | ||||||
7574 | |||||||
7575 | |||||||
7576 | { | ||||||
7577 | |||||||
7578 | sub _split | ||||||
7579 |
38
|
27
|
{ my $elt= shift; | ||||
7580 |
38
|
20
|
my $return_all= shift; | ||||
7581 |
38
|
26
|
my $regexp= shift; | ||||
7582 |
38
|
25
|
my @tags; | ||||
7583 | |||||||
7584 |
38
|
42
|
while( @_) | ||||
7585 |
41
|
27
|
{ my $tag= shift(); | ||||
7586 |
41
|
42
|
if( ref $_[0]) | ||||
7587 |
11
|
28
|
{ push @tags, { tag => $tag, atts => shift }; } | ||||
7588 | else | ||||||
7589 |
30
|
72
|
{ push @tags, { tag => $tag }; } | ||||
7590 | } | ||||||
7591 | |||||||
7592 |
38
2
|
43
3
|
unless( @tags) { @tags= { tag => $elt->{parent}->gi }; } | ||||
7593 | |||||||
7594 |
38
|
24
|
my @result; # the returned list of elements | ||||
7595 |
38
|
46
|
my $text= $elt->text; | ||||
7596 |
38
|
38
|
my $gi= $XML::Twig::index2gi[$elt->{'gi'}]; | ||||
7597 | |||||||
7598 | # 2 uses: if split matches then the first substring reuses $elt | ||||||
7599 | # once a split has occurred then the last match needs to be put in | ||||||
7600 | # a new element | ||||||
7601 |
38
|
20
|
my $previous_match= 0; | ||||
7602 | |||||||
7603 |
38
|
514
|
while( my( $pre_match, @matches)= $text=~ /^(.*?)$regexp(.*)$/gcs) | ||||
7604 |
55
|
58
|
{ $text= pop @matches; | ||||
7605 |
55
|
55
|
if( $previous_match) | ||||
7606 | { # match, not the first one, create a new text ($gi) element | ||||||
7607 |
21
|
31
|
_utf8_ify( $pre_match) if( $] < 5.010); | ||||
7608 |
21
|
25
|
$elt= $elt->insert_new_elt( after => $gi, $pre_match); | ||||
7609 |
21
|
28
|
push @result, $elt if( $return_all); | ||||
7610 | } | ||||||
7611 | else | ||||||
7612 | { # first match in $elt, re-use $elt for the first sub-string | ||||||
7613 |
34
|
47
|
_utf8_ify( $pre_match) if( $] < 5.010); | ||||
7614 |
34
|
42
|
$elt->set_text( $pre_match); | ||||
7615 |
34
|
22
|
$previous_match++; # store the fact that there was a match | ||||
7616 |
34
|
42
|
push @result, $elt if( $return_all); | ||||
7617 | } | ||||||
7618 | |||||||
7619 | # now deal with matches captured in the regexp | ||||||
7620 |
55
|
57
|
if( @matches) | ||||
7621 | { # match, with capture | ||||||
7622 |
43
|
27
|
my $i=0; | ||||
7623 |
43
|
36
|
foreach my $match (@matches) | ||||
7624 | { # create new element, text is the match | ||||||
7625 |
53
|
60
|
_utf8_ify( $match) if( $] < 5.010); | ||||
7626 |
53
|
79
|
my $tag = _repl_match( $tags[$i]->{tag}, @matches) || '#PCDATA'; | ||||
7627 |
53
|
36
|
my $atts = \%{$tags[$i]->{atts}} || {}; | ||||
7628 |
53
20
|
80
20
|
my %atts= map { _repl_match( $_, @matches) => _repl_match( $atts->{$_}, @matches) } keys %$atts; | ||||
7629 |
53
|
68
|
$elt= $elt->insert_new_elt( after => $tag, \%atts, $match); | ||||
7630 |
53
|
45
|
push @result, $elt; | ||||
7631 |
53
|
298
|
$i= ($i + 1) % @tags; | ||||
7632 | } | ||||||
7633 | } | ||||||
7634 | else | ||||||
7635 | { # match, no captures | ||||||
7636 |
12
|
9
|
my $tag = $tags[0]->{tag}; | ||||
7637 |
12
|
9
|
my $atts = \%{$tags[0]->{atts}} || {}; | ||||
7638 |
12
|
13
|
$elt= $elt->insert_new_elt( after => $tag, $atts); | ||||
7639 |
12
|
59
|
push @result, $elt; | ||||
7640 | } | ||||||
7641 | } | ||||||
7642 |
38
|
81
|
if( $previous_match && $text) | ||||
7643 | { # there was at least 1 match, and there is text left after the match | ||||||
7644 |
17
|
18
|
$elt= $elt->insert_new_elt( after => $gi, $text); | ||||
7645 | } | ||||||
7646 | |||||||
7647 |
38
|
45
|
push @result, $elt if( $return_all); | ||||
7648 | |||||||
7649 |
38
|
118
|
return @result; # return all elements | ||||
7650 | } | ||||||
7651 | |||||||
7652 | sub _repl_match | ||||||
7653 |
93
|
111
|
{ my( $val, @matches)= @_; | ||||
7654 |
93
|
113
|
$val=~ s{\$(\d+)}{$matches[$1-1]}g; | ||||
7655 |
93
|
176
|
return $val; | ||||
7656 | } | ||||||
7657 | |||||||
7658 | # evil hack needed as sometimes | ||||||
7659 | my $encode_is_loaded=0; # so we only load Encode once | ||||||
7660 | sub _utf8_ify | ||||||
7661 | { | ||||||
7662 |
1
|
76
|
if( $perl_version >= 5.008 and $perl_version < 5.010 and !_keep_encoding()) | ||||
7663 |
0
0
0
0
|
0
0
0
0
|
{ unless( $encode_is_loaded) { require Encode; import Encode; $encode_is_loaded++; } | ||||
7664 |
0
|
0
|
Encode::_utf8_on( $_[0]); # the flag should be set but is not | ||||
7665 | } | ||||||
7666 | } | ||||||
7667 | |||||||
7668 | |||||||
7669 | } | ||||||
7670 | |||||||
7671 | { my %replace_sub; # cache for complex expressions (expression => sub) | ||||||
7672 | |||||||
7673 | sub subs_text | ||||||
7674 |
28
|
32
|
{ my( $elt, $regexp, $replace)= @_; | ||||
7675 | |||||||
7676 |
28
|
14
|
my $replacement_string; | ||||
7677 |
28
|
35
|
my $is_string= _is_string( $replace); | ||||
7678 | |||||||
7679 |
28
|
20
|
my @parents; | ||||
7680 | |||||||
7681 |
28
|
50
|
foreach my $text_elt ($elt->descendants_or_self( $TEXT)) | ||||
7682 | { | ||||||
7683 |
71
|
69
|
if( $is_string) | ||||
7684 |
15
|
19
|
{ my $text= $text_elt->text; | ||||
7685 |
15
27
|
77
30
|
$text=~ s{$regexp}{ _replace_var( $replace, $1, $2, $3, $4, $5, $6, $7, $8, $9)}egx; | ||||
7686 |
15
|
19
|
$text_elt->set_text( $text); | ||||
7687 | } | ||||||
7688 | else | ||||||
7689 | { | ||||||
7690 |
187
187
187
|
894
181
1012
|
no utf8; # = perl 5.6 | ||||
7691 |
56
|
112
|
my $replace_sub= ( $replace_sub{$replace} ||= _install_replace_sub( $replace)); | ||||
7692 |
56
|
81
|
my $text= $text_elt->text; | ||||
7693 |
56
|
41
|
my $pos=0; # used to skip text that was previously matched | ||||
7694 |
56
|
32
|
my $found_hit; | ||||
7695 |
56
|
499
|
while( my( $pre_match_string, $match_string, @var)= ($text=~ m{(.*?)($regexp)}sg)) | ||||
7696 |
52
|
44
|
{ $found_hit=1; | ||||
7697 |
52
|
56
|
my $match_start = length( $pre_match_string); | ||||
7698 |
52
|
90
|
my $match = $match_start ? $text_elt->split_at( $match_start + $pos) : $text_elt; | ||||
7699 |
52
|
41
|
my $match_length = length( $match_string); | ||||
7700 |
52
|
72
|
my $post_match = $match->split_at( $match_length); | ||||
7701 |
52
|
995
|
$replace_sub->( $match, @var); | ||||
7702 | |||||||
7703 | # go to next | ||||||
7704 |
51
|
40
|
$text_elt= $post_match; | ||||
7705 |
51
|
57
|
$text= $post_match->text; | ||||
7706 | |||||||
7707 |
51
51
|
65
533
|
if( $found_hit) { push @parents, $text_elt->{parent} unless $parents[-1] && $parents[-1]== $text_elt->{parent}; } | ||||
7708 | |||||||
7709 | } | ||||||
7710 | } | ||||||
7711 | } | ||||||
7712 | |||||||
7713 |
27
29
|
37
47
|
foreach my $parent (@parents) { $parent->normalize; } | ||||
7714 | |||||||
7715 |
27
|
52
|
return $elt; | ||||
7716 | } | ||||||
7717 | |||||||
7718 | |||||||
7719 | sub _is_string | ||||||
7720 |
28
|
89
|
{ return ($_[0]=~ m{&e[ln]t}) ? 0: 1 } | ||||
7721 | |||||||
7722 | sub _replace_var | ||||||
7723 |
27
|
72
|
{ my( $string, @var)= @_; | ||||
7724 |
27
|
22
|
unshift @var, undef; | ||||
7725 |
27
|
52
|
$string=~ s{\$(\d)}{$var[$1]}g; | ||||
7726 |
27
|
71
|
return $string; | ||||
7727 | } | ||||||
7728 | |||||||
7729 | sub _install_replace_sub | ||||||
7730 |
17
|
19
|
{ my $replace_exp= shift; | ||||
7731 |
17
|
76
|
my @item= split m{(&e[ln]t\s*\([^)]*\))}, $replace_exp; | ||||
7732 |
17
|
21
|
my $sub= q{ my( $match, @var)= @_; my $new; my $last_inserted=$match;}; | ||||
7733 |
17
|
12
|
my( $gi, $exp); | ||||
7734 |
17
|
20
|
foreach my $item (@item) | ||||
7735 |
39
|
67
|
{ next if ! length $item; | ||||
7736 |
25
|
62
|
if( $item=~ m{^&elt\s*\(([^)]*)\)}) | ||||
7737 |
14
|
23
|
{ $exp= $1; } | ||||
7738 | elsif( $item=~ m{^&ent\s*\(\s*([^\s)]*)\s*\)}) | ||||||
7739 |
3
|
7
|
{ $exp= " '#ENT' => $1"; } | ||||
7740 | else | ||||||
7741 |
8
|
11
|
{ $exp= qq{ '#PCDATA' => "$item"}; } | ||||
7742 |
25
15
15
|
52
23
35
|
$exp=~ s{\$(\d)}{my $i= $1-1; "\$var[$i]"}eg; # replace references to matches | ||||
7743 |
25
|
31
|
$sub.= qq{ \$new= \$match->new( $exp); }; | ||||
7744 |
25
|
32
|
$sub .= q{ $new->paste( after => $last_inserted); $last_inserted=$new;}; | ||||
7745 | } | ||||||
7746 |
17
|
23
|
$sub .= q{ $match->delete; }; | ||||
7747 | #$sub=~ s/;/;\n/g; warn "subs: $sub"; | ||||||
7748 |
17
4
4
4
1
1
1
1
1
1
1
1
1
|
852
14
4
299
3
1
131
3
1
130
3
1
107
|
my $coderef= eval "sub { $NO_WARNINGS; $sub }"; | ||||
7749 |
17
0
|
35
0
|
if( $@) { croak( "invalid replacement expression $replace_exp: ",$@); } | ||||
7750 |
17
|
55
|
return $coderef; | ||||
7751 | } | ||||||
7752 | |||||||
7753 | } | ||||||
7754 | |||||||
7755 | |||||||
7756 | sub merge_text | ||||||
7757 |
50
|
68
|
{ my( $e1, $e2)= @_; | ||||
7758 |
50
|
167
|
croak "invalid merge: can only merge 2 elements" | ||||
7759 | unless( isa( $e2, 'XML::Twig::Elt')); | ||||||
7760 |
49
|
53
|
croak "invalid merge: can only merge 2 text elements" | ||||
7761 | unless( $e1->is_text && $e2->is_text && ($e1->gi eq $e2->gi)); | ||||||
7762 | |||||||
7763 |
47
|
71
|
my $t1_length= length( $e1->text); | ||||
7764 | |||||||
7765 |
47
|
63
|
$e1->set_text( $e1->text . $e2->text); | ||||
7766 | |||||||
7767 |
47
|
70
|
if( my $extra_data_in_pcdata= $e2->_extra_data_in_pcdata) | ||||
7768 |
10
16
|
8
23
|
{ foreach my $data (@$extra_data_in_pcdata) { $e1->_push_extra_data_in_pcdata( $data->{text}, $data->{offset} + $t1_length); } } | ||||
7769 | |||||||
7770 |
47
|
58
|
$e2->delete; | ||||
7771 | |||||||
7772 |
47
|
100
|
return $e1; | ||||
7773 | } | ||||||
7774 | |||||||
7775 | sub merge | ||||||
7776 |
5
|
4
|
{ my( $e1, $e2)= @_; | ||||
7777 |
5
|
8
|
my @e2_children= $e2->_children; | ||||
7778 |
5
|
10
|
if( $e1->_last_child && $e1->_last_child->is_pcdata | ||||
7779 | && @e2_children && $e2_children[0]->is_pcdata | ||||||
7780 | ) | ||||||
7781 |
4
|
5
|
{ my $t1_length= length( $e1->_last_child->{pcdata}); | ||||
7782 |
4
|
5
|
my $child1= $e1->_last_child; | ||||
7783 |
4
|
6
|
my $child2= shift @e2_children; | ||||
7784 |
4
|
6
|
$child1->{pcdata} .= $child2->{pcdata}; | ||||
7785 | |||||||
7786 |
4
|
8
|
my $extra_data= $e1->_extra_data_before_end_tag . $e2->extra_data; | ||||
7787 | |||||||
7788 |
4
|
8
|
if( $extra_data) | ||||
7789 |
1
|
2
|
{ $e1->_del_extra_data_before_end_tag; | ||||
7790 |
1
|
2
|
$child1->_push_extra_data_in_pcdata( $extra_data, $t1_length); | ||||
7791 | } | ||||||
7792 | |||||||
7793 |
4
|
6
|
if( my $extra_data_in_pcdata= $child2->_extra_data_in_pcdata) | ||||
7794 |
1
1
|
1
2
|
{ foreach my $data (@$extra_data_in_pcdata) { $child1->_push_extra_data_in_pcdata( $data->{text}, $data->{offset} + $t1_length); } } | ||||
7795 | |||||||
7796 |
4
|
5
|
if( my $extra_data_before_end_tag= $e2->_extra_data_before_end_tag) | ||||
7797 |
1
|
2
|
{ $e1->_set_extra_data_before_end_tag( $extra_data_before_end_tag); } | ||||
7798 | } | ||||||
7799 | |||||||
7800 |
5
1
|
8
2
|
foreach my $e (@e2_children) { $e->move( last_child => $e1); } | ||||
7801 | |||||||
7802 |
5
|
8
|
$e2->delete; | ||||
7803 |
5
|
8
|
return $e1; | ||||
7804 | } | ||||||
7805 | |||||||
7806 | |||||||
7807 | # recursively copy an element and returns the copy (can be huge and long) | ||||||
7808 | sub copy | ||||||
7809 |
86
|
57
|
{ my $elt= shift; | ||||
7810 |
86
|
117
|
my $copy= $elt->new( $XML::Twig::index2gi[$elt->{'gi'}]); | ||||
7811 | |||||||
7812 |
86
4
|
101
4
|
if( $elt->extra_data) { $copy->set_extra_data( $elt->extra_data); } | ||||
7813 |
86
7
|
114
9
|
if( $elt->{extra_data_before_end_tag}) { $copy->_set_extra_data_before_end_tag( $elt->{extra_data_before_end_tag}); } | ||||
7814 | |||||||
7815 |
86
2
|
96
3
|
if( $elt->is_asis) { $copy->set_asis; } | ||||
7816 | |||||||
7817 |
86
|
228
|
if( (exists $elt->{'pcdata'})) | ||||
7818 |
36
|
142
|
{ $copy->{pcdata}= (delete $copy->{empty} || 1) && $elt->{pcdata}; | ||||
7819 |
36
7
|
49
10
|
if( $elt->{extra_data_in_pcdata}) { $copy->_set_extra_data_in_pcdata( $elt->{extra_data_in_pcdata}); } | ||||
7820 | } | ||||||
7821 | elsif( (exists $elt->{'cdata'})) | ||||||
7822 |
2
|
3
|
{ $copy->_set_cdata( $elt->{cdata}); | ||||
7823 |
2
0
|
3
0
|
if( $elt->{extra_data_in_pcdata}) { $copy->_set_extra_data_in_pcdata( $elt->{extra_data_in_pcdata}); } | ||||
7824 | } | ||||||
7825 | elsif( (exists $elt->{'target'})) | ||||||
7826 |
1
|
2
|
{ $copy->_set_pi( $elt->{target}, $elt->{data}); } | ||||
7827 | elsif( (exists $elt->{'comment'})) | ||||||
7828 |
1
|
2
|
{ $copy->_set_comment( $elt->{comment}); } | ||||
7829 | elsif( (exists $elt->{'ent'})) | ||||||
7830 |
1
|
2
|
{ $copy->{ent}= $elt->{ent}; } | ||||
7831 | else | ||||||
7832 |
45
45
45
45
45
68
68
45
|
28
32
39
34
60
51
77
74
|
{ my @children= do { my $elt= $elt; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; }; | ||||
7833 |
45
|
70
|
if( my $atts= $elt->{att}) | ||||
7834 |
40
|
22
|
{ my %atts; | ||||
7835 |
40
|
40
|
tie %atts, 'Tie::IxHash' if (keep_atts_order()); | ||||
7836 |
40
40
|
46
61
|
%atts= %{$atts}; # we want to do a real copy of the attributes | ||||
7837 |
40
|
110
|
$copy->set_atts( \%atts); | ||||
7838 | } | ||||||
7839 |
45
|
70
|
foreach my $child (@children) | ||||
7840 |
68
|
109
|
{ my $child_copy= $child->copy; | ||||
7841 |
68
|
74
|
$child_copy->paste( 'last_child', $copy); | ||||
7842 | } | ||||||
7843 | } | ||||||
7844 | # save links to the original location, which can be convenient and is used for namespace resolution | ||||||
7845 |
86
|
80
|
foreach my $link ( qw(parent prev_sibling next_sibling) ) | ||||
7846 |
258
|
287
|
{ $copy->{former}->{$link}= $elt->{$link}; | ||||
7847 |
258
258
|
248
321
|
if( $XML::Twig::weakrefs) { weaken( $copy->{former}->{$link}); } | ||||
7848 | } | ||||||
7849 | |||||||
7850 |
86
|
81
|
$copy->{empty}= $elt->{'empty'}; | ||||
7851 | |||||||
7852 |
86
|
94
|
return $copy; | ||||
7853 | } | ||||||
7854 | |||||||
7855 | |||||||
7856 | sub delete | ||||||
7857 |
3460
|
2302
|
{ my $elt= shift; | ||||
7858 |
3460
|
3872
|
$elt->cut; | ||||
7859 |
3460
|
4102
|
$elt->DESTROY unless $XML::Twig::weakrefs; | ||||
7860 |
3460
|
31576
|
return undef; | ||||
7861 | } | ||||||
7862 | |||||||
7863 | sub __destroy | ||||||
7864 |
20368
|
13177
|
{ my $elt= shift; | ||||
7865 |
20368
|
20635
|
return if( $XML::Twig::weakrefs); | ||||
7866 |
20111
|
22617
|
my $t= shift || $elt->twig; # optional argument, passed in recursive calls | ||||
7867 | |||||||
7868 |
20111
20111
18070
|
12077
17548
14685
|
foreach( @{[$elt->_children]}) { $_->DESTROY( $t); } | ||||
7869 | |||||||
7870 | # the id reference needs to be destroyed | ||||||
7871 | # lots of tests to avoid warnings during the cleanup phase | ||||||
7872 |
20111
|
59309
|
$elt->del_id( $t) if( $ID && $t && defined( $elt->{att}) && exists( $elt->{att}->{$ID})); | ||||
7873 |
20111
2
2
6
2
|
19897
2
4
6
3
|
if( $elt->{former}) { foreach (keys %{$elt->{former}}) { delete $elt->{former}->{$_}; } delete $elt->{former}; } | ||||
7874 |
20111
40222
|
14122
33227
|
foreach (qw( keys %$elt)) { delete $elt->{$_}; } | ||||
7875 |
20111
|
21673
|
undef $elt; | ||||
7876 | } | ||||||
7877 | |||||||
7878 | BEGIN | ||||||
7879 |
194
187
7
|
485
259317
22
|
{ sub set_destroy { if( $XML::Twig::weakrefs) { undef *DESTROY } else { *DESTROY= *__destroy; } } | ||||
7880 |
187
|
231526
|
set_destroy(); | ||||
7881 | } | ||||||
7882 | |||||||
7883 | # ignores the element | ||||||
7884 | sub ignore | ||||||
7885 |
30
|
48
|
{ my $elt= shift; | ||||
7886 |
30
|
34
|
my $t= $elt->twig; | ||||
7887 |
30
|
46
|
$t->ignore( $elt, @_); | ||||
7888 | } | ||||||
7889 | |||||||
7890 | BEGIN { | ||||||
7891 |
187
|
303
|
my $pretty = 0; | ||||
7892 |
187
|
238
|
my $quote = '"'; | ||||
7893 |
187
|
180
|
my $INDENT = ' '; | ||||
7894 |
187
|
155
|
my $empty_tag_style = 0; | ||||
7895 |
187
|
192
|
my $remove_cdata = 0; | ||||
7896 |
187
|
177
|
my $keep_encoding = 0; | ||||
7897 |
187
|
171
|
my $expand_external_entities = 0; | ||||
7898 |
187
|
163
|
my $keep_atts_order = 0; | ||||
7899 |
187
|
248
|
my $do_not_escape_amp_in_atts = 0; | ||||
7900 |
187
|
214
|
my $WRAP = '80'; | ||||
7901 |
187
|
194
|
my $REPLACED_ENTS = qq{&<}; | ||||
7902 | |||||||
7903 |
187
|
337
|
my ($NSGMLS, $NICE, $INDENTED, $INDENTEDCT, $INDENTEDC, $WRAPPED, $RECORD1, $RECORD2, $INDENTEDA)= (1..9); | ||||
7904 |
187
935
|
239
1429
|
my %KEEP_TEXT_TAG_ON_ONE_LINE= map { $_ => 1 } ( $INDENTED, $INDENTEDCT, $INDENTEDC, $INDENTEDA, $WRAPPED); | ||||
7905 |
187
561
|
264
646
|
my %WRAPPED = map { $_ => 1 } ( $WRAPPED, $INDENTEDA, $INDENTEDC); | ||||
7906 | |||||||
7907 |
187
|
995
|
my %pretty_print_style= | ||||
7908 | ( none => 0, # no added \n | ||||||
7909 | nsgmls => $NSGMLS, # nsgmls-style, \n in tags | ||||||
7910 | # below this line styles are UNSAFE (the generated XML can be well-formed but invalid) | ||||||
7911 | nice => $NICE, # \n after open/close tags except when the | ||||||
7912 | # element starts with text | ||||||
7913 | indented => $INDENTED, # nice plus idented | ||||||
7914 | indented_close_tag => $INDENTEDCT, # nice plus idented | ||||||
7915 | indented_c => $INDENTEDC, # slightly more compact than indented (closing | ||||||
7916 | # tags are on the same line) | ||||||
7917 | wrapped => $WRAPPED, # text is wrapped at column | ||||||
7918 | record_c => $RECORD1, # for record-like data (compact) | ||||||
7919 | record => $RECORD2, # for record-like data (not so compact) | ||||||
7920 | indented_a => $INDENTEDA, # nice, indented, and with attributes on separate | ||||||
7921 | # lines as the nsgmls style, as well as wrapped | ||||||
7922 | # lines - to make the xml friendly to line-oriented tools | ||||||
7923 | cvs => $INDENTEDA, # alias for indented_a | ||||||
7924 | ); | ||||||
7925 | |||||||
7926 |
187
|
203
|
my ($HTML, $EXPAND)= (1..2); | ||||
7927 |
187
|
410
|
my %empty_tag_style= | ||||
7928 | ( normal => 0, # <tag/> | ||||||
7929 | html => $HTML, # <tag /> | ||||||
7930 | xhtml => $HTML, # <tag /> | ||||||
7931 | expand => $EXPAND, # <tag></tag> | ||||||
7932 | ); | ||||||
7933 | |||||||
7934 |
187
|
354
|
my %quote_style= | ||||
7935 | ( double => '"', | ||||||
7936 | single => "'", | ||||||
7937 | # smart => "smart", | ||||||
7938 | ); | ||||||
7939 | |||||||
7940 |
187
|
168
|
my $xml_space_preserve; # set when an element includes xml:space="preserve" | ||||
7941 | |||||||
7942 |
187
|
143
|
my $output_filter; # filters the entire output (including < and >) | ||||
7943 |
187
|
161
|
my $output_text_filter; # filters only the text part (tag names, attributes, pcdata) | ||||
7944 | |||||||
7945 |
187
|
177
|
my $replaced_ents= $REPLACED_ENTS; | ||||
7946 | |||||||
7947 | |||||||
7948 | # returns those pesky "global" variables so you can switch between twigs | ||||||
7949 | sub global_state ## no critic (Subroutines::ProhibitNestedSubs); | ||||||
7950 | { return | ||||||
7951 |
11
|
97
|
{ pretty => $pretty, | ||||
7952 | quote => $quote, | ||||||
7953 | indent => $INDENT, | ||||||
7954 | empty_tag_style => $empty_tag_style, | ||||||
7955 | remove_cdata => $remove_cdata, | ||||||
7956 | keep_encoding => $keep_encoding, | ||||||
7957 | expand_external_entities => $expand_external_entities, | ||||||
7958 | output_filter => $output_filter, | ||||||
7959 | output_text_filter => $output_text_filter, | ||||||
7960 | keep_atts_order => $keep_atts_order, | ||||||
7961 | do_not_escape_amp_in_atts => $do_not_escape_amp_in_atts, | ||||||
7962 | wrap => $WRAP, | ||||||
7963 | replaced_ents => $replaced_ents, | ||||||
7964 | }; | ||||||
7965 | } | ||||||
7966 | |||||||
7967 | # restores the global variables | ||||||
7968 | sub set_global_state | ||||||
7969 |
23
|
30
|
{ my $state= shift; | ||||
7970 |
23
|
28
|
$pretty = $state->{pretty}; | ||||
7971 |
23
|
23
|
$quote = $state->{quote}; | ||||
7972 |
23
|
28
|
$INDENT = $state->{indent}; | ||||
7973 |
23
|
25
|
$empty_tag_style = $state->{empty_tag_style}; | ||||
7974 |
23
|
25
|
$remove_cdata = $state->{remove_cdata}; | ||||
7975 |
23
|
26
|
$keep_encoding = $state->{keep_encoding}; | ||||
7976 |
23
|
21
|
$expand_external_entities = $state->{expand_external_entities}; | ||||
7977 |
23
|
21
|
$output_filter = $state->{output_filter}; | ||||
7978 |
23
|
37
|
$output_text_filter = $state->{output_text_filter}; | ||||
7979 |
23
|
19
|
$keep_atts_order = $state->{keep_atts_order}; | ||||
7980 |
23
|
20
|
$do_not_escape_amp_in_atts = $state->{do_not_escape_amp_in_atts}; | ||||
7981 |
23
|
21
|
$WRAP = $state->{wrap}; | ||||
7982 |
23
|
40
|
$replaced_ents = $state->{replaced_ents}, | ||||
7983 | } | ||||||
7984 | |||||||
7985 | # sets global state to defaults | ||||||
7986 | sub init_global_state | ||||||
7987 |
6
|
127
|
{ set_global_state( | ||||
7988 | { pretty => 0, | ||||||
7989 | quote => '"', | ||||||
7990 | indent => $INDENT, | ||||||
7991 | empty_tag_style => 0, | ||||||
7992 | remove_cdata => 0, | ||||||
7993 | keep_encoding => 0, | ||||||
7994 | expand_external_entities => 0, | ||||||
7995 | output_filter => undef, | ||||||
7996 | output_text_filter => undef, | ||||||
7997 | keep_atts_order => undef, | ||||||
7998 | do_not_escape_amp_in_atts => 0, | ||||||
7999 | wrap => $WRAP, | ||||||
8000 | replaced_ents => $REPLACED_ENTS, | ||||||
8001 | }); | ||||||
8002 | } | ||||||
8003 | |||||||
8004 | |||||||
8005 | # set the pretty_print style (in $pretty) and returns the old one | ||||||
8006 | # can be called from outside the package with 2 arguments (elt, style) | ||||||
8007 | # or from inside with only one argument (style) | ||||||
8008 | # the style can be either a string (one of the keys of %pretty_print_style | ||||||
8009 | # or a number (presumably an old value saved) | ||||||
8010 | sub set_pretty_print | ||||||
8011 |
2067
|
2519
|
{ my $style= lc( defined $_[1] ? $_[1] : $_[0]); # so we cover both cases | ||||
8012 |
2067
|
1379
|
my $old_pretty= $pretty; | ||||
8013 |
2067
|
4080
|
if( $style=~ /^\d+$/) | ||||
8014 |
1945
|
2724
|
{ croak "invalid pretty print style $style" unless( $style < keys %pretty_print_style); | ||||
8015 |
1944
|
1314
|
$pretty= $style; | ||||
8016 | } | ||||||
8017 | else | ||||||
8018 |
122
|
304
|
{ croak "invalid pretty print style '$style'" unless( exists $pretty_print_style{$style}); | ||||
8019 |
121
|
132
|
$pretty= $pretty_print_style{$style}; | ||||
8020 | } | ||||||
8021 |
2065
|
2392
|
if( $WRAPPED{$pretty} ) | ||||
8022 |
7
|
10
|
{ XML::Twig::_use( 'Text::Wrap') or croak( "Text::Wrap not available, cannot use style $style"); } | ||||
8023 |
2065
|
2859
|
return $old_pretty; | ||||
8024 | } | ||||||
8025 | |||||||
8026 |
2
|
4
|
sub _pretty_print { return $pretty; } | ||||
8027 | |||||||
8028 | # set the empty tag style (in $empty_tag_style) and returns the old one | ||||||
8029 | # can be called from outside the package with 2 arguments (elt, style) | ||||||
8030 | # or from inside with only one argument (style) | ||||||
8031 | # the style can be either a string (one of the keys of %empty_tag_style | ||||||
8032 | # or a number (presumably an old value saved) | ||||||
8033 | sub set_empty_tag_style | ||||||
8034 |
141
|
318
|
{ my $style= lc( defined $_[1] ? $_[1] : $_[0]); # so we cover both cases | ||||
8035 |
141
|
138
|
my $old_style= $empty_tag_style; | ||||
8036 |
141
|
445
|
if( $style=~ /^\d+$/) | ||||
8037 |
25
|
119
|
{ croak "invalid empty tag style $style" | ||||
8038 | unless( $style < keys %empty_tag_style); | ||||||
8039 |
24
|
17
|
$empty_tag_style= $style; | ||||
8040 | } | ||||||
8041 | else | ||||||
8042 |
116
|
289
|
{ croak "invalid empty tag style '$style'" | ||||
8043 | unless( exists $empty_tag_style{$style}); | ||||||
8044 |
115
|
163
|
$empty_tag_style= $empty_tag_style{$style}; | ||||
8045 | } | ||||||
8046 |
139
|
187
|
return $old_style; | ||||
8047 | } | ||||||
8048 | |||||||
8049 | sub _pretty_print_styles | ||||||
8050 |
27
718
|
102
872
|
{ return (sort { $pretty_print_style{$a} <=> $pretty_print_style{$b} || $a cmp $b } keys %pretty_print_style); } | ||||
8051 | |||||||
8052 | sub set_quote | ||||||
8053 |
3115
|
3904
|
{ my $style= $_[1] || $_[0]; | ||||
8054 |
3115
|
2003
|
my $old_quote= $quote; | ||||
8055 |
3115
|
3824
|
croak "invalid quote '$style'" unless( exists $quote_style{$style}); | ||||
8056 |
3114
|
2408
|
$quote= $quote_style{$style}; | ||||
8057 |
3114
|
2409
|
return $old_quote; | ||||
8058 | } | ||||||
8059 | |||||||
8060 | sub set_remove_cdata | ||||||
8061 |
3115
|
3054
|
{ my $new_value= defined $_[1] ? $_[1] : $_[0]; | ||||
8062 |
3115
|
1935
|
my $old_value= $remove_cdata; | ||||
8063 |
3115
|
1577
|
$remove_cdata= $new_value; | ||||
8064 |
3115
|
2240
|
return $old_value; | ||||
8065 | } | ||||||
8066 | |||||||
8067 | |||||||
8068 | sub set_indent | ||||||
8069 |
5
|
9
|
{ my $new_value= defined $_[1] ? $_[1] : $_[0]; | ||||
8070 |
5
|
7
|
my $old_value= $INDENT; | ||||
8071 |
5
|
7
|
$INDENT= $new_value; | ||||
8072 |
5
|
6
|
return $old_value; | ||||
8073 | } | ||||||
8074 | |||||||
8075 | sub set_wrap | ||||||
8076 |
9
|
117
|
{ my $new_value= defined $_[1] ? $_[1] : $_[0]; | ||||
8077 |
9
|
5
|
my $old_value= $WRAP; | ||||
8078 |
9
|
5
|
$WRAP= $new_value; | ||||
8079 |
9
|
19
|
return $old_value; | ||||
8080 | } | ||||||
8081 | |||||||
8082 | |||||||
8083 | sub set_keep_encoding | ||||||
8084 |
3152
|
3379
|
{ my $new_value= defined $_[1] ? $_[1] : $_[0]; | ||||
8085 |
3152
|
2016
|
my $old_value= $keep_encoding; | ||||
8086 |
3152
|
1832
|
$keep_encoding= $new_value; | ||||
8087 |
3152
|
2639
|
return $old_value; | ||||
8088 | } | ||||||
8089 | |||||||
8090 | sub set_replaced_ents | ||||||
8091 |
1
|
3
|
{ my $new_value= defined $_[1] ? $_[1] : $_[0]; | ||||
8092 |
1
|
1
|
my $old_value= $replaced_ents; | ||||
8093 |
1
|
1
|
$replaced_ents= $new_value; | ||||
8094 |
1
|
2
|
return $old_value; | ||||
8095 | } | ||||||
8096 | |||||||
8097 | sub do_not_escape_gt | ||||||
8098 |
1
|
1
|
{ my $old_value= $replaced_ents; | ||||
8099 |
1
|
1
|
$replaced_ents= q{&<}; # & needs to be first | ||||
8100 |
1
|
1
|
return $old_value; | ||||
8101 | } | ||||||
8102 | |||||||
8103 | sub escape_gt | ||||||
8104 |
2
|
2
|
{ my $old_value= $replaced_ents; | ||||
8105 |
2
|
1
|
$replaced_ents= qq{&<>}; # & needs to be first | ||||
8106 |
2
|
2
|
return $old_value; | ||||
8107 | } | ||||||
8108 | |||||||
8109 |
4
|
16
|
sub _keep_encoding { return $keep_encoding; } # so I can use elsewhere in the module | ||||
8110 | |||||||
8111 | sub set_do_not_escape_amp_in_atts | ||||||
8112 |
3130
|
3459
|
{ my $new_value= defined $_[1] ? $_[1] : $_[0]; | ||||
8113 |
3130
|
2334
|
my $old_value= $do_not_escape_amp_in_atts; | ||||
8114 |
3130
|
1871
|
$do_not_escape_amp_in_atts= $new_value; | ||||
8115 |
3130
|
2288
|
return $old_value; | ||||
8116 | } | ||||||
8117 | |||||||
8118 |
162
|
215
|
sub output_filter { return $output_filter; } | ||||
8119 |
2
|
2
|
sub output_text_filter { return $output_text_filter; } | ||||
8120 | |||||||
8121 | sub set_output_filter | ||||||
8122 |
3138
|
3197
|
{ my $new_value= defined $_[1] ? $_[1] : $_[0]; # can be called in object/non-object mode | ||||
8123 | # if called in object mode with no argument, the filter is undefined | ||||||
8124 |
3138
1
|
9131
2
|
if( isa( $new_value, 'XML::Twig::Elt') || isa( $new_value, 'XML::Twig')) { undef $new_value; } | ||||
8125 |
3138
|
1855
|
my $old_value= $output_filter; | ||||
8126 |
3138
|
4608
|
if( !$new_value || isa( $new_value, 'CODE') ) | ||||
8127 |
3131
|
1999
|
{ $output_filter= $new_value; } | ||||
8128 | elsif( $new_value eq 'latin1') | ||||||
8129 |
1
|
2
|
{ $output_filter= XML::Twig::latin1(); | ||||
8130 | } | ||||||
8131 | elsif( $XML::Twig::filter{$new_value}) | ||||||
8132 |
5
|
4
|
{ $output_filter= $XML::Twig::filter{$new_value}; } | ||||
8133 | else | ||||||
8134 |
1
|
74
|
{ croak "invalid output filter '$new_value'"; } | ||||
8135 | |||||||
8136 |
3137
|
2712
|
return $old_value; | ||||
8137 | } | ||||||
8138 | |||||||
8139 | sub set_output_text_filter | ||||||
8140 |
3124
|
3107
|
{ my $new_value= defined $_[1] ? $_[1] : $_[0]; # can be called in object/non-object mode | ||||
8141 | # if called in object mode with no argument, the filter is undefined | ||||||
8142 |
3124
1
|
8255
1
|
if( isa( $new_value, 'XML::Twig::Elt') || isa( $new_value, 'XML::Twig')) { undef $new_value; } | ||||
8143 |
3124
|
1840
|
my $old_value= $output_text_filter; | ||||
8144 |
3124
|
4091
|
if( !$new_value || isa( $new_value, 'CODE') ) | ||||
8145 |
3116
|
1922
|
{ $output_text_filter= $new_value; } | ||||
8146 | elsif( $new_value eq 'latin1') | ||||||
8147 |
0
|
0
|
{ $output_text_filter= XML::Twig::latin1(); | ||||
8148 | } | ||||||
8149 | elsif( $XML::Twig::filter{$new_value}) | ||||||
8150 |
7
|
7
|
{ $output_text_filter= $XML::Twig::filter{$new_value}; } | ||||
8151 | else | ||||||
8152 |
1
|
75
|
{ croak "invalid output text filter '$new_value'"; } | ||||
8153 | |||||||
8154 |
3123
|
2363
|
return $old_value; | ||||
8155 | } | ||||||
8156 | |||||||
8157 | sub set_expand_external_entities | ||||||
8158 |
3131
|
3958
|
{ my $new_value= defined $_[1] ? $_[1] : $_[0]; | ||||
8159 |
3131
|
2107
|
my $old_value= $expand_external_entities; | ||||
8160 |
3131
|
2004
|
$expand_external_entities= $new_value; | ||||
8161 |
3131
|
3231
|
return $old_value; | ||||
8162 | } | ||||||
8163 | |||||||
8164 | sub set_keep_atts_order | ||||||
8165 |
3116
|
3102
|
{ my $new_value= defined $_[1] ? $_[1] : $_[0]; | ||||
8166 |
3116
|
1822
|
my $old_value= $keep_atts_order; | ||||
8167 |
3116
|
1848
|
$keep_atts_order= $new_value; | ||||
8168 |
3116
|
2130
|
return $old_value; | ||||
8169 | |||||||
8170 | } | ||||||
8171 | |||||||
8172 |
43759
|
54976
|
sub keep_atts_order { return $keep_atts_order; } # so I can use elsewhere in the module | ||||
8173 | |||||||
8174 |
187
|
179
|
my %html_empty_elt; | ||||
8175 |
187
1870
|
397
573647
|
BEGIN { %html_empty_elt= map { $_ => 1} qw( base meta link hr br param img area input col); } | ||||
8176 | |||||||
8177 | sub start_tag | ||||||
8178 |
5251
|
4063
|
{ my( $elt, $option)= @_; | ||||
8179 | |||||||
8180 | |||||||
8181 |
5251
|
6034
|
return if( $elt->{gi} < $XML::Twig::SPECIAL_GI); | ||||
8182 | |||||||
8183 |
5250
|
8917
|
my $extra_data= $elt->{extra_data} || ''; | ||||
8184 | |||||||
8185 |
5250
|
4469
|
my $gi= $XML::Twig::index2gi[$elt->{'gi'}]; | ||||
8186 |
5250
|
3471
|
my $att= $elt->{att}; # should be $elt->{att}, optimized into a pure hash look-up | ||||
8187 | |||||||
8188 |
5250
|
5371
|
my $ns_map= $att ? $att->{'#original_gi'} : ''; | ||||
8189 |
5250
11
|
5147
12
|
if( $ns_map) { $gi= _restore_original_prefix( $ns_map, $gi); } | ||||
8190 |
5250
|
4005
|
$gi=~ s{^#default:}{}; # remove default prefix | ||||
8191 | |||||||
8192 |
5250
17
|
5405
57
|
if( $output_text_filter) { $gi= $output_text_filter->( $gi); } | ||||
8193 | |||||||
8194 | # get the attribute and their values | ||||||
8195 |
5250
|
168186
|
my $att_sep = $pretty==$NSGMLS ? "\n" | ||||
8196 | : $pretty==$INDENTEDA ? "\n" . $INDENT x ($elt->level+1) . ' ' | ||||||
8197 | : ' ' | ||||||
8198 | ; | ||||||
8199 | |||||||
8200 |
5250
|
4774
|
my $replace_in_att_value= $replaced_ents . "$quote\t\r\n"; | ||||
8201 |
5250
37
|
7453
31
|
if( $option->{escape_gt} && $replaced_ents !~ m{>}) { $replace_in_att_value.= '>'; } | ||||
8202 | |||||||
8203 |
5250
|
2692
|
my $tag; | ||||
8204 |
5250
1338
57
5193
|
4353
3383
136
9479
|
my @att_names= grep { !( $_=~ m{^#(?!default:)} ) } $keep_atts_order ? keys %{$att} : sort keys %{$att}; | ||||
8205 |
5250
|
5570
|
if( @att_names) | ||||
8206 |
787
1319
|
655
1275
|
{ my $atts= join $att_sep, map { my $output_att_name= $ns_map ? _restore_original_prefix( $ns_map, $_) : $_; | ||||
8207 |
1319
|
1382
|
if( $output_text_filter) | ||||
8208 |
4
|
5
|
{ $output_att_name= $output_text_filter->( $output_att_name); } | ||||
8209 |
1319
|
2359
|
$output_att_name . '=' . $quote . _att_xml_string( $att->{$_}, $replace_in_att_value) . $quote | ||||
8210 | |||||||
8211 | } | ||||||
8212 | @att_names | ||||||
8213 | ; | ||||||
8214 |
787
1
|
1183
2
|
if( $pretty==$INDENTEDA && @att_names == 1) { $att_sep= ' '; } | ||||
8215 |
787
|
952
|
$tag= "<$gi$att_sep$atts"; | ||||
8216 | } | ||||||
8217 | else | ||||||
8218 |
4463
|
3522
|
{ $tag= "<$gi"; } | ||||
8219 | |||||||
8220 |
5250
|
5634
|
$tag .= "\n" if($pretty==$NSGMLS); | ||||
8221 | |||||||
8222 | |||||||
8223 | # force empty if suitable HTML tag, otherwise use the value from the input tree | ||||||
8224 |
5250
|
7741
|
if( ($empty_tag_style eq $HTML) && !$elt->{first_child} && !$elt->{extra_data_before_end_tag} && $html_empty_elt{$gi}) | ||||
8225 |
36
|
38
|
{ $elt->{empty}= 1; } | ||||
8226 |
5250
|
6054
|
my $empty= defined $elt->{empty} ? $elt->{empty} | ||||
8227 | : $elt->{first_child} ? 0 | ||||||
8228 | : 1; | ||||||
8229 | |||||||
8230 |
5250
|
13218
|
$tag .= (!$elt->{empty} || $elt->{extra_data_before_end_tag}) ? '>' # element has content | ||||
8231 | : (($empty_tag_style eq $HTML) && $html_empty_elt{$gi}) ? ' />' # html empty element | ||||||
8232 | # cvs-friendly format | ||||||
8233 | : ( $pretty == $INDENTEDA && @att_names > 1) ? "\n" . $INDENT x $elt->level . "/>" | ||||||
8234 | : ( $pretty == $INDENTEDA && @att_names == 1) ? " />" | ||||||
8235 | : $empty_tag_style ? "></" . $XML::Twig::index2gi[$elt->{'gi'}] . ">" # $empty_tag_style is $HTML or $EXPAND | ||||||
8236 | : '/>' | ||||||
8237 | ; | ||||||
8238 | |||||||
8239 |
5250
3
|
10259
3
|
if( ( (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 1) eq '#') && (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 9) ne '#default:') )) { $tag= ''; } | ||||
8240 | |||||||
8241 | #warn "TRACE: ", $tag,": ", Encode::is_utf8( $tag) ? "has flag" : "FLAG NOT SET"; | ||||||
8242 | |||||||
8243 |
5250
4726
|
5165
12603
|
unless( $pretty) { return defined( $extra_data) ? $extra_data . $tag : $tag; } | ||||
8244 | |||||||
8245 |
524
|
364
|
my $prefix=''; | ||||
8246 |
524
|
285
|
my $return=''; # '' or \n is to be printed before the tag | ||||
8247 |
524
|
309
|
my $indent=0; # number of indents before the tag | ||||
8248 | |||||||
8249 |
524
|
1140
|
if( $pretty==$RECORD1) | ||||
8250 |
29
|
24
|
{ my $level= $elt->level; | ||||
8251 |
29
|
32
|
$return= "\n" if( $level < 2); | ||||
8252 |
29
|
32
|
$indent= 1 if( $level == 1); | ||||
8253 | } | ||||||
8254 | |||||||
8255 | elsif( $pretty==$RECORD2) | ||||||
8256 |
4
|
2
|
{ $return= "\n"; | ||||
8257 |
4
|
5
|
$indent= $elt->level; | ||||
8258 | } | ||||||
8259 | |||||||
8260 | elsif( $pretty==$NICE) | ||||||
8261 |
3
|
3
|
{ my $parent= $elt->{parent}; | ||||
8262 |
3
|
8
|
unless( !$parent || $parent->{contains_text}) | ||||
8263 |
2
|
1
|
{ $return= "\n"; } | ||||
8264 |
3
|
10
|
$elt->{contains_text}= 1 if( ($parent && $parent->{contains_text}) | ||||
8265 | || $elt->contains_text); | ||||||
8266 | } | ||||||
8267 | |||||||
8268 | elsif( $KEEP_TEXT_TAG_ON_ONE_LINE{$pretty}) | ||||||
8269 |
483
|
466
|
{ my $parent= $elt->{parent}; | ||||
8270 |
483
|
1057
|
unless( !$parent || $parent->{contains_text}) | ||||
8271 |
393
|
235
|
{ $return= "\n"; | ||||
8272 |
393
|
437
|
$indent= $elt->level; | ||||
8273 | } | ||||||
8274 |
483
|
1345
|
$elt->{contains_text}= 1 if( ($parent && $parent->{contains_text}) | ||||
8275 | || $elt->contains_text); | ||||||
8276 | } | ||||||
8277 | |||||||
8278 |
524
|
991
|
if( $return || $indent) | ||||
8279 | { # check for elements in which spaces should be kept | ||||||
8280 |
410
|
442
|
my $t= $elt->twig; | ||||
8281 |
410
|
500
|
return $extra_data . $tag if( $xml_space_preserve); | ||||
8282 |
410
|
885
|
if( $t && $t->{twig_keep_spaces_in}) | ||||
8283 |
3
|
5
|
{ foreach my $ancestor ($elt->ancestors) | ||||
8284 |
3
|
8
|
{ return $extra_data . $tag if( $t->{twig_keep_spaces_in}->{$XML::Twig::index2gi[$ancestor->{'gi'}]}) } | ||||
8285 | } | ||||||
8286 | |||||||
8287 |
410
|
373
|
$prefix= $INDENT x $indent; | ||||
8288 |
410
|
457
|
if( $extra_data) | ||||
8289 |
6
|
31
|
{ $extra_data=~ s{\s+$}{}; | ||||
8290 |
6
|
11
|
$extra_data=~ s{^\s+}{}; | ||||
8291 |
6
|
12
|
$extra_data= $prefix . $extra_data . $return; | ||||
8292 | } | ||||||
8293 | } | ||||||
8294 | |||||||
8295 | |||||||
8296 |
524
|
1270
|
return $return . $extra_data . $prefix . $tag; | ||||
8297 | } | ||||||
8298 | |||||||
8299 | sub end_tag | ||||||
8300 |
5200
|
3300
|
{ my $elt= shift; | ||||
8301 |
5200
|
13270
|
return '' if( ($elt->{gi}<$XML::Twig::SPECIAL_GI) | ||||
8302 | || ($elt->{'empty'} && !$elt->{extra_data_before_end_tag}) | ||||||
8303 | ); | ||||||
8304 |
4048
|
2600
|
my $tag= "<"; | ||||
8305 |
4048
|
3869
|
my $gi= $XML::Twig::index2gi[$elt->{'gi'}]; | ||||
8306 | |||||||
8307 |
4048
11
|
5117
12
|
if( my $map= $elt->{'att'}->{'#original_gi'}) { $gi= _restore_original_prefix( $map, $gi); } | ||||
8308 |
4048
|
2628
|
$gi=~ s{^#default:}{}; # remove default prefix | ||||
8309 | |||||||
8310 |
4048
16
|
3786
53
|
if( $output_text_filter) { $gi= $output_text_filter->( $XML::Twig::index2gi[$elt->{'gi'}]); } | ||||
8311 |
4048
|
4544
|
$tag .= "/$gi>"; | ||||
8312 | |||||||
8313 |
4048
|
8776
|
$tag = ($elt->{extra_data_before_end_tag} || '') . $tag; | ||||
8314 | |||||||
8315 |
4048
3
|
7136
2
|
if( ( (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 1) eq '#') && (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 9) ne '#default:') )) { $tag= ''; } | ||||
8316 | |||||||
8317 |
4048
|
7525
|
return $tag unless $pretty; | ||||
8318 | |||||||
8319 |
420
|
278
|
my $prefix=''; | ||||
8320 |
420
|
237
|
my $return=0; # 1 if a \n is to be printed before the tag | ||||
8321 |
420
|
237
|
my $indent=0; # number of indents before the tag | ||||
8322 | |||||||
8323 |
420
|
2036
|
if( $pretty==$RECORD1) | ||||
8324 |
29
|
20
|
{ $return= 1 if( $elt->level == 0); | ||||
8325 | } | ||||||
8326 | |||||||
8327 | elsif( $pretty==$RECORD2) | ||||||
8328 |
4
|
5
|
{ unless( $elt->contains_text) | ||||
8329 |
2
|
3
|
{ $return= 1 ; | ||||
8330 |
2
|
2
|
$indent= $elt->level; | ||||
8331 | } | ||||||
8332 | } | ||||||
8333 | |||||||
8334 | elsif( $pretty==$NICE) | ||||||
8335 |
3
|
4
|
{ my $parent= $elt->{parent}; | ||||
8336 |
3
|
19
|
if( ( ($parent && !$parent->{contains_text}) || !$parent ) | ||||
8337 | && ( !$elt->{contains_text} | ||||||
8338 | && ($elt->{has_flushed_child} || $elt->{first_child}) | ||||||
8339 | ) | ||||||
8340 | ) | ||||||
8341 |
1
|
1
|
{ $return= 1; } | ||||
8342 | } | ||||||
8343 | |||||||
8344 | elsif( $KEEP_TEXT_TAG_ON_ONE_LINE{$pretty}) | ||||||
8345 |
380
|
258
|
{ my $parent= $elt->{parent}; | ||||
8346 |
380
|
6471
|
if( ( ($parent && !$parent->{contains_text}) || !$parent ) | ||||
8347 | && ( !$elt->{contains_text} | ||||||
8348 | && ($elt->{has_flushed_child} || $elt->{first_child}) | ||||||
8349 | ) | ||||||
8350 | ) | ||||||
8351 |
141
|
87
|
{ $return= 1; | ||||
8352 |
141
|
168
|
$indent= $elt->level; | ||||
8353 | } | ||||||
8354 | } | ||||||
8355 | |||||||
8356 |
420
|
892
|
if( $return || $indent) | ||||
8357 | { # check for elements in which spaces should be kept | ||||||
8358 |
146
|
193
|
my $t= $elt->twig; | ||||
8359 |
146
|
177
|
return $tag if( $xml_space_preserve); | ||||
8360 |
146
|
383
|
if( $t && $t->{twig_keep_spaces_in}) | ||||
8361 |
1
|
1
|
{ foreach my $ancestor ($elt, $elt->ancestors) | ||||
8362 |
1
|
3
|
{ return $tag if( $t->{twig_keep_spaces_in}->{$XML::Twig::index2gi[$ancestor->{'gi'}]}) } | ||||
8363 | } | ||||||
8364 | |||||||
8365 |
146
146
|
178
197
|
if( $return) { $prefix= ($pretty== $INDENTEDCT) ? "\n$INDENT" : "\n"; } | ||||
8366 |
146
|
151
|
$prefix.= $INDENT x $indent; | ||||
8367 | } | ||||||
8368 | |||||||
8369 | # add a \n at the end of the document (after the root element) | ||||||
8370 |
420
|
488
|
$tag .= "\n" unless( $elt->{parent}); | ||||
8371 | |||||||
8372 |
420
|
654
|
return $prefix . $tag; | ||||
8373 | } | ||||||
8374 | |||||||
8375 | sub _restore_original_prefix | ||||||
8376 |
33
|
24
|
{ my( $map, $name)= @_; | ||||
8377 |
33
|
25
|
my $prefix= _ns_prefix( $name); | ||||
8378 |
33
|
49
|
if( my $original_prefix= $map->{$prefix}) | ||||
8379 |
26
|
28
|
{ if( $original_prefix eq '#default') | ||||
8380 |
8
|
28
|
{ $name=~ s{^$prefix:}{}; } | ||||
8381 | else | ||||||
8382 |
18
|
114
|
{ $name=~ s{^$prefix(?=:)}{$original_prefix}; } | ||||
8383 | } | ||||||
8384 |
33
|
43
|
return $name; | ||||
8385 | } | ||||||
8386 | |||||||
8387 | # buffer used to hold the text to print/sprint, to avoid passing it back and forth between methods | ||||||
8388 |
187
|
166
|
my @sprint; | ||||
8389 | |||||||
8390 | # $elt is an element to print | ||||||
8391 | # $fh is an optional filehandle to print to | ||||||
8392 | # $pretty is an optional value, if true a \n is printed after the < of the | ||||||
8393 | # opening tag | ||||||
8394 | sub print | ||||||
8395 |
112
|
145
|
{ my $elt= shift; | ||||
8396 | |||||||
8397 |
112
|
530
|
my $fh= isa( $_[0], 'GLOB') || isa( $_[0], 'IO::Scalar') ? shift : undef; | ||||
8398 |
112
|
208
|
my $old_select= defined $fh ? select $fh : undef; | ||||
8399 |
112
|
235
|
print $elt->sprint( @_); | ||||
8400 |
112
|
508
|
select $old_select if( defined $old_select); | ||||
8401 | } | ||||||
8402 | |||||||
8403 | |||||||
8404 | # those next 2 methods need to be refactored, they are copies of the same methods in XML::Twig | ||||||
8405 | sub print_to_file | ||||||
8406 |
2
|
2
|
{ my( $elt, $filename)= (shift, shift); | ||||
8407 |
2
|
2
|
my $out_fh; | ||||
8408 | # open( $out_fh, ">$filename") or _croak( "cannot create file $filename: $!"); # < perl 5.8 | ||||||
8409 |
2
|
6
|
my $mode= $keep_encoding ? '>' : '>:utf8'; # >= perl 5.8 | ||||
8410 |
2
|
47
|
open( $out_fh, $mode, $filename) or _croak( "cannot create file $filename: $!"); # >= perl 5.8 | ||||
8411 |
2
|
8
|
$elt->print( $out_fh, @_); | ||||
8412 |
2
|
82
|
close $out_fh; | ||||
8413 |
2
|
12
|
return $elt; | ||||
8414 | } | ||||||
8415 | |||||||
8416 | # probably only works on *nix (at least the chmod bit) | ||||||
8417 | # first print to a temporary file, then rename that file to the desired file name, then change permissions | ||||||
8418 | # to the original file permissions (or to the current umask) | ||||||
8419 | sub safe_print_to_file | ||||||
8420 |
1
|
2
|
{ my( $elt, $filename)= (shift, shift); | ||||
8421 |
1
|
9
|
my $perm= -f $filename ? (stat $filename)[2] & 07777 : ~umask() ; | ||||
8422 |
1
|
2
|
XML::Twig::_use( 'File::Temp') || croak "need File::Temp to use safe_print_to_file\n"; | ||||
8423 |
1
|
2
|
XML::Twig::_use( 'File::Basename') || croak "need File::Basename to use safe_print_to_file\n"; | ||||
8424 |
1
|
107
|
my $tmpdir= File::Basename::dirname( $filename); | ||||
8425 |
1
|
7
|
my( $fh, $tmpfilename) = File::Temp::tempfile( DIR => $tmpdir); | ||||
8426 |
1
|
263
|
$elt->print_to_file( $tmpfilename, @_); | ||||
8427 |
1
|
16
|
rename( $tmpfilename, $filename) or unlink $tmpfilename && _croak( "cannot move temporary file to $filename: $!"); | ||||
8428 |
1
|
4
|
chmod $perm, $filename; | ||||
8429 |
1
|
6
|
return $elt; | ||||
8430 | } | ||||||
8431 | |||||||
8432 | |||||||
8433 | # same as print but does not output the start tag if the element | ||||||
8434 | # is marked as flushed | ||||||
8435 | sub flush | ||||||
8436 |
18
|
42
|
{ my $elt= shift; | ||||
8437 |
18
|
84
|
my $up_to= $_[0] && isa( $_[0], 'XML::Twig::Elt') ? shift : $elt; | ||||
8438 |
18
|
75
|
$elt->twig->flush_up_to( $up_to, @_); | ||||
8439 | } | ||||||
8440 | sub purge | ||||||
8441 |
4
|
5
|
{ my $elt= shift; | ||||
8442 |
4
|
10
|
my $up_to= $_[0] && isa( $_[0], 'XML::Twig::Elt') ? shift : $elt; | ||||
8443 |
4
|
5
|
$elt->twig->purge_up_to( $up_to, @_); | ||||
8444 | } | ||||||
8445 | |||||||
8446 | sub _flush | ||||||
8447 |
2130
|
1366
|
{ my $elt= shift; | ||||
8448 | |||||||
8449 |
2130
|
1132
|
my $pretty; | ||||
8450 |
2130
|
8432
|
my $fh= isa( $_[0], 'GLOB') || isa( $_[0], 'IO::Scalar') ? shift : undef; | ||||
8451 |
2130
|
2200
|
my $old_select= defined $fh ? select $fh : undef; | ||||
8452 |
2130
|
2383
|
my $old_pretty= defined ($pretty= shift) ? set_pretty_print( $pretty) : undef; | ||||
8453 | |||||||
8454 |
2130
|
2300
|
$xml_space_preserve= 1 if( ($elt->inherit_att( 'xml:space') || '') eq 'preserve'); | ||||
8455 | |||||||
8456 |
2130
|
2434
|
$elt->__flush(); | ||||
8457 | |||||||
8458 |
2130
|
4950
|
$xml_space_preserve= 0; | ||||
8459 | |||||||
8460 |
2130
|
2112
|
select $old_select if( defined $old_select); | ||||
8461 |
2130
|
2652
|
set_pretty_print( $old_pretty) if( defined $old_pretty); | ||||
8462 | } | ||||||
8463 | |||||||
8464 | sub __flush | ||||||
8465 |
2130
|
1291
|
{ my $elt= shift; | ||||
8466 | |||||||
8467 |
2130
|
2415
|
if( $elt->{gi} >= $XML::Twig::SPECIAL_GI) | ||||
8468 |
1477
|
2842
|
{ my $preserve= ($elt->{'att'}->{'xml:space'} || '') eq 'preserve'; | ||||
8469 |
1477
|
1471
|
$xml_space_preserve++ if $preserve; | ||||
8470 |
1477
|
1319
|
unless( $elt->_flushed) | ||||
8471 |
1383
|
1312
|
{ print $elt->start_tag(); | ||||
8472 | } | ||||||
8473 | |||||||
8474 | # flush the children | ||||||
8475 |
1477
1477
1477
1477
1477
956
956
1477
|
10951
982
1179
1019
1644
730
1180
1589
|
my @children= do { my $elt= $elt; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; }; | ||||
8476 |
1477
|
1565
|
foreach my $child (@children) | ||||
8477 |
956
|
1076
|
{ $child->_flush( $pretty); } | ||||
8478 |
1477
1477
|
2640
2649
|
unless( $elt->{end_tag_flushed}) { print $elt->end_tag; } | ||||
8479 |
1477
|
10525
|
$xml_space_preserve-- if $preserve; | ||||
8480 | # used for pretty printing | ||||||
8481 |
1477
575
|
2131
765
|
if( my $parent= $elt->{parent}) { $parent->{has_flushed_child}= 1; } | ||||
8482 | } | ||||||
8483 | else # text or special element | ||||||
8484 |
653
|
413
|
{ my $text; | ||||
8485 |
653
500
|
941
572
|
if( (exists $elt->{'pcdata'})) { $text= $elt->pcdata_xml_string; | ||||
8486 |
500
|
682
|
if( my $parent= $elt->{parent}) | ||||
8487 |
500
|
522
|
{ $parent->{contains_text}= 1; } | ||||
8488 | } | ||||||
8489 |
5
|
12
|
elsif( (exists $elt->{'cdata'})) { $text= $elt->cdata_string; | ||||
8490 |
5
|
12
|
if( my $parent= $elt->{parent}) | ||||
8491 |
5
|
8
|
{ $parent->{contains_text}= 1; } | ||||
8492 | } | ||||||
8493 |
2
|
5
|
elsif( (exists $elt->{'target'})) { $text= $elt->pi_string; } | ||||
8494 |
146
|
169
|
elsif( (exists $elt->{'comment'})) { $text= $elt->comment_string; } | ||||
8495 |
0
|
0
|
elsif( (exists $elt->{'ent'})) { $text= $elt->ent_string; } | ||||
8496 | |||||||
8497 |
653
|
1490
|
print $output_filter ? $output_filter->( $text) : $text; | ||||
8498 | } | ||||||
8499 | } | ||||||
8500 | |||||||
8501 | |||||||
8502 | sub xml_text | ||||||
8503 |
26
|
33
|
{ my( $elt, @options)= @_; | ||||
8504 | |||||||
8505 |
26
1
1
|
42
4
2
|
if( @options && grep { lc( $_) eq 'no_recurse' } @options) { return $elt->xml_text_only; } | ||||
8506 | |||||||
8507 |
25
|
19
|
my $string=''; | ||||
8508 | |||||||
8509 |
25
|
43
|
if( ($elt->{gi} >= $XML::Twig::SPECIAL_GI) ) | ||||
8510 | { # sprint the children | ||||||
8511 |
9
|
16
|
my $child= $elt->{first_child} || ''; | ||||
8512 |
9
|
12
|
while( $child) | ||||
8513 |
14
|
23
|
{ $string.= $child->xml_text; | ||||
8514 |
14
|
23
|
} continue { $child= $child->{next_sibling}; } | ||||
8515 | } | ||||||
8516 |
13
|
21
|
elsif( (exists $elt->{'pcdata'})) { $string .= $output_filter ? $output_filter->($elt->pcdata_xml_string) | ||||
8517 | : $elt->pcdata_xml_string; | ||||||
8518 | } | ||||||
8519 |
1
|
3
|
elsif( (exists $elt->{'cdata'})) { $string .= $output_filter ? $output_filter->($elt->cdata_string) | ||||
8520 | : $elt->cdata_string; | ||||||
8521 | } | ||||||
8522 |
2
|
3
|
elsif( (exists $elt->{'ent'})) { $string .= $elt->ent_string; } | ||||
8523 | |||||||
8524 |
25
|
56
|
return $string; | ||||
8525 | } | ||||||
8526 | |||||||
8527 | sub xml_text_only | ||||||
8528 |
1
5
|
2
5
|
{ return join '', map { $_->xml_text if( $_->is_text || (exists $_->{'ent'})) } $_[0]->_children; } | ||||
8529 | |||||||
8530 | # same as print but except... it does not print but rather returns the string | ||||||
8531 | # if the second parameter is set then only the content is returned, not the | ||||||
8532 | # start and end tags of the element (but the tags of the included elements are | ||||||
8533 | # returned) | ||||||
8534 | |||||||
8535 | sub sprint | ||||||
8536 |
3401
|
2595
|
{ my $elt= shift; | ||||
8537 |
3401
|
1916
|
my( $old_pretty, $old_empty_tag_style); | ||||
8538 | |||||||
8539 |
3401
|
6403
|
if( $_[0] && isa( $_[0], 'HASH')) | ||||
8540 |
7
7
|
8
16
|
{ my %args= XML::Twig::_normalize_args( %{shift()}); | ||||
8541 |
7
4
|
12
7
|
if( defined $args{PrettyPrint}) { $old_pretty = set_pretty_print( $args{PrettyPrint}); } | ||||
8542 |
7
3
|
11
4
|
if( defined $args{EmptyTags}) { $old_empty_tag_style = set_empty_tag_style( $args{EmptyTags}); } | ||||
8543 | } | ||||||
8544 | |||||||
8545 |
3401
|
3821
|
$xml_space_preserve= 1 if( ($elt->inherit_att( 'xml:space') || '') eq 'preserve'); | ||||
8546 | |||||||
8547 |
3401
|
4029
|
@sprint=(); | ||||
8548 |
3401
|
3901
|
$elt->_sprint( @_); | ||||
8549 |
3401
|
4178
|
my $sprint= join( '', @sprint); | ||||
8550 |
3401
23
|
3415
289
|
if( $output_filter) { $sprint= $output_filter->( $sprint); } | ||||
8551 | |||||||
8552 |
3401
|
8381
|
if( ( ($pretty== $WRAPPED) || ($pretty==$INDENTEDC)) && !$xml_space_preserve) | ||||
8553 |
6
|
9
|
{ $sprint= _wrap_text( $sprint); } | ||||
8554 |
3401
|
2242
|
$xml_space_preserve= 0; | ||||
8555 | |||||||
8556 | |||||||
8557 |
3401
4
|
3350
5
|
if( defined $old_pretty) { set_pretty_print( $old_pretty); } | ||||
8558 |
3401
3
|
3123
3
|
if( defined $old_empty_tag_style) { set_empty_tag_style( $old_empty_tag_style); } | ||||
8559 | |||||||
8560 |
3401
|
8494
|
return $sprint; | ||||
8561 | } | ||||||
8562 | |||||||
8563 | sub _wrap_text | ||||||
8564 |
6
|
9
|
{ my( $string)= @_; | ||||
8565 |
6
|
6
|
my $wrapped; | ||||
8566 |
6
|
45
|
foreach my $line (split /\n/, $string) | ||||
8567 |
22
|
57
|
{ my( $initial_indent)= $line=~ m{^(\s*)}; | ||||
8568 |
22
|
52
|
my $wrapped_line= Text::Wrap::wrap( '', $initial_indent . $INDENT, $line) . "\n"; | ||||
8569 | |||||||
8570 | # fix glitch with Text::wrap when the first line is long and does not include spaces | ||||||
8571 | # the first line ends up being too short by 2 chars, but we'll have to live with it! | ||||||
8572 |
22
|
71249
|
$wrapped_line=~ s{^ +\n }{}s; # this prefix needs to be removed | ||||
8573 | |||||||
8574 |
22
|
44
|
$wrapped .= $wrapped_line; | ||||
8575 | } | ||||||
8576 | |||||||
8577 |
6
|
12
|
return $wrapped; | ||||
8578 | } | ||||||
8579 | |||||||
8580 | |||||||
8581 | sub _sprint | ||||||
8582 |
10272
|
5939
|
{ my $elt= shift; | ||||
8583 |
10272
|
15575
|
my $no_tag= shift || 0; | ||||
8584 | # in case there's some comments or PI's piggybacking | ||||||
8585 | |||||||
8586 |
10272
|
9843
|
if( $elt->{gi} >= $XML::Twig::SPECIAL_GI) | ||||
8587 | { | ||||||
8588 |
5284
|
9822
|
my $preserve= ($elt->{'att'}->{'xml:space'} || '') eq 'preserve'; | ||||
8589 |
5284
|
5563
|
$xml_space_preserve++ if $preserve; | ||||
8590 | |||||||
8591 |
5284
|
7432
|
push @sprint, $elt->start_tag unless( $no_tag); | ||||
8592 | |||||||
8593 | # sprint the children | ||||||
8594 |
5284
|
4158
|
my $child= $elt->{first_child}; | ||||
8595 |
5284
|
5634
|
while( $child) | ||||
8596 |
6871
|
6734
|
{ $child->_sprint; | ||||
8597 |
6871
|
8339
|
$child= $child->{next_sibling}; | ||||
8598 | } | ||||||
8599 |
5284
|
7014
|
push @sprint, $elt->end_tag unless( $no_tag); | ||||
8600 |
5284
|
6099
|
$xml_space_preserve-- if $preserve; | ||||
8601 | } | ||||||
8602 | else | ||||||
8603 |
4988
|
5441
|
{ push @sprint, $elt->{extra_data} if( $elt->{extra_data}) ; | ||||
8604 |
4988
1779
|
7695
1890
|
if( (exists $elt->{'pcdata'})) { push @sprint, $elt->pcdata_xml_string; } | ||||
8605 |
57
|
78
|
elsif( (exists $elt->{'cdata'})) { push @sprint, $elt->cdata_string; } | ||||
8606 |
1483
1
|
1760
2
|
elsif( (exists $elt->{'target'})) { if( ($pretty >= $INDENTED) && !$elt->{parent}->{contains_text}) { push @sprint, "\n" . $INDENT x $elt->level; } | ||||
8607 |
1483
|
1606
|
push @sprint, $elt->pi_string; | ||||
8608 | } | ||||||
8609 |
1619
1
|
1956
2
|
elsif( (exists $elt->{'comment'})) { if( ($pretty >= $INDENTED) && !$elt->{parent}->{contains_text}) { push @sprint, "\n" . $INDENT x $elt->level; } | ||||
8610 |
1619
|
1573
|
push @sprint, $elt->comment_string; | ||||
8611 | } | ||||||
8612 |
50
|
61
|
elsif( (exists $elt->{'ent'})) { push @sprint, $elt->ent_string; } | ||||
8613 | } | ||||||
8614 | |||||||
8615 |
10272
|
7002
|
return; | ||||
8616 | } | ||||||
8617 | |||||||
8618 | # just a shortcut to $elt->sprint( 1) | ||||||
8619 | sub xml_string | ||||||
8620 |
9
|
11
|
{ my $elt= shift; | ||||
8621 |
9
|
31
|
isa( $_[0], 'HASH') ? $elt->sprint( shift(), 1) : $elt->sprint( 1); | ||||
8622 | } | ||||||
8623 | |||||||
8624 | sub pcdata_xml_string | ||||||
8625 |
2293
|
1446
|
{ my $elt= shift; | ||||
8626 |
2293
|
2618
|
if( defined( my $string= $elt->{pcdata}) ) | ||||
8627 | { | ||||||
8628 |
2292
|
2114
|
if( ! $elt->{extra_data_in_pcdata}) | ||||
8629 | { | ||||||
8630 |
2207
|
10095
|
$string=~ s/([$replaced_ents])/$XML::Twig::base_ent{$1}/g unless( !$replaced_ents || $keep_encoding || $elt->{asis}); | ||||
8631 |
2207
|
2482
|
$string=~ s{\Q]]>}{]]>}g; | ||||
8632 | } | ||||||
8633 | else | ||||||
8634 |
85
|
108
|
{ _gen_mark( $string); # used by _(un)?protect_extra_data | ||||
8635 |
85
85
|
160
109
|
foreach my $data (reverse @{$elt->{extra_data_in_pcdata}}) | ||||
8636 |
122
|
163
|
{ my $substr= substr( $string, $data->{offset}); | ||||
8637 |
122
|
261
|
if( $keep_encoding || $elt->{asis}) | ||||
8638 |
30
|
43
|
{ substr( $string, $data->{offset}, 0, $data->{text}); } | ||||
8639 | else | ||||||
8640 |
92
|
113
|
{ substr( $string, $data->{offset}, 0, _protect_extra_data( $data->{text})); } | ||||
8641 | } | ||||||
8642 |
85
|
210
|
unless( $keep_encoding || $elt->{asis}) | ||||
8643 | { | ||||||
8644 |
65
|
256
|
$string=~ s{([$replaced_ents])}{$XML::Twig::base_ent{$1}}g ; | ||||
8645 |
65
|
92
|
$string=~ s{\Q]]>}{]]>}g; | ||||
8646 |
65
|
68
|
_unprotect_extra_data( $string); | ||||
8647 | } | ||||||
8648 | } | ||||||
8649 |
2292
|
3921
|
return $output_text_filter ? $output_text_filter->( $string) : $string; | ||||
8650 | } | ||||||
8651 | else | ||||||
8652 |
1
|
4
|
{ return ''; } | ||||
8653 | } | ||||||
8654 | |||||||
8655 |
187
187
|
176
167
|
{ my $mark; | ||||
8656 |
187
|
899513
|
my( %char2ent, %ent2char); | ||||
8657 | BEGIN | ||||||
8658 |
187
|
657
|
{ %char2ent= ( '<' => 'lt', '&' => 'amp', '>' => 'gt'); | ||||
8659 |
187
561
|
493
213660
|
%ent2char= map { $char2ent{$_} => $_ } keys %char2ent; | ||||
8660 | } | ||||||
8661 | |||||||
8662 | # generate a unique mark (a string) not found in the string, | ||||||
8663 | # used to mark < and & in the extra data | ||||||
8664 | sub _gen_mark | ||||||
8665 |
85
|
54
|
{ $mark="AAAA"; | ||||
8666 |
85
|
175
|
$mark++ while( index( $_[0], $mark) > -1); | ||||
8667 |
85
|
67
|
return $mark; | ||||
8668 | } | ||||||
8669 | |||||||
8670 | sub _protect_extra_data | ||||||
8671 |
92
|
78
|
{ my( $extra_data)= @_; | ||||
8672 |
92
|
642
|
$extra_data=~ s{([<&>])}{:$mark:$char2ent{$1}:}g; | ||||
8673 |
92
|
268
|
return $extra_data; | ||||
8674 | } | ||||||
8675 | |||||||
8676 | sub _unprotect_extra_data | ||||||
8677 |
65
|
656
|
{ $_[0]=~ s{:$mark:(\w+):}{$ent2char{$1}}g; } | ||||
8678 | |||||||
8679 | } | ||||||
8680 | |||||||
8681 | sub cdata_string | ||||||
8682 |
65
|
77
|
{ my $cdata= $_[0]->{cdata}; | ||||
8683 |
65
1
|
83
3
|
unless( defined $cdata) { return ''; } | ||||
8684 |
64
|
75
|
if( $remove_cdata) | ||||
8685 |
1
|
16
|
{ $cdata=~ s/([$replaced_ents])/$XML::Twig::base_ent{$1}/g; } | ||||
8686 | else | ||||||
8687 |
63
|
94
|
{ $cdata= $CDATA_START . $cdata . $CDATA_END; } | ||||
8688 |
64
|
93
|
return $cdata; | ||||
8689 | } | ||||||
8690 | |||||||
8691 | sub att_xml_string | ||||||
8692 |
1
|
1
|
{ my $elt= shift; | ||||
8693 |
1
|
2
|
my $att= shift; | ||||
8694 | |||||||
8695 |
1
|
3
|
my $replace= $replaced_ents . "$quote\n\r\t"; | ||||
8696 |
1
0
|
5
0
|
if($_[0] && $_[0]->{escape_gt} && ($replace!~ m{>}) ) { $replace .='>'; } | ||||
8697 | |||||||
8698 |
1
|
3
|
if( defined (my $string= $elt->{att}->{$att})) | ||||
8699 |
0
|
0
|
{ return _att_xml_string( $string, $replace); } | ||||
8700 | else | ||||||
8701 |
1
|
5
|
{ return ''; } | ||||
8702 | } | ||||||
8703 | |||||||
8704 | # escaped xml string for an attribute value | ||||||
8705 | sub _att_xml_string | ||||||
8706 |
1319
|
1258
|
{ my( $string, $escape)= @_; | ||||
8707 |
1319
1
|
1978
3
|
if( !defined( $string)) { return ''; } | ||||
8708 |
1318
|
1073
|
if( $keep_encoding) | ||||
8709 |
181
|
220
|
{ $string=~ s{$quote}{$XML::Twig::base_ent{$quote}}g; | ||||
8710 | } | ||||||
8711 | else | ||||||
8712 | { | ||||||
8713 |
1137
|
856
|
if( $do_not_escape_amp_in_atts) | ||||
8714 |
3
|
4
|
{ $escape=~ s{^.}{}; # seems like the most backward compatible way to remove & from the list | ||||
8715 |
3
|
14
|
$string=~ s{([$escape])}{$XML::Twig::base_ent{$1}}g; | ||||
8716 |
3
|
7
|
$string=~ s{&(?!(\w+|#\d+|[xX][0-9a-fA-F]+);)}{&}g; # dodgy: escape & that do not start an entity | ||||
8717 | } | ||||||
8718 | else | ||||||
8719 |
1134
|
2872
|
{ $string=~ s{([$escape])}{$XML::Twig::base_ent{$1}}g; | ||||
8720 |
1134
|
1212
|
$string=~ s{\Q]]>}{]]>}g; | ||||
8721 | } | ||||||
8722 | } | ||||||
8723 | |||||||
8724 |
1318
|
3299
|
return $output_text_filter ? $output_text_filter->( $string) : $string; | ||||
8725 | } | ||||||
8726 | |||||||
8727 | sub ent_string | ||||||
8728 |
52
|
34
|
{ my $ent= shift; | ||||
8729 |
52
|
46
|
my $ent_text= $ent->{ent}; | ||||
8730 |
52
|
35
|
my( $t, $el, $ent_string); | ||||
8731 |
52
|
99
|
if( $expand_external_entities | ||||
8732 | && ($t= $ent->twig) | ||||||
8733 | && ($el= $t->entity_list) | ||||||
8734 | && ($ent_string= $el->{entities}->{$ent->ent_name}->{val}) | ||||||
8735 | ) | ||||||
8736 |
9
|
15
|
{ return $ent_string; } | ||||
8737 | else | ||||||
8738 |
43
|
58
|
{ return $ent_text; } | ||||
8739 | } | ||||||
8740 | |||||||
8741 | # returns just the text, no tags, for an element | ||||||
8742 | sub text | ||||||
8743 |
3157
|
4458
|
{ my( $elt, @options)= @_; | ||||
8744 | |||||||
8745 |
3157
2
2
|
3810
8
3
|
if( @options && grep { lc( $_) eq 'no_recurse' } @options) { return $elt->text_only; } | ||||
8746 | |||||||
8747 |
3155
|
1880
|
my $string; | ||||
8748 | |||||||
8749 |
3155
1777
|
5614
3337
|
if( (exists $elt->{'pcdata'})) { return $elt->{pcdata}; } | ||||
8750 |
12
|
31
|
elsif( (exists $elt->{'cdata'})) { return $elt->{cdata}; } | ||||
8751 |
2
|
3
|
elsif( (exists $elt->{'target'})) { return $elt->pi_string;} | ||||
8752 |
2
|
6
|
elsif( (exists $elt->{'comment'})) { return $elt->{comment}; } | ||||
8753 |
2
|
4
|
elsif( (exists $elt->{'ent'})) { return $elt->{ent} ; } | ||||
8754 | |||||||
8755 |
1360
|
1630
|
my $child= $elt->{first_child} ||''; | ||||
8756 |
1360
|
1506
|
while( $child) | ||||
8757 | { | ||||||
8758 |
1451
|
1435
|
my $child_text= $child->text; | ||||
8759 |
1451
|
1947
|
$string.= defined( $child_text) ? $child_text : ''; | ||||
8760 |
1451
|
1880
|
} continue { $child= $child->{next_sibling}; } | ||||
8761 | |||||||
8762 |
1360
26
|
1420
22
|
unless( defined $string) { $string=''; } | ||||
8763 | |||||||
8764 |
1360
|
6978
|
return $output_text_filter ? $output_text_filter->( $string) : $string; | ||||
8765 | } | ||||||
8766 | |||||||
8767 | sub text_only | ||||||
8768 |
4
12
|
10
13
|
{ return join '', map { $_->text if( $_->is_text || (exists $_->{'ent'})) } $_[0]->_children; } | ||||
8769 | |||||||
8770 | sub trimmed_text | ||||||
8771 |
14
|
12
|
{ my $elt= shift; | ||||
8772 |
14
|
20
|
my $text= $elt->text( @_); | ||||
8773 |
14
|
57
|
$text=~ s{\s+}{ }sg; | ||||
8774 |
14
|
22
|
$text=~ s{^\s*}{}; | ||||
8775 |
14
|
48
|
$text=~ s{\s*$}{}; | ||||
8776 |
14
|
42
|
return $text; | ||||
8777 | } | ||||||
8778 | |||||||
8779 | sub trim | ||||||
8780 |
18
|
14
|
{ my( $elt)= @_; | ||||
8781 |
18
|
19
|
my $pcdata= $elt->first_descendant( $TEXT); | ||||
8782 |
18
|
21
|
(my $pcdata_text= $pcdata->text)=~ s{^\s+}{}s; | ||||
8783 |
18
|
25
|
$pcdata->set_text( $pcdata_text); | ||||
8784 |
18
|
17
|
$pcdata= $elt->last_descendant( $TEXT); | ||||
8785 |
18
|
22
|
($pcdata_text= $pcdata->text)=~ s{\s+$}{}; | ||||
8786 |
18
|
19
|
$pcdata->set_text( $pcdata_text); | ||||
8787 |
18
|
19
|
foreach my $pcdata ($elt->descendants( $TEXT)) | ||||
8788 |
31
|
31
|
{ ($pcdata_text= $pcdata->text)=~ s{\s+}{ }g; | ||||
8789 |
31
|
37
|
$pcdata->set_text( $pcdata_text); | ||||
8790 | } | ||||||
8791 |
18
|
33
|
return $elt; | ||||
8792 | } | ||||||
8793 | |||||||
8794 | |||||||
8795 | # remove cdata sections (turns them into regular pcdata) in an element | ||||||
8796 | sub remove_cdata | ||||||
8797 |
2
|
5
|
{ my $elt= shift; | ||||
8798 |
2
|
5
|
foreach my $cdata ($elt->descendants_or_self( $CDATA)) | ||||
8799 |
2
|
3
|
{ if( $keep_encoding) | ||||
8800 |
1
|
2
|
{ my $data= $cdata->{cdata}; | ||||
8801 |
1
|
6
|
$data=~ s{([&<"'])}{$XML::Twig::base_ent{$1}}g; | ||||
8802 |
1
|
6
|
$cdata->{pcdata}= (delete $cdata->{empty} || 1) && $data; | ||||
8803 | } | ||||||
8804 | else | ||||||
8805 |
1
|
5
|
{ $cdata->{pcdata}= (delete $cdata->{empty} || 1) && $cdata->{cdata}; } | ||||
8806 |
2
|
4
|
$cdata->{gi}=$XML::Twig::gi2index{$PCDATA} or $cdata->set_gi( $PCDATA); | ||||
8807 |
2
|
6
|
undef $cdata->{cdata}; | ||||
8808 | } | ||||||
8809 | } | ||||||
8810 | |||||||
8811 |
2
|
10
|
sub _is_private { return _is_private_name( $_[0]->gi); } | ||||
8812 |
4
|
15
|
sub _is_private_name { return $_[0]=~ m{^#(?!default:)}; } | ||||
8813 | |||||||
8814 | |||||||
8815 | } # end of block containing package globals ($pretty_print, $quotes, keep_encoding...) | ||||||
8816 | |||||||
8817 | # merges consecutive #PCDATAs in am element | ||||||
8818 | sub normalize | ||||||
8819 |
35
|
25
|
{ my( $elt)= @_; | ||||
8820 |
35
|
51
|
my @descendants= $elt->descendants( $PCDATA); | ||||
8821 |
35
|
63
|
while( my $desc= shift @descendants) | ||||
8822 |
149
18
18
|
200
21
53
|
{ if( ! length $desc->{pcdata}) { $desc->delete; next; } | ||||
8823 |
131
|
527
|
while( @descendants && $desc->{next_sibling} && $desc->{next_sibling}== $descendants[0]) | ||||
8824 |
15
|
17
|
{ my $to_merge= shift @descendants; | ||||
8825 |
15
|
22
|
$desc->merge_text( $to_merge); | ||||
8826 | } | ||||||
8827 | } | ||||||
8828 |
35
|
57
|
return $elt; | ||||
8829 | } | ||||||
8830 | |||||||
8831 | # SAX export methods | ||||||
8832 | sub toSAX1 | ||||||
8833 |
2
|
6
|
{ _toSAX(@_, \&_start_tag_data_SAX1, \&_end_tag_data_SAX1); } | ||||
8834 | |||||||
8835 | sub toSAX2 | ||||||
8836 |
2
|
6
|
{ _toSAX(@_, \&_start_tag_data_SAX2, \&_end_tag_data_SAX2); } | ||||
8837 | |||||||
8838 | sub _toSAX | ||||||
8839 |
72
|
54
|
{ my( $elt, $handler, $start_tag_data, $end_tag_data)= @_; | ||||
8840 |
72
|
88
|
if( $elt->{gi} >= $XML::Twig::SPECIAL_GI) | ||||
8841 |
43
|
46
|
{ my $data= $start_tag_data->( $elt); | ||||
8842 |
43
|
123
|
_start_prefix_mapping( $elt, $handler, $data); | ||||
8843 |
43
|
135
|
if( $data && (my $start_element = $handler->can( 'start_element'))) | ||||
8844 |
39
35
|
46
79
|
{ unless( $elt->_flushed) { $start_element->( $handler, $data); } } | ||||
8845 | |||||||
8846 |
43
|
1194
|
foreach my $child ($elt->_children) | ||||
8847 |
51
|
304
|
{ $child->_toSAX( $handler, $start_tag_data, $end_tag_data); } | ||||
8848 | |||||||
8849 |
43
|
474
|
if( (my $data= $end_tag_data->( $elt)) && (my $end_element = $handler->can( 'end_element')) ) | ||||
8850 |
39
|
73
|
{ $end_element->( $handler, $data); } | ||||
8851 |
43
|
1220
|
_end_prefix_mapping( $elt, $handler); | ||||
8852 | } | ||||||
8853 | else # text or special element | ||||||
8854 |
29
|
126
|
{ if( (exists $elt->{'pcdata'}) && (my $characters= $handler->can( 'characters'))) | ||||
8855 |
20
|
49
|
{ $characters->( $handler, { Data => $elt->{pcdata} }); } | ||||
8856 | elsif( (exists $elt->{'cdata'})) | ||||||
8857 |
2
|
8
|
{ if( my $start_cdata= $handler->can( 'start_cdata')) | ||||
8858 |
2
|
5
|
{ $start_cdata->( $handler); } | ||||
8859 |
2
|
80
|
if( my $characters= $handler->can( 'characters')) | ||||
8860 |
2
|
6
|
{ $characters->( $handler, {Data => $elt->{cdata} }); } | ||||
8861 |
2
|
12
|
if( my $end_cdata= $handler->can( 'end_cdata')) | ||||
8862 |
2
|
5
|
{ $end_cdata->( $handler); } | ||||
8863 | } | ||||||
8864 | elsif( ((exists $elt->{'target'})) && (my $pi= $handler->can( 'processing_instruction'))) | ||||||
8865 |
2
|
9
|
{ $pi->( $handler, { Target =>$elt->{target}, Data => $elt->{data} }); } | ||||
8866 | elsif( ((exists $elt->{'comment'})) && (my $comment= $handler->can( 'comment'))) | ||||||
8867 |
2
|
9
|
{ $comment->( $handler, { Data => $elt->{comment} }); } | ||||
8868 | elsif( ((exists $elt->{'ent'}))) | ||||||
8869 | { | ||||||
8870 |
1
|
6
|
if( my $se= $handler->can( 'skipped_entity')) | ||||
8871 |
1
|
3
|
{ $se->( $handler, { Name => $elt->ent_name }); } | ||||
8872 | elsif( my $characters= $handler->can( 'characters')) | ||||||
8873 |
0
|
0
|
{ if( defined $elt->ent_string) | ||||
8874 |
0
|
0
|
{ $characters->( $handler, {Data => $elt->ent_string}); } | ||||
8875 | else | ||||||
8876 |
0
|
0
|
{ $characters->( $handler, {Data => $elt->ent_name}); } | ||||
8877 | } | ||||||
8878 | } | ||||||
8879 | |||||||
8880 | } | ||||||
8881 | } | ||||||
8882 | |||||||
8883 | sub _start_tag_data_SAX1 | ||||||
8884 |
15
|
9
|
{ my( $elt)= @_; | ||||
8885 |
15
|
16
|
my $name= $XML::Twig::index2gi[$elt->{'gi'}]; | ||||
8886 |
15
|
39
|
return if( ( (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 1) eq '#') && (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 9) ne '#default:') )); | ||||
8887 |
13
|
12
|
my $attributes={}; | ||||
8888 |
13
|
8
|
my $atts= $elt->{att}; | ||||
8889 |
13
|
27
|
while( my( $att, $value)= each %$atts) | ||||
8890 |
4
|
19
|
{ $attributes->{$att}= $value unless( ( $att=~ m{^#(?!default:)} )); } | ||||
8891 |
13
|
22
|
my $data= { Name => $name, Attributes => $attributes}; | ||||
8892 |
13
|
16
|
return $data; | ||||
8893 | } | ||||||
8894 | |||||||
8895 | sub _end_tag_data_SAX1 | ||||||
8896 |
13
|
10
|
{ my( $elt)= @_; | ||||
8897 |
13
|
30
|
return if( ( (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 1) eq '#') && (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 9) ne '#default:') )); | ||||
8898 |
11
|
50
|
return { Name => $XML::Twig::index2gi[$elt->{'gi'}] }; | ||||
8899 | } | ||||||
8900 | |||||||
8901 | sub _start_tag_data_SAX2 | ||||||
8902 |
32
|
24
|
{ my( $elt)= @_; | ||||
8903 |
32
|
30
|
my $data={}; | ||||
8904 | |||||||
8905 |
32
|
36
|
my $name= $XML::Twig::index2gi[$elt->{'gi'}]; | ||||
8906 |
32
|
72
|
return if( ( (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 1) eq '#') && (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 9) ne '#default:') )); | ||||
8907 |
30
|
37
|
$data->{Name} = $name; | ||||
8908 |
30
|
30
|
$data->{Prefix} = $elt->ns_prefix; | ||||
8909 |
30
|
36
|
$data->{LocalName} = $elt->local_name; | ||||
8910 |
30
|
28
|
$data->{NamespaceURI} = $elt->namespace; | ||||
8911 | |||||||
8912 | # save a copy of the data so we can re-use it for the end tag | ||||||
8913 |
30
|
87
|
my %sax2_data= %$data; | ||||
8914 |
30
|
35
|
$elt->{twig_elt_SAX2_data}= \%sax2_data; | ||||
8915 | |||||||
8916 | # add the attributes | ||||||
8917 |
30
|
38
|
$data->{Attributes}= $elt->_atts_to_SAX2; | ||||
8918 | |||||||
8919 |
30
|
28
|
return $data; | ||||
8920 | } | ||||||
8921 | |||||||
8922 | sub _atts_to_SAX2 | ||||||
8923 |
30
|
15
|
{ my $elt= shift; | ||||
8924 |
30
|
27
|
my $SAX2_atts= {}; | ||||
8925 |
30
30
|
20
49
|
foreach my $att (keys %{$elt->{att}}) | ||||
8926 | { | ||||||
8927 |
15
|
32
|
next if( ( $att=~ m{^#(?!default:)} )); | ||||
8928 |
13
|
11
|
my $SAX2_att={}; | ||||
8929 |
13
|
13
|
$SAX2_att->{Name} = $att; | ||||
8930 |
13
|
13
|
$SAX2_att->{Prefix} = _ns_prefix( $att); | ||||
8931 |
13
|
9
|
$SAX2_att->{LocalName} = _local_name( $att); | ||||
8932 |
13
|
48
|
$SAX2_att->{NamespaceURI} = $elt->namespace( $SAX2_att->{Prefix}); | ||||
8933 |
13
|
20
|
$SAX2_att->{Value} = $elt->{'att'}->{$att}; | ||||
8934 |
13
|
17
|
my $SAX2_att_name= "{$SAX2_att->{NamespaceURI}}$SAX2_att->{LocalName}"; | ||||
8935 | |||||||
8936 |
13
|
26
|
$SAX2_atts->{$SAX2_att_name}= $SAX2_att; | ||||
8937 | } | ||||||
8938 |
30
|
37
|
return $SAX2_atts; | ||||
8939 | } | ||||||
8940 | |||||||
8941 | sub _start_prefix_mapping | ||||||
8942 |
43
|
38
|
{ my( $elt, $handler, $data)= @_; | ||||
8943 |
43
13
|
118
94
|
if( my $start_prefix_mapping= $handler->can( 'start_prefix_mapping') | ||||
8944 |
30
|
76
|
and my @new_prefix_mappings= grep { /^\{[^}]*\}xmlns/ || /^\{$XMLNS_URI\}/ } keys %{$data->{Attributes}} | ||||
8945 | ) | ||||||
8946 |
7
|
7
|
{ foreach my $prefix (@new_prefix_mappings) | ||||
8947 |
7
|
15
|
{ my $prefix_string= $data->{Attributes}->{$prefix}->{LocalName}; | ||||
8948 |
7
3
|
8
3
|
if( $prefix_string eq 'xmlns') { $prefix_string=''; } | ||||
8949 |
7
|
14
|
my $prefix_data= | ||||
8950 | { Prefix => $prefix_string, | ||||||
8951 | NamespaceURI => $data->{Attributes}->{$prefix}->{Value} | ||||||
8952 | }; | ||||||
8953 |
7
|
13
|
$start_prefix_mapping->( $handler, $prefix_data); | ||||
8954 |
7
|
122
|
$elt->{twig_end_prefix_mapping} ||= []; | ||||
8955 |
7
7
|
5
18
|
push @{$elt->{twig_end_prefix_mapping}}, $prefix_string; | ||||
8956 | } | ||||||
8957 | } | ||||||
8958 | } | ||||||
8959 | |||||||
8960 | sub _end_prefix_mapping | ||||||
8961 |
43
|
32
|
{ my( $elt, $handler)= @_; | ||||
8962 |
43
|
111
|
if( my $end_prefix_mapping= $handler->can( 'end_prefix_mapping')) | ||||
8963 |
30
30
|
15
99
|
{ foreach my $prefix (@{$elt->{twig_end_prefix_mapping}}) | ||||
8964 |
9
|
31
|
{ $end_prefix_mapping->( $handler, { Prefix => $prefix} ); } | ||||
8965 | } | ||||||
8966 | } | ||||||
8967 | |||||||
8968 | sub _end_tag_data_SAX2 | ||||||
8969 |
30
|
73
|
{ my( $elt)= @_; | ||||
8970 |
30
|
71
|
return if( ( (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 1) eq '#') && (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 9) ne '#default:') )); | ||||
8971 |
28
|
96
|
return $elt->{twig_elt_SAX2_data}; | ||||
8972 | } | ||||||
8973 | |||||||
8974 | sub contains_text | ||||||
8975 |
487
|
333
|
{ my $elt= shift; | ||||
8976 |
487
|
333
|
my $child= $elt->{first_child}; | ||||
8977 |
487
|
518
|
while ($child) | ||||
8978 |
472
|
504
|
{ return 1 if( $child->is_text || (exists $child->{'ent'})); | ||||
8979 |
248
|
332
|
$child= $child->{next_sibling}; | ||||
8980 | } | ||||||
8981 |
263
|
507
|
return 0; | ||||
8982 | } | ||||||
8983 | |||||||
8984 | # creates a single pcdata element containing the text as child of the element | ||||||
8985 | # options: | ||||||
8986 | # - force_pcdata: when set to a true value forces the text to be in a #PCDATA | ||||||
8987 | # even if the original element was a #CDATA | ||||||
8988 | sub set_text | ||||||
8989 |
209
|
383
|
{ my( $elt, $string, %option)= @_; | ||||
8990 | |||||||
8991 |
209
|
368
|
if( $XML::Twig::index2gi[$elt->{'gi'}] eq $PCDATA) | ||||
8992 |
157
|
605
|
{ return $elt->{pcdata}= (delete $elt->{empty} || 1) && $string; } | ||||
8993 | elsif( $XML::Twig::index2gi[$elt->{'gi'}] eq $CDATA) | ||||||
8994 |
12
|
17
|
{ if( $option{force_pcdata}) | ||||
8995 |
2
|
9
|
{ $elt->{gi}=$XML::Twig::gi2index{$PCDATA} or $elt->set_gi( $PCDATA); | ||||
8996 |
2
|
5
|
$elt->_set_cdata(''); | ||||
8997 |
2
|
47
|
return $elt->{pcdata}= (delete $elt->{empty} || 1) && $string; | ||||
8998 | } | ||||||
8999 | else | ||||||
9000 |
10
|
12
|
{ return $elt->_set_cdata( $string); } | ||||
9001 | } | ||||||
9002 | elsif( $elt->contains_a_single( $PCDATA) ) | ||||||
9003 | { # optimized so we have a slight chance of not loosing embedded comments and pi's | ||||||
9004 |
33
|
61
|
$elt->{first_child}->set_pcdata( $string); | ||||
9005 |
33
|
57
|
return $elt; | ||||
9006 | } | ||||||
9007 | |||||||
9008 |
7
7
|
9
19
|
foreach my $child (@{[$elt->_children]}) | ||||
9009 |
4
|
7
|
{ $child->delete; } | ||||
9010 | |||||||
9011 |
7
|
29
|
my $pcdata= $elt->_new_pcdata( $string); | ||||
9012 |
7
|
18
|
$pcdata->paste( $elt); | ||||
9013 | |||||||
9014 |
7
|
6
|
$elt->{empty}=0; | ||||
9015 | |||||||
9016 |
7
|
11
|
return $elt; | ||||
9017 | } | ||||||
9018 | |||||||
9019 | # set the content of an element from a list of strings and elements | ||||||
9020 | sub set_content | ||||||
9021 |
167
|
141
|
{ my $elt= shift; | ||||
9022 | |||||||
9023 |
167
|
234
|
return $elt unless defined $_[0]; | ||||
9024 | |||||||
9025 | # attributes can be given as a hash (passed by ref) | ||||||
9026 |
166
|
206
|
if( ref $_[0] eq 'HASH') | ||||
9027 |
2
|
4
|
{ my $atts= shift; | ||||
9028 |
2
|
4
|
$elt->del_atts; # usually useless but better safe than sorry | ||||
9029 |
2
|
5
|
$elt->set_atts( $atts); | ||||
9030 |
2
|
7
|
return $elt unless defined $_[0]; | ||||
9031 | } | ||||||
9032 | |||||||
9033 | # check next argument for #EMPTY | ||||||
9034 |
165
|
445
|
if( !(ref $_[0]) && ($_[0] eq $EMPTY) ) | ||||
9035 |
6
6
|
4
6
|
{ $elt->{empty}= 1; return $elt; } | ||||
9036 | |||||||
9037 | # case where we really want to do a set_text, the element is '#PCDATA' | ||||||
9038 | # or contains a single PCDATA and we only want to add text in it | ||||||
9039 |
159
|
394
|
if( ($XML::Twig::index2gi[$elt->{'gi'}] eq $PCDATA || $elt->contains_a_single( $PCDATA)) | ||||
9040 | && (@_ == 1) && !( ref $_[0])) | ||||||
9041 |
14
|
21
|
{ $elt->set_text( $_[0]); | ||||
9042 |
14
|
16
|
return $elt; | ||||
9043 | } | ||||||
9044 | elsif( ($XML::Twig::index2gi[$elt->{'gi'}] eq $CDATA) && (@_ == 1) && !( ref $_[0])) | ||||||
9045 |
1
|
2
|
{ $elt->_set_cdata( $_[0]); | ||||
9046 |
1
|
1
|
return $elt; | ||||
9047 | } | ||||||
9048 | |||||||
9049 | # delete the children | ||||||
9050 |
144
144
|
93
192
|
foreach my $child (@{[$elt->_children]}) | ||||
9051 |
7
|
10
|
{ $child->delete; } | ||||
9052 | |||||||
9053 |
144
144
|
212
139
|
if( @_) { $elt->{empty}=0; } | ||||
9054 | |||||||
9055 |
144
|
142
|
foreach my $child (@_) | ||||
9056 |
167
|
313
|
{ if( ref( $child) && isa( $child, 'XML::Twig::Elt')) | ||||
9057 | { # argument is an element | ||||||
9058 |
27
|
34
|
$child->paste( 'last_child', $elt); | ||||
9059 | } | ||||||
9060 | else | ||||||
9061 | { # argument is a string | ||||||
9062 |
140
|
244
|
if( (my $pcdata= $elt->{last_child}) && $elt->{last_child}->is_pcdata) | ||||
9063 | { # previous child is also pcdata: just concatenate | ||||||
9064 |
4
|
29
|
$pcdata->{pcdata}= (delete $pcdata->{empty} || 1) && $pcdata->{pcdata} . $child | ||||
9065 | } | ||||||
9066 | else | ||||||
9067 | { # previous child is not a string: create a new pcdata element | ||||||
9068 |
136
|
176
|
$pcdata= $elt->_new_pcdata( $child); | ||||
9069 |
136
|
189
|
$pcdata->paste( 'last_child', $elt); | ||||
9070 | } | ||||||
9071 | } | ||||||
9072 | } | ||||||
9073 | |||||||
9074 | |||||||
9075 |
144
|
156
|
return $elt; | ||||
9076 | } | ||||||
9077 | |||||||
9078 | # inserts an element (whose gi is given) as child of the element | ||||||
9079 | # all children of the element are now children of the new element | ||||||
9080 | # returns the new element | ||||||
9081 | sub insert | ||||||
9082 |
7
|
16
|
{ my ($elt, @args)= @_; | ||||
9083 | # first cut the children | ||||||
9084 |
7
7
7
7
7
4
4
7
|
8
5
8
8
16
5
9
10
|
my @children= do { my $elt= $elt; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; }; | ||||
9085 |
7
|
10
|
foreach my $child (@children) | ||||
9086 |
4
|
13
|
{ $child->cut; } | ||||
9087 | # insert elements | ||||||
9088 |
7
|
19
|
while( my $gi= shift @args) | ||||
9089 |
7
|
15
|
{ my $new_elt= $elt->new( $gi); | ||||
9090 | # add attributes if needed | ||||||
9091 |
7
|
23
|
if( defined( $args[0]) && ( isa( $args[0], 'HASH')) ) | ||||
9092 |
2
|
5
|
{ $new_elt->set_atts( shift @args); } | ||||
9093 | # paste the element | ||||||
9094 |
7
|
12
|
$new_elt->paste( $elt); | ||||
9095 |
7
|
6
|
$elt->{empty}=0; | ||||
9096 |
7
|
15
|
$elt= $new_elt; | ||||
9097 | } | ||||||
9098 | # paste back the children | ||||||
9099 |
7
|
8
|
foreach my $child (@children) | ||||
9100 |
4
|
6
|
{ $child->paste( 'last_child', $elt); } | ||||
9101 |
7
|
12
|
return $elt; | ||||
9102 | } | ||||||
9103 | |||||||
9104 | # insert a new element | ||||||
9105 | # $elt->insert_new_element( $opt_position, $gi, $opt_atts_hash, @opt_content); | ||||||
9106 | # the element is created with the same syntax as new | ||||||
9107 | # position is the same as in paste, first_child by default | ||||||
9108 | sub insert_new_elt | ||||||
9109 |
133
|
99
|
{ my $elt= shift; | ||||
9110 |
133
|
99
|
my $position= $_[0]; | ||||
9111 |
133
|
457
|
if( ($position eq 'before') || ($position eq 'after') | ||||
9112 | || ($position eq 'first_child') || ($position eq 'last_child')) | ||||||
9113 |
129
|
81
|
{ shift; } | ||||
9114 | else | ||||||
9115 |
4
|
4
|
{ $position= 'first_child'; } | ||||
9116 | |||||||
9117 |
133
|
198
|
my $new_elt= $elt->new( @_); | ||||
9118 |
133
|
142
|
$new_elt->paste( $position, $elt); | ||||
9119 | |||||||
9120 | #if( defined $new_elt->{'att'}->{$ID}) { $new_elt->set_id( $new_elt->{'att'}->{$ID}); } | ||||||
9121 | |||||||
9122 |
133
|
107
|
return $new_elt; | ||||
9123 | } | ||||||
9124 | |||||||
9125 | # wraps an element in elements which gi's are given as arguments | ||||||
9126 | # $elt->wrap_in( 'td', 'tr', 'table') wraps the element as a single | ||||||
9127 | # cell in a table for example | ||||||
9128 | # returns the new element | ||||||
9129 | sub wrap_in | ||||||
9130 |
18
|
26
|
{ my $elt= shift; | ||||
9131 |
18
|
32
|
while( my $gi = shift @_) | ||||
9132 |
18
|
22
|
{ my $new_elt = $elt->new( $gi); | ||||
9133 |
18
|
31
|
if( $elt->{twig_current}) | ||||
9134 |
2
|
4
|
{ my $t= $elt->twig; | ||||
9135 |
2
|
3
|
$t->{twig_current}= $new_elt; | ||||
9136 |
2
|
8
|
delete $elt->{'twig_current'}; | ||||
9137 |
2
|
2
|
$new_elt->{'twig_current'}=1; | ||||
9138 | } | ||||||
9139 | |||||||
9140 |
18
|
28
|
if( my $parent= $elt->{parent}) | ||||
9141 |
15
15
15
|
15
22
24
|
{ $new_elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $new_elt->{parent});} ; | ||||
9142 |
15
11
|
25
11
|
if( $parent->{first_child} == $elt) { $parent->{first_child}= $new_elt; } | ||||
9143 |
15
8
8
8
8
|
23
9
8
10
14
|
if( $parent->{last_child} == $elt) { $parent->{empty}=0; $parent->{last_child}=$new_elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; } | ||||
9144 | } | ||||||
9145 | else | ||||||
9146 | { # wrapping the root | ||||||
9147 |
3
|
7
|
my $twig= $elt->twig; | ||||
9148 |
3
|
15
|
if( $twig && $twig->root && ($twig->root eq $elt) ) | ||||
9149 |
3
|
6
|
{ $twig->set_root( $new_elt); | ||||
9150 | } | ||||||
9151 | } | ||||||
9152 | |||||||
9153 |
18
|
29
|
if( my $prev_sibling= $elt->{prev_sibling}) | ||||
9154 |
4
4
4
|
4
8
6
|
{ $new_elt->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $new_elt->{prev_sibling});} ; | ||||
9155 |
4
|
4
|
$prev_sibling->{next_sibling}= $new_elt; | ||||
9156 | } | ||||||
9157 | |||||||
9158 |
18
|
33
|
if( my $next_sibling= $elt->{next_sibling}) | ||||
9159 |
7
|
7
|
{ $new_elt->{next_sibling}= $next_sibling; | ||||
9160 |
7
7
7
|
7
10
8
|
$next_sibling->{prev_sibling}=$new_elt; if( $XML::Twig::weakrefs) { weaken( $next_sibling->{prev_sibling});} ; | ||||
9161 | } | ||||||
9162 |
18
|
20
|
$new_elt->{first_child}= $elt; | ||||
9163 |
18
18
18
17
|
17
19
19
22
|
$new_elt->{empty}=0; $new_elt->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $new_elt->{last_child});} ; | ||||
9164 | |||||||
9165 |
18
18
17
|
18
1498
25
|
$elt->{parent}=$new_elt; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ; | ||||
9166 |
18
18
17
|
22
24
18
|
$elt->{prev_sibling}=undef; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ; | ||||
9167 |
18
|
23
|
$elt->{next_sibling}= undef; | ||||
9168 | |||||||
9169 | # add the attributes if the next argument is a hash ref | ||||||
9170 |
18
|
57
|
if( defined( $_[0]) && (isa( $_[0], 'HASH')) ) | ||||
9171 |
10
|
15
|
{ $new_elt->set_atts( shift @_); } | ||||
9172 | |||||||
9173 |
18
|
35
|
$elt= $new_elt; | ||||
9174 | } | ||||||
9175 | |||||||
9176 |
18
|
25
|
return $elt; | ||||
9177 | } | ||||||
9178 | |||||||
9179 | sub replace | ||||||
9180 |
14
|
19
|
{ my( $elt, $ref)= @_; | ||||
9181 | |||||||
9182 |
14
1
|
26
2
|
if( $elt->{parent}) { $elt->cut; } | ||||
9183 | |||||||
9184 |
14
|
25
|
if( my $parent= $ref->{parent}) | ||||
9185 |
13
13
11
|
14
22
19
|
{ $elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ; | ||||
9186 |
13
7
|
29
7
|
if( $parent->{first_child} == $ref) { $parent->{first_child}= $elt; } | ||||
9187 |
13
7
7
7
6
|
25
8
8
12
8
|
if( $parent->{last_child} == $ref) { $parent->{empty}=0; $parent->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; } | ||||
9188 | } | ||||||
9189 | elsif( $ref->twig && $ref == $ref->twig->root) | ||||||
9190 |
1
|
1
|
{ $ref->twig->set_root( $elt); } | ||||
9191 | |||||||
9192 |
14
|
28
|
if( my $prev_sibling= $ref->{prev_sibling}) | ||||
9193 |
6
6
5
|
8
10
6
|
{ $elt->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ; | ||||
9194 |
6
|
6
|
$prev_sibling->{next_sibling}= $elt; | ||||
9195 | } | ||||||
9196 |
14
|
24
|
if( my $next_sibling= $ref->{next_sibling}) | ||||
9197 |
6
|
7
|
{ $elt->{next_sibling}= $next_sibling; | ||||
9198 |
6
6
5
|
6
13
9
|
$next_sibling->{prev_sibling}=$elt; if( $XML::Twig::weakrefs) { weaken( $next_sibling->{prev_sibling});} ; | ||||
9199 | } | ||||||
9200 | |||||||
9201 |
14
14
11
|
16
18
15
|
$ref->{parent}=undef; if( $XML::Twig::weakrefs) { weaken( $ref->{parent});} ; | ||||
9202 |
14
14
11
|
16
21
14
|
$ref->{prev_sibling}=undef; if( $XML::Twig::weakrefs) { weaken( $ref->{prev_sibling});} ; | ||||
9203 |
14
|
11
|
$ref->{next_sibling}= undef; | ||||
9204 |
14
|
24
|
return $ref; | ||||
9205 | } | ||||||
9206 | |||||||
9207 | sub replace_with | ||||||
9208 |
2
|
4
|
{ my $ref= shift; | ||||
9209 |
2
|
2
|
my $elt= shift; | ||||
9210 |
2
|
4
|
$elt->replace( $ref); | ||||
9211 |
2
|
6
|
foreach my $new_elt (reverse @_) | ||||
9212 |
2
|
5
|
{ $new_elt->paste( after => $elt); } | ||||
9213 |
2
|
4
|
return $elt; | ||||
9214 | } | ||||||
9215 | |||||||
9216 | |||||||
9217 | # move an element, same syntax as paste, except the element is first cut | ||||||
9218 | sub move | ||||||
9219 |
8
|
11
|
{ my $elt= shift; | ||||
9220 |
8
|
11
|
$elt->cut; | ||||
9221 |
8
|
12
|
$elt->paste( @_); | ||||
9222 |
8
|
10
|
return $elt; | ||||
9223 | } | ||||||
9224 | |||||||
9225 | |||||||
9226 | # adds a prefix to an element, creating a pcdata child if needed | ||||||
9227 | sub prefix | ||||||
9228 |
23
|
47
|
{ my ($elt, $prefix, $option)= @_; | ||||
9229 |
23
|
77
|
my $asis= ($option && ($option eq 'asis')) ? 1 : 0; | ||||
9230 |
23
|
133
|
if( (exists $elt->{'pcdata'}) | ||||
9231 | && (($asis && $elt->{asis}) || (!$asis && ! $elt->{asis})) | ||||||
9232 | ) | ||||||
9233 |
4
|
23
|
{ $elt->{pcdata}= (delete $elt->{empty} || 1) && $prefix . $elt->{pcdata}; } | ||||
9234 | elsif( $elt->{first_child} && $elt->{first_child}->is_pcdata | ||||||
9235 | && ( ($asis && $elt->{first_child}->{asis}) | ||||||
9236 | || (!$asis && ! $elt->{first_child}->{asis})) | ||||||
9237 | ) | ||||||
9238 | { | ||||||
9239 |
6
|
17
|
$elt->{first_child}->set_pcdata( $prefix . $elt->{first_child}->pcdata); | ||||
9240 | } | ||||||
9241 | else | ||||||
9242 |
13
|
23
|
{ my $new_elt= $elt->_new_pcdata( $prefix); | ||||
9243 |
13
|
28
|
my $pos= (exists $elt->{'pcdata'}) ? 'before' : 'first_child'; | ||||
9244 |
13
|
19
|
$new_elt->paste( $pos => $elt); | ||||
9245 |
13
5
|
21
8
|
if( $asis) { $new_elt->set_asis; } | ||||
9246 | } | ||||||
9247 |
23
|
32
|
return $elt; | ||||
9248 | } | ||||||
9249 | |||||||
9250 | # adds a suffix to an element, creating a pcdata child if needed | ||||||
9251 | sub suffix | ||||||
9252 |
19
|
31
|
{ my ($elt, $suffix, $option)= @_; | ||||
9253 |
19
|
58
|
my $asis= ($option && ($option eq 'asis')) ? 1 : 0; | ||||
9254 |
19
|
98
|
if( (exists $elt->{'pcdata'}) | ||||
9255 | && (($asis && $elt->{asis}) || (!$asis && ! $elt->{asis})) | ||||||
9256 | ) | ||||||
9257 |
3
|
15
|
{ $elt->{pcdata}= (delete $elt->{empty} || 1) && $elt->{pcdata} . $suffix; } | ||||
9258 | elsif( $elt->{last_child} && $elt->{last_child}->is_pcdata | ||||||
9259 | && ( ($asis && $elt->{last_child}->{asis}) | ||||||
9260 | || (!$asis && ! $elt->{last_child}->{asis})) | ||||||
9261 | ) | ||||||
9262 |
6
|
14
|
{ $elt->{last_child}->set_pcdata( $elt->{last_child}->pcdata . $suffix); } | ||||
9263 | else | ||||||
9264 |
10
|
17
|
{ my $new_elt= $elt->_new_pcdata( $suffix); | ||||
9265 |
10
|
18
|
my $pos= (exists $elt->{'pcdata'}) ? 'after' : 'last_child'; | ||||
9266 |
10
|
14
|
$new_elt->paste( $pos => $elt); | ||||
9267 |
10
4
|
14
6
|
if( $asis) { $new_elt->set_asis; } | ||||
9268 | } | ||||||
9269 |
19
|
29
|
return $elt; | ||||
9270 | } | ||||||
9271 | |||||||
9272 | # create a path to an element ('/root/.../gi) | ||||||
9273 | sub path | ||||||
9274 |
21
|
92
|
{ my $elt= shift; | ||||
9275 |
21
|
21
|
my @context= ( $elt, $elt->ancestors); | ||||
9276 |
21
55
|
18
40
|
return "/" . join( "/", reverse map {$_->gi} @context); | ||||
9277 | } | ||||||
9278 | |||||||
9279 | sub xpath | ||||||
9280 |
6
|
46
|
{ my $elt= shift; | ||||
9281 |
6
|
4
|
my $xpath; | ||||
9282 |
6
|
8
|
foreach my $ancestor (reverse $elt->ancestors_or_self) | ||||
9283 |
18
|
16
|
{ my $gi= $XML::Twig::index2gi[$ancestor->{'gi'}]; | ||||
9284 |
18
|
12
|
$xpath.= "/$gi"; | ||||
9285 |
18
|
20
|
my $index= $ancestor->prev_siblings( $gi) + 1; | ||||
9286 |
18
|
43
|
unless( ($index == 1) && !$ancestor->next_sibling( $gi)) | ||||
9287 |
10
|
24
|
{ $xpath.= "[$index]"; } | ||||
9288 | } | ||||||
9289 |
6
|
14
|
return $xpath; | ||||
9290 | } | ||||||
9291 | |||||||
9292 | # methods used mainly by wrap_children | ||||||
9293 | |||||||
9294 | # return a string with the | ||||||
9295 | # for an element <foo><elt att="val">...</elt><elt2/><elt>...</elt></foo> | ||||||
9296 | # returns '<elt att="val"><elt2><elt>' | ||||||
9297 | sub _stringify_struct | ||||||
9298 |
13
|
14
|
{ my( $elt, %opt)= @_; | ||||
9299 |
13
|
7
|
my $string=''; | ||||
9300 |
13
|
16
|
my $pretty_print= set_pretty_print( 'none'); | ||||
9301 |
13
|
18
|
foreach my $child ($elt->_children) | ||||
9302 |
37
37
|
43
55
|
{ $child->add_id; $string .= $child->start_tag( { escape_gt => 1 }) ||''; } | ||||
9303 |
13
|
16
|
set_pretty_print( $pretty_print); | ||||
9304 |
13
|
19
|
return $string; | ||||
9305 | } | ||||||
9306 | |||||||
9307 | # wrap a series of elements in a new one | ||||||
9308 | sub _wrap_range | ||||||
9309 |
9
|
9
|
{ my $elt= shift; | ||||
9310 |
9
|
6
|
my $gi= shift; | ||||
9311 |
9
|
25
|
my $atts= isa( $_[0], 'HASH') ? shift : undef; | ||||
9312 |
9
|
16
|
my $range= shift; # the string with the tags to wrap | ||||
9313 | |||||||
9314 |
9
|
12
|
my $t= $elt->twig; | ||||
9315 | |||||||
9316 | # get the tags to wrap | ||||||
9317 |
9
|
7
|
my @to_wrap; | ||||
9318 |
9
|
52
|
while( $range=~ m{<\w+\s+[^>]*id=("[^"]*"|'[^']*')[^>]*>}g) | ||||
9319 |
14
|
23
|
{ push @to_wrap, $t->elt_id( substr( $1, 1, -1)); } | ||||
9320 | |||||||
9321 |
9
|
15
|
return '' unless @to_wrap; | ||||
9322 | |||||||
9323 |
9
|
5
|
my $to_wrap= shift @to_wrap; | ||||
9324 |
9
|
20
|
my %atts= %$atts; | ||||
9325 |
9
|
12
|
my $new_elt= $to_wrap->wrap_in( $gi, \%atts); | ||||
9326 |
9
|
14
|
$_->move( last_child => $new_elt) foreach (@to_wrap); | ||||
9327 | |||||||
9328 |
9
|
45
|
return ''; | ||||
9329 | } | ||||||
9330 | |||||||
9331 | # wrap children matching a regexp in a new element | ||||||
9332 | sub wrap_children | ||||||
9333 |
13
|
21
|
{ my( $elt, $regexp, $gi, $atts)= @_; | ||||
9334 | |||||||
9335 |
13
|
32
|
$atts ||={}; | ||||
9336 | |||||||
9337 |
13
|
16
|
my $elt_as_string= $elt->_stringify_struct; # stringify the elt structure | ||||
9338 |
13
13
|
49
14
|
$regexp=~ s{(<[^>]*>)}{_match_expr( $1)}eg; # in the regexp, replace gi's by the proper regexp | ||||
9339 |
13
9
1
1
1
|
449
21388
3
1
124
|
$elt_as_string=~ s{($regexp)}{$elt->_wrap_range( $gi, $atts, $1)}eg; # then do the actual replace | ||||
9340 | |||||||
9341 |
13
|
38
|
return $elt; | ||||
9342 | } | ||||||
9343 | |||||||
9344 | sub _match_expr | ||||||
9345 |
22
|
159
|
{ my $tag= shift; | ||||
9346 |
22
|
27
|
my( $gi, %atts)= XML::Twig::_parse_start_tag( $tag); | ||||
9347 |
13
|
69
|
return _match_tag( $gi, %atts); | ||||
9348 | } | ||||||
9349 | |||||||
9350 | |||||||
9351 | sub _match_tag | ||||||
9352 |
13
|
16
|
{ my( $elt, %atts)= @_; | ||||
9353 |
13
|
14
|
my $string= "<$elt\\b"; | ||||
9354 |
13
|
23
|
foreach my $key (sort keys %atts) | ||||
9355 |
14
|
10
|
{ my $val= qq{\Q$atts{$key}\E}; | ||||
9356 |
14
|
25
|
$string.= qq{[^>]*$key=(?:"$val"|'$val')}; | ||||
9357 | } | ||||||
9358 |
13
|
12
|
$string.= qq{[^>]*>}; | ||||
9359 |
13
|
35
|
return "(?:$string)"; | ||||
9360 | } | ||||||
9361 | |||||||
9362 | sub field_to_att | ||||||
9363 |
6
|
11
|
{ my( $elt, $cond, $att)= @_; | ||||
9364 |
6
|
20
|
$att ||= $cond; | ||||
9365 |
6
|
11
|
my $child= $elt->first_child( $cond) or return undef; | ||||
9366 |
4
|
9
|
$elt->set_att( $att => $child->text); | ||||
9367 |
4
|
7
|
$child->cut; | ||||
9368 |
4
|
19
|
return $elt; | ||||
9369 | } | ||||||
9370 | |||||||
9371 | sub att_to_field | ||||||
9372 |
5
|
22
|
{ my( $elt, $att, $tag)= @_; | ||||
9373 |
5
|
13
|
$tag ||= $att; | ||||
9374 |
5
|
10
|
my $child= $elt->insert_new_elt( first_child => $tag, $elt->{'att'}->{$att}); | ||||
9375 |
5
|
9
|
$elt->del_att( $att); | ||||
9376 |
5
|
8
|
return $elt; | ||||
9377 | } | ||||||
9378 | |||||||
9379 | # sort children methods | ||||||
9380 | |||||||
9381 | sub sort_children_on_field | ||||||
9382 |
2
|
3
|
{ my $elt = shift; | ||||
9383 |
2
|
2
|
my $field = shift; | ||||
9384 |
2
9
|
8
11
|
my $get_key= sub { return $_[0]->field( $field) }; | ||||
9385 |
2
|
5
|
return $elt->sort_children( $get_key, @_); | ||||
9386 | } | ||||||
9387 | |||||||
9388 | sub sort_children_on_att | ||||||
9389 |
1
|
1
|
{ my $elt = shift; | ||||
9390 |
1
|
2
|
my $att = shift; | ||||
9391 |
1
3
|
4
9
|
my $get_key= sub { return $_[0]->{'att'}->{$att} }; | ||||
9392 |
1
|
3
|
return $elt->sort_children( $get_key, @_); | ||||
9393 | } | ||||||
9394 | |||||||
9395 | sub sort_children_on_value | ||||||
9396 |
1
|
1
|
{ my $elt = shift; | ||||
9397 | #my $get_key= eval qq{ sub { $NO_WARNINGS; return \$_[0]->text } }; | ||||||
9398 |
1
|
2
|
my $get_key= \&text; | ||||
9399 |
1
|
3
|
return $elt->sort_children( $get_key, @_); | ||||
9400 | } | ||||||
9401 | |||||||
9402 | sub sort_children | ||||||
9403 |
5
|
21
|
{ my( $elt, $get_key, %opt)=@_; | ||||
9404 |
5
|
21
|
$opt{order} ||= 'normal'; | ||||
9405 |
5
|
12
|
$opt{type} ||= 'alpha'; | ||||
9406 |
5
|
13
|
my( $par_a, $par_b)= ($opt{order} eq 'reverse') ? qw( b a) : qw ( a b) ; | ||||
9407 |
5
|
11
|
my $op= ($opt{type} eq 'numeric') ? '<=>' : 'cmp' ; | ||||
9408 |
5
|
10
|
my @children= $elt->cut_children; | ||||
9409 |
5
|
13
|
if( $opt{type} eq 'numeric') | ||||
9410 |
9
14
|
9
15
|
{ @children= map { $_->[1] } | ||||
9411 |
9
|
9
|
sort { $a->[0] <=> $b->[0] } | ||||
9412 |
2
|
3
|
map { [ $get_key->( $_), $_] } @children; | ||||
9413 | } | ||||||
9414 | elsif( $opt{type} eq 'alpha') | ||||||
9415 |
6
5
|
16
7
|
{ @children= map { $_->[1] } | ||||
9416 |
6
|
4
|
sort { $a->[0] cmp $b->[0] } | ||||
9417 |
2
|
2
|
map { [ $get_key->( $_), $_] } @children; | ||||
9418 | } | ||||||
9419 | else | ||||||
9420 |
1
|
97
|
{ croak "wrong sort type '$opt{type}', should be either 'alpha' or 'numeric'"; } | ||||
9421 | |||||||
9422 |
4
|
10
|
@children= reverse @children if( $opt{order} eq 'reverse'); | ||||
9423 |
4
|
9
|
$elt->set_content( @children); | ||||
9424 | } | ||||||
9425 | |||||||
9426 | |||||||
9427 | # comparison methods | ||||||
9428 | |||||||
9429 | sub before | ||||||
9430 |
6
|
28
|
{ my( $a, $b)=@_; | ||||
9431 |
6
4
2
|
6
4
2
|
if( $a->cmp( $b) == -1) { return 1; } else { return 0; } | ||||
9432 | } | ||||||
9433 | |||||||
9434 | sub after | ||||||
9435 |
3
|
13
|
{ my( $a, $b)=@_; | ||||
9436 |
3
1
2
|
3
1
3
|
if( $a->cmp( $b) == 1) { return 1; } else { return 0; } | ||||
9437 | } | ||||||
9438 | |||||||
9439 | sub lt | ||||||
9440 |
2
|
2
|
{ my( $a, $b)=@_; | ||||
9441 |
2
|
3
|
return 1 if( $a->cmp( $b) == -1); | ||||
9442 |
1
|
2
|
return 0; | ||||
9443 | } | ||||||
9444 | |||||||
9445 | sub le | ||||||
9446 |
2
|
4
|
{ my( $a, $b)=@_; | ||||
9447 |
2
|
3
|
return 1 unless( $a->cmp( $b) == 1); | ||||
9448 |
1
|
2
|
return 0; | ||||
9449 | } | ||||||
9450 | |||||||
9451 | sub gt | ||||||
9452 |
2
|
2
|
{ my( $a, $b)=@_; | ||||
9453 |
2
|
3
|
return 1 if( $a->cmp( $b) == 1); | ||||
9454 |
1
|
3
|
return 0; | ||||
9455 | } | ||||||
9456 | |||||||
9457 | sub ge | ||||||
9458 |
2
|
2
|
{ my( $a, $b)=@_; | ||||
9459 |
2
|
2
|
return 1 unless( $a->cmp( $b) == -1); | ||||
9460 |
1
|
3
|
return 0; | ||||
9461 | } | ||||||
9462 | |||||||
9463 | |||||||
9464 | sub cmp | ||||||
9465 |
886
|
544
|
{ my( $a, $b)=@_; | ||||
9466 | |||||||
9467 | # easy cases | ||||||
9468 |
886
|
1136
|
return 0 if( $a == $b); | ||||
9469 |
843
|
858
|
return 1 if( $a->in($b)); # a in b => a starts after b | ||||
9470 |
696
|
651
|
return -1 if( $b->in($a)); # b in a => a starts before b | ||||
9471 | |||||||
9472 | # ancestors does not include the element itself | ||||||
9473 |
538
|
573
|
my @a_pile= ($a, $a->ancestors); | ||||
9474 |
538
|
478
|
my @b_pile= ($b, $b->ancestors); | ||||
9475 | |||||||
9476 | # the 2 elements are not in the same twig | ||||||
9477 |
538
|
711
|
return undef unless( $a_pile[-1] == $b_pile[-1]); | ||||
9478 | |||||||
9479 | # find the first non common ancestors (they are siblings) | ||||||
9480 |
536
|
365
|
my $a_anc= pop @a_pile; | ||||
9481 |
536
|
328
|
my $b_anc= pop @b_pile; | ||||
9482 | |||||||
9483 |
536
|
1007
|
while( $a_anc == $b_anc) | ||||
9484 |
810
|
490
|
{ $a_anc= pop @a_pile; | ||||
9485 |
810
|
920
|
$b_anc= pop @b_pile; | ||||
9486 | } | ||||||
9487 | |||||||
9488 | # from there move left and right and figure out the order | ||||||
9489 |
536
|
411
|
my( $a_prev, $a_next, $b_prev, $b_next)= ($a_anc, $a_anc, $b_anc, $b_anc); | ||||
9490 |
536
|
296
|
while() | ||||
9491 |
538
|
882
|
{ $a_prev= $a_prev->{prev_sibling} || return( -1); | ||||
9492 |
323
|
588
|
return 1 if( $a_prev == $b_next); | ||||
9493 |
145
|
227
|
$a_next= $a_next->{next_sibling} || return( 1); | ||||
9494 |
114
|
1214
|
return -1 if( $a_next == $b_prev); | ||||
9495 |
32
|
58
|
$b_prev= $b_prev->{prev_sibling} || return( 1); | ||||
9496 |
25
|
62
|
return -1 if( $b_prev == $a_next); | ||||
9497 |
7
|
13
|
$b_next= $b_next->{next_sibling} || return( -1); | ||||
9498 |
5
|
13
|
return 1 if( $b_next == $a_prev); | ||||
9499 | } | ||||||
9500 | } | ||||||
9501 | |||||||
9502 | sub _dump | ||||||
9503 |
53
|
42
|
{ my( $elt, $option)= @_; | ||||
9504 | |||||||
9505 |
53
|
64
|
my $atts = defined $option->{atts} ? $option->{atts} : 1; | ||||
9506 |
53
|
49
|
my $extra = defined $option->{extra} ? $option->{extra} : 0; | ||||
9507 |
53
|
47
|
my $short_text = defined $option->{short_text} ? $option->{short_text} : 40; | ||||
9508 | |||||||
9509 |
53
|
33
|
my $sp= '| '; | ||||
9510 |
53
|
45
|
my $indent= $sp x $elt->level; | ||||
9511 |
53
|
45
|
my $indent_sp= ' ' x $elt->level; | ||||
9512 | |||||||
9513 |
53
|
34
|
my $dump=''; | ||||
9514 |
53
|
46
|
if( $elt->is_elt) | ||||
9515 | { | ||||||
9516 |
27
|
32
|
$dump .= $indent . '|-' . $XML::Twig::index2gi[$elt->{'gi'}]; | ||||
9517 | |||||||
9518 |
27
|
45
|
if( $atts && (my @atts= $elt->att_names) ) | ||||
9519 |
3
3
|
4
12
|
{ $dump .= ' ' . join( ' ', map { qq{$_="} . $elt->{'att'}->{$_} . qq{"} } @atts); } | ||||
9520 | |||||||
9521 |
27
|
24
|
$dump .= "\n"; | ||||
9522 |
27
10
|
31
13
|
if( $extra) { $dump .= $elt->_dump_extra_data( $indent, $indent_sp, $short_text); } | ||||
9523 |
27
47
27
27
27
27
47
47
27
|
22
74
14
21
21
28
29
51
33
|
$dump .= join( "", map { $_->_dump( $option) } do { my $elt= $elt; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; }); | ||||
9524 | } | ||||||
9525 | else | ||||||
9526 | { | ||||||
9527 |
26
|
37
|
if( (exists $elt->{'pcdata'})) | ||||
9528 |
20
|
25
|
{ $dump .= "$indent|-PCDATA: '" . _short_text( $elt->{pcdata}, $short_text) . "'\n" } | ||||
9529 | elsif( (exists $elt->{'ent'})) | ||||||
9530 |
0
|
0
|
{ $dump .= "$indent|-ENTITY: '" . _short_text( $elt->{ent}, $short_text) . "'\n" } | ||||
9531 | elsif( (exists $elt->{'cdata'})) | ||||||
9532 |
3
|
9
|
{ $dump .= "$indent|-CDATA: '" . _short_text( $elt->{cdata}, $short_text) . "'\n" } | ||||
9533 | elsif( (exists $elt->{'comment'})) | ||||||
9534 |
1
|
2
|
{ $dump .= "$indent|-COMMENT: '" . _short_text( $elt->comment_string, $short_text) . "'\n" } | ||||
9535 | elsif( (exists $elt->{'target'})) | ||||||
9536 |
2
|
5
|
{ $dump .= "$indent|-PI: '" . $elt->{target} . "' - '" . _short_text( $elt->{data}, $short_text) . "'\n" } | ||||
9537 |
26
8
|
32
9
|
if( $extra) { $dump .= $elt->_dump_extra_data( $indent, $indent_sp, $short_text); } | ||||
9538 | } | ||||||
9539 |
53
|
89
|
return $dump; | ||||
9540 | } | ||||||
9541 | |||||||
9542 | sub _dump_extra_data | ||||||
9543 |
18
|
15
|
{ my( $elt, $indent, $indent_sp, $short_text)= @_; | ||||
9544 |
18
|
11
|
my $dump=''; | ||||
9545 |
18
|
19
|
if( $elt->extra_data) | ||||
9546 |
2
|
3
|
{ my $extra_data = $indent . "|-- (cpi before) '" . _short_text( $elt->extra_data, $short_text) . "'"; | ||||
9547 |
2
|
4
|
$extra_data=~ s{\n}{$indent_sp}g; | ||||
9548 |
2
|
2
|
$dump .= $extra_data . "\n"; | ||||
9549 | } | ||||||
9550 |
18
|
22
|
if( $elt->{extra_data_in_pcdata}) | ||||
9551 |
1
1
|
1
1
|
{ foreach my $data ( @{$elt->{extra_data_in_pcdata}}) | ||||
9552 |
2
|
6
|
{ my $extra_data = $indent . "|-- (cpi offset $data->{offset}) '" . _short_text( $data->{text}, $short_text) . "'"; | ||||
9553 |
2
|
2
|
$extra_data=~ s{\n}{$indent_sp}g; | ||||
9554 |
2
|
3
|
$dump .= $extra_data . "\n"; | ||||
9555 | } | ||||||
9556 | } | ||||||
9557 |
18
|
20
|
if( $elt->{extra_data_before_end_tag}) | ||||
9558 |
1
|
3
|
{ my $extra_data = $indent . "|-- (cpi end) '" . _short_text( $elt->{extra_data_before_end_tag}, $short_text) . "'"; | ||||
9559 |
1
|
2
|
$extra_data=~ s{\n}{$indent_sp}g; | ||||
9560 |
1
|
1
|
$dump .= $extra_data . "\n"; | ||||
9561 | } | ||||||
9562 |
18
|
24
|
return $dump; | ||||
9563 | } | ||||||
9564 | |||||||
9565 | |||||||
9566 | sub _short_text | ||||||
9567 |
32
|
69
|
{ my( $string, $length)= @_; | ||||
9568 |
32
25
|
83
45
|
if( !$length || (length( $string) < $length) ) { return $string; } | ||||
9569 |
7
|
9
|
my $l1= (length( $string) -5) /2; | ||||
9570 |
7
|
6
|
my $l2= length( $string) - ($l1 + 5); | ||||
9571 |
7
|
18
|
return substr( $string, 0, $l1) . ' ... ' . substr( $string, -$l2); | ||||
9572 | } | ||||||
9573 | |||||||
9574 | |||||||
9575 |
235
|
297
|
sub _and { return _join_defined( ' && ', @_); } | ||||
9576 |
235
550
|
243
1173
|
sub _join_defined { return join( shift(), grep { $_ } @_); } | ||||
9577 | |||||||
9578 | 1; |