File Coverage
File: | tools/xml_split/xml_split |
Coverage: | 79.2% |
line | stmt | bran | cond | sub | pod | time | code |
---|---|---|---|---|---|---|---|
1 | #!/usr/bin/perl -w | ||||||
2 | # $Id: /xmltwig/trunk/tools/xml_split/xml_split 17 2007-06-04T11:57:10.366292Z mrodrigu $ | ||||||
3 |
29
29
29
|
76395
29
802
|
use strict; | ||||
4 | |||||||
5 |
29
29
29
|
38864
41
194
|
use XML::Twig; | ||||
6 |
29
29
29
|
7583
16536
2283
|
use FindBin qw( $RealBin $RealScript); | ||||
7 |
29
29
29
|
30433
696
1877
|
use Getopt::Std; | ||||
8 | |||||||
9 |
29
|
2148783
|
import xml_split::state::parser; | ||||
10 |
29
|
165
|
import xml_split::state::twig; | ||||
11 | |||||||
12 |
29
|
58
|
undef $Getopt::Std::STANDARD_HELP_VERSION; | ||||
13 |
29
|
33
|
$Getopt::Std::STANDARD_HELP_VERSION=1; # to stop processing after --help or --version | ||||
14 | |||||||
15 |
29
29
29
|
94
22
61705
|
use vars qw( $VERSION $USAGE); | ||||
16 | |||||||
17 |
29
|
38
|
$VERSION= "0.06"; | ||||
18 |
29
|
37
|
$USAGE= "xml_split [-l <level> [-s <size> | -g <nb_grouped>] | -c <cond>] [-b <base>] [-n <nb>] [-e <ext>] [-p <plugin>] [-I <plugin_dir>] [-i] [-d] [-v] [-h] [-m] [-V] <files>\n"; | ||||
19 | |||||||
20 | { # main block | ||||||
21 | |||||||
22 |
29
29
|
125
39
|
my $opt={}; | ||||
23 |
29
|
72
|
getopts('l:c:b:g:n:e:p:is:dvhmV', $opt); | ||||
24 | |||||||
25 | # defaults | ||||||
26 |
29
|
1463
|
$opt->{n} ||= 2; # number of digits used for creating parts | ||||
27 |
29
|
161
|
$opt->{I} ||= ($ENV{HOME} || '') . "/.xml_split"; | ||||
28 | |||||||
29 |
29
1
|
77
0
|
if( $opt->{h}) { die $USAGE, "\n"; } | ||||
30 |
28
1
|
51
0
|
if( $opt->{m}) { exec "pod2text $RealBin/$RealScript"; } | ||||
31 |
27
1
1
|
51
25
0
|
if( $opt->{V}) { print "xml_split version $VERSION\n"; exit; } | ||||
32 | |||||||
33 |
26
|
75
|
my %factor=( ' ' => 1, K => 1000, M => 1_000_000, G => 1_000_000_000); | ||||
34 |
26
6
1
|
51
12
0
|
if( $opt->{s}) { if( $opt->{c}) { die "cannot use -c and -s at the same time\n"; } | ||||
35 | |||||||
36 |
5
|
28
|
if( $opt->{s}=~ m{^\s*(\d+)\s*(G[bo]?|M[bo]?|K[bo]?\s*)?$}i) | ||||
37 |
4
|
21
|
{ my( $size, $unit)= ($1, uc substr( $2 || ' ', 0, 1)); | ||||
38 |
4
|
12
|
$opt->{s}= $size * $factor{$unit}; | ||||
39 | } | ||||||
40 | else | ||||||
41 |
1
|
0
|
{ die "invalid size (should be in Kb, Mb or Gb): '$opt->{s}'\n"; } | ||||
42 | } | ||||||
43 | |||||||
44 |
24
8
|
76
14
|
if( $opt->{g}) { die "cannot use -g and -s at the same time\n" if( $opt->{s}); | ||||
45 |
7
|
9
|
die "cannot use -g and -c at the same time\n" if( $opt->{c}); | ||||
46 |
6
|
17
|
$opt->{l} ||= 1; | ||||
47 | } | ||||||
48 |
5
|
9
|
elsif( $opt->{c}) { die "cannot use -l and -c at the same time\n" if( $opt->{l}); } | ||||
49 |
11
11
|
30
23
|
else { $opt->{l} ||= 1; $opt->{c}= "level( $opt->{l})"; } | ||||
50 | |||||||
51 | |||||||
52 |
22
|
127
|
my $options= { cond => $opt->{c}, | ||||
53 | base => $opt->{b}, nb_digits => $opt->{n}, ext => $opt->{e}, | ||||||
54 | plugin => $opt->{p}, | ||||||
55 | no_pi => $opt->{d}, | ||||||
56 | verbose => $opt->{v}, | ||||||
57 | xinclude => $opt->{i} ? 1 : 0, | ||||||
58 | }; | ||||||
59 | |||||||
60 | |||||||
61 |
22
|
22
|
my $state; | ||||
62 |
22
|
48
|
if( my $plugin= $opt->{p}) | ||||
63 |
0
0
|
0
0
|
{ if( $plugin!~ m{^[\w:.-]+$}) { die "wrong plugin name '$plugin' (only word characters are allowed in plugin names)\n"; } | ||||
64 |
0
|
0
|
push @INC, $opt->{I}; | ||||
65 |
0
0
|
0
0
|
eval { require $plugin }; | ||||
66 |
0
0
|
0
0
|
if( $@) { die "cannot find plugin '$plugin': $!"; } | ||||
67 |
0
|
0
|
import $plugin; | ||||
68 |
0
|
0
|
$state= $plugin->new( $options); | ||||
69 | } | ||||||
70 | |||||||
71 | |||||||
72 |
22
|
65
|
if( $opt->{s}) | ||||
73 |
3
|
22
|
{ $state||= xml_split::state::parser->new( $options); | ||||
74 |
3
|
7
|
$state->{level} = $opt->{l}; | ||||
75 |
3
|
5
|
$state->{size} = $opt->{s}; | ||||
76 |
3
|
5
|
$state->{current_size}=0; | ||||
77 |
3
|
10
|
$state->{handlers}= { Start => \&parser_start_tag_size, End => \&parser_end_tag_size , Default => \&parser_default_size}; | ||||
78 |
3
|
9
|
warn "using XML::Parser\n" if( $opt->{v}); | ||||
79 |
3
|
6
|
split_with_parser( $state, @ARGV); | ||||
80 | } | ||||||
81 | elsif( $opt->{g}) | ||||||
82 |
6
|
31
|
{ $state||= xml_split::state::parser->new( $options); | ||||
83 |
6
|
7
|
$state->{level}= $opt->{l}; | ||||
84 |
6
|
9
|
$state->{group}= $opt->{g}; | ||||
85 |
6
|
17
|
$state->{handlers}= { Start => \&parser_start_tag_grouped, End => \&parser_end_tag_grouped , Default => \&parser_default_grouped}; | ||||
86 |
6
|
11
|
warn "using XML::Parser\n" if( $opt->{v}); | ||||
87 |
6
|
10
|
split_with_parser( $state, @ARGV); | ||||
88 | } | ||||||
89 | elsif( $opt->{l}) | ||||||
90 |
8
|
39
|
{ $state||= xml_split::state::parser->new( $options); | ||||
91 |
8
|
12
|
$state->{level}= $opt->{l}; | ||||
92 |
8
|
23
|
$state->{handlers}= { Start => \&parser_start_tag_level, End => \&parser_end_tag_level , Default => \&parser_default_level}; | ||||
93 |
8
|
17
|
warn "using XML::Parser\n" if( $opt->{v}); | ||||
94 |
8
|
14
|
split_with_parser( $state, @ARGV); | ||||
95 | } | ||||||
96 | else | ||||||
97 |
5
|
25
|
{ $state||= xml_split::state::twig->new( $options); | ||||
98 |
5
|
8
|
split_with_twig( $state, @ARGV); | ||||
99 | } | ||||||
100 | |||||||
101 |
22
|
0
|
exit; | ||||
102 | } | ||||||
103 | |||||||
104 | sub split_with_twig | ||||||
105 |
5
|
8
|
{ my( $state, @files)= @_; | ||||
106 |
5
|
10
|
if( !@files) | ||||
107 |
0
|
0
|
{ $state->{base} ||= 'out'; | ||||
108 |
0
|
0
|
$state->{ext} ||= '.xml'; | ||||
109 |
0
|
0
|
my $twig_options= twig_options( $state); | ||||
110 |
0
|
0
|
my $t= XML::Twig->new( %$twig_options, $state); | ||||
111 |
0
|
0
|
$state->{twig}= $t; | ||||
112 |
0
|
0
|
$t->parse( \*STDIN); | ||||
113 |
0
|
0
|
end_file( $t, $state); | ||||
114 | } | ||||||
115 | else | ||||||
116 |
5
|
7
|
{ foreach my $file (@files) | ||||
117 | { | ||||||
118 |
5
0
|
10
0
|
unless( $state->{base}) { $state->{seq_nb}=0; } | ||||
119 |
5
|
36
|
my( $base, $ext)= ($file=~ m{^(.*?)(\.\w+)?$}); | ||||
120 |
5
|
15
|
$state->{base} ||= $base; | ||||
121 |
5
|
19
|
$state->{ext} ||= $ext || '.xml'; | ||||
122 |
5
|
8
|
my $twig_options= twig_options( $state); | ||||
123 |
5
|
28
|
my $t= XML::Twig->new( %$twig_options); | ||||
124 |
5
|
7
|
$state->{twig}= $t; | ||||
125 |
5
|
12
|
$t->parsefile( $file); | ||||
126 |
5
|
9
|
end_file( $t, $state); | ||||
127 | } | ||||||
128 | } | ||||||
129 | } | ||||||
130 | |||||||
131 | sub split_with_parser | ||||||
132 |
17
|
23
|
{ my( $state, @files)= @_; | ||||
133 |
17
|
34
|
if( !@files) | ||||
134 |
0
|
0
|
{ $state->{base} ||= 'out'; | ||||
135 |
0
|
0
|
$state->{ext} ||= '.xml'; | ||||
136 |
0
|
0
|
my $parser_options= parser_options( $state); | ||||
137 |
0
|
0
|
my $p= XML::Parser->new( %$parser_options); | ||||
138 |
0
|
0
|
$state->{parser}= $p; | ||||
139 |
0
|
0
|
$p->parse( \*STDIN); | ||||
140 | } | ||||||
141 | else | ||||||
142 |
17
|
23
|
{ foreach my $file (@files) | ||||
143 | { | ||||||
144 |
17
1
|
43
1
|
unless( $state->{base}) { $state->{seq_nb}=0; } | ||||
145 |
17
|
128
|
my( $base, $ext)= ($file=~ m{^(.*?)(\.\w+)?$}); | ||||
146 |
17
|
39
|
$state->{base} ||= $base; | ||||
147 |
17
|
68
|
$state->{ext} ||= $ext || '.xml'; | ||||
148 |
17
|
25
|
my $parser_options= parser_options( $state); | ||||
149 |
17
|
90
|
my $p= XML::Parser->new( %$parser_options); | ||||
150 |
17
|
434
|
$state->{parser}= $p; | ||||
151 |
17
|
44
|
$p->parsefile( $file); | ||||
152 | } | ||||||
153 | } | ||||||
154 | } | ||||||
155 | |||||||
156 | sub parser_options | ||||||
157 |
17
|
18
|
{ my( $state)= @_; | ||||
158 | # prepare output to the main document | ||||||
159 |
17
|
33
|
unless( $state->{no_pi}) | ||||
160 |
17
|
62
|
{ my $file_name= $state->main_file_name(); # main file name | ||||
161 |
17
|
36
|
warn "generating main file $file_name\n" if( $state->{verbose}); | ||||
162 |
17
|
715
|
open( my $out, '>', $file_name) or die "cannot create main file '$file_name': $!"; | ||||
163 |
17
|
26
|
$state->{main_fh}= $out; | ||||
164 |
17
|
31
|
$state->{current_fh}= $out; | ||||
165 | } | ||||||
166 |
189
|
98030
|
my $handlers= { Start => sub { $state->{handlers}->{Start}->( $state, shift( @_)); }, | ||||
167 |
189
|
765
|
End => sub { $state->{handlers}->{End}->( $state, shift( @_)); }, | ||||
168 |
618
|
2882
|
Default => sub { $state->{handlers}->{Default}->( $state, shift( @_)); }, | ||||
169 |
5
|
224248
|
XMLDecl => sub { parser_declaration( $state, @_); }, | ||||
170 |
17
|
117
|
}; | ||||
171 | |||||||
172 |
17
|
38
|
return { Handlers => $handlers }; | ||||
173 | } | ||||||
174 | |||||||
175 | ################################################################################### | ||||||
176 | # # | ||||||
177 | # handlers for the -l option # | ||||||
178 | # # | ||||||
179 | ################################################################################### | ||||||
180 | |||||||
181 | sub parser_start_tag_level | ||||||
182 |
84
|
66
|
{ my( $state, $p)= @_; | ||||
183 | |||||||
184 |
84
|
202
|
if( $p->depth == $state->{level}) | ||||
185 |
34
|
181
|
{ $state->{seq_nb}++; | ||||
186 |
34
|
79
|
my $file_name= $state->file_name; | ||||
187 | # prepare chunk file | ||||||
188 |
34
|
53
|
warn "generating $file_name\n" if( $state->{verbose}); | ||||
189 |
34
|
841
|
open( my $out, '>', $file_name) or die "cannot create output file '$file_name': $!"; | ||||
190 |
34
|
43
|
$state->{current_fh}= $out; | ||||
191 |
34
10
10
|
49
7
46
|
if( $state->{xml_declaration}) { print {$state->{current_fh}} $state->{xml_declaration}, "\n"; } | ||||
192 | # output pi | ||||||
193 |
34
|
51
|
unless( $state->{no_pi}) | ||||
194 |
34
34
|
24
65
|
{ print {$state->{main_fh}} $state->include( $file_name) ; } | ||||
195 | } | ||||||
196 |
84
84
|
277
163
|
print {$state->{current_fh}} $p->original_string if( $state->{current_fh}); | ||||
197 | } | ||||||
198 | |||||||
199 | sub parser_end_tag_level | ||||||
200 |
84
|
59
|
{ my( $state, $p)= @_; | ||||
201 |
84
84
|
101
138
|
print {$state->{current_fh}} $p->original_string if( $state->{current_fh}); | ||||
202 |
84
|
312
|
if( $p->depth == $state->{level}) | ||||
203 |
34
|
162
|
{ unless( $state->{current_fh} == $state->{main_fh}) | ||||
204 |
34
|
485
|
{ close $state->{current_fh}; | ||||
205 |
34
|
197
|
$state->{current_fh}= $state->{main_fh}; | ||||
206 | } | ||||||
207 | } | ||||||
208 | } | ||||||
209 | |||||||
210 | sub parser_default_level | ||||||
211 |
282
|
208
|
{ my( $state, $p)= @_; | ||||
212 |
282
282
|
350
530
|
print {$state->{current_fh}} $p->original_string if( $state->{current_fh}); | ||||
213 | } | ||||||
214 | |||||||
215 | |||||||
216 | ################################################################################### | ||||||
217 | # # | ||||||
218 | # handlers for the -s option # | ||||||
219 | # # | ||||||
220 | ################################################################################### | ||||||
221 | |||||||
222 | sub parser_start_tag_size | ||||||
223 |
39
|
28
|
{ my( $state, $p)= @_; | ||||
224 |
39
|
50
|
if( $p->depth == $state->{level} && !$state->{current_size}) | ||||
225 | { | ||||||
226 |
3
|
18
|
$state->{seq_nb}++; | ||||
227 |
3
|
12
|
my $file_name= $state->file_name; | ||||
228 | # prepare chunk file | ||||||
229 |
3
|
7
|
warn "generating $file_name\n" if( $state->{verbose}); | ||||
230 |
3
|
96
|
open( my $out, '>', $file_name) or die "cannot create output file '$file_name': $!"; | ||||
231 |
3
|
6
|
$state->{current_fh}= $out; | ||||
232 |
3
1
|
6
7
|
print {$state->{current_fh}} qq{$state->{xml_declaration}\n} if $state->{xml_declaration}; | ||||
233 |
3
3
|
11
11
|
print {$state->{current_fh}} qq{<xml_split:root xmlns:xml_split="http://xmltwig.com/xml_split">\n}; | ||||
234 | # output pi | ||||||
235 |
3
|
7
|
unless( $state->{no_pi}) | ||||
236 |
3
3
|
2
8
|
{ print {$state->{main_fh}} $state->include( $file_name) ; } | ||||
237 |
3
|
5
|
$state->{store_size}=1; | ||||
238 | } | ||||||
239 |
39
|
142
|
my $original_string= $p->original_string; | ||||
240 |
39
|
147
|
$state->{current_size} += length( $original_string) if( $state->{store_size}); | ||||
241 |
39
39
|
47
117
|
print {$state->{current_fh}} $original_string if( $state->{current_fh}); | ||||
242 | } | ||||||
243 | |||||||
244 | sub parser_end_tag_size | ||||||
245 |
39
|
28
|
{ my( $state, $p)= @_; | ||||
246 |
39
|
47
|
my $original_string= $p->original_string; | ||||
247 |
39
|
131
|
$state->{current_size} += length( $original_string) if( $state->{store_size}); | ||||
248 |
39
|
45
|
if( $p->depth == $state->{level} && $state->{current_size} > $state->{size}) | ||||
249 |
0
0
|
0
0
|
{ print {$state->{current_fh}} $original_string if( $state->{current_fh}); | ||||
250 |
0
|
0
|
end_file_with_size( $state); | ||||
251 | } | ||||||
252 | else | ||||||
253 |
39
3
|
151
11
|
{ if($p->depth < $state->{level}) { end_file_with_size( $state); } | ||||
254 |
39
39
|
133
61
|
print {$state->{current_fh}} $p->original_string if( $state->{current_fh}); | ||||
255 | } | ||||||
256 | } | ||||||
257 | |||||||
258 | sub end_file_with_size | ||||||
259 |
3
|
3
|
{ my( $state)= @_; | ||||
260 |
3
|
8
|
unless( $state->{current_fh} == $state->{main_fh}) | ||||
261 |
3
3
|
1
5
|
{ print {$state->{current_fh}} qq{\n</xml_split:root>\n}; | ||||
262 |
3
|
2012
|
close $state->{current_fh}; | ||||
263 |
3
|
10
|
$state->{current_size}=0; | ||||
264 |
3
|
3
|
$state->{store_size}=0; | ||||
265 |
3
|
7
|
$state->{current_fh}= $state->{main_fh}; | ||||
266 | } | ||||||
267 | } | ||||||
268 | |||||||
269 | sub parser_default_size | ||||||
270 |
124
|
81
|
{ my( $state, $p)= @_; | ||||
271 |
124
|
152
|
my $string= $p->original_string; | ||||
272 |
124
|
380
|
if( $state->{store_size}) | ||||
273 |
114
|
75
|
{ $state->{current_size} += length( $string); | ||||
274 |
114
0
|
132
0
|
if( $p->depth < $state->{level}) { end_file_with_size( $state); } | ||||
275 | } | ||||||
276 |
124
124
|
368
363
|
print {$state->{current_fh}} $string if( $state->{current_fh}); | ||||
277 | } | ||||||
278 | |||||||
279 | ################################################################################### | ||||||
280 | # # | ||||||
281 | # handlers for the -g option # | ||||||
282 | # # | ||||||
283 | ################################################################################### | ||||||
284 | |||||||
285 | sub parser_start_tag_grouped | ||||||
286 |
66
|
46
|
{ my( $state, $p)= @_; | ||||
287 |
66
|
89
|
if( $p->depth == $state->{level}) | ||||
288 |
32
|
109
|
{ if( !$state->{current_nb}) | ||||
289 |
13
|
11
|
{ $state->{seq_nb}++; | ||||
290 |
13
|
35
|
my $file_name= $state->file_name; | ||||
291 | # prepare chunk file | ||||||
292 |
13
|
19
|
warn "generating $file_name\n" if( $state->{verbose}); | ||||
293 |
13
|
301
|
open( my $out, '>', $file_name) or die "cannot create output file '$file_name': $!"; | ||||
294 |
13
|
17
|
$state->{current_fh}= $out; | ||||
295 |
13
13
26
|
9
26
97
|
print {$state->{current_fh}} join( "\n", grep { $_ } ( $state->{xml_declaration}, | ||||
296 | qq{<xml_split:root xmlns:xml_split="http://xmltwig.com/xml_split">\n } | ||||||
297 | ) | ||||||
298 | ); | ||||||
299 | # output pi | ||||||
300 |
13
|
23
|
unless( $state->{no_pi}) | ||||
301 |
13
13
|
9
24
|
{ print {$state->{main_fh}} $state->include( $file_name) ; } | ||||
302 | } | ||||||
303 | } | ||||||
304 |
66
66
|
175
111
|
print {$state->{current_fh}} $p->original_string if( $state->{current_fh}); | ||||
305 | } | ||||||
306 | |||||||
307 | sub parser_end_tag_grouped | ||||||
308 |
66
|
46
|
{ my( $state, $p)= @_; | ||||
309 |
66
|
79
|
if( $p->depth == $state->{level}) | ||||
310 |
32
32
|
112
48
|
{ print {$state->{current_fh}} $p->original_string if( $state->{current_fh}); | ||||
311 |
32
|
90
|
$state->{current_nb}++; | ||||
312 |
32
10
|
73
16
|
if( $state->{current_nb} == $state->{group}) { end_file_grouped( $state); } | ||||
313 | } | ||||||
314 | else | ||||||
315 |
34
11
|
109
43
|
{ if($p->depth < $state->{level}) { end_file_grouped( $state, { no_nl => 1 }); } | ||||
316 |
34
34
|
90
58
|
print {$state->{current_fh}} $p->original_string if( $state->{current_fh}); | ||||
317 | } | ||||||
318 | } | ||||||
319 | |||||||
320 | sub end_file_grouped | ||||||
321 |
21
|
19
|
{ my( $state, $options)= @_; | ||||
322 |
21
10
|
29
10
|
print {$state->{current_fh}} qq{\n} unless( $options->{no_nl}); | ||||
323 |
21
|
41
|
unless( $state->{current_fh} == $state->{main_fh}) | ||||
324 |
13
13
|
11
10
|
{ print {$state->{current_fh}} qq{</xml_split:root>\n}; | ||||
325 |
13
|
332
|
close $state->{current_fh}; | ||||
326 |
13
|
13
|
$state->{current_nb}=0; | ||||
327 |
13
|
72
|
$state->{current_fh}= $state->{main_fh}; | ||||
328 | } | ||||||
329 | } | ||||||
330 | |||||||
331 | sub parser_default_grouped | ||||||
332 |
212
|
143
|
{ my( $state, $p)= @_; | ||||
333 |
212
212
|
243
289
|
print {$state->{current_fh}} $p->original_string if( $state->{current_fh}); | ||||
334 | } | ||||||
335 | |||||||
336 | sub char_parser | ||||||
337 |
0
|
0
|
{ my( $state, $p)=( shift, shift); | ||||
338 |
0
0
|
0
0
|
print {$state->{current_fh}} $_[0] if( $state->{current_fh}); | ||||
339 | } | ||||||
340 | |||||||
341 | sub parser_declaration | ||||||
342 |
5
|
10
|
{ my( $state, $p, $version, $encoding, $standalone)= @_; | ||||
343 |
5
|
17
|
$state->{xml_declaration}= $p->recognized_string || ''; | ||||
344 |
5
5
|
49
56
|
print {$state->{main_fh}} $state->{xml_declaration}; | ||||
345 | # avoid calling original_string if not needed | ||||||
346 | #if( !$state->{xml_declaration} || $state->{xml_declaration}=~ m{encoding\s*=\s*["']utf-?8["']}i) | ||||||
347 | # { $state->{utf8_encoded}=1; | ||||||
348 | # $p->setHandlers( Char => \&char_parser); | ||||||
349 | # } | ||||||
350 | } | ||||||
351 | |||||||
352 | |||||||
353 | sub twig_options | ||||||
354 |
5
|
5
|
{ my( $state)= @_; | ||||
355 | |||||||
356 | # base options, ensures maximun fidelity to the original document | ||||||
357 |
5
|
12
|
my $twig_options= { keep_encoding => 1, keep_spaces => 1 }; | ||||
358 | |||||||
359 | # prepare output to the main document | ||||||
360 |
5
|
12
|
unless( $state->{no_pi}) | ||||
361 |
5
|
15
|
{ my $file_name= $state->main_file_name(); # main file name | ||||
362 |
5
|
10
|
warn "generating main file $file_name\n" if( $state->{verbose}); | ||||
363 |
5
|
222
|
open( my $out, '>', $file_name) or die "cannot create main file '$file_name': $!"; | ||||
364 |
5
|
8
|
$state->{out}= $out; | ||||
365 |
5
|
7
|
$twig_options->{twig_print_outside_roots}= $out; | ||||
366 |
5
33
|
20
58
|
$twig_options->{start_tag_handlers}= { $state->{cond} => sub { $_->set_att( '#in_fragment' => 1); } }; | ||||
367 | } | ||||||
368 | |||||||
369 |
5
33
|
15
43
|
$twig_options->{twig_roots}= { $state->{cond} => sub { dump_elt( @_, $state); } }; | ||||
370 |
5
|
6
|
return $twig_options; | ||||
371 | } | ||||||
372 | |||||||
373 | sub dump_elt | ||||||
374 |
33
|
24
|
{ my( $t, $elt, $state)= @_; | ||||
375 |
33
|
28
|
$state->{seq_nb}++; | ||||
376 |
33
|
23
|
$state->{elt}= $elt; | ||||
377 | |||||||
378 |
33
|
139
|
my $file_name= $state->file_name; | ||||
379 |
33
|
42
|
warn "generating $file_name\n" if( $state->{verbose}); | ||||
380 | |||||||
381 |
33
|
90
|
my $fragment= XML::Twig->new(); | ||||
382 |
33
|
34
|
$fragment->{twig_xmldecl} = $t->{twig_xmldecl}; | ||||
383 |
33
|
29
|
$fragment->{twig_doctype} = $t->{twig_doctype}; | ||||
384 |
33
|
34
|
$fragment->{twig_dtd} = $t->{twig_dtd}; | ||||
385 | |||||||
386 |
33
|
36
|
if( !$state->{no_pis}) | ||||
387 | { # if we are still within a fragment, just replace the element by the PI | ||||||
388 | # otherwise print it to the main document | ||||||
389 |
33
|
412
|
my $include= $state->include( $file_name); | ||||
390 | |||||||
391 |
33
|
46
|
$elt->del_att( '#in_fragment'); | ||||
392 | |||||||
393 |
33
|
50
|
if( $elt->inherited_att( '#in_fragment')) | ||||
394 |
2
|
6
|
{ $elt->parent( '*[@#in_fragment="1"]')->set_att( '#has_subdocs' => 1); | ||||
395 |
2
|
5
|
$include->replace( $elt); | ||||
396 | } | ||||||
397 | else | ||||||
398 |
31
|
44
|
{ $elt->cut; | ||||
399 |
31
|
45
|
$include->print( $state->{out}); | ||||
400 | } | ||||||
401 | } | ||||||
402 | else | ||||||
403 |
0
|
0
|
{ $elt->cut; } | ||||
404 | |||||||
405 |
33
|
54
|
$fragment->set_root( $elt); | ||||
406 |
33
|
885
|
open( my $out, '>', $file_name) or die "cannot create output file '$file_name': $!"; | ||||
407 | #if( $state->{xml_declaration}) { warn "c1"; print {$out} $state->{xml_declaration}, "\n"; } | ||||||
408 | #if( $fragment->{xml_decl}) { warn "c2"; print {$out} $fragment->xml_decl, "\n"; } | ||||||
409 |
33
|
68
|
$fragment->set_keep_encoding( 1); | ||||
410 |
33
|
48
|
$fragment->print( $out); | ||||
411 |
33
|
49043
|
close $out; | ||||
412 | } | ||||||
413 | |||||||
414 | sub end_file | ||||||
415 |
5
|
7
|
{ my( $t, $state)= @_; | ||||
416 |
5
|
12
|
unless( $state->{no_pi}) | ||||
417 |
5
|
31006
|
{ close $state->{out}; } | ||||
418 | } | ||||||
419 | |||||||
420 | |||||||
421 | # for Getop::Std | ||||||
422 |
0
|
0
|
sub HELP_MESSAGE { return $USAGE; } | ||||
423 |
0
|
0
|
sub VERSION_MESSAGE { return $VERSION; } | ||||
424 | |||||||
425 | package xml_split::state; | ||||||
426 | |||||||
427 | sub new | ||||||
428 |
22
|
31
|
{ my( $ref, $options)= @_; | ||||
429 |
22
|
37
|
my $state= bless $options, $ref; | ||||
430 |
22
|
72
|
$state->{seq_nb}=0; | ||||
431 |
22
|
53
|
return $state; | ||||
432 | } | ||||||
433 | |||||||
434 | sub file_name | ||||||
435 |
83
|
59
|
{ my( $state)= @_; | ||||
436 |
83
|
232
|
my $nb= sprintf( "%0$state->{nb_digits}d", $state->{seq_nb}); | ||||
437 |
83
|
109
|
my $file_name= "$state->{base}-$nb$state->{ext}"; | ||||
438 |
83
|
97
|
$file_name =~ s{\\}{/}g; | ||||
439 |
83
|
105
|
return $file_name; | ||||
440 | } | ||||||
441 | |||||||
442 | sub main_file_name | ||||||
443 |
22
|
24
|
{ my( $state)= @_; | ||||
444 |
22
|
70
|
my $nb= sprintf( "%0$state->{nb_digits}d", 0); | ||||
445 |
22
|
49
|
my $file_name= "$state->{base}-$nb$state->{ext}"; | ||||
446 |
22
|
29
|
return $file_name; | ||||
447 | } | ||||||
448 |
0
|
0
|
1; | ||||
449 | |||||||
450 | ################################################################################### | ||||||
451 | # # | ||||||
452 | # state when using XML::Parser # | ||||||
453 | # # | ||||||
454 | ################################################################################### | ||||||
455 | |||||||
456 | package xml_split::state::parser; | ||||||
457 |
0
|
0
|
import xml_split::state; | ||||
458 |
29
29
29
|
103
23
1714
|
use base 'xml_split::state'; | ||||
459 | |||||||
460 | sub include | ||||||
461 |
50
|
56
|
{ my( $state, $file_name)= @_; | ||||
462 |
50
|
75
|
if( $state->{xinclude}) | ||||
463 |
12
|
35
|
{ return qq{<xi:include href="$file_name" />}; } | ||||
464 | else | ||||||
465 |
38
|
97
|
{ return qq{<?merge subdocs = 0 :$file_name?>}; } | ||||
466 | } | ||||||
467 |
0
|
0
|
1; | ||||
468 | |||||||
469 | ################################################################################### | ||||||
470 | # # | ||||||
471 | # state when using XML::Twig # | ||||||
472 | # # | ||||||
473 | ################################################################################### | ||||||
474 | |||||||
475 | package xml_split::state::twig; | ||||||
476 |
0
|
0
|
import xml_split::state; | ||||
477 |
29
29
29
|
4073
31
1453
|
use base 'xml_split::state'; | ||||
478 | |||||||
479 | sub include | ||||||
480 |
33
|
30
|
{ my( $state, $file_name)= @_; | ||||
481 |
33
|
14
|
my $include; | ||||
482 |
33
|
58
|
my $subdocs= $state->{elt}->att( '#has_subdocs') || 0; | ||||
483 |
33
|
39
|
if( $state->{xinclude}) | ||||
484 |
12
|
33
|
{ $include= XML::Twig::Elt->new( 'xi:include', { href => $file_name }); | ||||
485 |
12
1
|
23
2
|
if( $subdocs) { $include->set_att( subdocs => 1); } | ||||
486 | } | ||||||
487 | else | ||||||
488 | { | ||||||
489 |
21
|
33
|
$include= XML::Twig::Elt->new( '#PI') | ||||
490 | ->set_pi( merge => " subdocs = $subdocs :$file_name"); | ||||||
491 | } | ||||||
492 |
33
|
34
|
return $include; | ||||
493 | } | ||||||
494 | |||||||
495 |
0
|
0
|
1; | ||||
496 | |||||||
497 | package main; | ||||||
498 |