#!/usr/bin/perl my $ratio=1; my $fin = ""; my $f1 = ""; my $f2 = ""; if (@ARGV ==0) { print "separate-xml.perl ratio inputfile outfile1 outfile2\n"; exit; } $svmtorch = 0; ($ratio, $fin, $f1, $f2) = @ARGV; $txt = ''; open FILE, "<".$fin || die("could not open file $fin"); while () {$txt .= $_; } close FILE; @content = (); while($txt =~ /()/isg) { push @content, $1; } if ($txt =~ /^(.*<\/header>)/isg) { $header = $1; $footer = ''; } print "shuffle\n"; my $i; for ($i = @content; --$i; $i>1) { my $j = int rand ($i+1); next if $i == $j; @content[$i,$j] = @content[$j,$i]; } print "output $f1 $f2 \n"; open F1, ">".$f1 || die("could not write to file $f1"); open F2, ">".$f2 || die("could not write to file $f2"); print F1 $header; print F2 $header; $nf1 = int($ratio * @content); for($i=0; $i<$nf1; $i++) { $r = int rand scalar(@content); if ($content[$r] ne '#-#') { print F1 $content[$r]; $content[$r] = "#-#"; } else { $i--; } } foreach(@content) { if ($_ ne '#-#') { print F2 $_; } } print F1 $footer; print F2 $footer; close F1; close F2; print "done.\n";