#! /usr/local/bin/perl
open(inf,"tr \"\\r\" \"\\n\" |") || die "input file cannot be opened\n";
while(&search_table()) {
do make_table();
do dump_table();
}
close(inf);
sub search_table {
if(!$_) {
do getnewline();
return 0 if $EOF;
}
while(1) {
$_=~m/(
/;
if($&) {
$_=$';
last;
}
else {
do getnewline();
return 0 if $EOF;
next;
}
}
}
elsif($&=~//;
if($&) {
$_=$';
return 1;
}
else {
do getnewline();
return 0 if $EOF;
next;
}
}
}
else {
do getnewline();
return 0 if $EOF;
next;
}
do getnewline();
return 0 if $EOF;
}
}
sub make_table {
do inittable();
if(! $_) {
do getnewline();
return 0 if $EOF;
}
while(1) {
if($_=~/<\/table>/i) {
($data,$dlm,$_)=($_=~/^(.*)(<\/table>)(.*)$/i);
do parsedata($data);
return 0;
}
else {
do parsedata($_);
do getnewline();
return 0 if $EOF;
}
}
}
sub inittable {
$table_id++;
$rowpointer=-1;
$colpointer=-1;
$maxrow=-1;
$maxcol=-1;
%table=();
}
sub parsedata {
local($data)=(@_);
$data=~s/\s\s/ /g;
while($data) {
if($data=~/^<.*>/) {
($tag,$data)=($data=~/^(<[^>]*>)(.*)$/);
do parsetag($tag);
}
else {
$a=substr($data,0,1);
$data=substr($data,1);
if(!($a eq "\n" || $a eq "\r")) {
do putdata($a);
}
}
}
}
sub putdata {
local($a)=(@_);
if($data_td || $data_th) {
$table{"$rowpointer $colpointer"}.=$a;
local($n,$i)=($rowspan,$rowpointer);
$n--;
while($n--) {
$i++;
$table{"$i $colpointer"}.=$a;
}
}
}
sub parsetag {
local($tag)=(@_);
$tag=~tr/A-Z/a-z/;
CASETAG: {
if($tag=~/^ $maxrow;
$colpointer=-1;
last CASETAG;
}
if($tag=~/^ $maxrow;
do nextcolumn();
last CASETAG;
}
if($tag=~/^<\/td\s*/) {
$data_td=0;
last CASETAG;
}
if($tag eq " ") {
do putdata("\n");
last CASETAG;
}
}
}
sub nextcolumn {
$colpointer++;
while($table{"$rowpointer $colpointer"}) {
$colpointer++;
}
$maxcol=$colpointer if $colpointer > $maxcol;
}
sub dump_table {
print "TB $table_id\n";
local($i,$j,$key,$data);
for($i=0;$i<=$maxrow;$i++) {
for($j=0;$j<=$maxcol;$j++) {
print "EB $i $j\n";
$key="$i $j";
$data=$table{$key};
$data=~s/\n\s*/\nX/g;
$data=~s/^\s*/X/;
print "$data\n";
print "EE $i $j\n";
}
}
print "TE $table_id\n";
}
sub getnewline {
if($EOF) {
$_="";
}
else {
$_=;
if(!$_) { $EOF=1; }
}
}
|