#! /usr/local/bin/perl open(inf,"tr \"\\r\" \"\\n\" |") || die "input file cannot be opened\n"; while(&search_table()) { do make_table(); do dump_table(); } close(inf); sub search_table { if(!$_) { do getnewline(); return 0 if $EOF; } while(1) { $_=~m/(/; if($&) { $_=$'; last; } else { do getnewline(); return 0 if $EOF; next; } } } elsif($&=~//; if($&) { $_=$'; return 1; } else { do getnewline(); return 0 if $EOF; next; } } } else { do getnewline(); return 0 if $EOF; next; } do getnewline(); return 0 if $EOF; } } sub make_table { do inittable(); if(! $_) { do getnewline(); return 0 if $EOF; } while(1) { if($_=~/<\/table>/i) { ($data,$dlm,$_)=($_=~/^(.*)(<\/table>)(.*)$/i); do parsedata($data); return 0; } else { do parsedata($_); do getnewline(); return 0 if $EOF; } } } sub inittable { $table_id++; $rowpointer=-1; $colpointer=-1; $maxrow=-1; $maxcol=-1; %table=(); } sub parsedata { local($data)=(@_); $data=~s/\s\s/ /g; while($data) { if($data=~/^<.*>/) { ($tag,$data)=($data=~/^(<[^>]*>)(.*)$/); do parsetag($tag); } else { $a=substr($data,0,1); $data=substr($data,1); if(!($a eq "\n" || $a eq "\r")) { do putdata($a); } } } } sub putdata { local($a)=(@_); if($data_td || $data_th) { $table{"$rowpointer $colpointer"}.=$a; local($n,$i)=($rowspan,$rowpointer); $n--; while($n--) { $i++; $table{"$i $colpointer"}.=$a; } } } sub parsetag { local($tag)=(@_); $tag=~tr/A-Z/a-z/; CASETAG: { if($tag=~/^ $maxrow; $colpointer=-1; last CASETAG; } if($tag=~/^ $maxrow; do nextcolumn(); last CASETAG; } if($tag=~/^<\/td\s*/) { $data_td=0; last CASETAG; } if($tag eq "
") { do putdata("\n"); last CASETAG; } } } sub nextcolumn { $colpointer++; while($table{"$rowpointer $colpointer"}) { $colpointer++; } $maxcol=$colpointer if $colpointer > $maxcol; } sub dump_table { print "TB $table_id\n"; local($i,$j,$key,$data); for($i=0;$i<=$maxrow;$i++) { for($j=0;$j<=$maxcol;$j++) { print "EB $i $j\n"; $key="$i $j"; $data=$table{$key}; $data=~s/\n\s*/\nX/g; $data=~s/^\s*/X/; print "$data\n"; print "EE $i $j\n"; } } print "TE $table_id\n"; } sub getnewline { if($EOF) { $_=""; } else { $_=; if(!$_) { $EOF=1; } } }