#!/usr/bin/perl -w # Ken Suzuki # 1.2.2009 created v0.1 # 25.12.2011 modified for CPAN upload v0.2 =head1 NAME LatexReferenceCheck =head1 STATUS version 0.2 =head1 AUTHOR Ken Suzuki =head1 DESCRIPTION Perform trivial editorial checks of papers from a tex source code. It checks .) if there's any citation in the abstract .) if all figures/tables are referred .) if either PACS code or keyword is given .) if all references are cited .) if reference appears in sequential order =head1 USAGE LatexReferenceCheck [options] texfilename [options] -v : verbose mode -h : print usage() =pod SCRIPT CATEGORIES Unix/System_administration CPAN/Administrative Educational =cut #initialization---------------------------------------------- $begin_abstract = $end_abstract = $begin_bib = $end_bib = 0; $begin_figures = $end_figures = $labels = 0; $citation_in_abstract = -999; $is_citation_order_okay = -999; $is_bibtex = 0; $is_any_figure_unlabeled = 0; $is_all_figure_referred = 0; $nfigures = 0; $verbose = 0; #------------------------------------------------------------ #$filename = $ARGV[0]; for ($i=0; $i<=$#ARGV; $i++){ if ($ARGV[$i] =~ m/^-v$/){ $verbose = 1; }elsif ($ARGV[$i] =~ m/^-h$/){ &usage(); # }elsif ($ARGV[$i] =~ m/^-o$/){ # $outfile = $ARGV[$i+1]; }else{ $filename = $ARGV[$i]; # print("invalid option: $ARGV[$i]\n"); # &usage(); } } sub usage(){ print " LatexReferenceCheck [options] filename: perform basic checks of latex references, e.x. .if all figures are referred .if all references are cited .if citations are in right order options -v verbose mode -h print out usage (this page) "; exit(1); } # -o filename to which the result is written. If not specified, the result # goes to STDOUT. die "Please specify filename to be inspected\n" unless (defined $filename) ; print "Checking tex file: $filename\n"; open(TEXFILEINPUT, "$filename") or die "Cannot open file: $filename\n"; if (defined $outfile){ open(TEXCHECKRESULT, ">$outfile") or die "Cannot open file: $outfile\n"; select TEXCHECKRESULT; } $nline = 0; @data = (); while(){ # s/[^[:ascii:]]+//g; # get rid of non-ASCII characters $line = $_; chop $line; @words = (); unless ($line =~ /^%/){ @words = split(" ", $line); for($i=0; $i<=$#words; $i++){ $words[$i] =~ s/\+//g; # get rid of plus, the special character of perl RE if ($words[$i] =~ /^%.*/){ splice (@words, $i, $#words); # cut comment like %this } } push(@data, @words); $nline++; } } if ($verbose){ print "$nline lines $#data words scanned\n"; } if ($verbose){ print "Making markers... \n"; } for ($i=0; $i<$#data; $i++){ if ($data[$i] =~ /^\\begin{abstract}/){ $begin_abstract = $i; } if ($data[$i] =~ /^\\end{abstract}/){ $end_abstract = $i; } if ($data[$i] =~ /^\\begin{thebibliography}/){ $begin_bib = $i; } if ($data[$i] =~ /^\\end{thebibliography}/){ $end_bib = $i; } if ($data[$i] =~ /^\\begin{figure\*?}/){ push(@begin_figures, $i); } if ($data[$i] =~ /\\end{figure\*?}/){ push(@end_figures, $i); } if ($data[$i] =~ /^\\label{.*}/){ push(@labels, $i); } } if ($begin_bib == 0){ $is_bibtex = 1; $begin_bib = $end_bib = $#data; } $nfigures = $#begin_figures+1; if ($verbose){ print " done\n\n"; } @figures = (); @figure_labels = (); # array of figure labels. length #figures if ($verbose){ print "Creating figure database...\n"; } for ($i=0; $i<$nfigures; $i++){ @a_figure = (); for ($j=$begin_figures[$i]; $j<=$end_figures[$i]; $j++){ push(@a_figure, $data[$j]); if ($data[$j] =~ /\\label{(.*?)}/){ # ? minimum matching $label = $1; $figure_labels[$i] = $label; } } $figures[$i] =[ @a_figure ]; } if ($verbose){ print " done\n"; } if ($verbose){ print $nfigures," figures are found\n"; } for ($i=0; $i<$nfigures; $i++){ if (defined $figure_labels[$i]){ if ($verbose){ print "Figure[$i] key=$figure_labels[$i]\n"; } }else{ if ($verbose){ print "Figure[$i] key= (Not Defined). Reference checking will not work properly.\n"; } $is_any_figure_unlabeled++; } } if ($verbose){ print "\n"; } #@tables = (); @references = (); if ($verbose){ print "Creating reference database...\n"; } for ($i=$end_abstract+1; $i<$begin_bib; $i++){ if ($data[$i] =~ /\\ref{(.*?)}/){ # ? minimum matching $reference_candidate = $1; @reference_candidates = split(",", $reference_candidate); push(@references, @reference_candidates); for($ii=0; $ii<=$#reference_candidates; $ii++){ if ($verbose){ print " $reference_candidates[$ii]\n";} } } } if ($verbose){ print " done\n"; } $buf = $#references+1; if ($verbose){ print "$buf references are found\n\n"; } @bibliography = (); if ($verbose){ print "Creating bibliography database...\n"; } for ($i=$begin_bib; $i<$end_bib; $i++){ if ($data[$i] =~ /\\bibitem{(.*)}/){ push(@bibliography, $1); if ($verbose){ print " Bibliography[$#bibliography] key=$1\n"} } } if ($verbose){ print " done\n"; } $buf = $#bibliography+1; if ($verbose){ print "$buf bibliography are found\n"; } if ($is_bibtex){ if ($verbose){ print "This document possibly uses bibtex. This may cause finding 0 bibliography.\n"; } } if ($verbose){ print "\n"; } $citation_in_abstract = 0; if ($verbose){ print "Checking if there\'s citation in the abstract... \n"; } for ($i=$begin_abstract; $i<$end_abstract; $i++){ if ($data[$i] =~ /\\cite/){ $citation_in_abstract++; } } if ($verbose){ print "done\n"; } if ($verbose){ print "$citation_in_abstract citation(s) in abstract found\n\n"; } $is_all_figure_referred = 0; if ($verbose){ print "Checking if all figures are referred in the text\n"; } for ($i=0; $i<$nfigures; $i++){ if ($verbose){ print "Figure[$i] "; } $is_figure_referred = 0; for ($j=0; $j<=$#references; $j++){ if (defined $figure_labels[$i]){ if ($references[$j] eq $figure_labels[$i]){ $is_figure_referred++; } } } if ($is_figure_referred>0){ if ($verbose){ print " okay\n"; } }else{ $is_all_figure_referred++; if ($verbose){ print " not okay\n"; } } } if ($is_all_figure_referred == 0){ if ($verbose){ print "All figures are referred\n"; } }else{ if ($verbose){ print "Not all figures are referred\n"; } } if ($verbose){ print "\n"; } if ($verbose){ print "Searching the first reference position of figures \n"; } for ($j=0; $j<$nfigures; $j++){ if ($verbose){ print "Figure [$j] is referred at .. "; } for ($i=$begin_bib; $i>$end_abstract+1; $i--){ if (defined $figure_labels[$j]){ if ($data[$i] =~ /\\ref{$figure_labels[$j]}/){ $reference_figures[$j] = $i; if ($verbose){ print " $i "; } } } } if (defined $reference_figures[$j]){ if ($verbose){ print ". ==> ($reference_figures[$j]) \n"; } }else{ if ($verbose){ print ". ==> (n.A.) \n"; } } } if ($verbose){ print " done\n\n"; } #@citations = (); #print "Creating citation database...\n"; #for ($i=$end_abstract+1; $i<$begin_bib; $i++){ # if ($data[$i] =~ /\\cite{(.*?)}/){ # ? minimum matching # $citation_candidate = $1; # @citation_candidates = split(",", $citation_candidate); # push(@citations, @citation_candidates); # for($ii=0; $ii<=$#citation_candidates; $ii++){ # if ($verbose){ print " $citation_candidates[$ii]\n";} # } # } #} #print " done\n"; #$buf = $#citations+1; #print "$buf citations are found\n\n"; $citation_infigure_captions = 0; if ($verbose){ print "Checking if there\'s citation in the figure captions\n"; } for ($idx=0; $idx<$nfigures; $idx++){ if ($verbose){ print " in the figure $idx ...."; } $citation_infigure_caption = 0; for($i=$begin_figures[$idx]; $i<$end_figures[$idx]; $i++){ if ($data[$i] =~ /\\cite{(.*?)}/){ $citation_infigure_caption++; $citation_infigure_captions++; } } if ($citation_infigure_caption>0) { if ($verbose){ print " found\n"; } }else{ if ($verbose){ print " not found\n"; } } } if ($verbose){ print "\n"; } if ($verbose){ print "Relocating figure/table captions at the first referenced position in the main text\n"; } #very first, remove unreferenced figure informatino from array @reference_figure @reference_figures_copy = @reference_figures; $remove_offset = 0; for($i=0; $i<$nfigures; $i++){ unless (defined $reference_figures_copy[$i]){ splice (@reference_figures, $i-$remove_offset, 1); splice (@figures, $i-$remove_offset, 1); if ($verbose){ print "Figure[$i] entry removed from \@figures, \@reference_figure\n"; } $remove_offset++; } } #first remove from original place (overwrite with null) for ($i=0; $i<=$#reference_figures; $i++){ for ($j=$begin_figures[$i]; $j<=$end_figures[$i]; $j++){ $data[$j] = ""; } # print "ahoaho @{$figures[$i]} \n\n"; } #split whole data into slices at the first reference positions @data_slices = (); if (defined $reference_figures[0]){ @data_slice = @data[$end_abstract+1 .. $reference_figures[0]]; push(@data_slices, [ @data_slice ] ); for ($i=0; $i<$#figures; $i++){ @data_slice = @data[$reference_figures[$i]+1 .. $reference_figures[$i+1]]; push(@data_slices, [ @data_slice ] ); } @data_slice = @data[$reference_figures[$#figures]+1 .. $begin_bib-1]; push(@data_slices, [ @data_slice ] ); #recombine data slices @new_data = (); for ($i=0; $i<=$#figures; $i++){ @new_data = (@new_data, @{$data_slices[$i]}, @{$figures[$i]}); } @new_data = (@new_data, @{$data_slices[$#data_slices]}); }else{ @new_data = @data; } if ($verbose){ print " done\n"; } if ($verbose){ print "Re-Making markers... \n"; } for ($i=0; $i<$#new_data; $i++){ if ($new_data[$i] =~ /^\\begin{figure\*?}/){ push(@begin_figures, $i); } if ($new_data[$i] =~ /^\\end{figure\*?}/){ push(@end_figures, $i); } } if ($verbose){ print " done\n\n"; } @citations = (); if ($verbose){ print "Re-Creating citation database...\n"; } for ($i=0; $i<$#new_data; $i++){ if ($new_data[$i] =~ /\\cite{(.*?)}/){ # ? minimum matching $citation_candidate = $1; @citation_candidates = split(",", $citation_candidate); push(@citations, @citation_candidates); for($ii=0; $ii<=$#citation_candidates; $ii++){ if ($verbose){ printf(" %3i %s\n", $#citations-$#citation_candidates+$ii, $citation_candidates[$ii]); } } } } if ($verbose){ print " done\n"; } $buf = $#citations+1; if ($verbose){ print "$buf citations are found\n\n"; } if ($verbose){ print "Checking the order of ciation ...\n"; } $maxrefnum = 1; $is_citation_order_okay = 0; for ($j=0; $j<=$#citations; $j++){ for ($i=0; $i<=$#bibliography; $i++){ if ($citations[$j] eq $bibliography[$i]){ $refnum = $i; if ($refnum<$maxrefnum){ if ($verbose) {print "-I- Ref[$refnum] cited once again at citation number $j\n";} }elsif ($i==$maxrefnum){ $maxrefnum++; }else{ if ($verbose) {print "-E- Ref[$refnum] ($bibliography[$i]) may be cited in wrong order at citation number $j\n";} $is_citation_order_okay++; } } } } if ($verbose){ print " done\n"; } #===== print "\n\n"; print "Result Summary\n"; $status = 0; print "Using bibtex ... "; if ($is_bibtex){ print " Possibly yes\n"; }else{ print " No\n"; } print "No citation in abstract ...... "; if ($citation_in_abstract>0){ $status+=1; print " Not Okay\n"; }else{ print " Okay\n"; } print "All figures referred ...... "; if ($is_all_figure_referred > 0){ $status+=10; if ($is_any_figure_unlabeled){ print " Suspicious. Please check manually.\n"; }else{ print " Not Okay\n"; } }else{ print " Okay\n"; } print "Citation in figure caption ...... "; if ($citation_infigure_captions>0){ print " Yes\n"; }else{ print " No\n"; } print "Bibliography cited sequentially .... "; if( $is_citation_order_okay>0){ $status+=100; print " Not Okay\n"; }else{ if ($is_bibtex){ print " Unknown. Please check manually\n"; $status+=200; }else{ print " Okay\n"; } } #print "\n$status\n"; select STDOUT;