#!/usr/local/bin/perl # Julius C. Duque # v1.1.1 2003 November 12 Wednesday # v1.2.0 2003 November 14 Friday - Added hyphenation # v1.3.0 2003 November 16 Sunday - Added hyphenation of overfull lines use diagnostics; use strict; use warnings; use Getopt::Long; use TeX::Hyphen; my ($width, $hyphenate, $left, $centered, $right, $both); my ($indent, $newline); GetOptions("width=i" => \$width, "help" => \$hyphenate, "left" => \$left, "centered" => \$centered, "right" => \$right, "both" => \$both, "indent:i" => \$indent, "newline" => \$newline); my $hyp = new TeX::Hyphen; syntax() if (!$width); $indent = 0 if (!$indent); local $/ = ""; while (<>) { my @linein = split; printpar(@linein); print "\n" if ($newline); } sub printpar { my (@par) = @_; my $firstline = 0; while (@par) { $firstline++; my ($buffer, $word); my ($charcount, $wordlen) = (0, 0); my $linewidth = $width; if ($firstline == 1) { $linewidth -= $indent; print " " x $indent; } while (($charcount < $linewidth) and (@par)) { $word = shift @par; $buffer .= $word; $wordlen = length($word); $charcount += $wordlen; $buffer .= " "; $charcount++; } chop $buffer; $charcount--; if ($charcount == $wordlen) { $linewidth = $wordlen; my ($pos, $pre_word_len) = (0, 0); if ($hyphenate) { if ($word =~ /^([^a-zA-Z]*)([a-zA-Z-']+)([^a-zA-Z]*)$/) { my $pre_word = $1; $pre_word_len = length($pre_word); my $stripped_word = $2; $pos = hyphenate_word($stripped_word, $width); $pos = 0 if ($wordlen <= $width); } if ($pos) { $charcount = $pre_word_len + $pos; my $post_word = substr $word, $charcount; unshift(@par, $post_word); $buffer = substr $word, 0, $charcount; $buffer .= "-"; $charcount++; } } } my $lineout = $buffer; if ($charcount > $linewidth) { my ($pos, $pre_word_len) = (0, 0); if ($hyphenate) { if ($word =~ /^([^a-zA-Z]*)([a-zA-Z-']+)([^a-zA-Z]*)$/) { my $pre_word = $1; $pre_word_len = length($pre_word); my $stripped_word = $2; my $unfilled = $linewidth - $charcount + $wordlen - $pre_word_len + 1; $pos = hyphenate_word($stripped_word, $unfilled); } } $charcount -= $wordlen; if ($pos == 0) { $charcount--; unshift(@par, $word); } else { my $post_word = substr $word, ($pre_word_len + $pos); unshift(@par, $post_word); $charcount = $charcount + $pre_word_len + $pos; } $lineout = substr $buffer, 0, $charcount; if ($pos) { $lineout .= "-"; $charcount++; } } my $spaces_to_fill = $linewidth - $charcount; if ($centered) { my $leftfill = int($spaces_to_fill/2); print " " x $leftfill; } elsif ($right) { print " " x $spaces_to_fill; } elsif ($both) { my $tempbuf = $lineout; my $replacements_made = 0; if (@par) { my $reps = 1; while (length($tempbuf) < $linewidth) { last if ($tempbuf !~ /\s/); if ($tempbuf =~ /(\S+ {$reps})(\S+)/) { $tempbuf =~ s/(\S+ {$reps})(\S+)/$1 $2/; $replacements_made++; $tempbuf = reverse $tempbuf; } else { $reps++; } } } if ($replacements_made % 2 == 0) { $lineout = $tempbuf; } else { $lineout = reverse $tempbuf; } } print "$lineout\n"; } } sub hyphenate_word { my ($tword, $unfilled) = @_; my @hyphen_places = $hyp->hyphenate($tword); if (@hyphen_places) { @hyphen_places = reverse @hyphen_places; foreach my $places (@hyphen_places) { return $places if ($places < $unfilled - 1); } } return 0; } sub syntax { print "Usage:\n"; print " $0 --width=n [options] file1 [file2 file3 ...]\n"; print " cat file1 [file2 file3 ...] | $0 --width=n [options]\n\n"; print "Options:\n"; print "--width=n (or -w=n or -w n) Line width is n chars "; print "long\n"; print "--left (or -l) Left-justified"; print " (default)\n"; print "--right (or -r) Right-justified\n"; print "--centered (or -c) Centered\n"; print "--both (or -b) Both left- and\n"; print " right-justified\n"; print "--indent=n (or -i=n or -i n) Leave n spaces for "; print "initial\n"; print " indention (defaults "; print "to 0)\n"; print "--newline (or -n) Output an empty line \n"; print " between "; print "paragraphs\n"; print "--hyphenate (or -h) Hyphenate word that "; print "doesn't\n"; print " fit on a line\n"; exit 0; } =head1 NAME paradj - a small Perl script that reformats lines of ASCII text so that the resulting lines are justified in any of the following formats: left-justified (default), right-justified, centered, or both left- and right-justified. =head1 README Paragraph Adjuster with Hyphenation (PAwH) is a small Perl script that reformats lines of ASCII text so that the resulting lines are justified in any of the following formats: left-justified (default), right-justified, centered, or both left- and right-justified. PAwH has various switches, most are optional, to control its output. The only mandatory switch is the line width (--width). For PAwH to work properly, input paragraphs must be separated by blank lines. PAwH is also capable of hyphenating a word that cannot be accommodated on a line. =head1 DESCRIPTION Paragraph Adjuster with Hyphenation (PAwH) is a small Perl script that reformats lines of ASCII text so that the resulting lines are justified in any of the following formats: left-justified (default), right-justified, centered, or both left- and right-justified. PAwH has various switches, most are optional, to control its output. The only mandatory switch is the line width (--width). For PAwH to work properly, input paragraphs must be separated by blank lines. PAwH is also capable of hyphenating a word that cannot be accommodated on a line. =head1 USAGE You can use PAwH in any of two ways: ./paradj.pl.pl --width=n [options] file1 [file2 file3 ...] or cat file1 [file2 file3 ...] | ./paradj.pl --width=n [options] where file1, file2, file3, and so on, are the files to be reformatted. There's only one output, though. =head1 SWITCHES The available switches are: --width=n (or -w=n or -w n) Line width is n chars long --left (or -l) Output is left-justified (default) --right (or -r) Output is right-justified --centered (or -c) Output is centered --both (or -b) Output is both left- and right-justified --indent=n (or -i=n or -i n) Leave n spaces for initial indention (defaults to 0) --newline (or -n) Insert blank lines between paragraphs --hyphenate (or -h) Hyphenate word that doesn't fit on a line =head1 EXAMPLES The following command reformats the file, LICENSE, so that the line width is at most 70 characters, both left- and right-justified, with blank lines inserted between consecutive paragraphs, and words that can't fit at the end of lines are hyphenated. paradj.pl --width=70 --both --newline --hyphenate LICENSE You can also use the shortened version: paradj.pl -w=70 -b -n -h LICENSE If you want to indent each paragraph, just use the --indent switch. Say, you want to indent the LICENSE file with 4 leading spaces, type: paradj.pl --width=70 --both --newline --hyphenate --indent=4 LICENSE or paradj.pl -w=70 -b -n -h -i=4 LICENSE =head1 GUI VERSION There is also a Perl/Tk version of paradj.pl, called paradj-tk.pl. =head1 PREREQUISITE You need Jan Pazdziora's Perl module, TeX::Hyphen, available from the Comprehensive Perl Archive Network (CPAN), to use the hyphenation feature. The latest is version 0.140. For Windows users, you can install TeX::Hyphen by following these steps: 1. Uncompress the TeX::Hyphen module, TeX-Hyphen-0.140.tar.gz. 2. Descend (cd) into the TeX-Hyphen-0.140/lib and copy the TeX directory into \lib. For example, if your Perl binaries are installed on E:\Perl, copy the TeX directory into E:\Perl\lib. =head1 COPYRIGHT AND LICENSE Copyright (C) 2003 Julius C. Duque <{jcduque}{at}{lycos}{dot}{com}> This library is free software; you can redistribute it and/or modify it under the same terms as the GNU General Public License. =pod SCRIPT CATEGORIES CPAN/Administrative Fun/Educational =cut