#!/usr/bin/perl
# grog - guess options for groff command
# Inspired by doctype script in Kernighan & Pike, Unix Programming
# Environment, pp 306-8.
# Copyright (C) 1993-2021 Free Software Foundation, Inc.
# Written by James Clark.
# Rewritten in Perl by Bernd Warken <groff-bernd.warken-72@web.de>.
# Hacked up by G. Branden Robinson, 2021.
# This file is part of 'grog', which is part of 'groff'.
# 'groff' is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
# 'groff' is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see
# <http://www.gnu.org/licenses/gpl-2.0.html>.
use warnings;
use strict;
use File::Spec;
my $groff_version = 'DEVELOPMENT';
my @command = (); # the constructed groff command
my @requested_package = (); # arguments to '-m' grog options
my @inferred_preprocessor = (); # preprocessors the document uses
my @inferred_main_package = (); # full-service package(s) detected
my $main_package; # full-service package we go with
my $do_run = 0; # run generated 'groff' command
my $use_compatibility_mode = 0; # is -C being passed to groff?
my %preprocessor_for_macro = (
'EQ', 'eqn',
'G1', 'grap',
'GS', 'grn',
'PS', 'pic',
'[', 'refer',
#'so', 'soelim', # Can't be inferred this way; see grog man page.
'TS', 'tbl',
'cstart', 'chem',
'lilypond', 'glilypond',
'Perl', 'gperl',
'pinyin', 'gpinyin',
);
my $program_name = $0;
{
my ($v, $d, $f) = File::Spec->splitpath($program_name);
$program_name = $f;
}
my %user_macro;
my %score = ();
my @input_file;
# .TH is both a man(7) macro and often used with tbl(1). We expect to
# find .TH in ms(7) documents only between .TS and .TE calls, and in
# man(7) documents only as the first macro call.
my $have_seen_first_macro_call = 0;
# man(7) and ms(7) use many of the same macro names; do extra checking.
my $man_score = 0;
my $ms_score = 0;
my $had_inference_problem = 0;
my $had_processing_problem = 0;
my $have_any_valid_arguments = 0;
sub fail {
my $text = shift;
print STDERR "$program_name: error: $text\n";
$had_processing_problem = 1;
}
sub warn {
my $text = shift;
print STDERR "$program_name: warning: $text\n";
}
sub process_arguments {
my $no_more_options = 0;
my $delayed_option = '';
my $was_minus = 0;
my $optarg = 0;
my $pdf_with_ligatures = 0;
foreach my $arg (@ARGV) {
if ( $optarg ) {
push @command, $arg;
$optarg = 0;
next;
}
if ($no_more_options) {
push @input_file, $arg;
next;
}
if ($delayed_option) {
if ($delayed_option eq '-m') {
push @requested_package, $arg;
$arg = '';
} else {
push @command, $delayed_option;
}
push @command, $arg if $arg;
$delayed_option = '';
next;
}
unless ( $arg =~ /^-/ ) { # file name, no opt, no optarg
push @input_file, $arg;
next;
}
# now $arg starts with '-'
if ($arg eq '-') {
unless ($was_minus) {
push @input_file, $arg;
$was_minus = 1;
}
next;
}
if ($arg eq '--') {
$no_more_options = 1;
next;
}
# Handle options that cause an early exit.
&version() if ($arg eq '-v' || $arg eq '--version');
&usage(0) if ($arg eq '-h' || $arg eq '--help');
if ($arg =~ '^--.') {
if ($arg =~ '^--(run|with-ligatures)$') {
$do_run = 1 if ($arg eq '--run');
$pdf_with_ligatures = 1 if ($arg eq '--with-ligatures');
} else {
&fail("unrecognized grog option '$arg'; ignored");
&usage(1);
}
next;
}
# Handle groff options that take an argument.
# Handle the option argument being separated by whitespace.
if ($arg =~ /^-[dfFIKLmMnoPrTwW]$/) {
$delayed_option = $arg;
next;
}
# Handle '-m' option without subsequent whitespace.
if ($arg =~ /^-m/) {
my $package = $arg;
$package =~ s/-m//;
push @requested_package, $package;
next;
}
# Treat anything else as (possibly clustered) groff options that
# take no arguments.
# Our do_line() needs to know if it should do compatibility parsing.
$use_compatibility_mode = 1 if ($arg =~ /C/);
push @command, $arg;
}
if ($pdf_with_ligatures) {
push @command, '-P-y';
push @command, '-PU';
}
@input_file = ('-') unless (@input_file);
} # process_arguments()
sub process_input {
foreach my $file (@input_file) {
unless ( open(FILE, $file eq "-" ? $file : "< $file") ) {
&fail("cannot open '$file': $!");
next;
}
$have_any_valid_arguments = 1;
while (my $line = <FILE>) {
chomp $line;
&do_line($line);
}
close(FILE);
} # end foreach
} # process_input()
# Push item onto inferred full-service list only if not already present.
sub push_main_package {
my $pkg = shift;
if (!grep(/^$pkg/, @inferred_main_package)) {
push @inferred_main_package, $pkg;
}
} # push_main_package()
sub do_line {
my $command; # request or macro name
my $args; # request or macro arguments
my $line = shift;
# Check for a Perl Pod::Man comment.
#
# An alternative to this kludge is noted below: if a "standard" macro
# is redefined, we could delete it from the relevant lists and
# hashes.
if ($line =~ /\\\" Automatically generated by Pod::Man/) {
$man_score += 100;
}
# Strip comments.
$line =~ s/\\".*//;
$line =~ s/\\#.*// unless $use_compatibility_mode;
return unless ($line =~ /^[.']/); # Ignore text lines.
# Perform preprocessor checks; they scan their inputs using a rump
# interpretation of roff(7) syntax that requires the default control
# character and no space between it and the macro name. In AT&T
# compatibility mode, no space (or newline!) is required after the
# macro name, either. We mimic the preprocessors themselves; eqn(1),
# for instance, does not recognize '.EN' if '.EQ' has not been seen.
my $boundary = '\\b';
$boundary = '' if ($use_compatibility_mode);
if ($line =~ /^\.(\S\S)$boundary/ || $line =~ /^\.(\[)/) {
my $macro = $1;
# groff identifiers can have extremely weird characters in them.
# The ones we care about are conventionally named, but me(7)
# documents can call macros like '+c', so quote carefully.
if (grep(/^\Q$macro\E$/, keys %preprocessor_for_macro)) {
my $preproc = $preprocessor_for_macro{$macro};
if (!grep(/$preproc/, @inferred_preprocessor)) {
push @inferred_preprocessor, $preproc;
}
}
}
# Normalize control lines; convert no-break control character to the
# regular one and remove unnecessary whitespace.
$line =~ s/^['.]\s*/./;
$line =~ s/\s+$//;
return if ($line =~ /^\.$/); # Ignore empty request.
return if ($line =~ /^\.\\?\.$/); # Ignore macro definition ends.
# Split control line into a request or macro call and its arguments.
# Handle single-letter macro names.
if ($line =~ /^\.(\S)(\s+(.*))?$/) {
$command = $1;
$args = $2;
# Handle two-letter macro/request names in compatibility mode.
} elsif ($use_compatibility_mode) {
$line =~ /^\.(\S\S)\s*(.*)$/;
$command = $1;
$args = $2;
# Handle multi-letter macro/request names in groff mode.
} else {
$line =~ /^\.(\S+)(\s+(.*))?$/;
$command = $1;
$args = $3;
}
$command = '' unless ($command);
$args = '' unless ($args);
######################################################################
# user-defined macros
# If the line calls a user-defined macro, skip it.
return if (exists $user_macro{$command});
# These are all requests supported by groff 1.23.0.
my @request = ('ab', 'ad', 'af', 'aln', 'als', 'am', 'am1', 'ami',
'ami1', 'as', 'as1', 'asciify', 'backtrace', 'bd',
'blm', 'box', 'boxa', 'bp', 'br', 'brp', 'break', 'c2',
'cc', 'ce', 'cf', 'cflags', 'ch', 'char', 'chop',
'class', 'close', 'color', 'composite', 'continue',
'cp', 'cs', 'cu', 'da', 'de', 'de1', 'defcolor', 'dei',
'dei1', 'device', 'devicem', 'di', 'do', 'ds', 'ds1',
'dt', 'ec', 'ecr', 'ecs', 'el', 'em', 'eo', 'ev',
'evc', 'ex', 'fam', 'fc', 'fchar', 'fcolor', 'fi',
'fp', 'fschar', 'fspecial', 'ft', 'ftr', 'fzoom',
'gcolor', 'hc', 'hcode', 'hla', 'hlm', 'hpf', 'hpfa',
'hpfcode', 'hw', 'hy', 'hym', 'hys', 'ie', 'if', 'ig',
'in', 'it', 'itc', 'kern', 'lc', 'length', 'linetabs',
'lf', 'lg', 'll', 'lsm', 'ls', 'lt', 'mc', 'mk', 'mso',
'msoquiet', 'na', 'ne', 'nf', 'nh', 'nm', 'nn', 'nop',
'nr', 'nroff', 'ns', 'nx', 'open', 'opena', 'os',
'output', 'pc', 'pev', 'pi', 'pl', 'pm', 'pn', 'pnr',
'po', 'ps', 'psbb', 'pso', 'ptr', 'pvs', 'rchar', 'rd',
'return', 'rfschar', 'rj', 'rm', 'rn', 'rnn', 'rr',
'rs', 'rt', 'schar', 'shc', 'shift', 'sizes', 'so',
'soquiet', 'sp', 'special', 'spreadwarn', 'ss',
'stringdown', 'stringup', 'sty', 'substring', 'sv',
'sy', 'ta', 'tc', 'ti', 'tkf', 'tl', 'tm', 'tm1',
'tmc', 'tr', 'trf', 'trin', 'trnt', 'troff', 'uf',
'ul', 'unformat', 'vpt', 'vs', 'warn', 'warnscale',
'wh', 'while', 'write', 'writec', 'writem');
# Add user-defined macro names to %user_macro.
#
# Macros can also be defined with .dei{,1}, ami{,1}, but supporting
# that would be a heavy lift for the benefit of users that probably
# don't require grog's help. --GBR
if ($command =~ /^(de|am)1?$/) {
my $name = $args;
# Strip off any end macro.
$name =~ s/\s+.*$//;
# Handle special cases of macros starting with '[' or ']'.
if ($name =~ /^[][]/) {
delete $preprocessor_for_macro{'['};
}
# XXX: If the macro name shadows a standard macro name, maybe we
# should delete the latter from our lists and hashes. This might
# depend on whether the document is trying to remain compatible
# with an existing interface, or simply colliding with names they
# don't care about (consider a raw roff document that defines 'PP').
# --GBR
$user_macro{$name} = 0 unless (exists $user_macro{$name});
return;
}
# XXX: Handle .rm as well?
# Ignore all other requests. Again, macro names can contain Perl
# regex metacharacters, so be careful.
return if (grep(/^\Q$command\E$/, @request));
# What remains must be a macro name.
my $macro = $command;
$have_seen_first_macro_call = 1;
$score{$macro}++;
######################################################################
# macro package (tmac)
######################################################################
# man and ms share too many macro names for the following approach to
# be fruitful for many documents; see &infer_man_or_ms_package.
#
# We can put one thumb on the scale, however.
if ((!$have_seen_first_macro_call) && ($macro eq 'TH')) {
# TH as the first call in a document screams man(7).
$man_score += 100;
}
##########
# mdoc
if ($macro =~ /^Dd$/) {
&push_main_package('doc');
return;
}
##########
# old mdoc
if ($macro =~ /^(Tp|Dp|De|Cx|Cl)$/) {
&push_main_package('doc-old');
return;
}
##########
# me
if ($macro =~ /^(
[ilnp]p|
n[12]|
sh
)$/x) {
&push_main_package('e');
return;
}
#############
# mm and mmse
if ($macro =~ /^(
H|
MULB|
LO|
LT|
NCOL|
PH|
SA
)$/x) {
if ($macro =~ /^LO$/) {
if ( $args =~ /^(DNAMN|MDAT|BIL|KOMP|DBET|BET|SIDOR)/ ) {
&push_main_package('mse');
return;
}
} elsif ($macro =~ /^LT$/) {
if ( $args =~ /^(SVV|SVH)/ ) {
&push_main_package('mse');
return;
}
}
&push_main_package('m');
return;
}
##########
# mom
if ($macro =~ /^(
ALD|
AUTHOR|
CHAPTER_TITLE|
CHAPTER|
COLLATE|
DOCHEADER|
DOCTITLE|
DOCTYPE|
DOC_COVER|
FAMILY|
FAM|
FT|
LEFT|
LL|
LS|
NEWPAGE|
NO_TOC_ENTRY|
PAGENUMBER|
PAGE|
PAGINATION|
PAPER|
PRINTSTYLE|
PT_SIZE|
START|
TITLE|
TOC_AFTER_HERE
TOC|
T_MARGIN|
)$/x) {
&push_main_package('om');
return;
}
} # do_line()
my @preprocessor = ();
sub infer_preprocessors {
my %option_for_preprocessor = (
'eqn', '-e',
'grap', '-G',
'grn', '-g',
'pic', '-p',
'refer', '-R',
#'soelim', '-s', # Can't be inferred this way; see grog man page.
'tbl', '-t',
'chem', '-j'
);
# Use a temporary list we can sort later. We want the options to show
# up in a stable order for testing purposes instead of the order their
# macros turn up in the input. groff doesn't care about the order.
my @opt = ();
foreach my $preproc (@inferred_preprocessor) {
my $preproc_option = $option_for_preprocessor{$preproc};
if ($preproc_option) {
push @opt, $preproc_option;
} else {
push @preprocessor, $preproc;
}
}
push @command, sort @opt;
} # infer_preprocessors()
# Return true (1) if either the man or ms package is inferred.
sub infer_man_or_ms_package {
my @macro_ms = ('RP', 'TL', 'AU', 'AI', 'DA', 'ND', 'AB', 'AE',
'QP', 'QS', 'QE', 'XP',
'NH',
'R',
'CW',
'BX', 'UL', 'LG', 'NL',
'KS', 'KF', 'KE', 'B1', 'B2',
'DS', 'DE', 'LD', 'ID', 'BD', 'CD', 'RD',
'FS', 'FE',
'OH', 'OF', 'EH', 'EF', 'P1',
'TA', '1C', '2C', 'MC',
'XS', 'XE', 'XA', 'TC', 'PX',
'IX', 'SG');
my @macro_man = ('BR', 'IB', 'IR', 'RB', 'RI', 'P', 'TH', 'TP', 'SS',
'HP', 'PD',
'AT', 'UC',
'SB',
'EE', 'EX',
'OP',
'MT', 'ME', 'SY', 'YS', 'TQ', 'UR', 'UE');
my @macro_man_or_ms = ('B', 'I', 'BI',
'DT',
'RS', 'RE',
'SH',
'SM',
'IP', 'LP', 'PP');
for my $key (@macro_man_or_ms, @macro_man, @macro_ms) {
$score{$key} = 0 unless exists $score{$key};
}
# Compute a score for each package by counting occurrences of their
# characteristic macros.
foreach my $key (@macro_man_or_ms) {
$man_score += $score{$key};
$ms_score += $score{$key};
}
foreach my $key (@macro_man) {
$man_score += $score{$key};
}
foreach my $key (@macro_ms) {
$ms_score += $score{$key};
}
if (!$ms_score && !$man_score) {
# The input may be a "raw" roff document; this is not a problem,
# but it does mean no package was inferred.
return 0;
} elsif ($ms_score == $man_score) {
# If there was no TH call, it's not a (valid) man(7) document.
if (!$score{'TH'}) {
&push_main_package('s');
} else {
&warn("document ambiguous; disambiguate with -man or -ms option");
$had_inference_problem = 1;
}
return 0;
} elsif ($ms_score > $man_score) {
&push_main_package('s');
} else {
&push_main_package('an');
}
return 1;
} # infer_man_or_ms_package()
sub construct_command {
my @main_package = ('an', 'doc', 'doc-old', 'e', 'm', 'om', 's');
my $file_args_included; # file args now only at 1st preproc
unshift @command, 'groff';
if (@preprocessor) {
my @progs;
$progs[0] = shift @preprocessor;
push(@progs, @input_file);
for (@preprocessor) {
push @progs, '|';
push @progs, $_;
}
push @progs, '|';
unshift @command, @progs;
$file_args_included = 1;
} else {
$file_args_included = 0;
}
foreach (@command) {
next unless /\s/;
# when one argument has several words, use accents
$_ = "'" . $_ . "'";
}
my $have_ambiguous_main_package = 0;
my $inferred_main_package_count = scalar @inferred_main_package;
# Did we infer multiple full-service packages?
if ($inferred_main_package_count > 1) {
$have_ambiguous_main_package = 1;
# For each one the user explicitly requested...
for my $pkg (@requested_package) {
# ...did it resolve the ambiguity for us?
if (grep(/$pkg/, @inferred_main_package)) {
@inferred_main_package = ($pkg);
$have_ambiguous_main_package = 0;
last;
}
}
} elsif ($inferred_main_package_count == 1) {
$main_package = shift @inferred_main_package;
}
if ($have_ambiguous_main_package) {
# TODO: Alphabetical is probably not the best ordering here. We
# should tally up scores on a per-package basis generally, not just
# for an and s.
for my $pkg (@main_package) {
if (grep(/$pkg/, @inferred_main_package)) {
$main_package = $pkg;
&warn("document ambiguous (choosing '$main_package'"
. " from '@inferred_main_package'); disambiguate with -m"
. " option");
$had_inference_problem = 1;
last;
}
}
}
# If a full-service package was explicitly requested, warn if the
# inference differs from the request. This also ensures that all -m
# arguments are placed in the same order that the user gave them;
# caveat dictator.
my @auxiliary_package_argument = ();
for my $pkg (@requested_package) {
my $is_auxiliary_package = 1;
if (grep(/$pkg/, @main_package)) {
$is_auxiliary_package = 0;
if ($pkg ne $main_package) {
&warn("overriding inferred package '$main_package'"
. " with requested package '$pkg'");
$main_package = $pkg;
}
}
if ($is_auxiliary_package) {
push @auxiliary_package_argument, "-m" . $pkg;
}
}
push @command, '-m' . $main_package if ($main_package);
push @command, @auxiliary_package_argument;
push @command, @input_file unless ($file_args_included);
#########
# execute the 'groff' command here with option '--run'
if ( $do_run ) { # with --run
print STDERR "@command\n";
my $cmd = join ' ', @command;
system($cmd);
} else {
print "@command\n";
}
} # construct_command()
sub usage {
my $stream = *STDOUT;
my $had_error = shift;
$stream = *STDERR if $had_error;
my $grog = $program_name;
print $stream "usage: $grog [--ligatures] [--run]" .
" [groff-option ...] [--] [file ...]\n" .
"usage: $grog {-v | --version}\n" .
"usage: $grog {-h | --help}\n";
unless ($had_error) {
print $stream "\n" .
"Read each roff(7) input FILE and attempt to infer an appropriate\n" .
"groff(1) command to format it. See the grog(1) manual page.\n";
}
exit $had_error;
}
sub version {
print "GNU $program_name (groff) $groff_version\n";
exit 0;
} # version()
# initialize
my $in_unbuilt_source_tree = 0;
{
my $at = '@';
$in_unbuilt_source_tree = 1 if ('1.23.0' eq "${at}VERSION${at}");
}
$groff_version = '1.23.0' unless ($in_unbuilt_source_tree);
&process_arguments();
&process_input();
if ($have_any_valid_arguments) {
&infer_preprocessors();
&infer_man_or_ms_package() if (scalar @inferred_main_package != 1);
&construct_command();
}
exit 2 if ($had_processing_problem);
exit 1 if ($had_inference_problem);
exit 0;
# Local Variables:
# fill-column: 72
# mode: CPerl
# End:
# vim: set cindent noexpandtab shiftwidth=2 softtabstop=2 textwidth=72:
|