head	1.1;
access;
symbols
	REL9_0_0:1.1
	REL9_1_ALPHA1:1.1
	REL9_0_RC1:1.1
	REL9_0_BETA4:1.1
	REL9_0_STABLE:1.1.0.18
	REL9_0_BETA3:1.1
	REL9_0_BETA2:1.1
	REL8_2_17:1.1
	REL8_3_11:1.1
	REL8_4_4:1.1
	REL9_0_BETA1:1.1
	REL9_0_ALPHA5_BRANCH:1.1.0.16
	REL9_0_ALPHA5:1.1
	REL8_2_16:1.1
	REL8_3_10:1.1
	REL8_4_3:1.1
	REL9_0_ALPHA4:1.1
	REL9_0_ALPHA4_BRANCH:1.1.0.14
	REL8_5_ALPHA3:1.1
	REL8_5_ALPHA3_BRANCH:1.1.0.12
	REL8_2_15:1.1
	REL8_3_9:1.1
	REL8_4_2:1.1
	REL8_5_ALPHA2:1.1
	REL8_5_ALPHA2_BRANCH:1.1.0.10
	REL8_2_14:1.1
	REL8_3_8:1.1
	REL8_4_1:1.1
	REL8_5_ALPHA1:1.1
	REL8_5_ALPHA1_BRANCH:1.1.0.8
	REL8_4_STABLE:1.1.0.6
	REL8_4_0:1.1
	REL8_4_RC2:1.1
	REL8_4_RC1:1.1
	REL8_4_BETA2:1.1
	REL8_4_BETA1:1.1
	REL8_2_13:1.1
	REL8_3_7:1.1
	REL8_2_12:1.1
	REL8_3_6:1.1
	REL8_2_11:1.1
	REL8_3_5:1.1
	REL8_2_10:1.1
	REL8_3_4:1.1
	REL8_2_9:1.1
	REL8_3_3:1.1
	REL8_2_8:1.1
	REL8_3_2:1.1
	REL8_2_7:1.1
	REL8_3_1:1.1
	REL8_3_STABLE:1.1.0.4
	REL8_3_0:1.1
	REL8_3_RC2:1.1
	REL8_2_6:1.1
	REL8_3_RC1:1.1
	REL8_3_BETA4:1.1
	REL8_3_BETA3:1.1
	REL8_3_BETA2:1.1
	REL8_3_BETA1:1.1
	REL8_2_5:1.1
	REL8_2_4:1.1
	REL8_2_3:1.1
	REL8_2_2:1.1
	REL8_2_1:1.1
	REL8_2_STABLE:1.1.0.2
	REL8_2_0:1.1
	REL8_2_RC1:1.1
	REL8_2_BETA3:1.1
	REL8_2_BETA2:1.1
	REL8_2_BETA1:1.1;
locks; strict;
comment	@# @;


1.1
date	2006.07.15.03.27.42;	author tgl;	state Exp;
branches;
next	;


desc
@@


1.1
log
@Create a tool to catch #include omissions that might not result in any
compiler warning, specifically #ifdef or #if defined tests on symbols
that are defined in a file not included.  The results are a bit noisy
and require care to interpret, but it's a lot better than no tool at all.
@
text
@#! /usr/bin/perl -w

#
# This script looks for symbols that are referenced in #ifdef or defined()
# tests without having #include'd the file that defines them.  Since this
# situation won't necessarily lead to any compiler message, it seems worth
# having an automated check for it.  In particular, use this to audit the
# results of pgrminclude!
#
# Usage: configure and build a PG source tree (non-VPATH), then start this
# script at the top level.  It's best to enable as many configure options
# as you can, especially --enable-cassert which is known to affect include
# requirements.  NB: you MUST use gcc, unless you have another compiler that
# can be persuaded to spit out the names of referenced include files.
#
# The results are necessarily platform-dependent, so use care in interpreting
# them.  We try to process all .c files, even those not intended for the
# current platform, so there will be some phony failures.
#
# $PostgreSQL$
#

use Cwd;
use File::Basename;

$topdir = cwd();

# Programs to use
$FIND = "find";
$MAKE = "make";

#
# Build arrays of all the .c and .h files in the tree
#
# We ignore .h files under src/include/port/, since only the one exposed as
# src/include/port.h is interesting.  (XXX Windows ports have additional
# files there?)  Ditto for .h files in src/backend/port/ subdirectories.
# Including these .h files would clutter the list of define'd symbols and
# cause a lot of false-positive results.
#
open PIPE, "$FIND * -type f -name '*.c' |"
    or die "can't fork: $!";
while (<PIPE>) {
    chomp;
    push @@cfiles, $_;
}
close PIPE or die "$FIND failed: $!";

open PIPE, "$FIND * -type f -name '*.h' |"
    or die "can't fork: $!";
while (<PIPE>) {
    chomp;
    push @@hfiles, $_ unless
	m|^src/include/port/| ||
	m|^src/backend/port/\w+/|;
}
close PIPE or die "$FIND failed: $!";

#
# For each .h file, extract all the symbols it #define's, and add them to
# a hash table.  To cover the possibility of multiple .h files defining
# the same symbol, we make each hash entry a hash of filenames.
#
foreach $hfile (@@hfiles) {
    open HFILE, $hfile
	or die "can't open $hfile: $!";
    while (<HFILE>) {
	if (m/^\s*#\s*define\s+(\w+)/) {
	    $defines{$1}{$hfile} = 1;
	}
    }
    close HFILE;
}

#
# For each file (both .h and .c), run the compiler to get a list of what
# files it #include's.  Then extract all the symbols it tests for defined-ness,
# and check each one against the previously built hashtable.
#
foreach $file (@@hfiles, @@cfiles) {
    ($fname, $fpath) = fileparse($file);
    chdir $fpath or die "can't chdir to $fpath: $!";
    #
    # Ask 'make' to parse the makefile so we can get the correct flags to
    # use.  CPPFLAGS in particular varies for each subdirectory.  If we are
    # processing a .h file, we might be in a subdirectory that has no
    # Makefile, in which case we have to fake it.  Note that there seems
    # no easy way to prevent make from recursing into subdirectories and
    # hence printing multiple definitions --- we keep the last one, which
    # should come from the current Makefile.
    #
    if (-f "Makefile" || -f "GNUmakefile") {
	$MAKECMD = "$MAKE -qp";
    } else {
	$subdir = $fpath;
	chop $subdir;
	$top_builddir = "..";
	$tmp = $fpath;
	while (($tmp = dirname($tmp)) ne '.') {
	    $top_builddir = $top_builddir . "/..";
	}
	$MAKECMD = "$MAKE -qp 'subdir=$subdir' 'top_builddir=$top_builddir' -f '$top_builddir/src/Makefile.global'";
    }
    open PIPE, "$MAKECMD |"
	or die "can't fork: $!";
    while (<PIPE>) {
	if (m/^CPPFLAGS :?= (.*)/) {
	    $CPPFLAGS = $1;
	} elsif (m/^CFLAGS :?= (.*)/) {
	    $CFLAGS = $1;
	} elsif (m/^CFLAGS_SL :?= (.*)/) {
	    $CFLAGS_SL = $1;
	} elsif (m/^PTHREAD_CFLAGS :?= (.*)/) {
	    $PTHREAD_CFLAGS = $1;
	} elsif (m/^CC :?= (.*)/) {
	    $CC = $1;
	}
    }
    # If make exits with status 1, it's not an error, it just means make
    # thinks some files may not be up-to-date.  Only complain on status 2.
    close PIPE;
    die "$MAKE failed in $fpath\n" if $? != 0 && $? != 256;

    # Expand out stuff that might be referenced in CFLAGS
    $CFLAGS =~ s/\$\(CFLAGS_SL\)/$CFLAGS_SL/;
    $CFLAGS =~ s/\$\(PTHREAD_CFLAGS\)/$PTHREAD_CFLAGS/;

    #
    # Run the compiler (which had better be gcc) to get the inclusions.
    # "gcc -H" reports inclusions on stderr as "... filename" where the
    # number of dots varies according to nesting depth.
    #
    @@includes = ();
    $COMPILE = "$CC $CPPFLAGS $CFLAGS -H -E $fname";
    open PIPE, "$COMPILE 2>&1 >/dev/null |"
	or die "can't fork: $!";
    while (<PIPE>) {
	if (m/^\.+ (.*)/) {
	    $include = $1;
	    # Ignore system headers (absolute paths); but complain if a
	    # .c file includes a system header before any PG header.
	    if ($include =~ m|^/|) {
		warn "$file includes $include before any Postgres inclusion\n"
		    if $#includes == -1 && $file =~ m/\.c$/;
		next;
	    }
	    # Strip any "./" (assume this appears only at front)
	    $include =~ s|^\./||;
	    # Make path relative to top of tree
	    $ipath = $fpath;
	    while ($include =~ s|^\.\./||) {
		$ipath = dirname($ipath) . "/";
	    }
	    $ipath =~ s|^\./||;
	    push @@includes, $ipath . $include;
	} else {
	    warn "$CC: $_";
	}
    }
    # The compiler might fail, particularly if we are checking a file that's
    # not supposed to be compiled at all on the current platform, so don't
    # quit on nonzero status.
    close PIPE or warn "$COMPILE failed in $fpath\n";

    #
    # Scan the file to find #ifdef, #ifndef, and #if defined() constructs
    # We assume #ifdef isn't continued across lines, and that defined(foo)
    # isn't split across lines either
    #
    open FILE, $fname
	or die "can't open $file: $!";
    $inif = 0;
    while (<FILE>) {
	$line = $_;
	if ($line =~ m/^\s*#\s*ifdef\s+(\w+)/) {
	    $symbol = $1;
	    &checkit;
	}
	if ($line =~ m/^\s*#\s*ifndef\s+(\w+)/) {
	    $symbol = $1;
	    &checkit;
	}
	if ($line =~ m/^\s*#\s*if\s+/) {
	    $inif = 1;
	}
	if ($inif) {
	    while ($line =~ s/\bdefined(\s+|\s*\(\s*)(\w+)//) {
		$symbol = $2;
		&checkit;
	    }
	    if (!($line =~ m/\\$/)) {
		$inif = 0;
	    }
	}
    }
    close FILE;

    chdir $topdir or die "can't chdir to $topdir: $!";
}

exit 0;

# Check an is-defined reference
sub checkit {
    # Ignore if symbol isn't defined in any PG include files
    if (! defined $defines{$symbol}) {
	return;
    }
    #
    # Try to match source(s) of symbol to the inclusions of the current file
    # (including itself).  We consider it OK if any one matches.
    #
    # Note: these tests aren't bulletproof; in theory the inclusion might
    # occur after the use of the symbol.  Given our normal file layout,
    # however, the risk is minimal.
    #
    foreach $deffile (keys %{ $defines{$symbol} }) {
	return if $deffile eq $file;
	foreach $reffile (@@includes) {
	    return if $deffile eq $reffile;
	}
    }
    #
    # If current file is a .h file, it's OK for it to assume that one of the
    # base headers (postgres.h or postgres_fe.h) has been included.
    #
    if ($file =~ m/\.h$/) {
	foreach $deffile (keys %{ $defines{$symbol} }) {
	    return if $deffile eq 'src/include/c.h';
	    return if $deffile eq 'src/include/postgres.h';
	    return if $deffile eq 'src/include/postgres_fe.h';
	    return if $deffile eq 'src/include/pg_config.h';
	    return if $deffile eq 'src/include/pg_config_manual.h';
	}
    }
    #
    @@places = keys %{ $defines{$symbol} };
    print "$file references $symbol, defined in @@places\n";
    # print "includes: @@includes\n";
}
@
