#!/bin/bash -e
#
# Applies the Undertaker tool to a Linux source tree
#
# Copyright (C) 2009-2012 Reinhard Tartler <tartler@informatik.uni-erlangen.de>
# Copyright (C) 2011 Christian Dietrich <christian.dietrich@informatik.uni-erlangen.de>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#

# This script is indented to run the undertaker on a whole linux
# tree. It will determine which files to be processed and how many
# threads can be started according to the count of processors your
# machine has. It assumes, that you have run undertaker-kconfigdump
# before, in order to create the models.
#

# scan for deads by default
MODE="scan-deads"

while getopts :t:m:a:csivh OPT; do
    case $OPT in
        m)
            MODELS="$OPTARG"
            ;;
        a)
            DEFAULT_ARCH="$OPTARG"
            ;;
        t)
            PROCESSORS="$OPTARG"
            ;;
        c)
            MODE="calc-coverage"
            ;;
        s)
            MODE="feature-statistics"
            ;;
        i)
            MODE="inference"
            ;;
        v)
            echo "undertaker-linux-tree"
            exit
            ;;
        h)
            echo "\`undertaker-linux-tree' drives the undertaker over a whole linux-tree"
            echo
            echo "Usage: ${0##*/} [-m DIR] [-a ARCH] [-t PROCS] [-c|-s]"
            echo " -m <modeldir>  Specify the directory for the models"
            echo "           (default: models)"
            echo " -a <arch>  Default architecture to check for"
            echo "        (default: x86)"
            echo " -t <count>   Number of analyzing processes"
            echo "        (default: _NPROCESSORS_ONLN)"
            echo " -c  Do coverage analysis instead of dead block search"
            echo " -s  Do feature statistics instead of dead block search"
            exit
            ;;
    esac
done
shift $(( OPTIND - 1 ))
OPTIND=1

MODELS=${MODELS:-models}
DEFAULT_ARCH=${DEFAULT_ARCH:-x86}
PROCESSORS=${PROCESSORS:-$(getconf _NPROCESSORS_ONLN)}

if [ ! -f arch/x86/Kconfig ]; then
    echo "Not run in an linux tree. Please run inside an linux tree without arguments"
    exit 1
else
    echo "Running on Linux Version $(git describe || echo '(no git)')"
fi

if ! which  undertaker > /dev/null; then
    echo "No undertaker binary found."
    exit 1
fi

if ! ls "$MODELS"/*.model >/dev/null 2>&1; then
    echo "No models found, please call undertaker-kconfigdump"
    exit
fi

if [ "$MODE" = "calc-coverage" ]; then
    find -type f -name '*.c' \
        ! -regex '^./tools.*' ! -regex '^./Documentation.*' ! -regex '^./scripts.*' \
        -exec grep -q -E '^#else' {} \; -print | shuf > undertaker-coverage-worklist

    files=`wc -l <undertaker-coverage-worklist`
    echo "Calculating partial configurations (greedy variant) on $files files"

    undertaker -v -j coverage -C min -t "$PROCESSORS" -b undertaker-coverage-worklist \
        -m "$MODELS" -M "$DEFAULT_ARCH" 2>&1 |
            grep '^I: ./' > coverage.txt

    if [ ! -s coverage.txt ]; then
        echo "Coverage analysis failed!"
        exit 1
    fi

    echo "TOP 50 variable files:"
    awk -F'I: ' '/^I: / { print $2 }' < coverage.txt |
            awk -F, '/Found Solutions/ { printf "%s %s\n", $2, $1 }' |
            sort -n -r |
            head -n 50 | tee coverage.stats

    awk '/c$/ { print $4 }' coverage.stats > undertaker-calc-coverage-worklist
    if ! undertaker-calc-coverage -m models/x86.model undertaker-calc-coverage-worklist \
              --run-sparse 2> undertaker-calc-coverage.error >undertaker-calc-coverage.output; then
        echo "undertaker-calc-coverage failed, error messages follow:"
        cat undertaker-calc-coverage.error
        exit 1
    fi
    if ! test -s undertaker-calc-coverage.error; then
        rm -f undertaker-calc-coverage.error
    fi
fi

if [ "$MODE" = "scan-deads" ]; then
    find -type f -name "*.[hcS]" \
        ! -regex '^./tools.*' ! -regex '^./Documentation.*' ! -regex '^./scripts.*' \
        -exec grep -q -E '^#else' {} \; -print | shuf > undertaker-worklist

    # delete potentially confusing .dead files first
    find . -type f -name '*dead' -delete

    echo "Analyzing $(wc -l < undertaker-worklist) files with $PROCESSORS threads."
    undertaker -v -t "$PROCESSORS" -b undertaker-worklist -m "$MODELS" -M "$DEFAULT_ARCH"
    printf "\n\nFound %s global defects\n" "$(find . -name '*dead'| grep globally | wc -l)"
    exit 0
fi

do_archstat () {
    CONFIG=$1
    ARCH=$2
    CROSS_COMPILE=
    export ARCH CROSS_COMPILE

    if ! make $CONFIG >/dev/null 2>&1; then
        # some strange architectures (e.g., h8300) are just utterly broken. skip them
        echo "Skipping $ARCH ($CONFIG)"
        return
    fi
    if ! golem -l >undertaker-stat/$CONFIG-$ARCH.list  2>undertaker-stat/$CONFIG-$ARCH.failed; then
        echo "golem -l failed on $ARCH with $CONFIG, skipping"
        return
    fi
    echo -n "Architecture $ARCH uses $(cat undertaker-stat/$CONFIG-$ARCH.list | wc -l) source files for $CONFIG"
    echo " (plus possibly $(grep -c 'Failed to guess' undertaker-stat/$CONFIG-$ARCH.failed) additional files)"
    if ! undertaker -j cppsym -t $PROCESSORS -b undertaker-stat/$CONFIG-$ARCH.list -m models/$ARCH.model |
         grep -v -E '(MISSING|NON_KCONFIG)' | sort |
         uniq >undertaker-stat/$CONFIG-$ARCH.cppsym 2>undertaker-stat/$CONFIG-$ARCH.cppsym-errors; then
        echo "Failed to extract CPP symbols for $ARCH with $CONFIG:"
        cat undertaker-stat/$CONFIG-$ARCH.cppsym-errors
    fi
    if ! test -s undertaker-stat/$CONFIG-$ARCH.cppsym-errors; then
        rm -f undertaker-stat/$CONFIG-$ARCH.cppsym-errors
    fi
}

if [ "$MODE" = "feature-statistics" ]; then
    allmodels="$(ls models/*.model)"
    allrsfs="$(ls models/*.rsf)"
    mkdir -p undertaker-stat

    golem -o | sort -u > undertaker-kbuild-variables
    ARCH=x86 golem -o | sort -u > undertaker-kbuild-variables-x86

    echo "Found $(cat undertaker-kbuild-variables | wc -l) configuration variables mentioned in Makefiles"

    find -type f -name "*.[hcS]" \
        ! -regex '^./tools.*' ! -regex '^./Documentation.*' ! -regex '^./scripts.*' \
        -exec grep -q -E '^#else' {} \; -print | shuf > undertaker-worklist

    if undertaker -j cppsym -t $PROCESSORS -b undertaker-worklist |
        grep -v -E '^(E|I|W): '>undertaker-all-cppsym.raw 2>undertaker-cppsym.errors; then
        awk '
BEGIN { FS="," }

{
    references[$1] = references[$1] + $2;
    rewrites[$1] = rewrites[$1] + $3;
}

END {
    for (item in references) {
        printf "%s, %d, %d\n", item, references[item], rewrites[item]
    }
}' < undertaker-all-cppsym.raw > undertaker-all-cppsym
        echo "Found $(cat undertaker-all-cppsym|wc -l) distinct CPP symbols."
    else
        echo "checking for cppsym errors failed, check the error log:"
        cat undertaker-cppsym.errors
    fi
    if ! test -s undertaker-cppsym.errors; then
        rm -f undertaker-cppsym.errors
    fi

    for m in $allmodels; do
        ARCH=$(basename $m .model)
        do_archstat allnoconfig $ARCH
        do_archstat allyesconfig $ARCH
        do_archstat allmodconfig $ARCH
    done
    exit 0
fi

if [ "$MODE" = "inference" ]; then
    ARCH='x86'
    export ARCH
    golem -iv > models/x86.makefile-constraints
    echo "Extracted $(grep -c ^FILE_ models/x86.makefile-constraints) source file implications for x86"
fi
