#! /bin/sh

# General style correction and cleanup.
t=__wt.$$
trap 'rm -f $t' 0 1 2 3 13 15

# Parallelize if possible.
xp=""
echo date | xargs -P 20 >/dev/null 2>&1
if test $? -eq 0; then
    xp="-P 20"
fi

# s_style is applied to every source file by default. It has a fast mode where it only runs against
# modified files, according to git. Use -F for fast mode.
search_str="find bench examples ext src test -name '*.[ch]' -o -name '*.in' -o -name '*.dox' \
    -o -name '*.cxx' -o -name '*.cpp' -o -name '*.py' -o -name '*.cmake' -o -name '*.i'"
single_file=0
if [ $# -eq 1 ]; then
    if [ "$1" = "-F" ]; then
        search_str="git diff --name-only $(git merge-base --fork-point develop) | grep -E '\.(c|h|cpp|cxx|i|py|dox|cmake)$'"
    else
        single_file=1
    fi
fi

# s_style is re-entrant, when run with no parameters it calls itself
# again for each file that needs checking.
if [ "$single_file" -ne 1 ]; then
    cd ..
    # Skip auto-generated and third party code, including SWIG generated workgen code.
    eval $search_str |
    sed -e '/Makefile.in/d' \
        -e '/test\/3rdparty/d' \
        -e '/test\/checksum\/power8/d' \
        -e '/bench\/workgen\/workgen_wrap.cxx/d' \
        -e '/checksum\/zseries/d' |
    xargs $xp -I{} sh ./dist/s_style {}
else
    # General style correction and cleanup for a single file
    f=$1
    fname=`basename $f`
    t=__wt_s_style.$fname.$$

    if [ ! -e $f ]; then
        echo "$0 error $1 does not exist"
        exit 1;
    fi

    # Ignore styling errors of external libraries.
    if expr "$f" : 'src/' > /dev/null &&
      ! expr "$f" : 'src/os_win/' > /dev/null &&
      ! expr "$f" : 'src/docs/' > /dev/null &&
      ! expr "$f" : 'src/tags' > /dev/null &&
      ! expr "$f" : 'src/.*/hash_city.*' > /dev/null &&
      ! expr "$f" : 'src/.*/huffman.*' > /dev/null &&
      ! expr "$f" : 'src/checksum.*' > /dev/null; then

        # Camel case style is not allowed.
        if egrep -r -n '\b[a-z]+[A-Z]' $f | egrep -v ':[     ]+\*|"|UNCHECKED_STRING'; then
            echo "$f: Styling requires variables that use underscores to separate parts of a name instead of camel casing.";
        fi

        # Return values should be wrapped in parentheses.
        egrep -r -n '^[^*/"]*[[:space:]]*return [^(]' $f > $t;
        test -s $t && {
            echo "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
            echo 'Add parentheses to return values indicated below in file '"$f"
            echo "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
            cat $t
        }
    fi

    # C++ files must use .cpp as a file extension
    if [ "${fname##*.}" = "cxx" ]; then
        echo "$f: C++ files must use .cpp as an extension."
    fi

    # Remove non-ascii characters and substitute some expressions with accepted version.
    tr -cd '[:alnum:][:space:][:punct:]' < $f |
    sed -e '/for /!s/;;$/;/' \
        -e 's/(EOPNOTSUPP)/(ENOTSUP)/' \
        -e 's/(unsigned)/(u_int)/' \
        -e 's/hazard reference/hazard pointer/' >$t

    cmp $t $f > /dev/null 2>&1 || (echo "modifying $f" && cp $t $f)

    # Finding paired typos in the comments of different file types and excluding invalid edge cases.
    if [ "${fname##*.}" = "py" ]; then
        egrep '#.*[[:space:]]\b([a-zA-Z]+)[[:space:]]\b\1[[:space:]\.]' $f > $t
    elif [ "${fname##*.}" = "c" ] || [ "${fname##*.}" = "h" ]; then
        egrep '/?\*.*[[:space:]]\b([a-zA-Z]+)[[:space:]]\b\1[[:space:]\.]' $f | egrep -v -e "@" -e "long long" > $t
    else
        egrep '[[:space:]]\b([a-zA-Z]+)[[:space:]]\b\1[[:space:]\.]' $f | egrep -v -e "@" -e "^\(" > $t
    fi

    test -s $t && {
        echo "paired typo"
        echo "============================"
        echo $f
        cat $t
    }

    extension="${fname##*.}"
    if [ "x$extension" = "xdox" ]; then
        exit 0;
    fi

    if grep "^[^}]*while (0);" $f > $t; then
        echo "$f: while (0) has trailing semi-colon"
        cat $t
    fi

    if grep WT_DEADLOCK $f | grep -v '#define.WT_DEADLOCK' > $t; then
        echo "$f: WT_DEADLOCK deprecated in favor of WT_ROLLBACK"
        cat $t
    fi

    # A static assert in verify_build.h requires using sizeof explicitly.
    if ! expr "$f" : 'src/include/verify_build.h' > /dev/null &&
        grep 'sizeof(WT_UPDATE)' $f > $t; then
        echo "$f: Use WT_UPDATE_SIZE rather than sizeof(WT_UPDATE)"
        cat $t

    fi

    if ! expr "$f" : 'examples/c/.*' > /dev/null &&
       ! expr "$f" : 'ext/.*' > /dev/null &&
       ! expr "$f" : 'src/include/wiredtiger_ext\.h' > /dev/null &&
       ! expr "$f" : 'src/txn/txn_ext\.c' > /dev/null &&
       grep WT_TXN_ISO_ $f; then
        echo "$f: WT_TXN_ISO_XXX constants only for the extension API"
        cat $t
    fi

    if ! expr "$f" : 'src/include/queue\.h' > /dev/null &&
        egrep 'STAILQ_|SLIST_|\bLIST_' $f ; then
        echo "$f: use TAILQ for all lists"
    fi

    if ! expr "$f" : 'src/include/extern.h' > /dev/null &&
       ! expr "$f" : 'src/include/extern_posix.h' > /dev/null &&
       ! expr "$f" : 'src/include/extern_win.h' > /dev/null &&
       ! expr "$f" : 'src/include/os.h' > /dev/null &&
       ! expr "$f" : 'src/os_common/.*' > /dev/null &&
       ! expr "$f" : 'src/os_posix/.*' > /dev/null &&
       ! expr "$f" : 'src/os_win/.*' > /dev/null &&
        grep '__wt_errno' $f > $t; then
        echo "$f: upper-level code should not call __wt_errno"
        cat $t
    fi

    if ! expr "$f" : 'examples/c/.*' > /dev/null &&
       ! expr "$f" : 'src/include/os.h' > /dev/null &&
        egrep "%[0-9]*zu" $f | grep -v 'SIZET_FMT' > $t; then
        echo "$f: %zu needs to be fixed for Windows"
        cat $t
    fi

    egrep -w 'off_t' $f > $t
    test -s $t && {
        echo "$f: off_t type declaration, use wt_off_t"
        cat $t
    }

    if ! expr "$f" : 'src/include/misc.h' > /dev/null &&
        grep '[[:space:]]qsort(' $f > $t; then
        echo "$f: qsort call, use WiredTiger __wt_qsort instead"
        cat $t
    fi

    if ! expr "$f" : 'src/.*/os_setvbuf.c' > /dev/null &&
        egrep -w 'setvbuf' $f > $t; then
        echo "$f: setvbuf call, use WiredTiger library replacements"
        cat $t
    fi

    if ! expr "$f" : 'examples/c/*' > /dev/null &&
       ! expr "$f" : 'bench/*' > /dev/null &&
       ! expr "$f" : '.*cxx' > /dev/null &&
       ! expr "$f" : '.*cpp' > /dev/null &&
       ! expr "$f" : 'ext/*' > /dev/null &&
       ! expr "$f" : 'src/os_posix/os_snprintf.c' > /dev/null &&
        egrep '[^a-z_]snprintf\(|[^a-z_]vsnprintf\(' $f > $t; then
        echo "$f: snprintf call, use WiredTiger library replacements"
        cat $t
    fi

    # If we don't have matching pack-begin and pack-end calls, we don't get
    # an error, we just get a Windows performance regression. Using awk and
    # not wc to ensure there's no whitespace in the assignment.
    egrep WT_PACKED_STRUCT $f > $t
    cnt=`awk 'BEGIN { line = 0 } { ++line } END { print line }' < $t`
    test `expr "$cnt" % 2` -ne 0 && {
        echo "$f: mismatched WT_PACKED_STRUCT_BEGIN/END lines"
        cat $t
    }

    # Direct calls to functions we're not supposed to use in the library.
    # We don't check for all of them, just a few of the common ones.
    if ! expr "$f" : 'bench/.*' > /dev/null &&
       ! expr "$f" : 'examples/.*' > /dev/null &&
       ! expr "$f" : 'ext/.*' > /dev/null &&
       ! expr "$f" : 'test/.*' > /dev/null; then
        if ! expr "$f" : '.*/os_alloc.c' > /dev/null &&
           ! expr "$f" : '.*/util_misc.c' > /dev/null &&
             egrep '[[:space:]]free[(]|[[:space:]]strdup[(]|[[:space:]]strndup[(]|[[:space:]]malloc[(]|[[:space:]]calloc[(]|[[:space:]]realloc[(]|[[:space:]]sprintf[(]' $f > $t; then
            test -s $t && {
                echo "$f: call to illegal function"
                cat $t
            }
        fi
        if ! expr "$f" : '.*/os_strtouq.c' > /dev/null &&
             egrep '[[:space:]]strtouq[(]' $f > $t; then
            test -s $t && {
                echo "$f: call to illegal function"
                cat $t
            }
        fi
        if egrep '[[:space:]]exit[(]' $f > $t; then
            test -s $t && {
                echo "$f: call to illegal function"
                cat $t
            }
        fi
    fi

    # Declaration of an integer return variable.
    if ! expr "$f" : 'bench/.*' > /dev/null &&
       ! expr "$f" : 'examples/.*' > /dev/null &&
       ! expr "$f" : 'test/.*' > /dev/null &&
       ! expr "$f" : 'ext/.*' > /dev/null; then
       # This regex can return false positives on functions that take ret as
       # an argument. Filter out matches that contain brackets.
        egrep -w ret $f | egrep 'int.*[, ]ret[,;]' | egrep -v '[()]' > $t
        test -s $t && {
            echo "$f: explicit declaration of \"ret\""
            cat $t
        }
    fi

    # Use of ctype functions that sign extend their arguments.
    if ! expr "$f" : 'bench/.*' > /dev/null &&
       ! expr "$f" : 'test/csuite/.*' > /dev/null &&
       ! expr "$f" : 'examples/.*' > /dev/null &&
       ! expr "$f" : 'ext/.*' > /dev/null &&
       ! expr "$f" : '.*py' > /dev/null &&
       ! expr "$f" : '.*cpp' > /dev/null &&
       ! expr "$f" : 'src/include/ctype.i' > /dev/null; then
        if egrep '(#include.*["</]ctype.h[">]|\b(is(alnum|alpha|cntrl|digit|graph|lower|print|punct|space|upper|xdigit)|to(lower|toupper))\()' $f > $t; then
            test -s $t && {
                echo "$f: direct use of ctype.h functions, instead of ctype.i equivalents"
                cat $t
            }
        fi
    fi
fi

exit 0
