#!/usr/bin/env python

# This script builds message databases for use by grokevt-parselog.
#
# Copyright (C) 2005-2008 Timothy D. Morgan
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation version 2 of the
# License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# vi:set tabsize=4:
# $Id: grokevt-builddb 109 2008-03-20 19:28:07Z tim $


import sys
import os
import re
import popen2
import anydbm
import select
import grokevt

# XXX: should these be changed to absolute paths discovered at install time?
CURRENT_DB_VERSION=2
PATH_REGLOOKUP='reglookup'
PATH_RIPDLL='grokevt-ripdll'

REGPATH_EVENTLOG = None
CONTROL_SET_ID_PATH='/Select/Current'

missing_dlls = {}

def usage():
    sys.stderr.write("USAGE:\n")
    sys.stderr.write("  %s [-v] [-c CSID] <CONFIG_PROFILE> <OUTPUT_DIR>\n\n"
                     % os.path.basename(sys.argv[0]))
    sys.stderr.write("grokevt-builddb builds a database tree based on a\n")
    sys.stderr.write("single windows system for the purpose of event log\n")
    sys.stderr.write("conversion.  Please see the man page for more\n")
    sys.stderr.write("information.\n")


def unquoteString(s):
    chunks = re.split("\\\\x([0-9A-F]{2})", s);

    for i in range(1,len(chunks),2):
        chunks[i] = "%c" % int(chunks[i], 16)

    return ''.join(chunks)


def windowsPathToUnixPath(path_list, variables, drives):
    ret_val = path_list.lower()

    for s in variables.items():
        ret_val = ret_val.replace(s[0].lower(), s[1].lower())

    ret_val = ret_val.replace("\\", "/")

    paths = ret_val.split(';')
    ret_val = []
    for p in paths:
        # Some paths have been found with trailing nulls and garbage.
        p = p.split('\x00')[0]
        for s in drives.items():
            if p.startswith(s[0].lower()):
                p = p.replace(s[0].lower(), s[1], 1)
        ret_val.append(p)

    return ret_val


def regLookupStderrHandler(e, args):
    sys.stderr.write("WARNING: reglookup reported: %s" % e)

def ripdllStderrHandler(e, args):
    sys.stderr.write(("WARNING: while reading %s and writing %s,"
                      +" grokevt-ripdll returned: %s")\
                     % (args[0], args[1], e))


class procWrapper:
    child = None
    files = None
    stderr_handler = None
    stderr_handler_args = None
    exe_name = None
    

    def __init__(self, args, stderr_handler, stderr_handler_args=()):
        self.stderr_handler = stderr_handler
        self.stderr_handler_args = stderr_handler_args
        self.exe_name = args[0]
        self.child = popen2.Popen3(args, 1)
        #self.child.tochild.close()
        self.files = [self.child.childerr,self.child.fromchild]
    
    def readline(self):
        l = ''
        # XXX: could this be simpler?
        while (l == '') and (len(self.files) > 0):
            r, w, e = select.select(tuple(self.files),(),tuple(self.files))
            for fh in e:
                fh.close()
            for fh in self.files:
                if fh.closed:
                    self.files.remove(fh)
            for fh in r:
                if fh.fileno() == self.child.childerr.fileno():
                    el = fh.readline()
                    if el == '':
                        self.files.remove(fh)
                    else:
                        self.stderr_handler(el, self.stderr_handler_args)
                else:
                    l = fh.readline()
                    if l == '':
                        self.files.remove(fh)
        return l
    
    def wait(self):
        while self.readline() != '':
            continue
        child_ret = self.child.wait()
        if child_ret > 0:
            sys.stderr.write("WARNING: %s returned code: %d. Is it in your path?"
                             % (self.exe_name, child_ret))
        return child_ret


def buildShortNamesPathsDicts(paths):
    ret_val1 = {}
    ret_val2 = {}
    for p in paths.keys():
        if os.path.isfile(p):
            sn = os.path.basename(p).replace(':','_')
            ext = '.db'
            i = 2
            while ret_val1.has_key(sn + ext):
                ext = '_%d.db' % i
                i += 1

            ret_val1[sn+ext] = p
            ret_val2[p] = sn+ext
        else:
            # XXX: this might be handy later
            #missing_dlls[p] = paths[p]
            sys.stderr.write("WARNING: %s doesn't exist.\n" % p
                             + "WARNING:    (This may affect log output "
                             + "for the following services: %s)\n" % paths[p])
    
    return (ret_val1, ret_val2)


def makeOutputDirectories(topdir, logs):
    dirs = []
    for log in logs:
        dirs.append('%s/services/%s' % (topdir,log))
    dirs.append('%s/messages' % topdir)
    dirs.append('%s/logs' % topdir)

    for p in dirs:
        if os.path.exists(p):
            if not os.path.isdir(p) or not os.access(p, os.W_OK):
                sys.stderr.write("ERROR: Access denied to '%s'.\n" % p)
                sys.exit(os.EX_CANTCREAT)
        else:
            os.makedirs(p, 0755)
    
    # Write DB version
    vf = file('%s/version' % topdir, 'w+')
    vf.write('%d\n' % CURRENT_DB_VERSION)
    vf.close()


def writeServiceMapping(topdir, maps):
    for lt in maps.keys():
        for t in maps[lt].keys():
            db_file = "%s/services/%s/%s.db"%(topdir,lt,t)
            db = anydbm.open(db_file, "n", 0644)
            for si in maps[lt][t].items():
                db[si[0]] = si[1]
            db.sync()
            db.close()


def writeDBFiles(topdir, names2paths):
    for npi in names2paths.items():
        outdb = "%s/messages/%s" % (topdir, npi[0])
        db_call = (PATH_RIPDLL, npi[1], outdb)
        ripdll_proc = procWrapper(db_call, ripdllStderrHandler, (npi[1], outdb))

        l = ripdll_proc.readline()
        while l != '':
            ripdllStderrHandler(l, (npi[1], outdb))
            l = ripdll_proc.readline()
        ripdll_proc.wait()

        

# Parse command line arguments
CONTROL_SET_ID = None
print_verbose = 0
next_is_cid = 0
argv_len = len(sys.argv)
if argv_len < 3:
    usage()
    sys.stderr.write("ERROR: Requires at least 2 arguments.\n")
    sys.exit(os.EX_USAGE)
else:
    for option in sys.argv[1:argv_len-2]:
        if next_is_cid:
            CONTROL_SET_ID = int(option)
            if CONTROL_SET_ID < 1:
                usage()
                sys.stderr.write("ERROR: CONTROL_SET_ID must be positive.\n")
                sys.exit(os.EX_USAGE)
            next_is_cid = 0
        elif option == '-v':
            print_verbose = 1
        elif option == '-c':
            next_is_cid = 1
        else:
            usage()
            sys.stderr.write("ERROR: Unrecognized option '%s'.\n" % option)
            sys.exit(os.EX_USAGE)

    if next_is_cid:
        usage()
        sys.stderr.write("ERROR: -c requires parameter.\n")
        sys.exit(os.EX_USAGE)
        

CONFIG_PROFILE=sys.argv[argv_len-2]
PATH_OUTPUT=sys.argv[argv_len-1]


if print_verbose:
    sys.stderr.write("INFO: Loading configuration...\n")
config = grokevt.grokevtConfig(grokevt.PATH_CONFIG, CONFIG_PROFILE)

if print_verbose:
    sys.stderr.write("INFO: Reading system registry for service information...\n")

# Need to determine correct 'CurrentControlSet', if not specified at
# command line 

if not CONTROL_SET_ID:
    logs_query=[PATH_REGLOOKUP, "-H", "-t", "DWORD",
                "-p", CONTROL_SET_ID_PATH, config.registry_path]
    reg_proc = procWrapper(logs_query, regLookupStderrHandler)
    
    l = reg_proc.readline()
    if len(l) == 0:
        sys.stderr.write("ERROR: Could not automatically "
                         +"determine CONTROL_SET_ID\n")
        sys.exit(os.EX_IOERR)

    reg_proc.wait()
    CONTROL_SET_ID = int(l.split(',')[2], 16)


CONTROL_SET_PATH='/ControlSet%.3d' % CONTROL_SET_ID
REGPATH_EVENTLOG='%s/Services/Eventlog' % CONTROL_SET_PATH

if print_verbose:
    sys.stderr.write("INFO: Using '%s' as CurrentControlSet path.\n"
                     % CONTROL_SET_PATH)

# Next, identify all log types by grabbing all keys under the eventlog key
logs_query=[PATH_REGLOOKUP, "-H", "-t", "KEY",
            "-p", REGPATH_EVENTLOG, config.registry_path]
reg_proc = procWrapper(logs_query, regLookupStderrHandler)

log_types = {}
prefix_len = len(REGPATH_EVENTLOG)

l = reg_proc.readline()
while l != '':
    l = l.split(',')[0][prefix_len:].lstrip('/').split('/')[0].strip()
    if l:
        # XXX: do log names need to be case-squashed?
        log_types[unquoteString(l)] = None
    l = reg_proc.readline()
reg_proc.wait()


# Now, for each log type, grab the log file path and the list of all
# services in the Sources MULTI_SZ
files = {}
evt_files = {}
for lt in log_types.keys():
    evt_query = [PATH_REGLOOKUP, "-H", "-t", "EXPAND_SZ",
                 "-p", "%s/%s/File" % (REGPATH_EVENTLOG,lt),
                 config.registry_path]
    reg_proc = procWrapper(evt_query, regLookupStderrHandler)

    l = reg_proc.readline()
    if l == '':
        if print_verbose:
            sys.stderr.write('WARNING: Event log file path not found'
                             +' for log type "%s".  Removing this type.\n' % lt)
        del log_types[lt]
        continue
    l = l.split(',')[2].strip()
    evt_files[lt] = windowsPathToUnixPath(unquoteString(l),
                                          config.path_vars,
                                          config.drive_mapping)[0]
    reg_proc.wait()
    
    logs_query = [PATH_REGLOOKUP, "-H", "-t", "MULTI_SZ",
                  "-p", "%s/%s/Sources" % (REGPATH_EVENTLOG,lt),
                  config.registry_path]
    reg_proc = procWrapper(logs_query, regLookupStderrHandler)

    log_types[lt] = {}
    log_types[lt]["event"] = {}
    log_types[lt]["category"] = {}
    log_types[lt]["parameter"] = {}
    
    # For each service under this log type, grab the associated event,
    # category, and parameter message file lists.
    l = reg_proc.readline()
    reg_proc.wait()
    if l != '':
        services = l.split(',')[2].split('|')
        for s in services:
            s = s.strip().lower()
            log_types[lt]["event"][s] = None
            log_types[lt]["category"][s] = None
            log_types[lt]["parameter"][s] = None
    elif print_verbose:
        sys.stderr.write('WARNING: No sources found for log type "%s".\n' % lt)

    # Now grab whole tree under this log type and parse it
    lt_path = "%s/%s" % (REGPATH_EVENTLOG,lt)
    lt_path_len = len(lt_path)
    dlls_query = [PATH_REGLOOKUP, "-H", "-p", lt_path, config.registry_path]
    reg_proc = procWrapper(dlls_query, regLookupStderrHandler)

    l = reg_proc.readline()
    while l != '':
        fields = l.split(',')
        # trim unneeded prefix from path
        fields[0] = fields[0][lt_path_len:].lstrip('/')

        s = fields[0].split('/')[0].lower()
        if(fields[1] == 'EXPAND_SZ' and log_types[lt]['event'].has_key(s)):
            t = None
            if fields[0].find("EventMessageFile") >= 0:
                t = "event"
            elif fields[0].find("CategoryMessageFile") >= 0:
                t = "category"
            elif fields[0].find("ParameterMessageFile") >= 0:
                t = "parameter"
            if t:
                log_types[lt][t][s]\
                  = windowsPathToUnixPath(unquoteString(fields[2].strip()),
                                          config.path_vars,
                                          config.drive_mapping)
                # Retain unique list of file names and their associated services
                for f in log_types[lt][t][s]:
                    if files.has_key(f):
                        files[f] = files[f]+","+s
                    else:
                        files[f] = s
        l = reg_proc.readline()
    reg_proc.wait()

(names2paths,paths2names) = buildShortNamesPathsDicts(files)

# Convert path lists to named path strings.  Filter out any unusable resources
# and any services that have no remaining resources.
for lt in log_types.keys():
    for t in log_types[lt].keys():
        for s in log_types[lt][t].keys():
            if(log_types[lt][t][s]):
                tmp_str = ''
                for i in range(0,len(log_types[lt][t][s])):
                    if paths2names.has_key(log_types[lt][t][s][i]):
                        tmp_str += ':' + paths2names[log_types[lt][t][s][i]]
                log_types[lt][t][s] = tmp_str.lstrip(':')

                if log_types[lt][t][s] == '':
                    del log_types[lt][t][s]
                    sys.stderr.write("WARNING: service '%s' removed" % s\
                                     +" due to missing message sources.\n")
            else:
                del log_types[lt][t][s]

if print_verbose:
    sys.stderr.write("INFO: Writing service mappings...\n")
makeOutputDirectories(PATH_OUTPUT, log_types.keys())
writeServiceMapping(PATH_OUTPUT, log_types)

if print_verbose:
    sys.stderr.write("INFO: Writing DLL databases...\n")
writeDBFiles(PATH_OUTPUT, names2paths)

if print_verbose:
    sys.stderr.write("INFO: Copying log files...\n")
try:
    for ei in evt_files.items():
        o = file("%s/logs/%s.evt" % (PATH_OUTPUT,ei[0]), "w+")
        i = file(ei[1], "r")
        buf = i.read(1024)
        while buf != "":
            o.write(buf)
            buf = i.read(1024)
        o.close()
        i.close()
except Exception, inst:
    sys.stderr.write("ERROR: %s\n" % inst)
    sys.stderr.write("ERROR: could not copy all log files.\n")
    sys.exit(os.EX_IOERR)

if print_verbose:
    sys.stderr.write("INFO: Done.\n")
