#!/usr/bin/python
# Copyright 2012  Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.


'''Check lock file usage from log files.

This program reads a number of Obnam log files, produced with tracing
for obnamlib, and analyses them for bugs when using lock files. Each
log file is assumed to be produced by a separate Obnam instance.

* Have any instances held the same lock during overlapping periods?

'''


import cliapp
import logging
import os
import re
import time
import ttystatus

timestamp_pat = \
    r'^\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d (?P<timestamp>\d+\.\d+) .*'

lock_pat = re.compile(
    timestamp_pat +
    r'vfs_local.py:[0-9]*:lock: got lockname=(?P<lock>.*)')
unlock_pat = re.compile(
    timestamp_pat +
    r'vfs_local.py:[0-9]*:unlock: lockname=(?P<lock>.*)')

writefile_pat = re.compile(
    timestamp_pat +
    r'vfs_local.py:[0-9]*:write_file: write_file (?P<filename>.*)$')
overwritefile_pat = re.compile(
    timestamp_pat +
    r'vfs_local.py:[0-9]*:overwrite_file: overwrite_file (?P<filename>.*)$')

node_open_pat = re.compile(
    timestamp_pat +
    r'nodestore_disk.py:\d+:get_node: reading node \d+ from file '
    r'(?P<nodeid>.*)$')
node_remove_pat = re.compile(
    timestamp_pat +
    r'vfs_local.py:\d+:remove: remove (?P<nodeid>.*/nodes/.*)$')

rename_pat = re.compile(
    timestamp_pat +
    r'vfs_local.py:\d+:rename: rename (?P<old>\S+) (?P<new>\S+)$')


class LogEvent(object):

    def __init__(self, logfile, lineno, timestamp):
        self.logfile = logfile
        self.lineno = lineno
        self.timestamp = timestamp

    def sortkey(self):
        return self.timestamp


class LockEvent(LogEvent):

    def __init__(self, logfile, lineno, timestamp, lockname):
        LogEvent.__init__(self, logfile, lineno, timestamp)
        self.lockname = lockname

    def __str__(self):
        return 'Lock(%s)' % self.lockname


class UnlockEvent(LockEvent):

    def __str__(self):
        return 'Unlock(%s)' % self.lockname


class WriteFileEvent(LogEvent):

    def __init__(self, logfile, lineno, timestamp, filename):
        LogEvent.__init__(self, logfile, lineno, timestamp)
        self.filename = filename

    def __str__(self):
        return 'WriteFile(%s)' % self.filename


class OverwriteFileEvent(WriteFileEvent):

    def __str__(self):
        return 'OverwriteFile(%s)' % self.filename


class NodeCreateEvent(LogEvent):

    def __init__(self, logfile, lineno, timestamp, node_id):
        LogEvent.__init__(self, logfile, lineno, timestamp)
        self.node_id = node_id

    def __str__(self):
        return 'NodeCreate(%s)' % self.node_id


class NodeDestroyEvent(NodeCreateEvent):

    def __str__(self):
        return 'NodeDestroy(%s)' % self.node_id


class NodeReadEvent(NodeCreateEvent):

    def __str__(self):
        return 'NodeOpen(%s)' % self.node_id


class RenameEvent(LogEvent):

    def __init__(self, logfile, lineno, timestamp, old, new):
        LogEvent.__init__(self, logfile, lineno, timestamp)
        self.old = old
        self.new = new

    def __str__(self):
        return 'Rename(%s -> %s)' % (self.old, self.new)


class CheckLocks(cliapp.Application):

    def setup(self):
        self.events = []
        self.errors = 0
        self.latest_opened_node = None

        self.patterns = [
            (lock_pat, self.lock_event),
            (unlock_pat, self.unlock_event),
            (writefile_pat, self.writefile_event),
            (overwritefile_pat, self.overwritefile_event),
            (node_open_pat, self.read_node_event),
            (node_remove_pat, self.node_remove_event),
            (rename_pat, self.rename_event),
        ]

        self.ts = ttystatus.TerminalStatus()
        self.ts.format(
            'Reading %ElapsedTime() %Integer(lines): %Pathname(filename)')
        self.ts['lines'] = 0

    def cleanup(self):
        self.ts.clear()

        self.analyse_phase_1()

        self.ts.finish()
        if self.errors:
            raise cliapp.AppException('There were %d errors' % self.errors)

    def error(self, msg):
        logging.error(msg)
        self.ts.error(msg)
        self.errors += 1

    def analyse_phase_1(self):
        self.events.sort(key=lambda e: e.sortkey())
        self.events = self.create_node_events(self.events)
        self.ts.format('Phase 1: %Index(event,events)')
        self.ts['events'] = self.events
        self.ts.flush()

        current_locks = set()
        current_nodes = set()

        for e in self.events:
            self.ts['event'] = e
            logging.debug(
                'analysing: %s:%s: %s: %s' %
                    (e.logfile, e.lineno, repr(e.sortkey()), str(e)))
            if type(e) is LockEvent:
                if e.lockname in current_locks:
                    self.error(
                        'Re-locking %s: %s:%s:%s' %
                            (e.lockname, e.logfile, e.lineno,
                             e.timestamp))
                else:
                    current_locks.add(e.lockname)
            elif type(e) is UnlockEvent:
                if e.lockname not in current_locks:
                    self.error(
                        'Unlocking %s which was not locked: %s:%s:%s' %
                            (e.lockname, e.logfile, e.lineno,
                             e.timestamp))
                else:
                    current_locks.remove(e.lockname)
            elif type(e) in (WriteFileEvent, OverwriteFileEvent):
                lockname = self.determine_lockfile(e.filename)
                if lockname and lockname not in current_locks:
                    self.error(
                        '%s:%s: '
                        'Write to file %s despite lock %s not existing' %
                            (e.logfile, e.lineno, e.filename, lockname))
            elif type(e) is NodeCreateEvent:
                if e.node_id in current_nodes:
                    self.error(
                        '%s:%s: Node %s already exists' %
                            (e.logfile, e.lineno, e.node_id))
                else:
                    current_nodes.add(e.node_id)
            elif type(e) is NodeDestroyEvent:
                if e.node_id not in current_nodes:
                    self.error(
                        '%s:%s: Node %s does not exist' %
                            (e.logfile, e.lineno, e.node_id))
                else:
                    current_nodes.remove(e.node_id)
            elif type(e) is NodeReadEvent:
                if e.node_id not in current_nodes:
                    self.error(
                        '%s:%s: Node %s does not exist' %
                            (e.logfile, e.lineno, e.node_id))
            elif type(e) is RenameEvent:
                if e.old in current_nodes:
                    current_nodes.remove(e.old)
                    current_nodes.add(e.new)
            else:
                raise NotImplementedError()

    def create_node_events(self, events):
        new = []
        for e in events:
            new.append(e)
            if type(e) in (WriteFileEvent, OverwriteFileEvent):
                if '/nodes/' in e.filename:
                    new_e = NodeCreateEvent(
                        e.logfile, e.lineno, e.timestamp, e.filename)
                    new_e.timestamp = e.timestamp
                    new.append(new_e)
        return new

    def determine_lockfile(self, filename):
        if filename.endswith('/lock'):
            return None
        toplevel = filename.split('/')[0]
        if toplevel == 'chunks':
            return None
        if toplevel in ('metadata', 'clientlist'):
            return './lock'
        return toplevel + '/lock'

    def process_input(self, name):
        self.ts['filename'] = name
        return cliapp.Application.process_input(self, name)

    def process_input_line(self, filename, line):
        self.ts['lines'] = self.global_lineno
        for pat, func in self.patterns:
            m = pat.search(line)
            if m:
                event = func(filename, line, m)
                if event is not None:
                    self.events.append(event)

    def lock_event(self, filename, line, match):
        return LockEvent(
            filename, self.lineno, float(match.group('timestamp')),
            match.group('lock'))

    def unlock_event(self, filename, line, match):
        return UnlockEvent(
            filename, self.lineno, float(match.group('timestamp')),
            match.group('lock'))

    def writefile_event(self, filename, line, match):
        return WriteFileEvent(
            filename, self.lineno, float(match.group('timestamp')),
            match.group('filename'))

    def overwritefile_event(self, filename, line, match):
        return OverwriteFileEvent(
            filename, self.lineno, float(match.group('timestamp')),
            match.group('filename'))

    def read_node_event(self, filename, line, match):
        node_id = match.group('nodeid')
        if not os.path.basename(node_id).startswith('tmp'):
            return NodeReadEvent(
                filename, self.lineno, float(match.group('timestamp')),
                node_id)

    def node_remove_event(self, filename, line, match):
        return NodeDestroyEvent(
            filename, self.lineno, float(match.group('timestamp')),
            match.group('nodeid'))

    def rename_event(self, filename, line, match):
        return RenameEvent(
            filename, self.lineno, float(match.group('timestamp')),
            match.group('old'), match.group('new'))


CheckLocks().run()

