#!/usr/bin/python3
#emacs: -*- mode: python-mode; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*-
#ex: set sts=4 ts=4 sw=4 noet:
"""Script to help maintaining package definitions within Debian blends
task files.

Often it becomes necessary to duplicate the same information about a
package within debian packaging and multiple blends task files,
possibly of different blends.  This script allows to automate:

- construction of entries
- injection into task files
- modification of existing entries if things changed
- removal of previously injected entries

Possible TODOs:
---------------

* For every package the same task file might be re-read/written (if
  entry changed/added) from disk.
  That allows to replace easily original entry for 'source' package
  (listed as Suggests:) with actual first listed binary package.
  This should be taken into consideration if current per-package
  handling gets changed

"""

"""
Configuration
-------------

Paths to the blends top directories, containing tasks directories are
specified in ~/.blends-inject.cfg file, e.g.::

 [debian-med]
 path = ~/deb/debian-med/

 [debian-science]
 path = ~/deb/debian-science/

Definition of the fields for task files by default are looked up
within debian/blends, or files provided in the command line.  Also for "-a"
mode of operation you should define list of globs to match your debian/blends
files::

 [paths]
 all=~/deb/gits/pkg-exppsy/neurodebian/future/blends/*
     ~/deb/gits/*/debian/blends
     ~/deb/gits/pkg-exppsy/*/debian/blends
 # Python regular expression on which files to skip
 # Default is listed below
 #skip=.*[~#]$


Format of debian/blends
-----------------------

Example::

 ; If originally filed using project source name, and it is different
 ; from the primary (first) binary package name, keep 'Source' to be
 ; able to adopt previously included tasks entry
Source: brian

 ; Define the format on how entries should be handled.
 ; Possible values:
 ;   extended -- whenever package is not in Debian and additional
 ;               fields should be obtained from debian/*:
 ;               * License
 ;               * WNPP
 ;               * Pkg-Description
 ;               * Responsible
 ;               * Homepage
 ;               * Vcs-*
 ;   plain [default] -- only fields listed here should be mentioned.
 ;               Common use -- whenever package is already known to UDD.
 ;
 ;  By default, all fields specified previously propagate into following
 ;  packages as well.  If that is not desired, add suffix '-clean' to
 ;  the Format
Format: extended

Tasks: debian-science/neuroscience-modeling

 ; Could have Depends/Recommends/Suggests and Ignore
 ; All those define Pkg-Name field which is not included
 ; in the final "rendering" but is available as Pkg-Name item
Depends: python-brian
Pkg-URL: http://neuro.debian.net/pkgs/%(Pkg-Name)s.html
Language: Python, C
Published-Authors: Goodman D.F. and Brette R.
Published-Title: Brian: a simulator for spiking neural networks in Python
Published-In: Front. Neuroinform
Published-Year: 2008
Published-DOI: 10.3389/neuro.11.005.2008

 ; May be some previous entry should be removed, thus say so
Remove: python-brian-doc

 ;Tasks: debian-med/imaging-dev
 ;Why: Allows interactive development/scripting

 ; ; It should be possible to switch between formats,
 ; ; e.g. if some component is not yet in Debian
 ;Format: extended
 ;
 ; ; Now some bogus one but with customizations
 ;Tasks: debian-med/documentation
 ;Recommends: python-brian-doc
 ;Language:
 ;Remark: some remark
 ;

"""


from past.builtins import cmp
from future import standard_library
from functools import reduce
standard_library.install_aliases()
from builtins import str
from builtins import object
import re, os, sys, tempfile, glob
from os.path import join, exists, expanduser, dirname, basename

from configparser import ConfigParser
from optparse import OptionParser, Option

from copy import deepcopy
#from debian_bundle import deb822
from debian import deb822
#import deb822
from debian.changelog import Changelog

# all files we are dealing with should be UTF8, thus
# lets override
import codecs

def open(f, *args):
    return codecs.open(f, *args, encoding='utf-8')

__author__ = 'Yaroslav Halchenko'
__prog__ = os.path.basename(sys.argv[0])
__version__ = '0.0.7'
__copyright__ = 'Copyright (c) 2010 Yaroslav Halchenko'
__license__ = 'GPL'

# What fields initiate new package description
PKG_FIELDS = ('depends', 'recommends', 'suggests', 'ignore', 'remove')

# We might need to resort to assure some what a canonical order
# Prefixes for "standard" blends/tasks fields.  Others do not get embedded
# into tasks files
BLENDS_FIELDS_PREFIXES = ('depends', 'recommends', 'suggests', 'ignore',
                'why', 'homepage', 'language', 'wnpp', 'responsible', 'license',
                'vcs-', 'pkg-url', 'pkg-description',
                'published-', 'x-', 'registration', 'remark')
# Additional fields which might come useful (e.g. for filing wnpp bugs)
# but are not "standard" thus should be in the trailer
CUSTOM_FIELDS_PREFIXES = ('author', 'pkg-name', 'pkg-source',
                          'version', 'remove')
# Other fields should cause Error for consistency

FIELDS_ORDER = BLENDS_FIELDS_PREFIXES + CUSTOM_FIELDS_PREFIXES

verbosity = None

def error(msg, exit_code=1):
    sys.stderr.write(msg + '\n')
    sys.exit(exit_code)

def verbose(level, msg):
    if level <= verbosity:
        sys.stderr.write(" "*level + msg + '\n')


def parse_debian_blends(f='debian/blends'):
    """Parses debian/blends file

    Returns unprocessed list of customized Deb822 entries
    """
    # Linearize all the paragraphs since we are not using them
    items = []
    for p in deb822.Deb822.iter_paragraphs(open(f)):
        items += list(p.items())

    verbose(6, "Got items %s" % items)
    # Traverse and collect things
    format_ = 'plain'
    format_clean = False # do not propagate fields into a new pkg if True
    pkg, source = None, None
    pkgs = []
    tasks = []

    def new_pkg(prev_pkg, bname, sname, tasks):
        """Helper function to create a new package
        """
        if format_clean or prev_pkg is None:
            pkg = deb822.Deb822()
        else:
            pkg = deepcopy(prev_pkg)
            for k_ in PKG_FIELDS:   # prune older depends
                pkg.pop(k_, None)
        pkg['Pkg-Name'] = pkg[k] = bname.lower()
        if sname is not None:
            sname = sname.lower()
        pkg['Pkg-Source'] = sname
        pkgs.append(pkg)
        pkg.tasks = dict( (t.strip(), deb822.Deb822Dict()) for t in tasks )
        pkg.format = format_
        return pkg

    for k, v in items:

        kl = k.lower()

        if kl == 'source':
            source = v.strip()
        elif kl == 'format':
            format_ = v.strip()
            format_clean = format_.endswith('-clean')
            if format_clean:
                format_ = format_[:-6]
        elif kl == 'tasks':
            tasks = [x.strip() for x in v.split(',')]
            newtasks = pkg is not None      # either we need to provide tune-ups
                                            # for current package
        elif kl in PKG_FIELDS: # new package
            if source is None and not format_ in ['extended']:
                source = v
            pkg = new_pkg(pkg, v, source, tasks)
            newtasks = False
        else:
            if pkg is None:
                # So we had just source?
                if source is None:
                    error("No package or source is known where to add %s" % (k,), 1)
                    # TODO: just deduce source from DebianMaterials
                pkg = new_pkg(pkg, source, source, tasks)
                # Since only source is available, it should be only Suggest:-ed
                pkg['Suggests'] = source.lower()
                newtasks = False

            if newtasks:
                # Add customization
                for t in tasks:
                    if not t in pkg.tasks:
                        pkg.tasks[t] = deb822.Deb822Dict()
                    pkg.tasks[t][k] = v
            else:
                # just store the key in the pkg itself
                pkg[k] = v

    return pkgs


def expand_pkgs(pkgs, topdir='.'):
    """In-place modification of pkgs taking if necessary additional
    information from Debian materials, and pruning empty fields
    """
    verbose(4, "Expanding content for %d packages" % len(pkgs))
    debianm = None

    # Expand packages which format is extended
    for pkg in pkgs:
        if pkg.format == 'extended':
            # expanding, for that we need debian/control
            if debianm is None:
                debianm = DebianMaterials(topdir)
            for k, m in (('License', lambda: debianm.get_license(pkg['Pkg-Name'])),
                         ('WNPP', debianm.get_wnpp),
                         ('Pkg-Description',
                          lambda: debianm.get_description(pkg['Pkg-Name'])),
                         ('Responsible', debianm.get_responsible),
                         ('Homepage', lambda: debianm.source.get('Homepage', None)),
                         ('Pkg-source', lambda: debianm.source.get('Source', None)),
                         ):
                if pkg.get(k, None):
                    continue
                v = m()
                if v:
                    pkg[k] = v
            # VCS fields
            pkg.update(debianm.get_vcsfields())


def prefix_index(x, entries, strict=True, case=False, default=10000):
    """Returns an index for the x in entries
    """
    if not case:
        x = x.lower()
    for i, v in enumerate(entries):
        if x.startswith(v):
            return i

    if strict:
        raise IndexError(
            "Could not find location for %s as specified by %s" %
            (x, entries))
    return default


def key_prefix_compare(x, y, order, strict=True, case=False):
    """Little helper to help with sorting

    Sorts according to the order of string prefixes as given by
    `order`.  If `strict`, then if no matching prefix found, would
    raise KeyError; otherwise provides least priority to those keys
    which were not found in `order`
    """
    if not case:
        order = [v.lower() for v in order]

    cmp_res =  cmp(prefix_index(x[0], order, strict, case),
                   prefix_index(y[0], order, strict, case))
    if not cmp_res:                     # still unknown
        return cmp(x, y)
    return cmp_res


def group_packages_into_tasks(pkgs):
    """Given a list of packages (with .tasks) group them per each
    task and perform necessary customizations stored in .tasks
    """
    # Time to take care about packages and tasks
    # Unroll pkgs into a collection of pkgs per known task
    tasks = {}
    for pkg in pkgs:
        # Lets just create deepcopies with tune-ups for each task
        for itask, (task, fields) in enumerate(pkg.tasks.items()):
            pkg_ = deepcopy(pkg)
            pkg_.update(fields)

            # Perform string completions and removals
            for k,v in pkg_.items():
                pkg_[k] = v % pkg_
                if v is None or not len(v.strip()):
                    pkg_.pop(k)

            # Sort the fields according to FIELDS_ORDER. Unfortunately
            # silly Deb822* cannot create from list of tuples, so will do
            # manually
            pkg__ = deb822.Deb822()
            for k,v in sorted(list(pkg_.items()),
                              cmp=lambda x, y:
                              key_prefix_compare(x, y, order=FIELDS_ORDER)):
                pkg__[k] = v

            # Move Pkg-source/name into attributes
            pkg__.source = pkg__.pop('Pkg-Source')
            pkg__.name = pkg__.pop('Pkg-Name')
            # Store the action taken on the package for later on actions
            for f in PKG_FIELDS:
                if f in pkg__:
                    pkg__.action = f
                    break

            tasks[task] = tasks.get(task, []) + [pkg__]
    verbose(4, "Grouped %d packages into %d tasks: %s" %
            (len(pkgs), len(tasks), ', '.join(list(tasks.keys()))))
    return tasks

def inject_tasks(tasks, config):
    # Now go through task files and replace/add entries
    for task, pkgs in tasks.items():
        verbose(2, "Task %s with %d packages" % (task, len(pkgs)))
        blend, puretask = task.split('/')
        taskfile = expanduser(join(config.get(blend, 'path'), 'tasks', puretask))

        # Load the file
        stats = dict(Added=[], Modified=[])
        for pkg in pkgs:
            msgs = {'Name': pkg.name.strip(), 'Action': None}

            # Create a copy of the pkg with only valid tasks
            # fields:
            # TODO: make it configurable?
            pkg = deepcopy(pkg)
            for k in pkg:
                if prefix_index(k, BLENDS_FIELDS_PREFIXES,
                                strict=False, default=None) is None:
                    pkg.pop(k) # remove it from becoming present in
                               # the taskfile

            # Find either it is known to the task file already

            # Load entirely so we could simply manipulate
            entries = open(taskfile).readlines()
            known = False
            # We need to search by name and by source
            # We need to search for every possible type of dependency
            regexp_str = '^ *(%s) *: *(%s) *$' \
                         % ('|'.join(PKG_FIELDS),
                            '|'.join((pkg.name, pkg.source)).replace('+', '\+'))
            verbose(4, "Searching for presence in %s using regexp: '%s'"
                    % (taskfile, regexp_str))
            regexp = re.compile(regexp_str, re.I)
            for istart, e in enumerate(entries):
                if regexp.search(e):
                    verbose(4, "Found %s in position %i: %s" %
                            (pkg.name, istart, e.rstrip()))
                    known = True
                    break

            descr = ' ; Added by %s %s. [Please note here if modified manually]\n' % \
                    (__prog__,  __version__)

            entry = pkg.dump()
            # Replace existing entry?
            if known:
                # TODO: Check if previous copy does not have our preceding comment
                # Find the previous end
                icount = 1
                try:
                    while entries[istart+icount].strip() != '':
                        icount += 1
                except IndexError as e:
                    pass                # if we go beyond

                # Lets not change file without necessity, if entry is identical --
                # do nothing
                old_entry = entries[istart:istart+icount]

                if u''.join(old_entry) == entry:
                    # no changes -- just go to the next one
                    continue
                else: # Rewrite the entry
                   if __prog__ in entries[istart-1]:
                       istart -= 1
                       icount += 2
                   if 'remove' != pkg.action:
                       entry = descr + entry
                       msgs['Action'] = 'Changed'
                   else:
                       while entries[istart-1].strip() == '':
                           istart -=1
                           icount +=2
                       entry = ''
                       msgs['Action'] = 'Removed'
                   entries_prior = entries[:istart]
                   entries_post = entries[istart+icount:]
            elif not 'remove' == pkg.action:  # or Append one
                msgs['Action'] = 'Added'
                entries_prior = entries
                entry = descr + entry
                entries_post = []
                # could be as simple as
                # Lets do 'in full' for consistent handling of empty lines
                # around
                #output = '\n%s%s' % (descr, pkg.dump(),)
                #open(taskfile, 'a').write(output)

            if msgs['Action']:
                # Prepare for dumping
                # Prune spaces before
                while len(entries_prior) and entries_prior[-1].strip() == '':
                    entries_prior = entries_prior[:-1]
                if len(entries_prior) and not entries_prior[-1].endswith('\n'):
                    entries_prior[-1] += '\n' # assure present trailing newline
                # Prune spaces after
                while len(entries_post) and entries_post[0].strip() == '':
                    entries_post = entries_post[1:]
                if len(entries_post) and len(entry):
                    # only then trailing empty line
                    entry += '\n'
                output = ''.join(entries_prior + [ '\n' + entry ] + entries_post)
                open(taskfile, 'w').write(output) # then only overwrite

                verbose(3, "%(Action)s %(Name)s" % msgs)


class DebianMaterials(object):
    """Extract selected information from an existing debian/
    """
    _WNPP_RE = re.compile('^ *\* *Initial release.*closes:? #(?P<bug>[0-9]*).*', re.I)

    def __init__(self, topdir):
        #self.topdir = topdir
        self._debiandir = join(topdir, 'debian')
        self._source = None
        self._binaries = None

    @property
    def source(self):
        if self._source is None:
            self._assign_packages()
        return self._source

    @property
    def binaries(self):
        if self._binaries is None:
            self._assign_packages()
        return self._binaries

    def fpath(self, name):
        return join(self._debiandir, name)

    def _assign_packages(self):
        try:
            control = deb822.Deb822.iter_paragraphs(
                open(self.fpath('control')))
        except Exception as e:
            raise RuntimeError(
                  "Cannot parse %s file necessary for the %s package entry. Error: %s"
                  % (control_file, pkg['Pkg-Name'], str(e)))
        self._binaries = {}
        self._source = None
        for v in control:
            if v.get('Source', None):
                self._source = v
            else:
                # Since it might be hash-commented out
                if 'Package' in v:
                    self._binaries[v['Package']] = v

    def get_license(self, package=None, first_only=True):
        """Return a license(s). Parsed out from debian/copyright if it is
        in machine readable format
        """
        licenses = []
        # may be package should carry custom copyright file
        copyright_file_ = self.fpath('%s.copyright' % package)
        if package and exists(copyright_file_):
            copyright_file = copyright_file_
        else:
            copyright_file = self.fpath('copyright')

        try:
            for p in deb822.Deb822.iter_paragraphs(open(copyright_file)):
                if not 'Files' in p or p['Files'].strip().startswith('debian/'):
                    continue
                l = p['License']
                # Take only the short version of first line
                l = re.sub('\n.*', '', l).strip()
                if not len(l):
                    l = 'custom'
                if not l in licenses:
                    licenses.append(l)
                    if first_only:
                        break
        except Exception as e:
            # print e
            return None
        return ', '.join(licenses)

    def get_wnpp(self):
        """Search for a template changelog entry closing "Initial bug
        """
        for l in open(self.fpath('changelog')):
            rr = self._WNPP_RE.match(l)
            if rr:
                return rr.groupdict()['bug']
        return None

    def get_responsible(self):
        """Returns responsible, atm -- maintainer
        """
        return self.source['Maintainer']

    def get_vcsfields(self):
        vcs = deb822.Deb822()
        for f,v in self._source.items():
            if f.lower().startswith('vcs-'):
                vcs[f] = v
        return vcs

    def get_description(self, pkg_name):
        """Some logic to extract description.

           If binary package matching pkg_name is found -- gets it description.
           If no binary package with such name, and name matches source name,
           obtain description of the first binary package.
        """
        if pkg_name in self.binaries:
            pass
        elif pkg_name.lower() == self.source['Source'].lower():
            pkg_name = list(self.binaries.keys())[0]
        else:
            error("Name %s does not match any binary, nor source package in %s"
                  % (pkg_name, self))
        return self.binaries[pkg_name]['Description']

def print_wnpp(pkgs, config, wnpp_type="ITP"):
    """Little helper to spit out formatted entry for WNPP bugreport

    TODO: It would puke atm if any field is missing
    """

    pkg = pkgs[0]                       # everything is based on the 1st one
    opts = dict(list(pkg.items()))
    opts['WNPP-Type'] = wnpp_type.upper()
    opts['Pkg-Description-Short'] = re.sub('\n.*', '', pkg['Pkg-Description'])

    subject = "%(WNPP-Type)s: %(Pkg-Name)s -- %(Pkg-Description-Short)s" % opts
    body = """*** Please type your report below this line ***

* Package name    : %(Pkg-Name)s
  Version         : %(Version)s
  Upstream Author : %(Author)s
* URL             : %(Homepage)s
* License         : %(License)s
  Programming Lang: %(Language)s
  Description     : %(Pkg-Description)s

""" % opts

    # Unfortunately could not figure out how to set the owner, so I will just print it out
    if False:
        tmpfile = tempfile.NamedTemporaryFile()
        tmpfile.write(body)
        tmpfile.flush()
        cmd = "reportbug -b --paranoid --subject='%s' --severity=wishlist --body-file='%s' -o /tmp/o.txt wnpp" \
              % (subject, tmpfile.name)
        verbose(2, "Running %s" %cmd)
        os.system(cmd)
    else:
        print("Subject: %s\n\n%s" % (subject, body))


def is_template(p):
    """Helper to return true if pkg definition looks like a template
       and should not be processed
    """
    # We might want to skip some which define a skeleton
    # (no source/homepage/etc although fields are there)
    for f in ['vcs-browser', 'pkg-url', 'pkg-description',
              'published-Title', 'pkg-name', 'homepage',
              'author']:
        if f in p and p[f] != "":
            return False
    return True


def main():

    p = OptionParser(
                usage="%s [OPTIONS] [blends_files]\n\n" % __prog__ + __doc__,
                version="%prog " + __version__)

    p.add_option(
        Option("-d", "--topdir", action="store",
               dest="topdir", default=None,
               help="Top directory of a Debian package. It is used to locate "
               "'debian/blends' if none is specified, and where to look for "
               "extended information."))

    p.add_option(
        Option("-c", "--config-file", action="store",
               dest="config_file", default=join(expanduser('~'), '.%s.cfg' % __prog__),
               help="Noise level."))

    p.add_option(
        Option("-v", "--verbosity", action="store", type="int",
               dest="verbosity", default=1, help="Noise level."))

    # We might like to create a separate 'group' of options for commands
    p.add_option(
        Option("-w", action="store_true",
               dest="wnpp", default=False,
               help="Operate in WNPP mode: dumps cut-paste-able entry for WNPP bugreport"))

    p.add_option(
        Option("--wnpp", action="store",
               dest="wnpp_mode", default=None,
               help="Operate in WNPP mode: dumps cut-paste-able entry for WNPP bugreport"))

    p.add_option(
        Option("-a", action="store_true",
               dest="all_mode", default=False,
               help="Process all files listed in paths.all"))


    (options, infiles) = p.parse_args()
    global verbosity; verbosity = options.verbosity

    if options.wnpp and options.wnpp_mode is None:
         options.wnpp_mode = 'ITP'

    # Load configuration
    config = ConfigParser(defaults={'skip': '.*[~#]$'})
    config.read(options.config_file)

    if options.all_mode:
        if len(infiles):
            raise ValueError("Do not specify any files in -a mode.  Use configuration file, section paths, option all")
        globs = config.get('paths', 'all', None).split()
        infiles = reduce(list.__add__, (glob.glob(expanduser(f)) for f in globs))
        verbose(1, "Found %d files in specified paths" % len(infiles))

    if not len(infiles):
        infiles = [join(options.topdir or './', 'debian/blends')]     #  default one

    skip_re = re.compile(config.get('paths', 'skip'))

    for blends_file in infiles:
        verbose(1, "Processing %s" % blends_file)
        if not exists(blends_file):
            error("Cannot find a file %s.  Either provide a file or specify top "
                  "debian directory with -d." % blends_file, 1)
        if skip_re.match(blends_file):
            verbose(2, "W: Skipped since matches paths.skip regexp")
            continue
        pkgs = parse_debian_blends(blends_file)
        if options.topdir is None:
            if dirname(blends_file).endswith('/debian'):
                topdir = dirname(dirname(blends_file))
            else:
                topdir = '.'            # and hope for the best ;)
        else:
            topdir = options.topdir

        expand_pkgs(pkgs, topdir=topdir)

        pkgs = [p for p in pkgs if not is_template(p)]
        if not len(pkgs):
            verbose(2, "W: Skipping since seems to contain templates only")
            continue
        if options.wnpp_mode is not None:
            print_wnpp(pkgs, config, options.wnpp_mode)
        else:
            # by default -- operate on blends/tasks files
            tasks = group_packages_into_tasks(pkgs)
            inject_tasks(tasks, config)


if __name__ == '__main__':
    main()

