##############################################################################
#
# Copyright (c) 2009 Zope Foundation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""Buildout download infrastructure"""

try:
    from hashlib import md5
except ImportError:
    from md5 import new as md5

try:
    # Python 3
    from urllib.error import HTTPError
    from urllib.request import Request, urlopen
    from urllib.parse import urlparse, urlsplit, quote
except ImportError:
    # Python 2
    from urlparse import urlparse, urlsplit
    from urllib2 import HTTPError, Request, urlopen, quote

from zc.buildout.easy_install import realpath
from base64 import b64encode
from contextlib import closing
import errno
import logging
import netrc
import os
import os.path
import re
import shlex
import shutil
import tempfile
import zc.buildout
from . import bytes2str, str2bytes
from .rmtree import rmtree


class netrc(netrc.netrc):

    def __init__(*args):
        pass

    def authenticators(self, host):
        self.__class__, = self.__class__.__bases__
        try:
            self.__init__()
        except IOError as e:
            if e.errno != errno.ENOENT:
                raise
            self.__init__(os.devnull)
        return self.authenticators(host)

class Group(str):

    @property
    def quote(self):
        return quote(self, '')

netrc = netrc()

class ChecksumError(zc.buildout.UserError):
    pass

class Download(object):
    """Configurable download utility.

    Handles the download cache and offline mode.

    Download(options=None, cache=None, namespace=None,
             offline=False, fallback=False, hash_name=False, logger=None)

    options: mapping of buildout options (e.g. a ``buildout`` config section)
    cache: path to the download cache (excluding namespaces)
    namespace: namespace directory to use inside the cache
    offline: whether to operate in offline mode
    fallback: whether to use the cache as a fallback (try downloading first),
              when an MD5 checksum is not given
    hash_name: whether to use a hash of the URL as cache file name
    logger: an optional logger to receive download-related log messages

    """

    def __init__(self, options=None, cache=-1, namespace=None,
                 offline=-1, fallback=False, hash_name=False, logger=None):
        if options is None:
            options = {}
        self.directory = options.get('directory', '')
        self.cache = cache
        if cache == -1:
            self.cache = options.get('download-cache')
        self.namespace = namespace
        self.offline = offline
        if offline == -1:
            self.offline = (options.get('offline') == 'true'
                            or options.get('install-from-cache') == 'true')
        self.fallback = fallback
        self.hash_name = hash_name
        self.logger = logger or logging.getLogger('zc.buildout')

    @property
    def download_cache(self):
        if self.cache is not None:
            return realpath(os.path.join(self.directory, self.cache))

    @property
    def cache_dir(self):
        if self.download_cache is not None:
            return os.path.join(self.download_cache, self.namespace or '')

    @property
    def __call__(self):
        """Download a file according to the utility's configuration.

        url: URL to download
        md5sum: MD5 checksum to match
        path: where to place the downloaded file

        Returns the path to the downloaded file.

        """
        return self.download_cached if self.cache else self.download

    def download_cached(self, url, md5sum=None, path=None, alternate_url=None):
        """Download a file from a URL using the cache.

        This method assumes that the cache has been configured.
        If a cached copy of a file has an MD5 mismatch, download
        and update the cache on success.

        """
        if not os.path.exists(self.download_cache):
            raise zc.buildout.UserError(
                'The directory:\n'
                '%r\n'
                "to be used as a download cache doesn't exist.\n"
                % self.download_cache)
        cache_dir = self.cache_dir
        if not os.path.exists(cache_dir):
            os.mkdir(cache_dir)
        cache_key = self.filename(url)
        cached_path = os.path.join(cache_dir, cache_key)

        self.logger.debug('Searching cache at %s' % cache_dir)
        if os.path.exists(cached_path):
            if check_md5sum(cached_path, md5sum):
                if md5sum or not self.fallback:
                    self.logger.debug('Using cache file %s', cached_path)
                    return locate_at(cached_path, path), False
            else:
                self.logger.warning(
                    'MD5 checksum mismatch for cached download from %r at %r',
                    url, cached_path)
            # Don't download directly to cached_path to minimize
            # the probability to alter old data if download fails.
            try:
                path, is_temp = self.download(url, md5sum, path, alternate_url)
            except ChecksumError:
                raise
            except Exception:
                if md5sum:
                    raise
                self.logger.debug("Fallback to cache using %s",
                                  cached_path, exception=1)
            else:
                samefile = getattr(os.path, 'samefile', None)
                if not (samefile and samefile(path, cached_path)):
                    # update cache
                    try:
                        os.remove(cached_path)
                    except OSError as e:
                        if e.errno != errno.EISDIR:
                            raise
                        rmtree(cached_path)
                    locate_at(path, cached_path)
                return path, is_temp
        else:
            self.logger.debug('Cache miss; will cache %s as %s' %
                              (url, cached_path))
            self.download(url, md5sum, cached_path, alternate_url)

        return locate_at(cached_path, path), False

    def download(self, url, md5sum=None, path=None, alternate_url=None):
        """Download a file from a URL to a given or temporary path.

        An online resource is always downloaded to a temporary file and moved
        to the specified path only after the download is complete and the
        checksum (if given) matches. If path is None, the temporary file is
        returned and the client code is responsible for cleaning it up.

        """
        # Make sure the drive letter in windows-style file paths isn't
        # interpreted as a URL scheme.
        if re.match(r"^[A-Za-z]:\\", url):
            url = 'file:' + url

        parsed_url = urlparse(url, 'file')
        url_scheme, _, url_path = parsed_url[:3]
        if url_scheme == 'file':
            self.logger.debug('Using local resource %s' % url)
            if not check_md5sum(url_path, md5sum):
                raise ChecksumError(
                    'MD5 checksum mismatch for local resource at %r.' %
                    url_path)
            return locate_at(url_path, path), False

        if self.offline:
            raise zc.buildout.UserError(
                "Couldn't download %r in offline mode." % url)

        self.logger.info('Downloading %s' % url)
        download_url = url
        tmp_path = path
        cleanup = True
        try:
            if not path:
                handle, tmp_path = tempfile.mkstemp(prefix='buildout-')
                os.close(handle)
            self._download(url, tmp_path, md5sum, alternate_url)
            cleanup = False
        finally:
            if cleanup and tmp_path:
                remove(tmp_path)

        return tmp_path, not path

    def _download(self, url, path, md5sum=None, alternate_url=None):
        download_url = url
        try:
            try:
                self.urlretrieve(url, path)
            except HTTPError:
                if not alternate_url:
                    raise
                self.logger.info('using alternate URL: %s', alternate_url)
                download_url = alternate_url
                self.urlretrieve(alternate_url, path)
            if not check_md5sum(path, md5sum):
                raise ChecksumError('MD5 checksum mismatch downloading %r'
                                    % download_url)
        except IOError as e:
            raise zc.buildout.UserError("Error downloading %s: %s"
                                        % (download_url, e))

    def filename(self, url):
        """Determine a file name from a URL according to the configuration.

        """
        if self.hash_name:
            return md5(url.encode()).hexdigest()
        else:
            if re.match(r"^[A-Za-z]:\\", url):
                url = 'file:' + url
            parsed = urlparse(url, 'file')
            url_path = parsed[2]

            if parsed[0] == 'file':
                while True:
                    url_path, name = os.path.split(url_path)
                    if name:
                        return name
                    if not url_path:
                        break
            else:
                for name in reversed(url_path.split('/')):
                    if name:
                        return name

            url_host, url_port = parsed[-2:]
            return '%s:%s' % (url_host, url_port)

    def _rewrite_url(self, base_url, path_query, line_list):
        # line_list = list of line for selected macdef
        for pattern, template in zip(*[iter(line_list)]*2):
            match = re.match(pattern.strip() + '$', # PY3: re.fullmatch
                             path_query)
            if match is not None:
                group_list = [base_url]
                group_list += map(Group, match.groups())
                return [s.format(*group_list) for s in shlex.split(template)]

    def _auth(self, url):
        parsed_url = urlsplit(url)
        if parsed_url.scheme in ('http', 'https'):
            auth_host = parsed_url.netloc.rsplit('@', 1)
            if len(auth_host) > 1:
                return (auth_host[0],
                        None,
                        parsed_url._replace(netloc=auth_host[1]).geturl(),
                        (),
                        )
            auth = netrc.authenticators(parsed_url.hostname)
            if auth is None:
                return
            new = self._rewrite_url(
                parsed_url._replace(path='', query='', fragment='').geturl(),
                parsed_url._replace(scheme='', netloc='', fragment='').geturl(),
                netrc.macros.get('buildout:' + parsed_url.hostname, ()),
            ) or [url]
            return auth[0], auth[2], new.pop(0), new

    def urlretrieve(self, url, tmp_path):
        auth = self._auth(url)
        if auth:
            req = Request(auth[2])
            if url != auth[2]:
                self.logger.info('Downloading from url: %s', auth[2])
            for header in auth[3]:
                req.add_header(*header.split('=', 1))
            cred = auth[0] if auth[1] is None else ':'.join(auth[:2])
            req.add_header("Authorization",
                           "Basic " + bytes2str(b64encode(str2bytes(cred))))
        else:
            req = url

        with closing(urlopen(req)) as src:
            with open(tmp_path, 'wb') as dst:
                shutil.copyfileobj(src, dst)
            return tmp_path, src.info()


class Download(Download):

    def _download(self, url, path, md5sum=None, alternate_url=None):
        from .buildout import networkcache_client as nc
        while nc: # not a loop
            if self._auth(url): # do not cache restricted data
                nc = None
                break
            key = 'file-urlmd5:' + md5(url.encode()).hexdigest()
            if not nc.tryDownload(key):
                break
            with nc:
                entry = next(nc.select(key, {'url': url}), None)
                if entry is None:
                    err = 'no matching entry'
                else:
                    with closing(nc.download(entry['sha512'])) as src, \
                         open(path, 'wb') as dst:
                        shutil.copyfileobj(src, dst)
                    if check_md5sum(path, md5sum):
                        return
                    err = 'MD5 checksum mismatch'
                self.logger.info('Cannot download from network cache: %s', err)
            break
        super(Download, self)._download(url, path, md5sum, alternate_url)
        if nc and nc.tryUpload(key):
            with nc, open(path, 'rb') as f:
                nc.upload(f, key, url=url)


def check_md5sum(path, md5sum):
    """Tell whether the MD5 checksum of the file at path matches.

    No checksum being given is considered a match.

    """
    if md5sum is None:
        return True

    f = open(path, 'rb')
    checksum = md5()
    try:
        chunk = f.read(2**16)
        while chunk:
            checksum.update(chunk)
            chunk = f.read(2**16)
        return checksum.hexdigest() == md5sum
    finally:
        f.close()


def remove(path):
    if os.path.exists(path):
        os.remove(path)


def locate_at(source, dest):
    if dest is None or realpath(dest) == realpath(source):
        return source

    if os.path.isdir(source):
        shutil.copytree(source, dest)
    else:
        try:
            os.link(source, dest)
        except (AttributeError, OSError):
            shutil.copyfile(source, dest)
    return dest
