# (c) Copyright 2009-2015. CodeWeavers, Inc.

import errno
import ftplib
import gzip
import http.cookiejar as cookielib
import os
import socket
import threading
import traceback
import time

import urllib.request as urllib2
import urllib.parse as urlparse
import urllib.error as urllib_error

import proxyinfo
import cxlog
import cxurlget_base

proxyinfo.install_default_opener()

def _get_rfc822_value(string):
    if string.startswith('"'):
        # quoted-string
        return string.split('"', 2)[1]

    # token
    def is_token_breaker(c):
        return c in ' ()<>@,;:\\"/[]?=' or not 0x1f < ord(c) < 0x7f
    chars = []
    for c in string:
        if is_token_breaker(c):
            break
        chars.append(c)
    return ''.join(chars)

def wrap_exception(exception):
    if isinstance(exception, urllib_error.HTTPError):
        return cxurlget_base.HttpError(exception.code, exception.reason)
    if isinstance(exception, urllib_error.URLError):
        return cxurlget_base.UrlError(exception.reason)
    return exception

class UrlGetter(cxurlget_base.UrlGetterBase):
    @classmethod
    def make_cookie_storage(cls):
        return cookielib.CookieJar()

    def __init__(self, *args, **kwargs):
        self.opener = kwargs.pop('opener', None)
        cxurlget_base.UrlGetterBase.__init__(self, *args, **kwargs)

        if self.cookie_storage:
            cookie_processor = urllib2.HTTPCookieProcessor(self.cookie_storage)
            if self.opener:
                self.opener.add_handler(cookie_processor)
            else:
                self.opener = urllib2.build_opener(cookie_processor)

    @staticmethod
    def getfield(headers, name, default=None):
        """HTTPMessage.get() is unusable because when a header is duplicated
        it returns a comma-separated list of the values. This function returns
        the value of the first instance instead."""
        values = headers.get_all(name)
        if values is None:
            return default
        return values[0].strip()

    def _gzip_decoding_thread(self, infile, outfile):
        try:
            gzipfile = gzip.GzipFile(mode='rb', fileobj=infile)
            try:
                while True:
                    data = gzipfile.read(4096)
                    if not data:
                        break
                    outfile.write(data)
            finally:
                gzipfile.close()
        except Exception as e:
            cxlog.log("failed to decode gzip data from %s:\n%s" % (cxlog.to_str(self.url), traceback.format_exc()))
            self.decoding_exception = e
        finally:
            infile.close()
            outfile.close()
            self.decoding_event.set()

    def _setup_gzip_decoding(self):
        rd, wd = os.pipe()
        readfile = os.fdopen(rd, 'rb')
        writefile = os.fdopen(wd, 'wb')
        outfile = self.outfile

        self.outfile = writefile
        self.decoding_event = threading.Event()

        worker = threading.Thread(target=self._gzip_decoding_thread, args=(readfile, outfile))
        worker.start()

    def _analyze_headers(self, headers):
        self.bytes_total = int(self.getfield(headers, 'content-length', '-1'))
        self.last_modified = self.getfield(headers, 'Last-Modified')
        self.etag = self.getfield(headers, 'ETag')
        self.content_encoding = self.getfield(headers, 'Content-Encoding', 'identity')
        self.notify_headers(self, self.infile.geturl(), headers)

        if self.content_encoding == 'gzip':
            self._setup_gzip_decoding()
        elif self.content_encoding != 'identity':
            raise Exception("Unsupported Content-Encoding: %s" % self.content_encoding)

        if self.bytes_total == -1:
            res = urlparse.urlparse(self.url)
            if res.scheme == 'ftp':
                try:
                    ftp = ftplib.FTP(res.netloc)
                    try:
                        ftp.login()
                        size = ftp.size(res.path)
                        if size:
                            self.bytes_total = size
                    finally:
                        ftp.close()
                except Exception as _exception:
                    cxlog.log("failed to read size of url %s:\n%s" % (cxlog.to_str(self.url), traceback.format_exc()))

        # calculate the basename
        basename = ''
        if 'filename=' in headers.get('content-disposition', ()):
            # FIXME: This is not entirely correct because filename= could appear in some other value
            _unused, filenamevalue = headers['content-disposition'].split('filename=', 1)
            basename = cxurlget_base.url_to_sanitized_basename(_get_rfc822_value(filenamevalue))
        if not basename:
            # recheck the url in case we were redirected
            basename = cxurlget_base.url_to_sanitized_basename(self.infile.geturl())
        self.basename = basename

    def _fetch_chunk(self, start_time, start=0, end=-1):
        cxlog.log("fetching url %s from %d to %d" % (cxlog.to_str(self.url),
                                                     start, end))
        # FIXME: verb is currently ignored
        req = urllib2.Request(self.url, self.data)
        # Add an Accept header otherwise some websites return an
        # HTTP 403 error !
        req.add_header('Accept', '*/*')
        if self.user_agent is not None:
            req.add_header('User-Agent', self.user_agent)
        if start != 0 or end != -1:
            byte_range = 'bytes=%d-' % start
            if end != -1:
                byte_range += '%d' % end
            req.add_header('Range', byte_range)
            # if we have partial content, what we get next must match its encoding
            if self.content_encoding == 'identity':
                req.add_header('Accept-Encoding', 'identity')
            elif self.content_encoding:
                req.add_header('Accept-Encoding', '%s, *;q=0')
        else:
            if self.last_modified:
                req.add_unredirected_header("If-Modified-Since", self.last_modified)
            if self.etag:
                req.add_unredirected_header("If-None-Match", self.etag)
            req.add_header('Accept-Encoding', 'gzip')

        if self.opener:
            self.infile = self.opener.open(req, None, self.timeout)
        else:
            self.infile = urllib2.urlopen(req, None, self.timeout) # pylint: disable=R1732
        headers = self.infile.info()
        if self.try_count == 0:
            self._analyze_headers(headers)
        else:
            size = int(self.getfield(headers, 'content-length', '-1'))
            if size != -1 and size + self.bytes_downloaded != self.bytes_total:
                raise Exception('Download size changed from %d to %d' % (self.bytes_total, size + self.bytes_downloaded))
            content_encoding = self.getfield(headers, 'Content-Encoding', 'identity')
            if content_encoding != self.content_encoding:
                raise Exception('Server changed encoding from %s to %s' % (self.content_encoding, content_encoding))

        self.notify_progress(self)

        while True:
            data = self.infile.read(self.blocksize)
            if data == b'':
                break
            self.outfile.write(data)
            # Not += to avoid ambiguity with the static variable
            self.bytes_downloaded = self.bytes_downloaded + len(data)
            self.notify_progress(self)
        if self.bytes_total != -1 and self.bytes_downloaded != self.bytes_total: # pylint: disable=R1714
            raise Exception("expected to download %i bytes, got %i" % (self.bytes_total, self.bytes_downloaded))

        # The download is a success
        self.finished = True
        cxlog.log("fetched url %s (%i bytes) in %0.3fs\n" % (cxlog.to_str(self.url), self.bytes_downloaded, time.time() - start_time))
        self.notify_finished(self)

    def fetch(self):
        if self.infile:
            raise Exception("fetch() has already been called")
        start_time = time.time()
        try:
            try:
                last_position = -1
                while not self.finished:
                    last_position = self.bytes_downloaded
                    try:
                        self._fetch_chunk(start_time, self.bytes_downloaded)
                    except socket.error as sockerr:
                        cxlog.log("caught %s last_position=%s bytes_downloaded=%d" % (repr(sockerr), last_position, self.bytes_downloaded))
                        if sockerr.errno != errno.ECONNRESET or \
                                last_position == self.bytes_downloaded or \
                                self.try_count >= 12:
                            # If it's not an ECONNRESET, or we did not make any
                            # progress, or we've tried hard enough already,
                            # then give up
                            raise
                    self.infile.close()
                    # Not += to avoid ambiguity with the static variable
                    self.try_count = self.try_count + 1
            except cxurlget_base.StopDownload:
                return
            except urllib_error.HTTPError as exception:
                if exception.code == 304:
                    cxlog.log("%s has not changed (%0.3fs)" % (cxlog.to_str(self.url), time.time() - start_time))
                else:
                    cxlog.log("failed to fetch url %s (%0.3fs):\n%s" % (cxlog.to_str(self.url), time.time() - start_time, traceback.format_exc()))
                    self.notify_failed(self, wrap_exception(exception))
                return
            except Exception as exception:
                cxlog.log("failed to fetch url %s (%0.3fs):\n%s" % (cxlog.to_str(self.url), time.time() - start_time, traceback.format_exc()))
                self.notify_failed(self, wrap_exception(exception))
        finally:
            self.outfile.close()
            self.outfile = None
        if self.content_encoding == 'gzip':
            self.decoding_event.wait()
            if self.decoding_exception:
                raise self.decoding_exception
