#!/usr/bin/python3

# pylint: disable=superfluous-parens,wrong-import-position
# superfluous-parens: Parentheses are good for clarity and portability

"""
Check for changes in a series of URL's. E-mail diffs as needed.

The URL's to check come from ~/.page-change/urls, one URL per line.
"""

import io
import os
import re
import sys
import difflib
import subprocess

sys.path.insert(0, os.path.expanduser('~/lib'))

import mailer as mailer_mod  # noqa: disable=E402


def find_links_binary():
    """Return True iff links binary is on our path."""
    # This didn't help with a highly dynamic page, but I've added it here as a comment in case I revisit that
    # someday.
    # 'elinks' '-no-numbering' '-dump' 'https://apps.fedoraproject.org/packages/bash'
    retval = os.system('links --help > /dev/null 2>&1')
    if retval is None:
        exit_code = 0
    else:
        exit_code = retval / 256
    return exit_code in [0, 3]


def get_prior_url_filename(directory, url):
    """Derive a filename from a url."""
    return os.path.join(directory, url.replace(b'/', b'-'))


def put_current_url_lines(directory, url, text):
    """Save the URL text."""
    prior_url_filename = get_prior_url_filename(directory, url)
    file_ = open(prior_url_filename, 'wb')
    for line in text:
        file_.write(b'%s\n' % line.rstrip())
    file_.close()


def get_prior_url_lines(directory, url):
    """Read the previous content of the URL from disk."""
    prior_url_filename = get_prior_url_filename(directory, url)
    try:
        file_ = open(prior_url_filename, 'rb')
        sans_newlines = [line.rstrip() for line in file_]
        spaces_shortened = [re.sub(b'  *', b' ', line) for line in sans_newlines]
        file_.close()
    except IOError:
        spaces_shortened = []
    return spaces_shortened


def make_used(variable):
    """Persuade pyflakes that 'variable' is used."""
    assert True or variable


def get_current_url_lines(url):
    """Retrieve the current URL text using links, so that we get an ASCII dump of the page."""
    command = ['links', '-html-numbered-links', '0', '-dump', url]
    subp = subprocess.Popen(command, stdout=subprocess.PIPE)
    file_outputs = subp.communicate()
    stdout = io.BytesIO(file_outputs[0])
    _stderr = io.BytesIO(file_outputs[1])
    make_used(_stderr)
    exit_code = subp.returncode
    if exit_code not in [0, None]:
        print(exit_code)
        raise IOError
    for line in stdout:
        sans_newline = line.rstrip()
        spaces_shortened = re.sub(b'  *', b' ', sans_newline)
        yield spaces_shortened


def dump(text, filename):
    """Dump a list of strings or string to a filename for debugging."""
    file_ = open(filename, 'wb')
    if isinstance(text, list):
        file_.write(b'list\n')
        for line in text:
            file_.write(b'%s\n' % line.rstrip())
    else:
        file_.write(b'string\n')
        file_.write(text)
    file_.close()


def main():
    # pylint: disable=too-many-locals
    """Check list of URL's for changes."""
    verbose = True
    if not find_links_binary():
        sys.stderr.write('%s: Could not find links\n' % sys.argv[0])
    mailer = mailer_mod.Mailer('drs4auto@gmail.com')
    directory = os.path.expanduser(b'~/.page-change')
    try:
        urls_file = open(os.path.join(directory, b'urls'), 'rb')
    except IOError:
        sys.stderr.write('%s: Error opening %s\n' % (sys.argv[0], os.path.join(directory, 'urls')))
        sys.exit(1)
    for raw_url in urls_file:
        url = re.sub(b'#.*$', b'', raw_url).strip()
        if not url:
            continue
        if verbose:
            sys.stderr.write('getting current for %s\n' % url)
        try:
            current_url_lines = list(get_current_url_lines(url))
        except (IOError, OSError):
            sys.stderr.write('Error getting url {}\n'.format(url))
            continue
        if verbose:
            sys.stderr.write('getting prior\n')
        prior_url_lines = list(get_prior_url_lines(directory, url))
        if prior_url_lines == current_url_lines:
            sys.stderr.write('no change\n')
            continue
        differ = difflib.HtmlDiff()
        if verbose:
            sys.stderr.write('diffing\n')
        # We're almost completely byte strings, except differ.make_table really wants strings :-S
        decoded_prior_url_lines = [line.decode("utf-8", 'replace') for line in prior_url_lines]
        decoded_current_url_lines = [line.decode("utf-8", 'replace') for line in current_url_lines]
        diffs_in_html = differ.make_table(
            decoded_prior_url_lines,
            decoded_current_url_lines,
            context=True,
            )
        diffs_in_html = diffs_in_html.replace(' ', ' ')
        if verbose:
            sys.stderr.write('emailing\n')

        address = 'strombrg@gmail.com'

        subject = 'Changes in %s' % url
        from_address = address
        to_address = address

        mailer.send(from_address, to_address, subject, diffs_in_html, is_html=True)

        put_current_url_lines(directory, url, current_url_lines)
    urls_file.close()


main()