"""Code to compare files."""

__copyright__ = "(C) Copyright Aquaveo 2025"
__license__ = "All rights reserved"

# 1. Standard Python modules
import difflib
import filecmp
import functools
import itertools
import math
import operator
from pathlib import Path
import platform
import re
import shutil
import sqlite3
import subprocess
from typing import Iterable

# 2. Third party modules
from PIL import Image, ImageChops

# 3. Aquaveo modules
from xms.executables.h5diff import paths as h5diff_paths

# 4. Local modules
from xms.testing.type_aliases import Pathlike


def null_path(filepath: Pathlike) -> bool:
    """Return True if the path is either None; '', if a str, or '.', if a pathlib.Path.

    Args:
        filepath: Path to file.

    Returns:
        See description.
    """
    if filepath is None:
        return True
    elif isinstance(filepath, str) and filepath == '':
        return True
    elif isinstance(filepath, Path) and str(filepath) == '.':
        return True
    else:
        return False


def ascii_files_equal(filepath1: Pathlike, filepath2: Pathlike, comments: Iterable | None = None) -> bool:
    """Return True if ascii files are equal, optionally ignoring tokens at the beginning of the line.

    Args:
        filepath1: Path to first file.
        filepath2: Path to second file.
        comments: If comment is found at the beginning of the line, those lines are not compared.

    Returns:
        (bool): True if equal, else False.
    """
    if not _check_file_paths(filepath1, filepath2):
        return False

    if not comments:  # Nothing to ignore, just compare files
        return filecmp.cmp(filepath1, filepath2)

    with open(filepath1) as f:
        f1_lines = f.readlines()
    with open(filepath2) as f:
        f2_lines = f.readlines()

    # See if there are any lines that differ that don't start with one of the comments
    differ = difflib.Differ()
    diff = differ.compare(f1_lines, f2_lines)
    same = True
    for line in diff:
        if line.startswith('+ ') or line.startswith('- '):
            line_stripped = line[2:].lstrip()
            for comment in comments:
                if line_stripped.startswith(comment):
                    break
            else:
                same = False
                break
    return same


def binary_files_equal(filepath1: Pathlike, filepath2: Pathlike, bytes_to_skip: int = 0) -> bool:
    """Return True if two binary files are equal.

    Args:
        filepath1: Path to first file.
        filepath2: Path to second file.
        bytes_to_skip: Number of bytes to skip at the beginning.

    Returns:
        (bool): True if equal, else False.
    """
    if not _check_file_paths(filepath1, filepath2):
        return False

    with open(filepath1, 'rb') as b, open(filepath2, 'rb') as o:
        if bytes_to_skip:
            b.read(bytes_to_skip)
            o.read(bytes_to_skip)
        # Read the rest of the bytes and compare
        b_data = b.read()
        o_data = o.read()
        return b_data == o_data


def sqlite_databases_equal(filepath1: Pathlike, filepath2: Pathlike) -> bool:
    """Use sqldiff.exe to compare two sqlite database files and returns True if they are equal.

    Args:
        filepath1: Path to first file.
        filepath2: Path to second file.

    Returns:
        (bool): True if equal, else False.
    """
    if not _check_file_paths(filepath1, filepath2):
        return False

    if not is_sqlite_database(filepath1) or not is_sqlite_database(filepath2):
        return False

    sqldiff_path = _ensure_sql_exe_exists()
    args = [str(sqldiff_path), '--summary', str(filepath1), str(filepath2)]
    try:
        popen = subprocess.Popen(args, stdout=subprocess.PIPE)
        output, _ = popen.communicate()
        if output:
            pattern = re.compile('.*: 0 changes, 0 inserts, 0 deletes, .* unchanged')
            for line in iter(output.splitlines()):
                if not pattern.match(line.decode('utf-8')):
                    return False
        return True
    except Exception as error:
        print(f'{args}\n')
        raise error


def h5_files_equal(filepath1: Pathlike, filepath2: Pathlike, diffs: list[str] | None = None) -> bool:
    """
    Check if H5 files exist in the directories and, if so, compare them.

    Args:
        filepath1: First file path.
        filepath2: Second file path.
        diffs: If not None, any differences found will be appended to the list.

    Returns:
        True if the files are equal according to h5diff.exe.
    """
    if not _check_file_paths(filepath1, filepath2):
        diffs.append('FAIL - missing file(s)')
        return False

    h5diff_path = h5diff_paths['h5diff.exe']
    args = [str(h5diff_path), str(filepath1), str(filepath2)]
    try:
        popen = subprocess.Popen(args, stdout=subprocess.PIPE)
        output, _ = popen.communicate()
        if popen.returncode != 0:
            if popen.returncode == 1:
                if diffs is not None:
                    diffs.append(f"Files are different.{output.decode('utf-8')}")
                return False
            if popen.returncode == 2:
                if diffs is not None:
                    diffs.append(f"h5diff returned an error: {output.decode('utf-8')}")
                return False

            # We shouldn't be able to get a return code other than 0, 1, or 2, but just in case...
            if diffs is not None:
                diffs.append(f"Unexpected return code '{popen.returncode}'\n{output.decode('utf-8')}")
            return False
        else:
            # If we exited successfully, there shouldn't be any output, but just in case...
            out_str = output.decode('utf-8')
            if out_str and diffs is not None:
                diffs.append(out_str)
            return out_str == ''
    except Exception as error:
        print(f'{args}\n')
        raise error


def dbf_files_equal(filepath1: Pathlike, filepath2: Pathlike) -> bool:
    """Compare two .dbf files, skipping the first 4 bytes which contain a date that changes.

    Args:
        filepath1: Path to first file.
        filepath2: Path to second file.

    Returns:
        (bool): True if equal, else False.
    """
    # Skip the first 4 bytes which are a version number and a date
    return binary_files_equal(filepath1, filepath2, 4)


def image_files_equal(base: Pathlike, out: Pathlike, tolerance: float, method: str = 'rms') -> bool:
    """Return True if two image files are equal within tolerance.

    Uses Pillow library. Currently only the 'rms' (root mean squared) method is available.

    Args:
        base: Filepath of first file.
        out: Filepath of second file.
        tolerance: Allowable difference between two image files (to be tested)
        method: Method to use for comparison. Options are 'rms' (root mean squared).

    Returns:
        (bool): True if the image difference is less than or equal to tolerance.
    """
    if not _check_file_paths(base, out):
        return False

    if binary_files_equal(base, out):
        return True

    im1 = Image.open(base)
    im2 = Image.open(out)
    # yapf: disable - this block gets reformatted poorly on the CI for some reason
    if method == 'rms':
        # See https://stackoverflow.com/a/11818358/5666265
        h = ImageChops.difference(im1, im2).histogram()
        rms = math.sqrt(
            functools.reduce(operator.add, map(lambda h, i: i % 256 *  # noqa: W504
                                               (h**2), h, range(len(h)))) / (float(im1.size[0]) * im1.size[1])
        )
        return rms <= tolerance
    else:
        raise ValueError(f"Unknown method '{method}'")
    # yapf: enable


def is_binary_file(filepath: str | Path) -> bool:
    """Return True if the file is binary.

    See https://stackoverflow.com/questions/898669/how-can-i-detect-if-a-file-is-binary-non-text-in-python

    Args:
        filepath: The file path.

    Returns:
        (bool): True if binary, else False.
    """
    if null_path(filepath) or not Path(filepath).is_file():
        return False
    textchars = bytearray({7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x100)) - {0x7f})
    with open(filepath, 'rb') as file:
        return bool(file.read(1024).translate(None, textchars))


def is_sqlite_database(filepath: Pathlike) -> bool:
    """Return True if the file is a SQLite database by examining its header.

    Args:
        filepath: The file.

    Returns:
        See description.
    """
    if null_path(filepath) or not Path(filepath).is_file():
        return False

    con = sqlite3.connect(filepath)
    cur = con.cursor()
    try:
        cur.execute("PRAGMA integrity_check")
        con.close()
        return True
    except sqlite3.DatabaseError:
        con.close()
        return False


def is_dbf_file(filepath: Pathlike) -> bool:
    """Return True if the file suffix is '.dbf'.

    Args:
        filepath: Path to file.

    Returns:
        See description.
    """
    if null_path(filepath) or not Path(filepath).is_file():
        return False
    return Path(filepath).suffix.lower() == '.dbf'


def are_dir_trees_equal(
    base_dir: Pathlike,
    out_dir: Pathlike,
    skip_extensions: Iterable[str] | None = None,
    comments: Iterable[str] | None = None,
    diffs: list[str] | None = None,
    update_base: bool = False
) -> bool:
    """Return True if two directory trees are equal, recursively.

    See stackoverflow.com/questions/4187564
    Files in each directory are assumed to be equal if their names and contents are equal.

    Args:
        base_dir: First directory.
        out_dir: Second directory.
        skip_extensions: List of extensions of files to skip when comparing.
        comments: List of strings that can appear at the beginning of lines to ignore.
        diffs: List of differences.
        update_base: If True and there are differences, base_dir is replaced by out_dir.

    Return:
        True if the directory trees are the same and there were no errors while accessing the directories or files;
        False otherwise.
    """
    if not _check_dir_paths(base_dir, out_dir):
        return False

    base_dir, out_dir = Path(base_dir), Path(out_dir)
    ignore = _make_ignore_list(base_dir, out_dir, skip_extensions)
    my_diffs: list[str] = []
    rv = _dirs_equal(base_dir, out_dir, ignore, comments, my_diffs)  # start recursion

    if my_diffs:
        _print_diffs(my_diffs)
        if diffs is not None:
            diffs.clear()
            diffs.extend(my_diffs)

    if update_base:
        shutil.rmtree(base_dir, ignore_errors=True)
        shutil.copytree(out_dir, base_dir)
    return rv


def _dirs_equal(dir1: Path, dir2: Path, ignore: list[str], comments: Iterable[str] | None, diffs: list[str]) -> bool:
    """Return True if two directory trees are equal, recursively.

    Differences are logged to the 'xms.testing' log.

    Args:
        dir1: First directory.
        dir2: Second directory.
        ignore: List of files to ignore.
        comments: List of strings that can appear at the beginning of lines to ignore.
        diffs: List of differences found.

    Returns:
        True if the directory trees are the same and there were no errors while accessing the directories or files;
        False otherwise.
    """
    # Use filecmp.dircmp to get things in one dir that aren't in the other dir
    dcmp = filecmp.dircmp(dir1, dir2, ignore)
    if dcmp.left_only or dcmp.right_only or dcmp.funny_files:
        _append_dir_diffs(dir1, dir2, dcmp, diffs)
        return False

    # Call cmpfiles to get convenient list of mismatches and errors
    (_, mismatches, errors) = filecmp.cmpfiles(dir1, dir2, dcmp.common_files, shallow=False)

    if mismatches:
        print(mismatches)
        # double-check mismatches to compare binary, sqlite, .dbf, and ascii files with skipped comments
        real_mismatches = []
        for mismatch in mismatches:
            # for some odd reason this comment is needed here for things to work on the CI
            path1, path2 = Path(dir1) / mismatch, Path(dir2) / mismatch
            if is_binary_file(path1) or is_binary_file(path2):  # either is binary
                # print('is_binary')
                if is_sqlite_database(path1) and is_sqlite_database(path2):  # both sqlite
                    # print('is_sqlite')
                    if not sqlite_databases_equal(path1, path2):
                        real_mismatches.append(mismatch)
                elif is_dbf_file(path1) and is_dbf_file(path2):  # both .dbf
                    # print('is_dbf')
                    if not dbf_files_equal(path1, path2):
                        real_mismatches.append(mismatch)
                elif not binary_files_equal(path1, path2):
                    real_mismatches.append(mismatch)
            elif not ascii_files_equal(path1, path2, comments):
                # print('is_ascii')
                real_mismatches.append(mismatch)
        if real_mismatches:
            _append_file_diffs(dir1, dir2, real_mismatches, 'Mismatches', diffs)
            return False

    if errors:
        _append_file_diffs(dir1, dir2, errors, 'Errors', diffs)
        return False

    # Recurse
    for common_dir in dcmp.common_dirs:
        if not _dirs_equal(dir1 / common_dir, dir2 / common_dir, ignore, comments, diffs):
            return False
    return True


def _append_dir_diffs(dir1: Path, dir2: Path, dcmp: filecmp.dircmp, diffs: list[str]) -> None:
    """Log directory differences to the 'xms.testing' log.

    Args:
        dir1: First directory.
        dir2: Second directory.
        dcmp: Dircmp object.
        diffs: List of differences found.
    """
    diffs.append('FAIL in are_dir_trees_equal():')
    diffs.append(f'  dir1: {dir1}')
    diffs.append(f'  dir2: {dir2}')
    if dcmp.left_only:
        diffs.append(f'  Only in {dir1.as_posix()}:')
        diffs.extend(([f'    {left}' for left in sorted(dcmp.left_only)]))
    if dcmp.right_only:
        diffs.append(f'  Only in {dir2.as_posix()}:')
        diffs.extend(([f'    {right}' for right in sorted(dcmp.right_only)]))
    if dcmp.funny_files:
        diffs.append('  Funny files:')
        diffs.append(f'    {str(dcmp.funny_files)}')


def _append_file_diffs(dir1: Path, dir2: Path, file_list: list[str], name: str, diffs: list[str]) -> None:
    """Log mismatch and/or errors to the 'xms.testing' log.

    Args:
        dir1: First directory.
        dir2: Second directory.
        file_list: List of files that don't match.
        name: Name of the file_list (i.e. 'Mismatches' or 'Errors').
        diffs: List of differences found.
    """
    diffs.append(f'FAIL - {name}:')
    for file in sorted(file_list):
        diffs.append(f'  {(dir1 / file).as_posix()}')
        diffs.append(f'  {(dir2 / file).as_posix()}')
        diffs.append('--')


def _file_names_with_suffix(folder: Pathlike, suffix: str | Iterable[str]) -> set[str]:
    """Return set of file names (without paths) below folder, ending with suffix, recursively.

    Args:
        folder: A directory.
        suffix: A suffix or list of suffixes, e.g. '.dbf' (include the '.').

    Returns:
        (set): See description.
    """
    if null_path(folder) or not Path(folder).is_dir():
        return set()
    suffixes = [suffix] if isinstance(suffix, str) else suffix
    found_files = list(itertools.chain.from_iterable(Path(folder).rglob(f'*{s}') for s in suffixes))
    if found_files:
        # Just name, remove duplicates
        return {file.name for file in found_files}
    return set()


def _sqldiff_exe_paths() -> tuple[Path, Path]:
    """Return path to local sqldiff.exe, and path to network folder containing sqldiff.exe.

    Returns:
        See description.
    """
    system = platform.system()
    if system == 'Windows':
        sqldiff_path = Path('C:/temp') / 'sqldiff' / 'sqldiff.exe'
        network_path = Path('\\\\f\\software\\sqldiff\\bin_windows')
    elif system == 'Linux':
        sqldiff_path = Path.cwd() / 'sqldiff' / 'bin_linux' / 'sqldiff'
        network_path = Path('\\\\f\\software\\sqldiff\\bin_linux')
    else:
        raise ValueError('Unsupported operating system.')
    return sqldiff_path, network_path


def _ensure_sql_exe_exists() -> Path:
    """Copy the sqldiff test utility from the network drive if it doesn't already exist in the cwd.

    Returns:
        Path to the sqldiff executable.
    """
    sqldiff_path, network_path = _sqldiff_exe_paths()
    if not sqldiff_path.is_file():
        # Pull the sqldiff utility from F drive
        sqldiff_path.parent.mkdir(parents=True, exist_ok=True)
        shutil.copytree(network_path, sqldiff_path.parent, dirs_exist_ok=True)
    return sqldiff_path


def _make_ignore_list(dir1: Path, dir2: Path, skip_extensions: Iterable[str] | None = None) -> list[str]:
    """Return list of files to ignore.

    Args:
        dir1: First directory.
        dir2: Second directory.
        skip_extensions: List of extensions of files to skip when comparing.

    Returns:
        See description.
    """
    if not skip_extensions:
        return []
    files1: set = _file_names_with_suffix(dir1, skip_extensions)
    files2: set = _file_names_with_suffix(dir2, skip_extensions)
    return sorted(list(files1.union(files2)))  # Sort for consistency


def _print_diffs(diffs: list[str]) -> None:
    """Print all the diff lines.

    Args:
        diffs: List of differences.
    """
    print('\n')
    for line in diffs:
        print(f'{line}')


def _check_file_paths(fp1: Pathlike, fp2: Pathlike) -> bool:
    """Return True if paths are not null and exist.

    Args:
        fp1: Path to first file.
        fp2: Path to second file.

    Returns:
        See description.
    """
    return (not null_path(fp1)) and Path(fp1).is_file() and (not null_path(fp2)) and Path(fp2).is_file()


def _check_dir_paths(dir1: Pathlike, dir2: Pathlike) -> bool:
    """Return True if paths are not null and exist.

    Args:
        dir1: Path to first directory.
        dir2: Path to second directory.

    Returns:
        See description.
    """
    return (not null_path(dir1)) and Path(dir1).is_dir() and (not null_path(dir2)) and Path(dir2).is_dir()
