#!/usr/bin/env python3

import xml.etree.ElementTree as ET
import re
import math

def tokenize_path_data(d_attribute):
    """
    Splits an SVG path 'd' attribute string into a list of tokens:
      - Single-letter commands (M, m, L, l, C, c, etc.)
      - Float values (including possible scientific notation)
    Returns a list of strings/float, e.g. ['M', 10.0, 20.0, 'L', 30.5, 40.7, 'Z'].
    """
    # Regex explanation:
    #   ([MmZzLlHhVvCcSsQqTtAa])  => one of the valid path command letters
    #   |                        => OR
    #   ([+-]?\d+(\.\d+)?([eE][+-]?\d+)?) => a number that may include optional +/-,
    #                                       decimals, and scientific notation
    token_pattern = re.compile(r'([MmZzLlHhVvCcSsQqTtAa])|([+-]?\d+(\.\d+)?([eE][+-]?\d+)?)')
    raw_tokens = token_pattern.findall(d_attribute)

    # raw_tokens is a list of tuples from the capturing groups. We only need one
    # of those fields from each match. We'll filter out empty strings and parse floats.
    tokens = []
    for match in raw_tokens:
        # match is a tuple: (command_letter, number_string, ..., ...)
        command_letter = match[0]
        number_string = match[1]
        if command_letter:
            # It's a path command like 'M', 'L', etc.
            tokens.append(command_letter)
        else:
            # It's a numeric value
            tokens.append(float(number_string))

    return tokens

def extract_paths_from_svg(svg_data):
    """
    Parses an SVG file and extracts the 'd' attribute from
    all <path> elements in order. Returns a list of token lists.
    """
    root = ET.fromstring(svg_data)

    # Inkscape / Illustrator SVGs often have a default namespace.
    # You may need to adjust the namespace if the <path> tags are qualified.
    # For example:
    #   namespace = {'svg': 'http://www.w3.org/2000/svg'}
    #   for path_elem in root.findall('.//svg:path', namespace):
    #       ...
    # If your SVGs do not use a default namespace, the below should work.

    all_token_lists = []
    for path_elem in root.findall('.//{http://www.w3.org/2000/svg}path'):
        d_attribute = path_elem.get('d')
        if d_attribute:
            tokens = tokenize_path_data(d_attribute)
            all_token_lists.append(tokens)

    # If your SVG has no default namespace or if you removed it, you might do:
    #   for path_elem in root.findall('.//path'):
    #       d_attribute = path_elem.get('d')
    #       ...
    #
    # Adjust as appropriate depending on your actual SVG structure/namespaces.

    return all_token_lists

def compare_token_lists(tokens_a, tokens_b):
    """
    Compares two lists of path tokens (commands and floats).
    Returns True if they match (same commands in same positions,
    numeric values within given tolerance), otherwise False.
    """
    if len(tokens_a) != len(tokens_b):
        return None  # Different lengths => not a match

    max_diff = 0
    for a, b in zip(tokens_a, tokens_b):
        if isinstance(a, str) and isinstance(b, str):
            # Must match exactly the same command letter
            if a != b:
                return None
        elif isinstance(a, float) and isinstance(b, float):
            # Compare numeric values
            diff = abs(a - b)
            max_diff = max(max_diff, diff)
        else:
            # One is command, the other is float => mismatch
            return None

    return max_diff

def compare_svg_files(svg_file_1, svg_file_2):
    """
    Compares two SVG files to check if they have the same number of <path> elements,
    and each corresponding path has the same structure of commands.
    Return max difference between respective numeric values in the paths.
    """

    svg_data_1 = open(svg_file_1).read()
    svg_data_2 = open(svg_file_2).read()

    # If contents match exactly, return 0
    if svg_data_1 == svg_data_2:
        return 0

    paths1 = extract_paths_from_svg(svg_data_1)
    paths2 = extract_paths_from_svg(svg_data_2)

    # Check that we have the same number of <path> elements
    if len(paths1) != len(paths2):
        return None

    # Compare each path token list
    max_diff = 0
    for tokens1, tokens2 in zip(paths1, paths2):
        ret = compare_token_lists(tokens1, tokens2)
        if ret is None:
            return ret
        max_diff = max(max_diff, ret)

    return max_diff


if __name__ == "__main__":
    import sys

    if '--help' in sys.argv[1:]:
        print("Usage: hb-svg-compare TOLERANCE < file_with_svg_pairs.txt")
        sys.exit(1)

    tolerance = 0
    if len(sys.argv) > 1:
        tolerance = float(sys.argv[1])

    # Read all lines of two SVG file paths from stdin and compare
    for line in sys.stdin:
        svg1, svg2 = line.strip().split()
        diff = compare_svg_files(svg1, svg2)

        if diff is None:
            diff = "DIFF"
        elif diff <= tolerance:
            continue
        else:
            diff = f"{diff:.5f}"

        print(f"{diff}\t{svg1}\t{svg2}")
        # Flush stdout to make sure output is immediate
        sys.stdout.flush()
