import logging
import mmap
import os
import struct
import sys
from pathlib import Path
from typing import Optional
from urllib.parse import parse_qs, unquote

logger = logging.getLogger(__name__)


def write_installer_attribution_to_file(attribution_data, output_filepath):
    """Write attribution data to a text file."""
    output_filepath = Path(output_filepath).expanduser()
    output_filepath.parent.mkdir(parents=True, exist_ok=True)
    try:
        with open(output_filepath, "w", encoding="utf-8") as f:
            f.write(attribution_data)
        return True
    except (IOError, OSError) as e:
        print(f"Error writing to file: {e}", file=sys.stderr)
        return False


def read_installer_attribution_from_env() -> Optional[str]:
    """
    Read attribution data from the ANACONDA_ATTR environment variable.
    This is the primary method for post-install scripts to access attribution data,
    as the installer exports this variable before running post-install scripts.
    Returns:
        Attribution data string if found, None otherwise.
    """
    attribution_data = os.environ.get("ANACONDA_ATTR")
    if attribution_data:
        logger.debug(f"Found ANACONDA_ATTR in environment: {attribution_data[:50]}...")
    else:
        logger.debug("ANACONDA_ATTR not found in environment")
    return attribution_data


def read_installer_attribution_from_sh_file(filepath) -> Optional[str]:
    """
    Read attribution data from a shell script file.
    Looks for an export statement that sets ANACONDA_ATTR environment variable.
    Format: export ANACONDA_ATTR='<data>' or export ANACONDA_ATTR="<data>"
    """
    try:
        with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
            for line in f:
                # Look for export ANACONDA_ATTR='...'
                if line.strip().startswith("export ANACONDA_ATTR="):
                    # Extract the value between quotes
                    # Handle both single and double quotes
                    if "'" in line:
                        parts = line.split("'")
                        if len(parts) >= 2:
                            data = parts[1].strip()
                            if data:
                                return data
                    elif '"' in line:
                        parts = line.split('"')
                        if len(parts) >= 2:
                            data = parts[1].strip()
                            if data:
                                return data
    except Exception as e:
        logger.error(f"Error reading shell script: {e}")
    return None


def read_installer_attribution_sh(filepath) -> Optional[str]:
    """
    Read attribution data from a shell script installer.
    First tries to read from ANACONDA_ATTR environment variable,
    then falls back to parsing the shell script file.
    Args:
        filepath: Path to the shell script installer
    Returns:
        Attribution data string if found, None otherwise
    """
    attribution_data = read_installer_attribution_from_env()
    if not attribution_data:
        attribution_data = read_installer_attribution_from_sh_file(filepath)
    return attribution_data


def read_installer_attribution_windows(filepath: str) -> Optional[str]:
    """Read attribution data from a signed PE file.

    Returns the attribution data as a string if found, or None if the file
    isn't a valid PE file or if the attribution data was not found.

    This function looks for the "ANACONDA_ATTR" tag in the certificate table
    and returns the data that follows it (up to the next null byte or end of space).
    """
    try:
        with open(filepath, "rb") as file:
            mapped = mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ)

            # Get the location of the PE header and the optional header
            if len(mapped) < 0x40:
                mapped.close()
                return None

            pe_header_offset = struct.unpack("<I", mapped[0x3C:0x40])[0]

            # Validate PE signature
            if mapped[pe_header_offset : pe_header_offset + 4] != b"PE\x00\x00":
                mapped.close()
                return None

            optional_header_offset = pe_header_offset + 24

            # Look up the magic number in the optional header,
            # so we know if we have a 32 or 64-bit executable.
            pe_magic_number = struct.unpack(
                "<H", mapped[optional_header_offset : optional_header_offset + 2]
            )[0]
            if pe_magic_number == 0x10B:
                # 32-bit
                cert_dir_entry_offset = optional_header_offset + 128
            elif pe_magic_number == 0x20B:
                # 64-bit. Certain header fields are wider.
                cert_dir_entry_offset = optional_header_offset + 144
            else:
                # Not any known PE format
                mapped.close()
                return None

            # The certificate table offset and length give us the valid range
            # to search through for our attribution data.
            cert_table_offset = struct.unpack(
                "<I", mapped[cert_dir_entry_offset : cert_dir_entry_offset + 4]
            )[0]
            cert_table_size = struct.unpack(
                "<I", mapped[cert_dir_entry_offset + 4 : cert_dir_entry_offset + 8]
            )[0]

            if cert_table_offset == 0 or cert_table_size == 0:
                # The file isn't signed or has no certificate table
                mapped.close()
                return None

            tag = b"ANACONDA_ATTR"
            tag_index = mapped.find(
                tag, cert_table_offset, cert_table_offset + cert_table_size
            )
            if tag_index == -1:
                mapped.close()
                return None

            # Read the data after the tag
            data_start = tag_index + len(tag)
            # Find the end of the reserved space (1024 bytes from tag start)
            max_data_end = tag_index + 1024
            data_end = min(cert_table_offset + cert_table_size, max_data_end)

            # Extract the raw data
            raw_data = mapped[data_start:data_end]
            mapped.close()

            # Find the first null byte to determine actual data length
            null_index = raw_data.find(b"\x00")
            if null_index != -1:
                raw_data = raw_data[:null_index]

            # Convert to string, handling potential encoding issues
            try:
                return raw_data.decode("utf-8")
            except UnicodeDecodeError:
                # Try latin-1 as fallback
                try:
                    return raw_data.decode("latin-1")
                except UnicodeDecodeError:
                    # Return as bytes string representation if all else fails
                    return str(raw_data)

    except (IOError, OSError, struct.error) as e:
        logger.error(f"Error reading file: {e}", file=sys.stderr)
        return None


def read_installer_attribution_macos(filepath: str) -> Optional[str]:
    """
    Read attribution data from a macOS installer.

    For .pkg files, this might read attribution data from xattrs
    For .sh installers we could read data embeded in the script.
    """
    logger.info("macOS attribution reader")
    if filepath.endswith(".sh"):
        return _read_installer_attribution_macos_sh(filepath)
    elif filepath.endswith(".pkg"):
        return _read_installer_attribution_macos_pkg(filepath)
    else:
        logger.warning(f"Unknown macOS installer type: {filepath}")
        return None


def _read_installer_attribution_macos_sh(filepath: str) -> Optional[str]:
    """Read attribution data from a shell script installer on macOS."""
    return read_installer_attribution_sh(filepath)


def _read_installer_attribution_macos_pkg(filepath: str) -> Optional[str]:
    """Read attribution data from a .pkg file."""
    # TODO: Implement reading from .pkg files - https://anaconda.atlassian.net/browse/DATAPROD-416
    # We could potentially read xattrs here (the installer would pass in the $PKG_PATH)
    # kMDItemWhereFroms is a file metadata attribute that stores the URL or URLs from which a file was downloaded.
    # redirects are tracked (so we could redirect to an itoken url /attribution-service/...?...itoken=abc)
    # When a file is downloaded from one URL but is redirected one or more times to a different final download location
    # The kMDItemWhereFroms attribute stores all the URLs in that chain.
    # This attribute is set by applications, such as web browsers, that support the macOS file quarantine system (Gatekeeper).
    # Formatting: The data for kMDItemWhereFroms is stored in a binary property list (bplist) format.
    logger.warning(".pkg attribution reading not yet implemented")
    return None


def read_installer_attribution_linux(filepath: str) -> Optional[str]:
    """
    Read attribution data from a Linux installer.
    """
    return read_installer_attribution_sh(filepath)


INSTALLER_ATTRIBUTION_READERS = {
    "windows": read_installer_attribution_windows,
    "darwin": read_installer_attribution_macos,
    "linux": read_installer_attribution_linux,
}


def parse_installer_attribution(attribution_data: str) -> dict:
    """Parse URL-encoded attribution data into a dictionary.
    Args:
        attribution_data: URL-encoded query string with attribution parameters
    Returns:
        Dictionary with parsed attribution parameters. Values are unwrapped from
        lists if they contain a single item.
    """
    decoded_attribution_data = unquote(attribution_data)
    parsed_attribution_data = parse_qs(decoded_attribution_data)
    # parse_qs returns values packed in a list
    parsed_attribution_data = {
        k: v[0] if len(v) == 1 else v for k, v in parsed_attribution_data.items()
    }
    return parsed_attribution_data
