from pathlib import Path
from yaml_manifest.models import Manifest
import tempfile


def get_local_file(wildcards):
    filepath = wildcards.filepath
    if filepath in compress_targets.values():
        return Path(tempdir, f"{filepath}")

    if Path(filepath) in upload_files:
        return Path(filepath)

    raise ValueError(f"get_local_file did not recognise filepath {filepath}")


def write_receipts(receipts, output_jsonl):
    with open(output_jsonl, "w") as out:
        for receipt_path in receipts:
            if Path(receipt_path).exists:
                with open(receipt_path, "rt") as f:
                    line = f.read().strip()
                if line:
                    out.write(line + "\n")
    print(f"Upload manifest written to {output_jsonl}")


tempdir = tempfile.mkdtemp()
globals().update(config)

# Load the manifest and classify files
manifest = Manifest.from_yaml(Path(manifest))

# Allow overriding the result_dir for testing.
if result_dir is not None:
    logger.warning(
        f"Looking for output files in {result_dir}.\nDon't do this in production."
    )
    from yaml_manifest.layout import _collect_upload_files

    classified = _collect_upload_files(stage, result_dir)
else:
    classified = manifest.collect_upload_files(stage)

upload_files = classified["upload"]
compress_files_list = classified["compress"]

# Build the list of all files to upload with their remote paths.
# The remote path mirrors the local path.
all_uploads = {str(f): str(f) for f in upload_files}

# Map original path → compressed path for files that need gzipping
compress_targets = {
    str(f): str(f.with_name(f.name + ".gz")) for f in compress_files_list
}

# Write receipts and logs into the parent directory
receipts_parent = Path(receipts_file).parent
logdir = Path(receipts_parent, "logs")
receipt_dir = Path(receipts_parent, "receipts")


rule compress_file:
    input:
        original="{filepath}",
    output:
        gz=Path(tempdir, "{filepath}.gz"),
    threads: max(workflow.cores - 2, 1)
    shell:
        "pigz -c "
        "--processes {threads} "
        "{input.original} "
        "> {output.gz}"


rule upload_file:
    input:
        local_file=get_local_file,
    output:
        receipt=Path(receipt_dir, "{filepath}.receipt.jsonl"),
    log:
        Path(logdir, "{filepath}.upload_file.log"),
    params:
        remote=lambda wildcards: (
            all_uploads.get(wildcards.filepath)
            or compress_targets[wildcards.filepath.removesuffix(".gz")]
        ),
        bucket=bucket,
    retries: 3
    shell:
        "result-file-uploader "
        "{input.local_file} "
        "{params.remote} "
        "--bucket {params.bucket} "
        "> {output.receipt} "
        "2> {log}"


upload_receipts = expand(rules.upload_file.output, filepath=all_uploads)
compressed_file_upload_receipts = expand(
    rules.upload_file.output,
    filepath=compress_targets.values(),
)
all_receipts = sorted(set(upload_receipts + compressed_file_upload_receipts))


rule target:
    default_target: True
    input:
        all_receipts,


onsuccess:
    write_receipts(all_receipts, receipts_file)
