UNCLASSIFIED

mirror.py 4.16 KB
Newer Older
Ian Dunbar-Hall's avatar
Ian Dunbar-Hall committed
1 2 3 4 5 6 7 8
"""
This module will use evironment variables to download images
from DCAR and push to a container registry. Metadata about
what images to mirror are loaded from `dcar.json`.
"""
import sys
import os
import json
Zachary Prebosnyak's avatar
Zachary Prebosnyak committed
9 10
import multiprocessing
import docker
Ian Dunbar-Hall's avatar
Ian Dunbar-Hall committed
11 12

from dcar.request import Session, LoginError
Zachary Prebosnyak's avatar
Zachary Prebosnyak committed
13 14
from dcar.image import mirror_image, process_image
from dcar.utils import check_envs, validate_argument
Ian Dunbar-Hall's avatar
Ian Dunbar-Hall committed
15 16 17 18 19 20 21 22 23 24

def mirror(dcar_json_input: str = "dcar.json",
           failed_image_output: str = "failed_images.json"):
    """
    Copy from DCAR to container registry.

    Top level function that reads the dcar.json file, downloads
    from DCAR, and pushes to a container registry.

    Environment Variables:
Zachary Prebosnyak's avatar
Zachary Prebosnyak committed
25
    DCAR_USERNAME (required): Username for authenticating with DCAR.
Ian Dunbar-Hall's avatar
Ian Dunbar-Hall committed
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
    DCAR_PASSWORD (required): Password for authenticating with DCAR.
    DCAR_TOTP_SEED (required): Time-based One-time Password seed for authenticating with DCAR.
    REGISTRY_IMAGE_TEMPLATE (required): Container registry name to store images in.
    REGISTRY_USER (required): Username for authenticating with container registry.
    REGISTRY_PASSWORD (required): Password for authenticating with container registry.
    CI_NODE_INDEX (Optional): Index of process mirroring used to slice the list
                              of images being mirrored.
    CI_NODE_TOTAL (Optional): Total number of processes mirroring.
    PUSH_W_DOCKER (Optional): If this environment variable exists, use docker instead of
                              skopeo to push images.
    """

    validate_argument("dcar_json_input", dcar_json_input, str)
    validate_argument("failed_image_output", failed_image_output, str)

    # Make sure envs are set
Zachary Prebosnyak's avatar
Zachary Prebosnyak committed
42
    check_envs(["DCAR_USERNAME", "DCAR_PASSWORD",
Ian Dunbar-Hall's avatar
Ian Dunbar-Hall committed
43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
                "DCAR_TOTP_SEED", "REGISTRY_IMAGE_TEMPLATE",
                "REGISTRY_USER", "REGISTRY_PASSWORD"])

    if "CI_NODE_INDEX" in os.environ and "CI_NODE_TOTAL" in os.environ:
        node_index = int(os.environ["CI_NODE_INDEX"])
        node_total = int(os.environ["CI_NODE_TOTAL"])
    else:
        node_index = 1
        node_total = 1

    try:
        with open(dcar_json_input) as json_file:
            dcar_image_dict = json.load(json_file)

        # Since this is probably going to be run as part of a
        # scheduled job, you can define the two environment variables
        # to split the `dcar.json` dictionary for parallel processing
        # This is done to speed up the mirroring images.
        #
        # If you are using GitLab CI, these two environment variables
        # are automatically created if you use the "parallel" tag.
        # - CI_NODE_INDEX
        # - CI_NODE_TOTAL
        complete_dcar_image_list = list(dcar_image_dict.keys())
67
        dcar_image_list = list(dcar_image_dict.items())[node_index-1::node_total]
Ian Dunbar-Hall's avatar
Ian Dunbar-Hall committed
68 69

        print("processing {}/{} ({}/{} images)".format(node_index, node_total,
70 71 72
                                                        len(dcar_image_list),
                                                        len(complete_dcar_image_list)),
                flush=True)
Ian Dunbar-Hall's avatar
Ian Dunbar-Hall committed
73 74 75

        # mirror the images
        dcar_failed_image_list = []
Zachary Prebosnyak's avatar
Zachary Prebosnyak committed
76
        with multiprocessing.Pool(processes=5) as pool:
77 78
            #vals = pool.imap(process_image, dcar_image_list)
            vals = pool.imap(process_image, dcar_image_list)
Zachary Prebosnyak's avatar
Zachary Prebosnyak committed
79 80 81 82 83
            for image_info in vals:
                dcar_image_name = image_info[0]
                image_exist = image_info[1]
                if not image_exist:
                    dcar_failed_image_list.append(dcar_image_name)
Ian Dunbar-Hall's avatar
Ian Dunbar-Hall committed
84

85 86 87 88 89 90 91 92 93
            # write out list of images that failed
            if len(dcar_failed_image_list) > 0:
                # Save the json object with all of the failed images.
                with open(failed_image_output, 'w') as outfile:
                    json.dump(dcar_failed_image_list, outfile)
                if len(dcar_failed_image_list) != 0:
                    print("ERROR: The following images failed to mirror", flush=True, file=sys.stderr)
                    for failed_image in dcar_failed_image_list:
                        print("- " + failed_image, flush=True, file=sys.stderr)
94 95
    except LoginError as exception:
        sys.exit("FATAL: {}".format(exception))