diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..41a221b431fc43496d6e0b5283aeba885c6a3cf1 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,28 @@ +ARG GITLAB_VERSION=v14.2.0-ubi8 + +ARG BASE_REGISTRY=nexus-docker-secure.levelup-nexus.svc.cluster.local:18082 +ARG BASE_IMAGE=gitlab/gitlab/gitlab-rails +ARG BASE_TAG=14.2.0 + +ARG RAILS_IMAGE=${BASE_REGISTRY}/${BASE_IMAGE}:${BASE_TAG} + +FROM ${RAILS_IMAGE} + +ARG GITLAB_VERSION +ARG GITLAB_USER=git +ARG DNF_OPTS +ENV LIBDIR ${LIBDIR:-"/usr/lib64"} + +ADD gitlab-toolbox-ee.tar.gz / +ADD gitlab-python.tar.gz / + +COPY scripts/bin/* /usr/local/bin/ +COPY scripts/lib/* ${LIBDIR}/ruby/vendor_ruby/ + +RUN dnf clean all \ + && rm -r /var/cache/dnf \ + && dnf ${DNF_OPTS} install -by --nodocs ca-certificates openssl + +USER ${GITLAB_USER}:${GITLAB_USER} + +ENTRYPOINT ["/usr/local/bin/entrypoint.sh"] \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..4077ae53d2af87617cc2efc31f2f71f3c0d5a870 --- /dev/null +++ b/LICENSE @@ -0,0 +1,22 @@ +MIT License + +Copyright (c) 2017 + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/README.md b/README.md index 5dc6fa6db4361c22da2f35edf0544d83ba6001e2..1ccac53353019fa368caa315e50286893d6451f6 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,41 @@ -# +# gitlab-toolbox-container -Project template for all Iron Bank container repositories. \ No newline at end of file +Gitlab Toolbox is an entry point for interaction with other containers in the cluster. It contains scripts for running Rake tasks, backup, restore, and tools to intract with object storage. + +## GitLab Build + +The hardened containers for GitLab require the gitlab correctly version assets blob to be available for download. + +There are some included scripts to make this easier for user building images offline on their machine. + + +## Building + +1. Switch to the desired versioned directory +2. Run `./build-scripts/build.sh` + * Runs docker build, and takes care of setting the appropriate build args for users running locally + + +`build-script/build.sh` is provided as an example and helpful for building locally. You can also instead call docker build and pass build-args directly. + +## Build Phases + +Some of the GitLab containers are build ontop of previous containers, building the containers in ordered phases is necessary to build all containers. + +- Phase One + * kubectl + * gitlab-ruby + * gitlab-container-registry +- Phase Two + * git-base + * gitlab-exporter + * gitlab-mailroom + * gitlab-shell + * gitlab-rails + * gitlab-workhorse +- Phase 3 + * gitaly +- Phase 4 + * gitlab-sidekiq + * gitlab-toolbox + * gitlab-webservice diff --git a/build-scripts/build.sh b/build-scripts/build.sh new file mode 100755 index 0000000000000000000000000000000000000000..b069fac019ef6679b69bd2676dc21a69ba2d507f --- /dev/null +++ b/build-scripts/build.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +# NOTICE: This script requires `docker`. + +set -euxo pipefail + +TAG=${1:-14.2.0} +REPOSITORY=${2:-} +DOCKER_OPTS=${DOCKER_OPTS:-""} + +imageName() { + printf "${REPOSITORY}${1}:${TAG}" +} + +buildImage() { + IMAGE="${1}" + CONTEXT="${IMAGE%*-ee}" + { + docker build \ + -t "$(imageName ${IMAGE})" . \ + ${DOCKER_OPTS:-} | tee ${CONTEXT}.out + } || { + echo "${CONTEXT}" >> failed.log + } +} + +# Cleanup log outputs from previous build +rm -f *.out failed.log + +DOCKER_OPTS="$DOCKER_OPTS --build-arg RAILS_IMAGE=$(imageName gitlab-rails)" +buildImage gitlab-toolbox diff --git a/build-scripts/cleanup.sh b/build-scripts/cleanup.sh new file mode 100755 index 0000000000000000000000000000000000000000..ad291f98ba8b4db3c80d564f753a1288550c8f10 --- /dev/null +++ b/build-scripts/cleanup.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +set -euxo pipefail + +rm -f *.tar.gz *.out failed.log diff --git a/hardening_manifest.yaml b/hardening_manifest.yaml new file mode 100644 index 0000000000000000000000000000000000000000..89725c0c2c55941d0741a8f908400e1b0d0d4f7d --- /dev/null +++ b/hardening_manifest.yaml @@ -0,0 +1,59 @@ +apiVersion: v1 +# The repository name in registry1, excluding /ironbank/ +name: "gitlab/gitlab/gitlab-toolbox" +# List of tags to push for the repository in registry1 +# The most specific version should be the first tag and will be shown +# on ironbank.dsop.io +tags: + - "14.2.0" + - "latest" +# Build args passed to Dockerfile ARGs +args: + BASE_IMAGE: "gitlab/gitlab/gitlab-rails" + BASE_TAG: "14.2.0" +# Docker image labels +labels: + org.opencontainers.image.title: "Gitlab Toolbox" + ## Human-readable description of the software packaged in the image + org.opencontainers.image.description: "GitLab Toolbox is an entry point for interaction with other containers in the cluster. It contains scripts for running Rake tasks, backup, restore, and tools to intract with object storage." + ## License(s) under which contained software is distributed + org.opencontainers.image.licenses: "MIT License" + ## URL to find more information on the image + org.opencontainers.image.url: "https://about.gitlab.com/" + ## Name of the distributing entity, organization or individual + org.opencontainers.image.vendor: "Gitlab" + org.opencontainers.image.version: "14.2.0" + ## Keywords to help with search (ex. "cicd,gitops,golang") + mil.dso.ironbank.image.keywords: "gitlab, git, gitops" + ## This value can be "opensource" or "commercial" + mil.dso.ironbank.image.type: "commercial" + ## Product the image belongs to for grouping multiple images + mil.dso.ironbank.product.name: "gitlab" +# List of project maintainers +# FIXME: Fill in the following details for the current container owner in the whitelist +# FIXME: Include any other vendor information if applicable +maintainers: + - email: "dj@gitlab.com " + # # The name of the current container owner + name: "DJ Mountney" + # # The gitlab username of the current container owner + username: "twk3" + # cht_member: true # FIXME: Uncomment if the maintainer is a member of CHT + - name: "Al Fontaine" + username: "alfontaine" + email: "alan.fontaine@centauricorp.com" + - email: "adam.martin@rancherfederal.com" + name: "Adam Martin" + username: "adam.martin" + cht_member: true +resources: + - url: "https://gitlab-ubi.s3.amazonaws.com/ubi8-build-dependencies-v14.2.0-ubi8/gitlab-toolbox-ee.tar.gz" + filename: "gitlab-toolbox-ee.tar.gz" + validation: + type: "sha256" + value: "d0b2020e9ba41367ea3bb36e1fabc924ca7fb8241224e386924f2822aa5a44a4" + - url: "https://gitlab-ubi.s3.amazonaws.com/ubi8-build-dependencies-v14.2.0-ubi8/gitlab-python.tar.gz" + filename: "gitlab-python.tar.gz" + validation: + type: "sha256" + value: "c7c279f90687febbfb537c17f43b6db9dc0d1916feb7e99bdd40c17e61fe0064" diff --git a/openshift.metadata b/openshift.metadata new file mode 100644 index 0000000000000000000000000000000000000000..5b94c6382a6f60b8b8cea46d9cd0fce44ddb5d95 --- /dev/null +++ b/openshift.metadata @@ -0,0 +1,7 @@ +io.openshift.tags=gitlab-toolbox +io.openshift.wants=gitlab-webservice +io.k8s.description=Container with tools to diagnose GitLab application +issues. +io.openshift.non-scalable=false +io.openshift-min-memory=350Mi +io.openshift.min-cpu=50m diff --git a/scripts/bin/backup-utility b/scripts/bin/backup-utility new file mode 100755 index 0000000000000000000000000000000000000000..818604bdd1b55a973144f9176fafc87a45f46038 --- /dev/null +++ b/scripts/bin/backup-utility @@ -0,0 +1,368 @@ +#!/bin/bash +set -e + +ACTION="" +export BACKUP_BUCKET_NAME=${BACKUP_BUCKET_NAME-gitlab-backups} +export BACKUP_BACKEND=${BACKUP_BACKEND-s3} +S3_CMD_BACKUP_OPTION="" + +rails_dir=/srv/gitlab +backups_path=$rails_dir/tmp/backups +backup_tars_path=$rails_dir/tmp/backup_tars +object_storage_backends=( registry uploads artifacts lfs packages external_diffs terraform_state pages ) + +skipping_backup_for=() + +function usage() +{ + cat << HEREDOC + + Usage: backup-utility [--restore|--cleanup] [-f URL] [-t TIMESTAMP] [--skip COMPONENT] [--backend BACKEND] [--s3config CONFIG] + + Options: + -h, --help Show this help message and exit. + --restore [-t TIMESTAMP | -f URL] When specified, utility restores from an existing backup specified + as url or timestamp in object storage. + -f URL http(s):/ftp:/file: URL with backup location. Use with --restore. + -t TIMESTAMP Timestamp (part before '_gitlab_backup.tar' in archive name), + can be used to specify backup source or target name. + --rsyncable Pass the '--rsyncable' parameter to gzip for artifact compression. + --skip COMPONENT When specified, utility will skip the backup of COMPONENT. + May be defined multiple times. Valid values for COMPONENT are + db, repositories, and any of the object storages (e.g. 'lfs'). + --backend BACKEND Object storage backend to use for backups. + Can be either 's3' or 'gcs'. + --s3config CONFIG S3 backend configuration to use for backups storage. + Special config file for s3cmd (see: https://s3tools.org/usage) + --storage-class CLASSNAME Pass this storage class to the gcs or s3cmd for more cost-efficient + storage of backups. + --maximum-backups N Only keep the most recent N number of backups, deleting others after success. + Requires s3config credentials to be able to list and delete objects. + --cleanup Run the backup cleanup without creating a new backup. Can be used with the + 'maximum-backups' option to clean old remote backups. +HEREDOC +} + +# Checks if provided argument is a url for downloading it +function is_url() { + regex='(https?|ftp|file)://[-A-Za-z0-9\+&@#/%?=~_|!:,.;]*[-A-Za-z0-9\+&@#/%=~_|]' + + [[ $1 =~ $regex ]] +} + +function fetch_remote_backup(){ + mkdir -p $backups_path + output_path=$backups_path/0_gitlab_backup.tar + + if is_url $1; then + >&2 echo "Downloading from $1"; + curl --retry 6 --progress-bar -o $output_path $1 + else # It's a timestamp + file_name="$1_gitlab_backup.tar" + if [ "${BACKUP_BACKEND}" = "s3" ]; then + s3cmd ${S3_CMD_BACKUP_OPTION} get "s3://$BACKUP_BUCKET_NAME/$file_name" $output_path > /dev/null + elif [ "${BACKUP_BACKEND}" = "gcs" ]; then + gsutil cp "gs://$BACKUP_BUCKET_NAME/$file_name" $output_path > /dev/null + else + echo "Unknown backend: ${BACKUP_BACKEND}" + fi + fi + echo $output_path +} + +function unpack_backup(){ + local file_path=$1 + cd $(dirname $file_path) + + echo "Unpacking backup" + + if [ ! -f $file_path ]; then + echo $file_path not found + exit 1 + fi + + tar -xf $file_path +} + +function pack_backup(){ + echo "Packing up backup tar" + local backup_name=$1 + tar -cf ${backup_tars_path}/${backup_name}.tar -C $backups_path . +} + +function get_version(){ + cat $rails_dir/VERSION +} + +function get_backup_name(){ + if [ -n "$BACKUP_TIMESTAMP" ]; then + echo ${BACKUP_TIMESTAMP}_gitlab_backup + else + now_timestamp=$(date +%s_%Y_%m_%d) + gitlab_version=$(get_version) + echo ${now_timestamp}_${gitlab_version}_gitlab_backup + fi +} + +function get_existing_backups(){ + # This will only match backups with the same naming convention as backups generated by this script + # Example: TIMESTAMP_YYYY_MM_DD_VERSION_gitlab_backup.tar + case $BACKUP_BACKEND in + s3) + existing_backups=($(s3cmd ${S3_CMD_BACKUP_OPTION} ls s3://$BACKUP_BUCKET_NAME --rinclude '^\d{10}_\d{4}_\d{2}_\d{2}_.+_gitlab_backup.tar$' | awk '{print $4}' | LC_ALL=C sort)) + ;; + gcs) + # Note: gsutil doesn't support regex, so we need to try to match the prefix as best we can with wildcards + # https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames#other-wildcard-characters + existing_backups=($(gsutil ls gs://$BACKUP_BUCKET_NAME/[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]_[0-9][0-9][0-9][0-9]_[0-9][0-9]_[0-9][0-9]_\*_gitlab_backup.tar | LC_ALL=C sort)) + ;; + *) + echo "Unknown backend for backup: ${BACKUP_BACKEND}" + exit 1 + ;; + esac +} + +function remove_backup(){ + local backup_to_remove=$1 + if [ "${BACKUP_BACKEND}" = "s3" ]; then + s3cmd ${S3_CMD_BACKUP_OPTION} del ${backup_to_remove} > /dev/null + elif [ "${BACKUP_BACKEND}" = "gcs" ]; then + gsutil rm ${backup_to_remove} > /dev/null + else + echo "Unknown backend for backup: ${BACKUP_BACKEND}" + exit 1 + fi +} + +function cleanup(){ + rm -rf $backups_path/* + rm -rf $backup_tars_path/* + + if [ -n "$MAXIMUM_BACKUPS" ]; then + get_existing_backups + + echo "Found ${#existing_backups[@]} existing backups. Maximum allowed is $MAXIMUM_BACKUPS" + if [ ${#existing_backups[@]} -gt $MAXIMUM_BACKUPS ]; then + i=0 + while [ $i -lt $(expr ${#existing_backups[@]} - $MAXIMUM_BACKUPS) ]; do + echo "Deleting old backup ${existing_backups[$i]}" + remove_backup ${existing_backups[$i]} + ((++i)) + done + fi + echo "[DONE] Finished pruning old backups" + fi +} + +function write_backup_info(){ + cat << EOF > $backups_path/backup_information.yml +:db_version: $($rails_dir/bin/rails runner "File.write('/tmp/db_version', ActiveRecord::Migrator.current_version.to_s)" && cat /tmp/db_version) +:backup_created_at: $(date "+%Y-%m-%d %H:%M:%S %z") +:gitlab_version: $(get_version) +:tar_version: $(tar --version | head -n 1) +:installation_type: gitlab-helm-chart +:skipped: $1 +EOF +} + +function get_skipped(){ + all=( builds.tar.gz db repositories pages.tar.gz ) + for storage in ${object_storage_backends[@]}; do + all+=( "${storage}.tar.gz" ); + done; + skipped_string="" + + for backup_item in ${all[@]}; do + if [ ! -e $backups_path/$backup_item ]; then + skipped_string="$skipped_string,${backup_item%.tar.gz}"; + fi; + done; + + echo ${skipped_string#,} +} + +function backup(){ + backup_name=$(get_backup_name) + mkdir -p $backup_tars_path $backups_path + + if ! [[ ${skipping_backup_for[@]} =~ "db" ]]; then + gitlab-rake gitlab:backup:db:create + fi + if ! [[ ${skipping_backup_for[@]} =~ "repositories" ]]; then + gitlab-rake gitlab:backup:repo:create + fi + + for backup_item in ${object_storage_backends[@]}; do + if ! [[ ${skipping_backup_for[@]} =~ $backup_item ]]; then + object-storage-backup $backup_item $backups_path/${backup_item}.tar.gz + fi + done + + skipped=$(get_skipped $backup_name) + write_backup_info $skipped + pack_backup $backup_name + if [ "${BACKUP_BACKEND}" = "s3" ]; then + if [ -z "${STORAGE_CLASS}" ]; then + s3cmd ${S3_CMD_BACKUP_OPTION} put ${backup_tars_path}/${backup_name}.tar s3://$BACKUP_BUCKET_NAME > /dev/null + else + s3cmd ${S3_CMD_BACKUP_OPTION} put --storage-class "${STORAGE_CLASS}" ${backup_tars_path}/${backup_name}.tar s3://$BACKUP_BUCKET_NAME > /dev/null + fi + echo "[DONE] Backup can be found at s3://$BACKUP_BUCKET_NAME/${backup_name}.tar" + elif [ "${BACKUP_BACKEND}" = "gcs" ]; then + if [ -z "${STORAGE_CLASS}" ]; then + gsutil cp -n ${backup_tars_path}/${backup_name}.tar gs://$BACKUP_BUCKET_NAME > /dev/null + else + gsutil cp -s "${STORAGE_CLASS}" -n ${backup_tars_path}/${backup_name}.tar gs://$BACKUP_BUCKET_NAME > /dev/null + fi + echo "[DONE] Backup can be found at gs://$BACKUP_BUCKET_NAME/${backup_name}.tar" + else + echo "Unknown backend for backup: ${BACKUP_BACKEND}" + fi + + cleanup +} + +function is_skipped() { + [[ $SKIPPED =~ $1 ]] +} + +function restore(){ + if [ -z "$BACKUP_URL" ] && [ -z "$BACKUP_TIMESTAMP" ]; then + echo "You need to set BACKUP_URL or BACKUP_TIMESTAMP variable" + exit 1 + fi + + BACKUP=${BACKUP_URL-} + if [ -z "$BACKUP" ]; then + BACKUP=$BACKUP_TIMESTAMP + fi + + file=$(fetch_remote_backup $BACKUP) + + dir_name=$(dirname $file) + file_name=$(basename $file) + timestamp="${file_name%%_*}" + export BACKUP=$timestamp + unpack_backup $file + + skipped_line=$(grep skipped $(dirname $file)/backup_information.yml) + export SKIPPED=$(echo ${skipped_line#:skipped:}) + + installation_type_line=$(grep installation_type $(dirname $file)/backup_information.yml || echo ":installation_type: unknown") + export INSTALLATION_TYPE=$(echo ${installation_type_line#:installation_type: }) + + ! is_skipped "db" && gitlab-rake gitlab:db:drop_tables + ! is_skipped "db" && gitlab-rake gitlab:backup:db:restore + + # Previous versions of the dump failed to mark the repos as skipped, so we additionally check for the directory + if [ -e $backups_path/repositories ]; then + ! is_skipped "repositories" && gitlab-rake gitlab:backup:repo:restore + fi + + ! is_skipped "builds" && gitlab-rake gitlab:backup:builds:restore + + if [ "$INSTALLATION_TYPE" = "gitlab-helm-chart" ]; then + for restore_item in ${object_storage_backends[@]}; do + if [ -f $backups_path/${restore_item}.tar.gz ]; then + ! is_skipped $restore_item && object-storage-restore $restore_item $backups_path/${restore_item}.tar.gz + fi + done + else + echo "Backup tarball not from a Helm chart based installation. Not processing files in object storage." + fi + + gitlab-rake cache:clear +} + +while [[ $# -gt 0 ]] +do + key="$1" + + case $key in + -h|--help) + usage + ACTION="none" + break + ;; + -f|--file) + BACKUP_URL="$2" + shift + shift + ;; + -t|--timestamp) + BACKUP_TIMESTAMP="$2" + shift + shift + ;; + --backend) + export BACKUP_BACKEND="$2" + shift + shift + ;; + --s3config) + if [ ! -f $2 ]; then + echo "s3cmd file specified does not exist"; + exit 1; + fi + export S3_CMD_BACKUP_OPTION="--config=$2 " + shift + shift + ;; + --restore) + if [ -z "$ACTION" ]; then + ACTION="restore" + else + echo "Only one action at a time is supported" + exit 1 + fi + shift + ;; + --rsyncable) + export GZIP_RSYNCABLE="yes" + shift + ;; + --skip) + skipping_backup_for+=( "$2" ) + shift + shift + ;; + --storage-class) + export STORAGE_CLASS="$2" + shift + shift + ;; + --maximum-backups) + export MAXIMUM_BACKUPS="$2" + if ! [[ $MAXIMUM_BACKUPS =~ ^-?[0-9]+$ ]]; then + echo "Value specified for --maximum-backups must be an integer. Got: ${MAXIMUM_BACKUPS}" + exit 1 + fi + shift + shift + ;; + --cleanup) + if [ -z "$ACTION" ]; then + ACTION="cleanup" + else + echo "Only one action at a time is supported" + exit 1 + fi + shift + ;; + *) + usage + echo "Unexpected parameter: $key" + exit 1 + ;; + esac +done + +if [ "$ACTION" = "restore" ]; then + restore +elif [ "$ACTION" = "cleanup" ]; then + cleanup +elif [ -z "$ACTION" ]; then + ACTION="backup" + backup +fi diff --git a/scripts/bin/entrypoint.sh b/scripts/bin/entrypoint.sh new file mode 100755 index 0000000000000000000000000000000000000000..3bc0d9e6d0b16185a1aca86d829bc68e5843f854 --- /dev/null +++ b/scripts/bin/entrypoint.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +set -e +/scripts/set-config "${CONFIG_TEMPLATE_DIRECTORY}" "${CONFIG_DIRECTORY:=$CONFIG_TEMPLATE_DIRECTORY}" + +cd /srv/gitlab; +echo "Attempting to run '$@' as a main process"; + +exec "$@"; diff --git a/scripts/bin/gitlab-rails b/scripts/bin/gitlab-rails new file mode 100755 index 0000000000000000000000000000000000000000..9a2febb21b8cdb44197c892390f543d3c12d8ccb --- /dev/null +++ b/scripts/bin/gitlab-rails @@ -0,0 +1,5 @@ +#!/bin/bash + +rails_dir=/srv/gitlab +cd $rails_dir +$rails_dir/bin/bundle exec rails "$@" diff --git a/scripts/bin/gitlab-rake b/scripts/bin/gitlab-rake new file mode 100755 index 0000000000000000000000000000000000000000..330d75dd5c5ee55d7831ae29e4bcf0a3b25ab4f4 --- /dev/null +++ b/scripts/bin/gitlab-rake @@ -0,0 +1,5 @@ +#!/bin/bash + +rails_dir=/srv/gitlab +cd $rails_dir +$rails_dir/bin/bundle exec rake -f $rails_dir/Rakefile "$@" diff --git a/scripts/bin/object-storage-backup b/scripts/bin/object-storage-backup new file mode 100755 index 0000000000000000000000000000000000000000..47c2a2e5a62c1c2736c7d6cedcdb093759cf1823 --- /dev/null +++ b/scripts/bin/object-storage-backup @@ -0,0 +1,10 @@ +#!/usr/bin/env ruby + +require 'object_storage_backup' + +abort ("backup_item and output_tar_path arguments needs to be passed to the script") unless ARGV.length == 2 + +bucket_name = ENV["#{ARGV[0].upcase}_BUCKET_NAME"] || "gitlab-#{ARGV[0]}" +tmp_bucket = ENV['TMP_BUCKET_NAME'] || 'tmp' +backend_type = ENV['BACKUP_BACKEND'] || 's3' +ObjectStorageBackup.new(ARGV[0], ARGV[1], bucket_name, tmp_bucket, backend_type).backup diff --git a/scripts/bin/object-storage-restore b/scripts/bin/object-storage-restore new file mode 100755 index 0000000000000000000000000000000000000000..e8a5958e76339f8abfe00d9d1dd22c22adb1039d --- /dev/null +++ b/scripts/bin/object-storage-restore @@ -0,0 +1,10 @@ +#!/usr/bin/env ruby + +require 'object_storage_backup' + +abort("restore_item and tar path needs to be passed as arguments to the script") unless ARGV.length == 2 + +bucket_name = ENV["#{ARGV[0].upcase}_BUCKET_NAME"] || "gitlab-#{ARGV[0]}" +tmp_bucket = ENV['TMP_BUCKET_NAME'] || 'tmp' +backend_type = ENV['BACKUP_BACKEND'] || 's3' +ObjectStorageBackup.new(ARGV[0], ARGV[1], bucket_name, tmp_bucket, backend_type).restore diff --git a/scripts/lib/object_storage_backup.rb b/scripts/lib/object_storage_backup.rb new file mode 100644 index 0000000000000000000000000000000000000000..ab2c42e0161803700fc08711e324bd40525f1bac --- /dev/null +++ b/scripts/lib/object_storage_backup.rb @@ -0,0 +1,145 @@ +require 'open3' +require 'fileutils' + +class String + def red; "\e[31m#{self}\e[0m" end + def green; "\e[32m#{self}\e[0m" end + def blue; "\e[34m#{self}\e[0m" end +end + +class ObjectStorageBackup + attr_accessor :name, :local_tar_path, :remote_bucket_name, :tmp_bucket_name, :backend + + def initialize(name, local_tar_path, remote_bucket_name, tmp_bucket_name = 'tmp', backend = 's3') + @name = name + @local_tar_path = local_tar_path + @remote_bucket_name = remote_bucket_name + @tmp_bucket_name = tmp_bucket_name + @backend = backend + end + + def backup + if @backend == "s3" + # Check bucket existence by listing, limit 1 to optimize + check_bucket_cmd = %W(s3cmd --limit=1 ls s3://#{@remote_bucket_name}) + cmd = %W(s3cmd --stop-on-error --delete-removed sync s3://#{@remote_bucket_name}/ /srv/gitlab/tmp/#{@name}/) + elsif @backend == "gcs" + check_bucket_cmd = %W(gsutil ls gs://#{@remote_bucket_name}) + cmd = %W(gsutil -m rsync -r gs://#{@remote_bucket_name} /srv/gitlab/tmp/#{@name}) + end + + # Check if the bucket exists + output, status = run_cmd(check_bucket_cmd) + unless status.zero? + puts "Bucket not found: #{@remote_bucket_name}. Skipping backup of #{@name} ...".blue + return + end + + puts "Dumping #{@name} ...".blue + + # create the destination: gsutils requires it to exist, s3cmd does not + FileUtils.mkdir_p("/srv/gitlab/tmp/#{@name}", mode: 0700) + + output, status = run_cmd(cmd) + failure_abort('creation of working directory', output) unless status.zero? + + # check the destiation for contents. Bucket may have been empty. + if Dir.empty? "/srv/gitlab/tmp/#{@name}" + puts "empty".green + return + end + + # build gzip command used for tar compression + gzip_cmd = 'gzip' + (ENV['GZIP_RSYNCABLE'] == 'yes' ? ' --rsyncable' : '') + + cmd = %W(tar -cf #{@local_tar_path} -I #{gzip_cmd} -C /srv/gitlab/tmp/#{@name} . ) + output, status = run_cmd(cmd) + failure_abort('archive', output) unless status.zero? + + puts "done".green + end + + def restore + puts "Restoring #{@name} ...".blue + + backup_existing + cleanup + restore_from_backup + puts "done".green + end + + def failure_abort(action, error_message) + puts "[Error] #{error_message}".red + abort "#{action} of #{@name} failed" + end + + def upload_to_object_storage(source_path) + dir_name = File.basename(source_path) + if @backend == "s3" + cmd = %W(s3cmd --stop-on-error sync #{source_path}/ s3://#{@remote_bucket_name}/#{dir_name}/) + elsif @backend == "gcs" + cmd = %W(gsutil -m rsync -r #{source_path}/ gs://#{@remote_bucket_name}/#{dir_name}) + end + + output, status = run_cmd(cmd) + + failure_abort('upload', output) unless status.zero? + end + + def backup_existing + backup_file_name = "#{@name}.#{Time.now.to_i}" + + if @backend == "s3" + cmd = %W(s3cmd sync s3://#{@remote_bucket_name} s3://#{@tmp_bucket_name}/#{backup_file_name}/) + elsif @backend == "gcs" + cmd = %W(gsutil -m rsync -r gs://#{@remote_bucket_name} gs://#{@tmp_bucket_name}/#{backup_file_name}/) + end + + output, status = run_cmd(cmd) + + failure_abort('sync existing', output) unless status.zero? + end + + def cleanup + if @backend == "s3" + cmd = %W(s3cmd --stop-on-error del --force --recursive s3://#{@remote_bucket_name}) + elsif @backend == "gcs" + # Check if the bucket has any objects + list_objects_cmd = %W(gsutil ls gs://#{@remote_bucket_name}/) + output, status = run_cmd(list_objects_cmd) + failure_abort('GCS ls', output) unless status.zero? + + # There are no objects in the bucket so skip the cleanup + if output.length == 0 + return + end + + cmd = %W(gsutil rm -f -r gs://#{@remote_bucket_name}/*) + end + output, status = run_cmd(cmd) + failure_abort('bucket cleanup', output) unless status.zero? + end + + def restore_from_backup + extracted_tar_path = File.join(File.dirname(@local_tar_path), "/srv/gitlab/tmp/#{@name}") + FileUtils.mkdir_p(extracted_tar_path, mode: 0700) + + failure_abort('restore', "#{@local_tar_path} not found") unless File.exist?(@local_tar_path) + + untar_cmd = %W(tar -xf #{@local_tar_path} -C #{extracted_tar_path}) + + output, status = run_cmd(untar_cmd) + + failure_abort('un-archive', output) unless status.zero? + + Dir.glob("#{extracted_tar_path}/*").each do |file| + upload_to_object_storage(file) + end + end + + def run_cmd(cmd) + _, stdout, wait_thr = Open3.popen2e(*cmd) + return stdout.read, wait_thr.value.exitstatus + end + +end