UNCLASSIFIED - NO CUI

Skip to content
Snippets Groups Projects
Commit 6ffa5cbb authored by Branden Cobb's avatar Branden Cobb
Browse files

SKIP UPGRADE Pipeline Refactor

parent 4115ae24
No related branches found
No related tags found
2 merge requests!1386Master,!1057SKIP UPGRADE Pipeline Refactor
Showing
with 3 additions and 1164 deletions
# Bigbang CI pipelines defintiions
# There are 4 different use-cases/pipelines that are supported by this file:
# 1) Build and test changes made from a Merge Reuqest using a docker-in-docker (DIND) K3D cluster deployment inside the
# gitlab runner.
# 2) Build and test a commit to the Master branch (default branch) using a AWS created K3S cluster which is deployed
# using Terraform
# 3) Build and test a new release and/or tagged commit using a docker-in-docker (DIND) K3D cluster deployment inside the
# gitlab runner. Once a successful build and test is completed, package the build and perform a release operation.
# 4) Periodically at a scheduled time, build and test the master branch using a AWS created K3S cluster which is deployed
# using Terraform
# global rules for when pipelines run
workflow:
rules:
# run pipeline for manual tag events such as a new release
- if: $CI_COMMIT_TAG
# run pipeline on merge request events
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
# run pipeline on commits to default branch
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
# skip pipeline for branches that start with "docs"
- if: '$CI_COMMIT_REF_NAME =~ /^doc*/i'
when: never
# Enabled CI pipeline testing it commit message contains "test-ci"
- if: $CI_PIPELINE_SOURCE == "push" && $CI_COMMIT_MESSAGE =~ /.*test-ci.*/i
# Include templates and cluster creation jobs
include: include:
- local: '/.gitlab-ci/templates.yml' - project: 'platform-one/big-bang/pipeline-templates/pipeline-templates'
ref: master
# Pipeline stages file: '/pipelines/bigbang.yaml'
# - Smoke tests are executed for all pipelines except scheduled nightly runs. \ No newline at end of file
# -
stages:
- 🔥 smoke tests
- 🔌 network up
- ⚓ cluster up
- 🌌 bigbang up
- 🤞 test
- 💣 bigbang down
- 💣 cluster down
- 💣 network down
- 📦 package
- 🚀 release
variables:
RELEASE_BUCKET: umbrella-bigbang-releases
IMAGE_LIST: images.txt
IMAGE_PKG: images.tar.gz
REPOS_PKG: repositories.tar.gz
VALUES_FILE: chart/values.yaml
CI_VALUES_FILE: tests/ci/k3d/values.yaml
REGISTRY1_USER: $REGISTRY1_USER
#-----------------------------------------------------------------------------------------------------------------------
# Pre Stage Jobs. This execute before any job is run.
#
pre vars:
image: registry.dso.mil/platform-one/big-bang/pipeline-templates/pipeline-templates/pre-envs:ubi8.3
stage: .pre
extends:
- .bigbang-gitlab-runner-tags
artifacts:
reports:
dotenv: variables.env
script:
# Create the TF_VAR_env variable
- echo "TF_VAR_env=$(echo $CI_COMMIT_REF_SLUG | cut -c 1-7)-$(echo $CI_COMMIT_SHA | cut -c 1-7)" >> variables.env
- cat variables.env
retry:
max: 2
when:
- unknown_failure
- stuck_or_timeout_failure
- runner_system_failure
#-----------------------------------------------------------------------------------------------------------------------
#-----------------------------------------------------------------------------------------------------------------------
# Smoke Tests
#
.chart_changes: &chart_changes
changes:
- chart/**/*
- .gitlab-ci.yml
- .gitlab-ci/jobs/**/*
- scripts/**/*
- tests/**/*
- base/flux/*
.deploy_bigbang: &deploy_bigbang
- |
set -e
for deploy_script in $(find ./tests/deploy -type f -name '*.sh' | sort); do
chmod +x ${deploy_script}
echo -e "\e[0Ksection_start:`date +%s`:${deploy_script##*/}[collapsed=true]\r\e[0K\e[33;1m${deploy_script##*/}\e[37m"
./${deploy_script}
echo -e "\e[0Ksection_end:`date +%s`:${deploy_script##*/}\r\e[0K"
done
.test_bigbang: &test_bigbang
- |
set -e
for test_script in $(find ./tests/tests -type f -name '*.sh' | sort); do
echo -e "\e[0Ksection_start:`date +%s`:${test_script##*/}[collapsed=true]\r\e[0K\e[33;1m${test_script##*/}\e[37m"
chmod +x ${test_script}
echo "Executing ${test_script}..."
./${test_script} && export EXIT_CODE=$? || export EXIT_CODE=$?
if [[ ${EXIT_CODE} -ne 0 ]]; then
if [[ ${EXIT_CODE} -ne 123 ]]; then
echo -e "\e[31m❌ ${test_script} failed, see log output above and cluster debug.\e[0m"
exit ${EXIT_CODE}
fi
# 123 error codes are allowed to continue
echo -e "\e[31m⚠️ ${test_script} failed but was allowed to continue, see log output above and cluster debug.\e[0m"
EXIT_FLAG=1
fi
echo -e "\e[0Ksection_end:`date +%s`:${test_script##*/}\r\e[0K"
done
if [[ -n "$EXIT_FLAG" ]]; then
echo -e "\e[31m⚠️ WARNING: One or more BB tests failed but were allowed to continue. See output of scripts above for details.\e[0m"
fi
clean install:
stage: 🔥 smoke tests
extends:
- .k3d-ci
variables:
CLUSTER_NAME: "clean-${CI_COMMIT_SHORT_SHA}"
rules:
# Always run a clean installation test unless we are deploying the AWS cluster installation during a scheduled test (nightly master test)
- if: '($CI_PIPELINE_SOURCE == "schedule" && $CI_COMMIT_BRANCH == "master") || $CI_MERGE_REQUEST_LABELS =~ /(^|,)test-ci::infra(,|$)/'
when: never
- *chart_changes
script:
- *deploy_bigbang
# Fetch list of all images ran (retry crictl up to 6x)
- echo -e "\e[0Ksection_start:`date +%s`:images_used[collapsed=true]\r\e[0K\e[33;1mImages Used\e[37m"
- cid=$(docker ps -aqf "name=k3d-${CI_JOB_ID}-server-0")
- images=$(timeout 65 bash -c "until docker exec $cid crictl images -o json; do sleep 10; done;")
- echo $images | jq -r '.images[].repoTags[0] | select(. != null)' | tee images.txt
- echo -e "\e[0Ksection_end:`date +%s`:images_used\r\e[0K"
- *test_bigbang
- |
if [[ $EXIT_FLAG -eq 1 ]]; then
exit 123
fi
artifacts:
paths:
- images.txt
- "test-artifacts/"
expire_in: 3 days
when: always
allow_failure:
exit_codes: 123
retry:
max: 2
when:
- unknown_failure
- stuck_or_timeout_failure
- runner_system_failure
upgrade:
stage: 🔥 smoke tests
dependencies:
- pre vars
extends:
- .k3d-ci
rules:
# skip job for nightly master and "test-ci::infra" labeled pipelines
- if: '($CI_PIPELINE_SOURCE == "schedule" && $CI_COMMIT_BRANCH == "master") || $CI_MERGE_REQUEST_LABELS =~ /(^|,)test-ci::infra(,|$)/'
when: never
# skip job when MR title starts with 'SKIP UPGRADE'
- if: '$CI_MERGE_REQUEST_TITLE =~ /SKIP UPGRADE/'
when: never
# run pipeline on merge request events
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
<<: *chart_changes
variables:
CLUSTER_NAME: "upgrade-${CI_COMMIT_SHORT_SHA}"
script:
- echo "🌌 Install Big Bang from ${CI_MERGE_REQUEST_TARGET_BRANCH_NAME}"
- echo -e "\e[0Ksection_start:`date +%s`:git_master[collapsed=true]\r\e[0K\e[33;1mGit Fetch Master\e[37m"
- git fetch && git checkout ${CI_MERGE_REQUEST_TARGET_BRANCH_NAME}
- echo -e "\e[0Ksection_end:`date +%s`:git_master\r\e[0K"
- *deploy_bigbang
- *test_bigbang
- echo "🌌 Upgrade Big Bang from ${CI_MERGE_REQUEST_SOURCE_BRANCH_NAME}"
- echo -e "\e[0Ksection_start:`date +%s`:git_upgrade[collapsed=true]\r\e[0K\e[33;1mGit Upgrade\e[37m"
- git reset --hard && git clean -fd
- git checkout ${CI_MERGE_REQUEST_SOURCE_BRANCH_NAME}
- echo -e "\e[0Ksection_end:`date +%s`:git_upgrade\r\e[0K"
- *deploy_bigbang
- *test_bigbang
- |
if [[ $EXIT_FLAG -eq 1 ]]; then
exit 123
fi
artifacts:
paths:
- "test-artifacts/"
expire_in: 3 days
when: always
allow_failure:
exit_codes: 123
retry:
max: 2
when:
- unknown_failure
- stuck_or_timeout_failure
- runner_system_failure
#-----------------------------------------------------------------------------------------------------------------------
# Rules for execution of AWS based K3S cluster deployment: Infrastructure jobs
#
# Abstract for job manually triggering infrastructure builds
.infra fork:
stage: 🔌 network up
rules:
# Run on scheduled jobs OR when `test-ci` label is assigned
- if: '($CI_PIPELINE_SOURCE == "schedule" && $CI_COMMIT_BRANCH == "master") || $CI_MERGE_REQUEST_LABELS =~ /(^|,)test-ci::infra(,|$)/'
allow_failure: false
# Abstract for jobs responsible for creating infrastructure
.infra create:
rules:
# Run on scheduled jobs OR when `test-ci` label is assigned
- if: '($CI_PIPELINE_SOURCE == "schedule" && $CI_COMMIT_BRANCH == "master") || $CI_MERGE_REQUEST_LABELS =~ /(^|,)test-ci::infra(,|$)/'
# skip job when branch name starts with "hotfix" or "patch"
- if: '$CI_MERGE_REQUEST_SOURCE_BRANCH_NAME =~ /^(hotfix|patch)/'
when: never
# Abstract for jobs responsible for cleaning up infrastructure OR when `test-ci` label is assigned
.infra cleanup:
rules:
# Run on scheduled jobs
- if: '($CI_PIPELINE_SOURCE == "schedule" && $CI_COMMIT_BRANCH == "master") || $CI_MERGE_REQUEST_LABELS =~ /(^|,)test-ci::infra(,|$)/'
allow_failure: true
when: always
#-----------------------------------------------------------------------------------------------------------------------
# Infrastructure: Networking
#
aws/network up:
extends:
- .bigbang-gitlab-runner-tags
- .infra fork
- .network up
environment:
name: review/aws-${CI_COMMIT_REF_SLUG}-${CI_COMMIT_SHORT_SHA}
auto_stop_in: 1 hour
retry:
max: 2
when:
- unknown_failure
- stuck_or_timeout_failure
- runner_system_failure
aws/network down:
extends:
- .bigbang-gitlab-runner-tags
- .infra cleanup
- .network down
stage: 💣 network down
environment:
name: review/aws-${CI_COMMIT_REF_SLUG}-${CI_COMMIT_SHORT_SHA}
action: stop
retry:
max: 2
when:
- unknown_failure
- stuck_or_timeout_failure
- runner_system_failure
#-----------------------------------------------------------------------------------------------------------------------
#-----------------------------------------------------------------------------------------------------------------------
# Infrastructure: RKE2
#
# Create RKE2 cluster on AWS
aws/rke2/cluster up:
stage: ⚓ cluster up
extends:
- .bigbang-gitlab-runner-tags
- .infra create
- .rke2 up
needs:
- job: aws/network up
- job: pre vars
artifacts: true
environment:
name: review/aws-${CI_COMMIT_REF_SLUG}-${CI_COMMIT_SHORT_SHA}
retry:
max: 2
when:
- unknown_failure
- stuck_or_timeout_failure
- runner_system_failure
# Install BigBang on RKE2 cluster on AWS
aws/rke2/bigbang up:
stage: 🌌 bigbang up
extends:
- .bigbang-gitlab-runner-tags
- .infra create
- .kubectl-output
needs:
- job: aws/rke2/cluster up
artifacts: true
before_script:
- mkdir -p ~/.kube
- cp ${CI_PROJECT_DIR}/rke2.yaml ~/.kube/config
# Deploy a default storage class for aws
- kubectl apply -f ${CI_PROJECT_DIR}/.gitlab-ci/jobs/rke2/dependencies/k8s-resources/aws/default-ebs-sc.yaml
script:
- *deploy_bigbang
environment:
name: review/aws-${CI_COMMIT_REF_SLUG}-${CI_COMMIT_SHORT_SHA}
retry:
max: 2
when:
- unknown_failure
- stuck_or_timeout_failure
- runner_system_failure
# Run tests on BigBang on RKE2 cluster on AWS
aws/rke2/bigbang test:
stage: 🤞 test
extends:
- .bigbang-gitlab-runner-tags
- .infra create
- .kubectl-output
needs:
- job: aws/rke2/cluster up
artifacts: true
- job: aws/rke2/bigbang up
before_script:
- mkdir -p ~/.kube
- cp ${CI_PROJECT_DIR}/rke2.yaml ~/.kube/config
script:
## Move this yum install to the dockerfile for the builder
## putting it here now for a quick way to install dig
- echo -e "\e[0Ksection_start:`date +%s`:host_setup[collapsed=true]\r\e[0K\e[33;1mHost Setup\e[37m"
- yum install bind-utils -y
- echo -e "\e[0Ksection_end:`date +%s`:host_setup\r\e[0K"
- *test_bigbang
- |
if [[ $EXIT_FLAG -eq 1 ]]; then
exit 123
fi
environment:
name: review/aws-${CI_COMMIT_REF_SLUG}-${CI_COMMIT_SHORT_SHA}
allow_failure:
exit_codes: 123
retry:
max: 2
when:
- unknown_failure
- stuck_or_timeout_failure
- runner_system_failure
artifacts:
paths:
- "test-artifacts/"
expire_in: 3 days
when: always
# Uninstall BigBang on RKE2 cluster on AWS
aws/rke2/bigbang down:
stage: 💣 bigbang down
extends:
- .bigbang-gitlab-runner-tags
- .infra cleanup
- .kubectl-output
needs:
- job: aws/rke2/cluster up
artifacts: true
- job: aws/rke2/bigbang test
before_script:
- mkdir -p ~/.kube
- cp ${CI_PROJECT_DIR}/rke2.yaml ~/.kube/config
script:
- helm un -n bigbang bigbang
# TODO: Smarter wait
- sleep 180
environment:
name: review/aws-${CI_COMMIT_REF_SLUG}-${CI_COMMIT_SHORT_SHA}
retry:
max: 2
when:
- unknown_failure
- stuck_or_timeout_failure
- runner_system_failure
# Destroy RKE2 cluster on AWS
aws/rke2/cluster down:
stage: 💣 cluster down
extends:
- .bigbang-gitlab-runner-tags
- .infra cleanup
- .rke2 down
needs:
- job: aws/rke2/bigbang down
- job: pre vars
artifacts: true
environment:
name: review/aws-${CI_COMMIT_REF_SLUG}-${CI_COMMIT_SHORT_SHA}
retry:
max: 2
when:
- unknown_failure
- stuck_or_timeout_failure
- runner_system_failure
#-----------------------------------------------------------------------------------------------------------------------
#-----------------------------------------------------------------------------------------------------------------------
# Release Jobs
#
package:
stage: 📦 package
image: registry.dso.mil/platform-one/big-bang/bigbang/synker:0.0.3
extends:
- .bigbang-gitlab-runner-tags
rules:
# run job for manual tag events or test-ci::release MRs
- if: '$CI_COMMIT_TAG || $CI_MERGE_REQUEST_LABELS =~ /(^|,)test-ci::release(,|$)/'
before_script:
# Set up auth
- mkdir -p /root/.docker
- |
jq -n '{"auths": {"registry.dso.mil": {"auth": $bb_registry_auth}, "registry1.dso.mil": {"auth": $registry1_auth}, "registry.il2.dso.mil": {"auth": $il2_registry_auth}, "docker.io": {"auth": $bb_docker_auth} } }' \
--arg bb_registry_auth ${BB_REGISTRY_AUTH} \
--arg registry1_auth ${REGISTRY1_AUTH} \
--arg il2_registry_auth ${IL2_REGISTRY_AUTH} \
--arg bb_docker_auth ${DOCKER_AUTH} > /root/.docker/config.json
script:
- echo -e "\e[0Ksection_start:`date +%s`:synker_pull[collapsed=true]\r\e[0K\e[33;1mSynker Pull\e[37m"
- cp ./scripts/package/synker.yaml ./synker.yaml
# Populate images list in synker config
- |
for image in $(cat images.txt); do
yq -i e "(.source.images |= . + \"${image}\")" "./synker.yaml"
done
- synker pull -b=1
- echo -e "\e[0Ksection_end:`date +%s`:synker_pull\r\e[0K"
# Create image list from synker, overwrite since ./synker.yaml contains everything at this point
- yq e '.source.images | .[] | ... comments=""' "./synker.yaml" > images.txt
# Tar up synker as well?
- cp /usr/local/bin/synker synker.yaml /var/lib/registry/
# Grab the registry image
- crane pull registry:2 registry.tar
- mv registry.tar /var/lib/registry/
- echo -e "\e[0Ksection_start:`date +%s`:package_synker[collapsed=true]\r\e[0K\e[33;1mPackage Images\e[37m"
- tar -czvf $IMAGE_PKG /var/lib/registry
- echo -e "\e[0Ksection_end:`date +%s`:package_synker\r\e[0K"
# Package dependent repos
- echo -e "\e[0Ksection_start:`date +%s`:package_repos[collapsed=true]\r\e[0K\e[33;1mPackage Repos\e[37m"
- ./scripts/package/gits.sh
- tar -czf $REPOS_PKG repos/
- echo -e "\e[0Ksection_end:`date +%s`:package_repos\r\e[0K"
# Prep release
- mkdir -p release
- mv $IMAGE_LIST $IMAGE_PKG $REPOS_PKG release/
# Publish packages to s3 release
- |
if [ -z $CI_COMMIT_TAG ]; then
aws s3 sync --quiet release/ s3://umbrella-bigbang-releases/tests/${CI_COMMIT_SHA}
else
aws s3 sync --quiet release/ s3://umbrella-bigbang-releases/umbrella/${CI_COMMIT_TAG}
fi
after_script: []
retry:
max: 2
when:
- unknown_failure
- stuck_or_timeout_failure
- runner_system_failure
release:
stage: 🚀 release
image: registry.gitlab.com/gitlab-org/release-cli:latest
extends:
- .bigbang-gitlab-runner-tags
rules:
# run job for manual tag events or test-ci::release MRs
- if: '$CI_COMMIT_TAG || $CI_MERGE_REQUEST_LABELS =~ /(^|,)test-ci::release(,|$)/'
variables:
RELEASE_ENDPOINT: https://${RELEASE_BUCKET}.s3-${AWS_DEFAULT_REGION}.amazonaws.com/umbrella/${CI_COMMIT_TAG}
script:
# Use release-cli to cut a release in Gitlab or simulate a dry-run & print asset links
- |
if [ -z $CI_COMMIT_TAG ]; then
RELEASE_ENDPOINT="https://${RELEASE_BUCKET}.s3-${AWS_DEFAULT_REGION}.amazonaws.com/tests/${CI_COMMIT_SHA}"
printf "Release will run: \n\
release-cli create --name \"Big Bang \${CI_COMMIT_TAG}\" --tag-name \${CI_COMMIT_TAG} \n\
--description \"Automated release notes are a WIP.\" \n\
--assets-link \"{\"name\":\"${IMAGE_LIST}\",\"url\":\"${RELEASE_ENDPOINT}/${IMAGE_LIST}\"}\" \n\
--assets-link \"{\"name\":\"${IMAGE_PKG}\",\"url\":\"${RELEASE_ENDPOINT}/${IMAGE_PKG}\"}\" \n\
--assets-link \"{\"name\":\"${REPOS_PKG}\",\"url\":\"${RELEASE_ENDPOINT}/${REPOS_PKG}\"}\"\n"
else
release-cli create --name "Big Bang ${CI_COMMIT_TAG}" --tag-name ${CI_COMMIT_TAG} \
--description "Automated release notes are a WIP." \
--assets-link "{\"name\":\"${IMAGE_LIST}\",\"url\":\"${RELEASE_ENDPOINT}/${IMAGE_LIST}\"}" \
--assets-link "{\"name\":\"${IMAGE_PKG}\",\"url\":\"${RELEASE_ENDPOINT}/${IMAGE_PKG}\"}" \
--assets-link "{\"name\":\"${REPOS_PKG}\",\"url\":\"${RELEASE_ENDPOINT}/${REPOS_PKG}\"}"
fi
retry:
max: 2
when:
- unknown_failure
- stuck_or_timeout_failure
- runner_system_failure
#-----------------------------------------------------------------------------------------------------------------------
{{ template "chart.header" . }}
{{ template "chart.deprecationWarning" . }}
{{ template "chart.badgesSection" . }}
{{ template "chart.description" . }}
{{ template "chart.homepageLine" . }}
> _This is a mirror of a government repo hosted on [Repo1](https://repo1.dso.mil/) by [DoD Platform One](http://p1.dso.mil/). Please direct all code changes, issues and comments to https://repo1.dso.mil/platform-one/big-bang/bigbang_
Big Bang follows a [GitOps](#gitops) approach to configuration management, using [Flux v2](#flux-v2) to reconcile Git with the cluster. Environments (e.g. dev, prod) and packages (e.g. istio) can be fully configured to suit the deployment needs.
## Usage
Big Bang is intended to be used for deploying and maintaining a DoD hardened and approved set of packages into a Kubernetes cluster. Deployment and configuration of ingress/egress, load balancing, policy auditing, logging, monitoring, etc. are handled via Big Bang. Additional packages (e.g. ArgoCD, GitLab) can also be enabled and customized to extend Big Bang's baseline. Once deployed, the customer can use the Kubernetes cluster to add mission specific applications.
Additional information can be found in [Big Bang Overview](./docs/1_overview.md).
## Getting Started
To start using Big Bang, you will need to create your own Big Bang environment tailored to your needs. The [Big Bang customer template](https://repo1.dso.mil/platform-one/big-bang/customers/template/) is provided for you to copy into your own Git repository and begin modifications. Follow the instructions in [Big Bang Getting Started](./docs/2_getting_started.md) to customize and deploy Big Bang.
{{ template "chart.maintainersSection" . }}
{{ template "chart.sourcesSection" . }}
{{ template "chart.requirementsSection" . }}
{{ template "chart.valuesSection" . }}
## Contributing
Please see the [contributing guide](./CONTRIBUTING.md) if you are interested in contributing to Big Bang.
.k8s-util:
image: registry.dso.mil/platform-one/big-bang/pipeline-templates/pipeline-templates/k8s-ci:v1.20.4-bb.3
.dind-runner:
tags:
- bigbang
- umbrella
- privileged
- dogfood
# In cluster k3s using K3D with the docker daemon as a sidecar
#
# This will connect to a remote docker daemon over tls tcp (defined at installation of gitlab runners) and create
# a k3d cluster in a custom built docker bridge network.
#
.k3d-ci:
extends:
- .k8s-util
- .dind-runner
# services:
# # Added in through gitlab ci configuration, left in incase some poor soul needs to come debug this later
# - name: docker:20.10.5-dind
variables:
DOCKER_HOST: tcp://localhost:2376
DOCKER_TLS_CERTDIR: "/certs"
DOCKER_TLS_VERIFY: 1
DOCKER_CERT_PATH: "$DOCKER_TLS_CERTDIR/client"
DOCKER_DRIVER: overlay2
before_script:
- echo -e "\e[0Ksection_start:`date +%s`:k3d_up[collapsed=true]\r\e[0K\e[33;1mK3D Cluster Create\e[37m"
# Give docker-in-docker time to come alive
- i=0; while [ "$i" -lt 12 ]; do docker info &>/dev/null && break; sleep 5; i=$(( i + 1 )) ; done
- docker network create ${CI_JOB_ID} --driver=bridge -o "com.docker.network.driver.mtu"="1450" --subnet=172.20.0.0/16
- chmod +x tests/ci/k3d/deploy_k3d.sh; echo "Executing tests/ci/k3d/deploy_k3d.sh..."; ./tests/ci/k3d/deploy_k3d.sh
- until kubectl get deployment coredns -n kube-system -o go-template='{{.status.availableReplicas}}' | grep -v -e '<no value>'; do sleep 1s; done
- chmod +x tests/ci/k3d/metallb/install_metallb.sh; echo "Executing tests/ci/k3d/metallb/install_metallb.sh..."; ./tests/ci/k3d/metallb/install_metallb.sh
- kubectl get all -A
- echo -e "\e[0Ksection_end:`date +%s`:k3d_up\r\e[0K"
after_script:
- echo -e "\e[0Ksection_start:`date +%s`:k3d_down[collapsed=true]\r\e[0K\e[33;1mK3D Cluster Delete\e[37m"
- kubectl get all -A
- echo -e "\e[0Ksection_start:`date +%s`:show_event_log[collapsed=true]\r\e[0K\e[33;1mCluster event log:\e[37m"
- kubectl get events -A
- echo -e "\e[0Ksection_end:`date +%s`:show_event_log\r\e[0K"
- kubectl get gitrepository,helmrelease,kustomizations -A
- k3d cluster delete ${CI_JOB_ID}
- docker network rm ${CI_JOB_ID}
- echo -e "\e[0Ksection_end:`date +%s`:k3d_down\r\e[0K"
\ No newline at end of file
.calc_unique_cidr: &calc_unique_cidr
- apk add python3 py3-boto3
- echo "Calculating unique cidr range for vpc"
- TF_VAR_vpc_cidr=$(terraform output vpc_cidr | tr -d '\n' | tr -d '\r' | grep 10) || TF_VAR_vpc_cidr=$(python3 ../../../get-vpc.py | tr -d '\n' | tr -d '\r')
- echo "Using VPC CIDR $TF_VAR_vpc_cidr for $CLUSTER_NAME cluster"
- export TF_VAR_vpc_cidr=$TF_VAR_vpc_cidr
.network:
extends: .terraformer
variables:
TF_ROOT: ".gitlab-ci/jobs/networking/aws/dependencies/terraform/env/ci"
.network up:
extends: .network
script:
- echo -e "\e[0Ksection_start:`date +%s`:network_up[collapsed=true]\r\e[0K\e[33;1mNetwork Up\e[37m"
- *calc_unique_cidr
- echo "Creating network with cidr range ${TF_VAR_vpc_cidr}"
# Loop to retry network up terraform apply due to issues locking terraform.state in s3
- |
set -e
attempt_counter=0
max_attempts=2
until [ $(terraform apply -auto-approve >/dev/null; echo $?) -eq 0 ]; do
if [ ${attempt_counter} == ${max_attempts} ];then
echo "Error applying network up terraform"
exit 1
fi
attempt_counter=$(($attempt_counter+1))
echo "Attempt failed to apply will retry in 30 seconds"
sleep 30
done
- echo -e "\e[0Ksection_end:`date +%s`:network_up\r\e[0K"
.network down:
extends:
- .network
- .terraform destroy workspace
script:
- echo -e "\e[0Ksection_start:`date +%s`:network_down[collapsed=true]\r\e[0K\e[33;1mNetwork Down\e[37m"
- *calc_unique_cidr
- echo "Destroying network"
# Loop to retry network terraform destory
- |
set -e
attempt_counter=0
max_attempts=2
until [ $(terraform destroy -auto-approve >/dev/null; echo $?) -eq 0 ]; do
if [ ${attempt_counter} == ${max_attempts} ];then
echo "Error destroying network terraform"
exit 1
fi
attempt_counter=$(($attempt_counter+1))
echo "Attempt failed to destroy will retry in 30 seconds"
sleep 30
done
- echo -e "\e[0Ksection_end:`date +%s`:network_down\r\e[0K"
\ No newline at end of file
[[source]]
name = "pypi"
url = "https://pypi.org/simple"
verify_ssl = true
[dev-packages]
[packages]
boto3 = "*"
[requires]
python_version = "3.8"
{
"_meta": {
"hash": {
"sha256": "0ba145c19353da73840755ed85984b6653241c800c6ad2c772805a6089dfb424"
},
"pipfile-spec": 6,
"requires": {
"python_version": "3.8"
},
"sources": [
{
"name": "pypi",
"url": "https://pypi.org/simple",
"verify_ssl": true
}
]
},
"default": {
"boto3": {
"hashes": [
"sha256:b091cf6581dc137f100789240d628a105c989cf8f559b863fd15e18c1a29b714",
"sha256:bd4c26d304abba8d96817bb83917bb2e19123f5ce1a5dd26255f866daeff61c7"
],
"index": "pypi",
"version": "==1.16.17"
},
"botocore": {
"hashes": [
"sha256:33f650b2d63cc1f2d5239947c9ecdadfd8ceeb4ab8bdefa0a711ac175a43bf44",
"sha256:81184afc24d19d730c1ded84513fbfc9e88409c329de5df1151bb45ac30dfce4"
],
"version": "==1.19.17"
},
"jmespath": {
"hashes": [
"sha256:b85d0567b8666149a93172712e68920734333c0ce7e89b78b3e987f71e5ed4f9",
"sha256:cdf6525904cc597730141d61b36f2e4b8ecc257c420fa2f4549bac2c2d0cb72f"
],
"markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==0.10.0"
},
"python-dateutil": {
"hashes": [
"sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c",
"sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==2.8.1"
},
"s3transfer": {
"hashes": [
"sha256:2482b4259524933a022d59da830f51bd746db62f047d6eb213f2f8855dcb8a13",
"sha256:921a37e2aefc64145e7b73d50c71bb4f26f46e4c9f414dc648c6245ff92cf7db"
],
"version": "==0.3.3"
},
"six": {
"hashes": [
"sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259",
"sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==1.15.0"
},
"urllib3": {
"hashes": [
"sha256:19188f96923873c92ccb987120ec4acaa12f0461fa9ce5d3d0772bc965a39e08",
"sha256:d8ff90d979214d7b4f8ce956e80f4028fc6860e4431f731ea4a8c08f23f99473"
],
"markers": "python_version != '3.4'",
"version": "==1.26.2"
}
},
"develop": {}
}
import boto3
import operator
import ipaddress
initial_cidr = "10.10.0.0/16"
client = boto3.client('ec2', region_name='us-gov-west-1')
res = client.describe_vpcs(Filters=[{}])
vpcIds = list(map(operator.itemgetter("CidrBlock"), res["Vpcs"]))
vpcIds.sort()
unique_cidr = False
while not unique_cidr:
found_cidr_overlap = False
for cidr in vpcIds:
aws_cidr = ipaddress.IPv4Network(cidr)
try:
proposed_cidr = ipaddress.IPv4Network(initial_cidr)
except:
logger.error("Couldn't convert cidr of " + str(initial_cidr))
sys.exit(2)
if aws_cidr.overlaps(proposed_cidr):
found_cidr_overlap = True
break
allowed_private_cidr = ipaddress.IPv4Network("10.0.0.0/8")
if not found_cidr_overlap:
if allowed_private_cidr.overlaps(proposed_cidr):
unique_cidr = True
final_vpc = initial_cidr
else:
logger.error("Proposed cidr not in private ip space: " + str(initial_cidr))
sys.exit(2)
else:
try:
initial_cidr = str(ipaddress.ip_address(initial_cidr.split("/")[0]) + 65536) + "/16"
except:
logger.error("Couldn't update cidr of " + str(initial_cidr))
sys.exit(2)
print(final_vpc)
terraform {
backend "s3" {
bucket = "umbrella-tf-states"
key = "terraform.tfstate"
region = "us-gov-west-1"
dynamodb_table = "umbrella-tf-states-lock"
workspace_key_prefix = "aws-networking"
}
}
module "ci" {
source = "../../main"
# Set by CI - "TF_VAR_env=$(echo $CI_COMMIT_REF_SLUG | cut -c 1-7)-$(echo $CI_COMMIT_SHA | cut -c 1-7)"
env = var.env
# Set by CI - "TF_VAR_ci_pipeline_url=$ci_pipeline_url"
ci_pipeline_url = var.ci_pipeline_url
# Calculated in CI
vpc_cidr = var.vpc_cidr
}
output "vpc_id" {
value = module.ci.vpc_id
}
output "public_subnets" {
value = module.ci.public_subnet_ids
}
output "private_subnets" {
value = module.ci.private_subnet_ids
}
\ No newline at end of file
variable "vpc_cidr" {}
variable "env" {}
variable "ci_pipeline_url" {}
\ No newline at end of file
module "dev" {
source = "../../main"
env = "dev"
vpc_cidr = "10.255.0.0/16"
}
output "vpc_id" {
value = module.dev.vpc_id
}
output "public_subnets" {
value = module.dev.public_subnet_ids
}
output "private_subnets" {
value = module.dev.private_subnet_ids
}
\ No newline at end of file
## TODO: Revisit the terraform gitlab http backend
# terraform {
# backend "http" {}
# }
provider "aws" {
region = var.aws_region
}
locals {
public_subnet_cidrs = [
cidrsubnet(var.vpc_cidr, ceil(log(6, 2)), 0),
cidrsubnet(var.vpc_cidr, ceil(log(6, 2)), 1),
]
private_subnet_cidrs = [
cidrsubnet(var.vpc_cidr, ceil(log(6, 2)), 2),
cidrsubnet(var.vpc_cidr, ceil(log(6, 2)), 3),
]
intra_subnet_cidrs = [
cidrsubnet(var.vpc_cidr, ceil(log(6, 2)), 4),
cidrsubnet(var.vpc_cidr, ceil(log(6, 2)), 5),
]
name = "umbrella-${var.env}"
tags = {
"terraform" = "true",
"env" = var.env,
"project" = "umbrella",
"ci_pipeline_url" = var.ci_pipeline_url
}
}
#
# Network
#
module "vpc" {
source = "terraform-aws-modules/vpc/aws"
version = "2.78.0"
name = local.name
cidr = var.vpc_cidr
azs = ["${var.aws_region}a", "${var.aws_region}b", "${var.aws_region}c"]
public_subnets = local.public_subnet_cidrs
private_subnets = local.private_subnet_cidrs
intra_subnets = local.intra_subnet_cidrs
enable_nat_gateway = true
single_nat_gateway = true
enable_dns_hostnames = true
enable_dns_support = true
# Use AWS VPC private endpoints to mirror functionality on airgapped (T)C2S environments
# S3: for some vendors cluster bootstrapping/artifact storage
# STS: for caller identity checks
# EC2: for cloud manager type requests (such as auto ebs provisioning)
# ASG: for cluster autoscaler
# ELB: for auto elb provisioning
enable_s3_endpoint = true
enable_sts_endpoint = true
enable_ec2_endpoint = true
enable_ec2_autoscaling_endpoint = true
enable_elasticloadbalancing_endpoint = true
ec2_endpoint_security_group_ids = [aws_security_group.endpoints.id]
ec2_endpoint_subnet_ids = module.vpc.intra_subnets
ec2_endpoint_private_dns_enabled = true
ec2_autoscaling_endpoint_security_group_ids = [aws_security_group.endpoints.id]
ec2_autoscaling_endpoint_subnet_ids = module.vpc.intra_subnets
ec2_autoscaling_endpoint_private_dns_enabled = true
elasticloadbalancing_endpoint_security_group_ids = [aws_security_group.endpoints.id]
elasticloadbalancing_endpoint_subnet_ids = module.vpc.intra_subnets
elasticloadbalancing_endpoint_private_dns_enabled = true
sts_endpoint_security_group_ids = [aws_security_group.endpoints.id]
sts_endpoint_subnet_ids = module.vpc.intra_subnets
sts_endpoint_private_dns_enabled = true
# Prevent creation of EIPs for NAT gateways
reuse_nat_ips = false
# Add in required tags for proper AWS CCM integration
public_subnet_tags = merge({
"kubernetes.io/cluster/${local.name}" = "shared"
"kubernetes.io/role/elb" = "1"
}, local.tags)
private_subnet_tags = merge({
"kubernetes.io/cluster/${local.name}" = "shared"
"kubernetes.io/role/internal-elb" = "1"
}, local.tags)
intra_subnet_tags = merge({
"kubernetes.io/cluster/${local.name}" = "shared"
}, local.tags)
tags = merge({
"kubernetes.io/cluster/${local.name}" = "shared"
}, local.tags)
}
# Shared Private Endpoint Security Group
resource "aws_security_group" "endpoints" {
name = "${local.name}-endpoint"
description = "${local.name} endpoint"
vpc_id = module.vpc.vpc_id
ingress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
egress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
}
#
# TGW Attachments
# Attaches the management vpc (the hub) to the created vpc (the spokes).
#
module "spoke" {
source = "git::https://repo1.dso.mil/platform-one/big-bang/terraform-modules/spoke-tgw-attachments.git"
name = local.name
hub_vpc_id = var.hub_vpc_id
hub_tgw = var.hub_tgw
hub_tgw_rt = var.hub_tgw_rt
hub_tgwa = var.hub_tgwa
spoke_vpc_id = module.vpc.vpc_id
spoke_subnets = module.vpc.private_subnets
spoke_rt_ids = module.vpc.private_route_table_ids
}
output "vpc_id" {
value = module.vpc.vpc_id
}
output "private_subnet_ids" {
value = module.vpc.private_subnets
}
output "public_subnet_ids" {
value = module.vpc.public_subnets
}
\ No newline at end of file
variable "env" {}
variable "vpc_cidr" {
description = "The CIDR block for the VPC. Default value is a valid CIDR"
type = string
}
variable "aws_region" {
type = string
default = "us-gov-west-1"
}
#
# Spoke variables
# We can hardcode these for now... they haven't changed in 8 months
#
variable "hub_vpc_id" {
default = "vpc-5f627a3b"
}
variable "hub_tgw" {
default = "tgw-0c324b57d019790f4"
}
variable "hub_tgwa" {
default = "tgw-attach-0dce16098dd33fd2c"
}
variable "hub_tgw_rt" {
default = "tgw-rtb-04b66987e7d96a3d4"
}
variable "ci_pipeline_url" {
type = string
default = "none"
description = "URL to the pipeline that created this resource"
}
.rke2 tf:
extends: .terraformer
variables:
TF_ROOT: ".gitlab-ci/jobs/rke2/dependencies/terraform/env/ci"
.rke2 up:
extends: .rke2 tf
script:
- echo -e "\e[0Ksection_start:`date +%s`:rke2_up[collapsed=true]\r\e[0K\e[33;1mRKE2 Up\e[37m"
# Fetch dependencies
- apk add bash aws-cli
# Loop to retry rke2 terraform apply
- |
set -e
attempt_counter=0
max_attempts=2
until [ $(terraform apply -input=false -auto-approve >/dev/null; echo $?) -eq 0 ]; do
if [ ${attempt_counter} == ${max_attempts} ];then
echo "Error applying rke2 cluster up terraform"
exit 1
fi
attempt_counter=$(($attempt_counter+1))
echo "Attempt failed to apply will retry in 30 seconds"
sleep 30
done
- mv rke2.yaml ${CI_PROJECT_DIR}/rke2.yaml
- echo -e "\e[0Ksection_end:`date +%s`:rke2_up\r\e[0K"
artifacts:
paths:
- ${CI_PROJECT_DIR}/rke2.yaml
.rke2 down:
extends:
- .rke2 tf
- .terraform destroy workspace
script:
- echo -e "\e[0Ksection_start:`date +%s`:rke2_down[collapsed=true]\r\e[0K\e[33;1mRKE2 Down\e[37m"
# Loop to retry rke2 terraform destory
- |
set -e
attempt_counter=0
max_attempts=2
until [ $(terraform destroy -input=false -auto-approve >/dev/null; echo $?) -eq 0 ]; do
if [ ${attempt_counter} == ${max_attempts} ];then
echo "Error destroying rke2 cluster terraform"
exit 1
fi
attempt_counter=$(($attempt_counter+1))
echo "Attempt failed to destroy will retry in 30 seconds"
sleep 30
done
- echo -e "\e[0Ksection_end:`date +%s`:rke2_down\r\e[0K"
# rke2
This folder contains _one example_ of deploying `rke2`, and is tuned specifically to run BigBang CI. While it can be used as an example for deployments, please ensure you're taking your own needs into consideration.
## What's deployed
* `rke2` cluster
* sized according to BigBang CI Needs as non-ha
* if ha is desired, simply change `servers = 3` in the installation or upgrade
* aws govcloud (`us-gov-west-1`)
* stig'd rhel8 (90-95% depending on user configuration)
* airgap
* single autoscaling generic agent nodepool
* sized according to BigBang CI needs as 2 `m5a.4xlarge` instances
* if additional nodes are needed, simply add more nodepools
## How's it deployed
The `rke2` terraform modules used can be found on repo1 [here](https://repo1.dso.mil/platform-one/distros/rancher-federal/rke2/rke2-aws-terraform).
Both `ci` and `dev` setups exist, the example below can be run locally for development workflows where local clusters may not suffice:
```bash
# ensure BigBang's CI network exists
cd .gitlab-ci/jobs/networking/aws/dependencies/terraform/env/dev
terraform init
terraform apply
# deploy rke2
cd .gitlab-ci/jobs/rke2/dependencies/terraform/env/dev
terraform init
terraform apply
# kubeconfig will be copied locally after terraform completes in ~5m
kubectl --kubeconfig rke2.yaml get no,all -A
```
\ No newline at end of file
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: ebs
annotations:
storageclass.kubernetes.io/is-default-class: "true"
provisioner: kubernetes.io/aws-ebs
parameters:
type: gp2
reclaimPolicy: Delete
allowVolumeExpansion: true
mountOptions:
- debug
volumeBindingMode: WaitForFirstConsumer
\ No newline at end of file
terraform {
backend "s3" {
bucket = "umbrella-tf-states"
key = "terraform.tfstate"
region = "us-gov-west-1"
dynamodb_table = "umbrella-tf-states-lock"
workspace_key_prefix = "rke2"
}
}
data "terraform_remote_state" "networking" {
backend = "s3"
config = {
bucket = "umbrella-tf-states"
key = "terraform.tfstate"
region = "us-gov-west-1"
workspace_key_prefix = "aws-networking"
}
workspace = var.env
}
module "ci" {
source = "../../main"
env = var.env
ci_pipeline_url = var.ci_pipeline_url
vpc_id = data.terraform_remote_state.networking.outputs.vpc_id
subnets = data.terraform_remote_state.networking.outputs.intra_subnets
}
\ No newline at end of file
variable "aws_region" {
default = "us-gov-west-1"
}
variable "env" {}
variable "ci_pipeline_url" {}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment