UNCLASSIFIED - NO CUI

Skip to content
Snippets Groups Projects
Unverified Commit c6d9aece authored by zamaz's avatar zamaz Committed by GitHub
Browse files

feat: add monitoring and logging (#33)


* Add promtail, fix netpols, add prometheus to standard-package, add grafana

* remove service monitor so individual test passes, add empty validate.yaml

* add datasources cm, fix netpols

* commit missed file

* add wait for grafana test

* Fix loki labels, remove extra prometheus annotations now handled via pepr

* bump version variable in deploy.yaml

* fix messed up zarf package name

* update prometheus values, reorder packages in standard packages, remove grafana validate

* revert version in deploy.yaml

* fix deploy.yaml version, add file to release please

* remove extra peer auth definition in grafana

* fix package versions

* update readme

Signed-off-by: default avatarZachariah Miller <zachariah.miller@defenseunicorns.com>

* Remediate review comments, bump chart and image versions

* update loki chart and images

* update prometheus values, fix images, job timeout

* chore: update loki nps, use gateway

* fix: netpols

* remove loki gateway for now

* fix loki replication factor, add set -e on tasks, update promtail to point to loki-gw

* add validate.yaml checks for grafana, promtail, loki and prometheus

* revert neuvector change

* remove redundant peer-authentication.yaml from neuvector config chart

* fix: netpols, loki required, quote consistency

* template peerauth for scalable and bump loki config chart. Bump promtail limits

* revert limits change

* bump to test all to next runner size

* add debug log for CI

---------

Signed-off-by: default avatarZachariah Miller <zachariah.miller@defenseunicorns.com>
Co-authored-by: default avatarMegamind <882485+jeff-mccoy@users.noreply.github.com>
Co-authored-by: default avatarMicah Nagel <micah.nagel@defenseunicorns.com>
Co-authored-by: default avatarJeff McCoy <code@jeffm.us>
parent 7ad5b675
No related branches found
No related tags found
No related merge requests found
Showing
with 111 additions and 75 deletions
name: save-logs
description: "Save debug logs"
runs:
using: composite
steps:
- name: Fix log permissions
run: |
sudo chown $USER /tmp/zarf-*.log || echo ""
sudo chown $USER /tmp/uds-*.log || echo ""
shell: bash
- uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3
with:
name: debug-log
path: |
/tmp/zarf-*.log
/tmp/uds-*.log
......@@ -49,3 +49,7 @@ jobs:
- name: Publish UDS Example Bundles
run: uds run -f tasks/publish.yaml bundles
- name: Save logs
if: always()
uses: ./.github/actions/save-logs
......@@ -26,7 +26,7 @@ permissions:
jobs:
test:
runs-on: "${{ inputs.capability == 'all' && 'uds-ubuntu-big-boy-4-core' || 'ubuntu-latest'}}"
runs-on: "${{ inputs.capability == 'all' && 'uds-ubuntu-big-boy-8-core' || 'ubuntu-latest'}}"
timeout-minutes: 30
name: Test
env:
......@@ -46,3 +46,7 @@ jobs:
- name: Test UDS Core
if: ${{ inputs.capability == 'all' }}
run: uds run -f tasks/test.yaml uds-core
- name: Save logs
if: always()
uses: ./.github/actions/save-logs
......@@ -8,7 +8,7 @@ The UDS Core Bundle groups foundational Unicorn Delivery Service applications th
The core applications are:
- [ ] Authservice
- [ ] Grafana
- [x] Grafana
- [x] Istio
- [ ] KeyCloak
- [ ] Kiali
......@@ -16,8 +16,8 @@ The core applications are:
- [x] Loki
- [x] Metrics Server
- [x] Neuvector
- [ ] Prometheus
- [ ] Promtail
- [x] Prometheus
- [x] Promtail
- [ ] Tempo
- [ ] Velero
......
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-datasources
namespace: {{ .Release.Namespace }}
labels:
grafana_datasource: "1"
data:
datasources.yaml: |
apiVersion: 1
datasources:
- access: proxy
editable: true
name: Prometheus
type: prometheus
url: http://kube-prometheus-stack-prometheus.monitoring.svc.cluster.local:9090
- access: proxy
editable: true
name: Loki
type: loki
url: http://loki-gateway.loki.svc.cluster.local:80
apiVersion: security.istio.io/v1beta1
kind: PeerAuthentication
metadata:
name: default-grafana
namespace: {{ .Release.Namespace }}
spec:
mtls:
mode: STRICT
......@@ -11,7 +11,7 @@ spec:
- to:
- namespaceSelector:
matchLabels:
app.kubernetes.io/name: istio-controlplane
kubernetes.io/metadata.name: istio-system
podSelector:
matchLabels:
app: istiod
......
tasks:
- name: run
actions:
- description: Validate...
cmd: "echo Replace Me"
# wait:
# cluster:
- description: Validate grafana is up
wait:
cluster:
kind: Pod
name: "app.kubernetes.io/instance=grafana"
namespace: grafana
condition: Ready
\ No newline at end of file
......@@ -3,3 +3,17 @@ service:
uds/istio-gateway: admin
uds/istio-host: grafana
uds/istio-port: "80"
sidecar:
image:
# -- The Docker registry
registry: ghcr.io
repository: kiwigrid/k8s-sidecar
tag: 1.25.2
dashboards:
enabled: true
label: grafana_dashboard
datasources:
enabled: true
label: grafana_datasource
kind: ZarfPackageConfig
metadata:
name: uds-capabilities-grafana
name: uds-capability-grafana
description: "UDS Grafana Capability"
url: https://grafana.com/grafana
......@@ -11,17 +11,16 @@ components:
charts:
- name: grafana
url: https://grafana.github.io/helm-charts/
version: 6.61.1
version: 7.0.6
namespace: grafana
valuesFiles:
- values/values.yaml
images:
- docker.io/grafana/grafana:10.1.5
- name: uds-grafana-config
required: true
charts:
- name: uds-grafana-config
namespace: grafana
version: 0.1.0
localPath: chart
images:
- docker.io/grafana/grafana:10.1.5
- docker.io/curlimages/curl:7.85.0
- docker.io/library/busybox:1.31.1
- ghcr.io/kiwigrid/k8s-sidecar:1.25.2
\ No newline at end of file
......@@ -15,4 +15,4 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.1.0
\ No newline at end of file
version: 0.2.0
\ No newline at end of file
{{- if .Values.grafana.enabled }}
{{- $files := .Files.Glob "src/dashboards/*.json" }}
apiVersion: v1
kind: ConfigMap
metadata:
name: loki-grafana-dashboards
namespace: monitoring
namespace: grafana
labels:
grafana_dashboard: "1"
data:
grafana-loki-general.json: |
{{ .Files.Get "dashboards/loki-dashboard-quick-search.json" | nindent 4 }}
---
{{- $files := .Files.Glob "src/dashboards/*.json" }}
{{- if $files }}
apiVersion: v1
kind: ConfigMapList
items:
{{- range $path, $fileContents := $files }}
{{- $dashboardName := regexReplaceAll "(^.*/)(.*)\\.json$" $path "${2}" }}
- apiVersion: v1
kind: ConfigMap
metadata:
name: {{ printf "%s-%s" (include "loki.name" $) $dashboardName | trunc 63 | trimSuffix "-" }}
namespace: monitoring
labels:
grafana_dashboard: "1"
data:
{{ $dashboardName }}.json: {{ $.Files.Get $path | toJson }}
{{- end }}
{{- end }}
{{- end }}
......@@ -8,7 +8,7 @@ spec:
- from:
- namespaceSelector:
matchLabels:
app.kubernetes.io/name: monitoring
kubernetes.io/metadata.name: monitoring
podSelector:
matchLabels:
app: prometheus
......
......@@ -6,7 +6,7 @@ metadata:
spec:
podSelector:
matchLabels:
{{- include "loki.selectorLabels" . | nindent 6 }}
app.kubernetes.io/name: loki
policyTypes:
- Egress
egress:
......
......@@ -6,23 +6,25 @@ metadata:
spec:
podSelector:
matchLabels:
{{- include "loki.selectorLabels" . | nindent 6 }}
app.kubernetes.io/name: loki
policyTypes:
- Ingress
ingress:
- from:
- namespaceSelector:
matchLabels:
app.kubernetes.io/name: monitoring
kubernetes.io/metadata.name: grafana
podSelector:
matchLabels:
app.kubernetes.io/name: grafana
- namespaceSelector:
matchLabels:
app.kubernetes.io/name: monitoring
kubernetes.io/metadata.name: monitoring
podSelector:
matchLabels:
app.kubernetes.io/name: prometheus
ports:
- port: 8080
protocol: TCP
- port: 3100
protocol: TCP
......@@ -6,17 +6,17 @@ metadata:
spec:
podSelector:
matchLabels:
{{- include "loki.selectorLabels" . | nindent 6 }}
app.kubernetes.io/name: loki
policyTypes:
- Ingress
ingress:
- from:
- namespaceSelector:
matchLabels:
app.kubernetes.io/name: promtail
kubernetes.io/metadata.name: promtail
podSelector:
matchLabels:
app.kubernetes.io/name: promtail
ports:
- port: 3100
- port: 8080
protocol: TCP
{{- if .Values.scalable }}
apiVersion: "security.istio.io/v1beta1"
kind: PeerAuthentication
metadata:
......@@ -8,8 +9,9 @@ spec:
mode: STRICT
selector:
matchLabels:
{{- include "loki.selectorLabels" . | nindent 6 }}
app.kubernetes.io/name: loki
portLevelMtls:
# GRPC exception to support Loki internal communication
"9095":
mode: PERMISSIVE
{{- end }}
apiVersion: "security.istio.io/v1beta1"
kind: PeerAuthentication
metadata:
name: loki
namespace: {{ .Release.Namespace }}
spec:
mtls:
mode: STRICT
selector:
matchLabels:
{{- include "loki.selectorLabels" . | nindent 6 }}
grafana:
enabled: false
scalable: false
\ No newline at end of file
tasks:
- name: run
actions:
- description: Validate...
cmd: "echo Replace Me"
# wait:
# cluster:
- description: Validate loki
wait:
cluster:
kind: Pod
name: app.kubernetes.io/name=loki
namespace: loki
condition: Ready
- description: Validate loki-gw
wait:
cluster:
kind: Pod
name: app.kubernetes.io/component=gateway
namespace: loki
condition: Ready
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment