From ba19ae05a50b7eee862f81c5921ed9ede18baf59 Mon Sep 17 00:00:00 2001
From: Doug Lagemann <douglagemann@seed-innovations.com>
Date: Thu, 10 Oct 2024 14:20:54 -0600
Subject: [PATCH] trufflehog emulation prototype

---
 scripts/trufflehog/docker-compose.yml       |  17 ++
 scripts/trufflehog/entrypoint.sh            |  19 +++
 scripts/trufflehog/rules.yml                | 172 ++++++++++++++++++++
 scripts/trufflehog/trufflehog-gate-check.py |  31 ++++
 4 files changed, 239 insertions(+)
 create mode 100644 scripts/trufflehog/docker-compose.yml
 create mode 100755 scripts/trufflehog/entrypoint.sh
 create mode 100644 scripts/trufflehog/rules.yml
 create mode 100755 scripts/trufflehog/trufflehog-gate-check.py

diff --git a/scripts/trufflehog/docker-compose.yml b/scripts/trufflehog/docker-compose.yml
new file mode 100644
index 0000000..edd40fe
--- /dev/null
+++ b/scripts/trufflehog/docker-compose.yml
@@ -0,0 +1,17 @@
+version: "0.1"
+services:
+  trufflehog:
+    image: registry1.dso.mil/ironbank/opensource/trufflehog/trufflehog3:3.0.10
+    container_name: trufflehog3
+    entrypoint: ["/root/scripts/trufflehog/entrypoint.sh"]
+    working_dir: /root
+    # env_file:
+      # - .env # TODO: Setting env vars in here allows use of env vars inside this file itself, i.e. the entrypoint directory. Probably good to define a BASE_SCRIPTS_DIR for all jobs.
+    environment:
+      - REPORT_DIR=/root/reports/trufflehog
+      - SCRIPTS_DIR=/root/scripts/trufflehog
+      - SCAN_DIR=/app
+      # - TRUFFLEHOG_EXCLUDE_PATHS= # TODO: Refine
+    volumes:
+      - ./:/root
+      - ./<<projectName>>:/app
diff --git a/scripts/trufflehog/entrypoint.sh b/scripts/trufflehog/entrypoint.sh
new file mode 100755
index 0000000..847f719
--- /dev/null
+++ b/scripts/trufflehog/entrypoint.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+ls -al # TODO: Delete
+mkdir -p ${REPORT_DIR}
+REPORT_FILE=${REPORT_DIR}/trufflehog_report.json 
+# enable shell globbing for recursive exclude matching. allows use of '**/*' format
+shopt -s globstar
+configfiletoremove=".trufflehog3.yml"
+if [ -f "$configfiletoremove" ]; then rm -f "$configfiletoremove" && echo "I deleted trufflehog3.yml"; fi
+# set -x so we can see the real command being run
+set -x
+trufflehog3 -vvv --ignore-nosecret --exclude ${TRUFFLEHOG_EXCLUDE_PATHS} \
+  --format json --zero --no-history \
+  -r ${SCRIPTS_DIR}/rules.yml \
+  -o ${REPORT_FILE} ${SCAN_DIR} 2>&1 | tee ${REPORT_DIR}/trufflehog_log.txt # TODO: Writing to trufflehog_log.txt not currently working
+set +x
+shopt -u globstar
+trufflehog3 --version > ${REPORT_DIR}/trufflehog_version.txt
+python ${SCRIPTS_DIR}/trufflehog-gate-check.py "${REPORT_FILE}"
diff --git a/scripts/trufflehog/rules.yml b/scripts/trufflehog/rules.yml
new file mode 100644
index 0000000..63415ec
--- /dev/null
+++ b/scripts/trufflehog/rules.yml
@@ -0,0 +1,172 @@
+# Rules originally from https://github.com/feeltheajf/trufflehog3/blob/master/trufflehog3/static/rules.yml
+# Any severity "MEDIUM" or "HIGH" rules will fail the P1 gate check; "LOW" rules will just be warnings.
+
+#
+# entropy-based rules
+#
+- id: high-entropy
+  message: High Entropy
+  minlen: 20
+  alphabet: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/="
+  threshold: 4.5
+  severity: MEDIUM
+- id: high-entropy
+  message: High Entropy
+  minlen: 20
+  alphabet: "0123456789abcdefABCDEF"
+  threshold: 3.0
+  severity: MEDIUM
+#
+# regexes-based rules
+#
+- id: private.pgp-key
+  message: PGP Private Key Block
+  pattern: "-----BEGIN PGP PRIVATE KEY BLOCK-----"
+  severity: HIGH
+- id: private.key
+  message: Private Key
+  pattern: "-----BEGIN (RSA|DSA|EC|OPENSSH) PRIVATE KEY-----"
+  severity: HIGH
+
+- id: amazon.aws-api-key
+  message: AWS API Key
+  pattern: "AKIA[0-9A-Z]{16}"
+  severity: MEDIUM
+- id: amazon.mws-auth-token
+  message: Amazon MWS Auth Token
+  pattern: "amzn\\.mws\\.[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"
+  severity: MEDIUM
+
+- id: facebook.access-token
+  message: Facebook Access Token
+  pattern: "EAACEdEose0cBA[0-9A-Za-z]+"
+  severity: MEDIUM
+- id: facebook.oauth-token
+  message: Facebook OAuth Token
+  pattern: '[f|F][a|A][c|C][e|E][b|B][o|O][o|O][k|K].*[''|"][0-9a-f]{32}[''|"]'
+  severity: MEDIUM
+
+# TODO add new github token formats
+- id: github.token
+  message: GitHub Token
+  pattern: '[g|G][i|I][t|T][h|H][u|U][b|B].*[''|"][0-9a-zA-Z]{35,40}[''|"]'
+  severity: MEDIUM
+
+- id: google.access-token
+  message: Google Access Token
+  pattern: "ya29\\.[0-9A-Za-z\\-_]+"
+  severity: MEDIUM
+- id: google.api-key
+  message: Google API Key
+  pattern: "AIza[0-9A-Za-z\\-_]{35}"
+  severity: MEDIUM
+- id: google.oauth-token
+  message: Google OAuth
+  pattern: "[0-9]+-[0-9A-Za-z_]{32}\\.apps\\.googleusercontent\\.com"
+  severity: MEDIUM
+- id: google.gcp-service-account
+  message: Google Cloud Platform Service Account
+  pattern: '"type": "service_account"'
+  severity: MEDIUM
+
+- id: heroku.api-key
+  message: Heroku API Key
+  pattern: "[h|H][e|E][r|R][o|O][k|K][u|U].*[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}"
+  severity: MEDIUM
+
+- id: mailchimp.api-key
+  message: MailChimp API Key
+  pattern: "[0-9a-f]{32}-us[0-9]{1,2}"
+  severity: MEDIUM
+
+- id: mailgun.api-key
+  message: Mailgun API Key
+  pattern: "key-[0-9a-zA-Z]{32}"
+  severity: MEDIUM
+
+- id: paypal.braintree-access-token
+  message: PayPal Braintree Access Token
+  pattern: "access_token\\$production\\$[0-9a-z]{16}\\$[0-9a-f]{32}"
+  severity: MEDIUM
+
+- id: picatic.api-key
+  message: Picatic API Key
+  pattern: "sk_live_[0-9a-z]{32}"
+  severity: MEDIUM
+
+- id: slack.token
+  message: Slack Token
+  pattern: "xox[p|b|o|a]-[0-9]{12}-[0-9]{12}-[0-9]{12}-[a-z0-9]{32}"
+  severity: MEDIUM
+- id: slack.bot-token
+  message: Slack Bot Token
+  pattern: "xox[p|b|o|a]-[0-9]{12}-[0-9]{12}-[a-z0-9]{24}"
+  severity: MEDIUM
+- id: slack.webhook
+  message: Slack Webhook
+  pattern: "https://hooks.slack.com/services/T[a-zA-Z0-9_]{8}/B[a-zA-Z0-9_]{8}/[a-zA-Z0-9_]{24}"
+  severity: MEDIUM
+
+- id: stripe.api-key
+  message: Stripe API Key
+  pattern: "sk_live_[0-9a-zA-Z]{24}"
+  severity: MEDIUM
+- id: stripe.restricted-api-key
+  message: Stripe Restricted API Key
+  pattern: "rk_live_[0-9a-zA-Z]{24}"
+  severity: MEDIUM
+
+- id: square.access-token
+  message: Square Access Token
+  pattern: "sq0atp-[0-9A-Za-z\\-_]{22}"
+  severity: MEDIUM
+- id: square.oauth-secret
+  message: Square OAuth Secret
+  pattern: "sq0csp-[0-9A-Za-z\\-_]{43}"
+  severity: MEDIUM
+
+- id: twilio.api-key
+  message: Twilio API Key
+  pattern: "SK[0-9a-fA-F]{32}"
+  severity: MEDIUM
+
+- id: twitter.access-token
+  message: Twitter Access Token
+  pattern: "[t|T][w|W][i|I][t|T][t|T][e|E][r|R].*[1-9][0-9]+-[0-9a-zA-Z]{40}"
+  severity: MEDIUM
+- id: twitter.oauth-token
+  message: Twitter OAuth Token
+  pattern: '[t|T][w|W][i|I][t|T][t|T][e|E][r|R].*[''|"][0-9a-zA-Z]{35,44}[''|"]'
+  severity: MEDIUM
+
+  #
+  # These 3 upstream rules were originally LOW severity, but P1 upgraded them to MEDIUM:
+  #
+- id: generic.api-key
+  message: Generic API Key
+  pattern: '[a|A][p|P][i|I][_]?[k|K][e|E][y|Y].*[''|"][0-9a-zA-Z]{32,45}[''|"]'
+  severity: MEDIUM
+- id: generic.secret
+  message: Generic Secret
+  pattern: '[s|S][e|E][c|C][r|R][e|E][t|T].*[''|"][0-9a-zA-Z]{32,45}[''|"]'
+  severity: MEDIUM
+- id: generic.password-in-url
+  message: Password in URL
+  pattern: "[a-zA-Z]{3,10}://[^/\\s:@]{3,20}:[^/\\s:@]{3,20}@.{1,100}[\"'\\s]"
+  severity: MEDIUM
+
+  #
+  # P1 additions to upstream rules:
+  #
+- id: generic.password
+  message: Password 
+  pattern: "[p|P]assword\\s*[:|=]"
+  severity: HIGH
+- id: generic.dso-url
+  message: Hardcoded dso.mil URL
+  pattern: "(?i)https?://[0-9a-z.-]+\\.dso\\.mil"
+  severity: LOW
+- id: polyfill-urls
+  message: PolyFill URLs
+  pattern: "polyfill[.]io|bootcdn[.]net|bootcss[.]com|kuurza[.]com|newcrbpc[.]com|staticfile[.]net|staticfile[.]org|macoms[.]la|unionadjs[.]com|googie-anaiytics[.]com|xhsbpza[.]com"
+  severity: HIGH
diff --git a/scripts/trufflehog/trufflehog-gate-check.py b/scripts/trufflehog/trufflehog-gate-check.py
new file mode 100755
index 0000000..515fd75
--- /dev/null
+++ b/scripts/trufflehog/trufflehog-gate-check.py
@@ -0,0 +1,31 @@
+import sys
+import json
+
+def parseTrufflehogReport(path):
+  """ Parse the JSON report format and
+      separate the findings (severity "MEDIUM" or "HIGH")
+      from the warnings (severity "LOW")
+  """
+  with open(path, 'r') as f:
+    data = json.load(f)
+  findings = [x for x in data if x["rule"]["severity"]=="HIGH" or x["rule"]["severity"]=="MEDIUM"]
+  warnings = [x for x in data if x["rule"]["severity"]=="LOW"]
+  return (findings, warnings)
+
+if __name__ == "__main__":
+  if len(sys.argv) < 2:
+    filename = "trufflehog_report.json"
+    print("No filename supplied; using default.")
+  else:
+    filename = sys.argv[1]
+  print(f'Parsing {filename}')
+  (findings, warnings) = parseTrufflehogReport(filename)
+  if len(findings) > 0:
+    print("Trufflehog gate check findings - ")
+    print(json.dumps(findings,indent=2))
+    print(f'Trufflehog gate check Failed with finding count -  {len(findings)}')
+    print('For help, see https://confluence.il2.dso.mil/display/P1MDOHD/TS+-+Trufflehog+-+Stage+Failure')
+    sys.exit(1)
+  if len(warnings) > 0:
+    print(json.dumps(warnings,indent=2))
+  print(f'Trufflehog gate check Passed with {len(warnings)} warnings')
-- 
GitLab