From ba19ae05a50b7eee862f81c5921ed9ede18baf59 Mon Sep 17 00:00:00 2001 From: Doug Lagemann <douglagemann@seed-innovations.com> Date: Thu, 10 Oct 2024 14:20:54 -0600 Subject: [PATCH] trufflehog emulation prototype --- scripts/trufflehog/docker-compose.yml | 17 ++ scripts/trufflehog/entrypoint.sh | 19 +++ scripts/trufflehog/rules.yml | 172 ++++++++++++++++++++ scripts/trufflehog/trufflehog-gate-check.py | 31 ++++ 4 files changed, 239 insertions(+) create mode 100644 scripts/trufflehog/docker-compose.yml create mode 100755 scripts/trufflehog/entrypoint.sh create mode 100644 scripts/trufflehog/rules.yml create mode 100755 scripts/trufflehog/trufflehog-gate-check.py diff --git a/scripts/trufflehog/docker-compose.yml b/scripts/trufflehog/docker-compose.yml new file mode 100644 index 0000000..edd40fe --- /dev/null +++ b/scripts/trufflehog/docker-compose.yml @@ -0,0 +1,17 @@ +version: "0.1" +services: + trufflehog: + image: registry1.dso.mil/ironbank/opensource/trufflehog/trufflehog3:3.0.10 + container_name: trufflehog3 + entrypoint: ["/root/scripts/trufflehog/entrypoint.sh"] + working_dir: /root + # env_file: + # - .env # TODO: Setting env vars in here allows use of env vars inside this file itself, i.e. the entrypoint directory. Probably good to define a BASE_SCRIPTS_DIR for all jobs. + environment: + - REPORT_DIR=/root/reports/trufflehog + - SCRIPTS_DIR=/root/scripts/trufflehog + - SCAN_DIR=/app + # - TRUFFLEHOG_EXCLUDE_PATHS= # TODO: Refine + volumes: + - ./:/root + - ./<<projectName>>:/app diff --git a/scripts/trufflehog/entrypoint.sh b/scripts/trufflehog/entrypoint.sh new file mode 100755 index 0000000..847f719 --- /dev/null +++ b/scripts/trufflehog/entrypoint.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +ls -al # TODO: Delete +mkdir -p ${REPORT_DIR} +REPORT_FILE=${REPORT_DIR}/trufflehog_report.json +# enable shell globbing for recursive exclude matching. allows use of '**/*' format +shopt -s globstar +configfiletoremove=".trufflehog3.yml" +if [ -f "$configfiletoremove" ]; then rm -f "$configfiletoremove" && echo "I deleted trufflehog3.yml"; fi +# set -x so we can see the real command being run +set -x +trufflehog3 -vvv --ignore-nosecret --exclude ${TRUFFLEHOG_EXCLUDE_PATHS} \ + --format json --zero --no-history \ + -r ${SCRIPTS_DIR}/rules.yml \ + -o ${REPORT_FILE} ${SCAN_DIR} 2>&1 | tee ${REPORT_DIR}/trufflehog_log.txt # TODO: Writing to trufflehog_log.txt not currently working +set +x +shopt -u globstar +trufflehog3 --version > ${REPORT_DIR}/trufflehog_version.txt +python ${SCRIPTS_DIR}/trufflehog-gate-check.py "${REPORT_FILE}" diff --git a/scripts/trufflehog/rules.yml b/scripts/trufflehog/rules.yml new file mode 100644 index 0000000..63415ec --- /dev/null +++ b/scripts/trufflehog/rules.yml @@ -0,0 +1,172 @@ +# Rules originally from https://github.com/feeltheajf/trufflehog3/blob/master/trufflehog3/static/rules.yml +# Any severity "MEDIUM" or "HIGH" rules will fail the P1 gate check; "LOW" rules will just be warnings. + +# +# entropy-based rules +# +- id: high-entropy + message: High Entropy + minlen: 20 + alphabet: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/=" + threshold: 4.5 + severity: MEDIUM +- id: high-entropy + message: High Entropy + minlen: 20 + alphabet: "0123456789abcdefABCDEF" + threshold: 3.0 + severity: MEDIUM +# +# regexes-based rules +# +- id: private.pgp-key + message: PGP Private Key Block + pattern: "-----BEGIN PGP PRIVATE KEY BLOCK-----" + severity: HIGH +- id: private.key + message: Private Key + pattern: "-----BEGIN (RSA|DSA|EC|OPENSSH) PRIVATE KEY-----" + severity: HIGH + +- id: amazon.aws-api-key + message: AWS API Key + pattern: "AKIA[0-9A-Z]{16}" + severity: MEDIUM +- id: amazon.mws-auth-token + message: Amazon MWS Auth Token + pattern: "amzn\\.mws\\.[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}" + severity: MEDIUM + +- id: facebook.access-token + message: Facebook Access Token + pattern: "EAACEdEose0cBA[0-9A-Za-z]+" + severity: MEDIUM +- id: facebook.oauth-token + message: Facebook OAuth Token + pattern: '[f|F][a|A][c|C][e|E][b|B][o|O][o|O][k|K].*[''|"][0-9a-f]{32}[''|"]' + severity: MEDIUM + +# TODO add new github token formats +- id: github.token + message: GitHub Token + pattern: '[g|G][i|I][t|T][h|H][u|U][b|B].*[''|"][0-9a-zA-Z]{35,40}[''|"]' + severity: MEDIUM + +- id: google.access-token + message: Google Access Token + pattern: "ya29\\.[0-9A-Za-z\\-_]+" + severity: MEDIUM +- id: google.api-key + message: Google API Key + pattern: "AIza[0-9A-Za-z\\-_]{35}" + severity: MEDIUM +- id: google.oauth-token + message: Google OAuth + pattern: "[0-9]+-[0-9A-Za-z_]{32}\\.apps\\.googleusercontent\\.com" + severity: MEDIUM +- id: google.gcp-service-account + message: Google Cloud Platform Service Account + pattern: '"type": "service_account"' + severity: MEDIUM + +- id: heroku.api-key + message: Heroku API Key + pattern: "[h|H][e|E][r|R][o|O][k|K][u|U].*[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}" + severity: MEDIUM + +- id: mailchimp.api-key + message: MailChimp API Key + pattern: "[0-9a-f]{32}-us[0-9]{1,2}" + severity: MEDIUM + +- id: mailgun.api-key + message: Mailgun API Key + pattern: "key-[0-9a-zA-Z]{32}" + severity: MEDIUM + +- id: paypal.braintree-access-token + message: PayPal Braintree Access Token + pattern: "access_token\\$production\\$[0-9a-z]{16}\\$[0-9a-f]{32}" + severity: MEDIUM + +- id: picatic.api-key + message: Picatic API Key + pattern: "sk_live_[0-9a-z]{32}" + severity: MEDIUM + +- id: slack.token + message: Slack Token + pattern: "xox[p|b|o|a]-[0-9]{12}-[0-9]{12}-[0-9]{12}-[a-z0-9]{32}" + severity: MEDIUM +- id: slack.bot-token + message: Slack Bot Token + pattern: "xox[p|b|o|a]-[0-9]{12}-[0-9]{12}-[a-z0-9]{24}" + severity: MEDIUM +- id: slack.webhook + message: Slack Webhook + pattern: "https://hooks.slack.com/services/T[a-zA-Z0-9_]{8}/B[a-zA-Z0-9_]{8}/[a-zA-Z0-9_]{24}" + severity: MEDIUM + +- id: stripe.api-key + message: Stripe API Key + pattern: "sk_live_[0-9a-zA-Z]{24}" + severity: MEDIUM +- id: stripe.restricted-api-key + message: Stripe Restricted API Key + pattern: "rk_live_[0-9a-zA-Z]{24}" + severity: MEDIUM + +- id: square.access-token + message: Square Access Token + pattern: "sq0atp-[0-9A-Za-z\\-_]{22}" + severity: MEDIUM +- id: square.oauth-secret + message: Square OAuth Secret + pattern: "sq0csp-[0-9A-Za-z\\-_]{43}" + severity: MEDIUM + +- id: twilio.api-key + message: Twilio API Key + pattern: "SK[0-9a-fA-F]{32}" + severity: MEDIUM + +- id: twitter.access-token + message: Twitter Access Token + pattern: "[t|T][w|W][i|I][t|T][t|T][e|E][r|R].*[1-9][0-9]+-[0-9a-zA-Z]{40}" + severity: MEDIUM +- id: twitter.oauth-token + message: Twitter OAuth Token + pattern: '[t|T][w|W][i|I][t|T][t|T][e|E][r|R].*[''|"][0-9a-zA-Z]{35,44}[''|"]' + severity: MEDIUM + + # + # These 3 upstream rules were originally LOW severity, but P1 upgraded them to MEDIUM: + # +- id: generic.api-key + message: Generic API Key + pattern: '[a|A][p|P][i|I][_]?[k|K][e|E][y|Y].*[''|"][0-9a-zA-Z]{32,45}[''|"]' + severity: MEDIUM +- id: generic.secret + message: Generic Secret + pattern: '[s|S][e|E][c|C][r|R][e|E][t|T].*[''|"][0-9a-zA-Z]{32,45}[''|"]' + severity: MEDIUM +- id: generic.password-in-url + message: Password in URL + pattern: "[a-zA-Z]{3,10}://[^/\\s:@]{3,20}:[^/\\s:@]{3,20}@.{1,100}[\"'\\s]" + severity: MEDIUM + + # + # P1 additions to upstream rules: + # +- id: generic.password + message: Password + pattern: "[p|P]assword\\s*[:|=]" + severity: HIGH +- id: generic.dso-url + message: Hardcoded dso.mil URL + pattern: "(?i)https?://[0-9a-z.-]+\\.dso\\.mil" + severity: LOW +- id: polyfill-urls + message: PolyFill URLs + pattern: "polyfill[.]io|bootcdn[.]net|bootcss[.]com|kuurza[.]com|newcrbpc[.]com|staticfile[.]net|staticfile[.]org|macoms[.]la|unionadjs[.]com|googie-anaiytics[.]com|xhsbpza[.]com" + severity: HIGH diff --git a/scripts/trufflehog/trufflehog-gate-check.py b/scripts/trufflehog/trufflehog-gate-check.py new file mode 100755 index 0000000..515fd75 --- /dev/null +++ b/scripts/trufflehog/trufflehog-gate-check.py @@ -0,0 +1,31 @@ +import sys +import json + +def parseTrufflehogReport(path): + """ Parse the JSON report format and + separate the findings (severity "MEDIUM" or "HIGH") + from the warnings (severity "LOW") + """ + with open(path, 'r') as f: + data = json.load(f) + findings = [x for x in data if x["rule"]["severity"]=="HIGH" or x["rule"]["severity"]=="MEDIUM"] + warnings = [x for x in data if x["rule"]["severity"]=="LOW"] + return (findings, warnings) + +if __name__ == "__main__": + if len(sys.argv) < 2: + filename = "trufflehog_report.json" + print("No filename supplied; using default.") + else: + filename = sys.argv[1] + print(f'Parsing {filename}') + (findings, warnings) = parseTrufflehogReport(filename) + if len(findings) > 0: + print("Trufflehog gate check findings - ") + print(json.dumps(findings,indent=2)) + print(f'Trufflehog gate check Failed with finding count - {len(findings)}') + print('For help, see https://confluence.il2.dso.mil/display/P1MDOHD/TS+-+Trufflehog+-+Stage+Failure') + sys.exit(1) + if len(warnings) > 0: + print(json.dumps(warnings,indent=2)) + print(f'Trufflehog gate check Passed with {len(warnings)} warnings') -- GitLab