From e960b09001e453fc49b7229be54b3b046437c008 Mon Sep 17 00:00:00 2001 From: jweatherford Date: Wed, 24 Feb 2021 09:31:58 -0500 Subject: [PATCH 01/16] initial test --- Dockerfile | 30 ++++++ LICENSE | 202 ++++++++++++++++++++++++++++++++++++++++ README.md | 109 +++++++++++++++++++++- hardening_manifest.yaml | 51 ++++++++++ 4 files changed, 390 insertions(+), 2 deletions(-) create mode 100644 Dockerfile create mode 100644 LICENSE create mode 100644 hardening_manifest.yaml diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..2f7b2cd --- /dev/null +++ b/Dockerfile @@ -0,0 +1,30 @@ +ARG BASE_REGISTRY=repo1.dso.mil +ARG BASE_IMAGE=ironbank/redhat/python/python38 +ARG BASE_TAG=3.8 + +FROM spark-operator/spark-operator:v1beta2-1.0.0-2.4.4 as base + +FROM ${BASE_REGISTRY}/${BASE_IMAGE}:${BASE_TAG} + + +ARG spark_uid=185 + +USER root + +RUN dnf -y update && dnf -y upgrade && \ + pip3 install --upgrade pip setuptools && \ + rm -rf /var/cache/dnf + +COPY --from=base /python/spark /opt/spark/python/pyspark +COPY --from=base /python/lib /opt/spark/python/lib + + +ENV SPARK_HOME /opt/spark + +WORKDIR /opt/spark/work-dir +RUN chmod g+w /opt/spark/work-dir + +ENTRYPOINT [ "/opt/entrypoint.sh" ] + +# Specify the User that the actual main process will run as +USER ${spark_uid} diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..d645695 --- /dev/null +++ b/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md index 5dc6fa6..aa7d1dd 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,108 @@ -# +# Apache Spark -Project template for all Iron Bank container repositories. \ No newline at end of file +Spark is a unified analytics engine for large-scale data processing. It provides +high-level APIs in Scala, Java, Python, and R, and an optimized engine that +supports general computation graphs for data analysis. It also supports a +rich set of higher-level tools including Spark SQL for SQL and DataFrames, +MLlib for machine learning, GraphX for graph processing, +and Structured Streaming for stream processing. + + + +[![Jenkins Build](https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-sbt-hadoop-3.2/badge/icon)](https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-sbt-hadoop-3.2) +[![AppVeyor Build](https://img.shields.io/appveyor/ci/ApacheSoftwareFoundation/spark/master.svg?style=plastic&logo=appveyor)](https://ci.appveyor.com/project/ApacheSoftwareFoundation/spark) +[![PySpark Coverage](https://img.shields.io/badge/dynamic/xml.svg?label=pyspark%20coverage&url=https%3A%2F%2Fspark-test.github.io%2Fpyspark-coverage-site&query=%2Fhtml%2Fbody%2Fdiv%5B1%5D%2Fdiv%2Fh1%2Fspan&colorB=brightgreen&style=plastic)](https://spark-test.github.io/pyspark-coverage-site) + + +## Online Documentation + +You can find the latest Spark documentation, including a programming +guide, on the [project web page](https://spark.apache.org/documentation.html). +This README file only contains basic setup instructions. + +## Building Spark + +Spark is built using [Apache Maven](https://maven.apache.org/). +To build Spark and its example programs, run: + + ./build/mvn -DskipTests clean package + +(You do not need to do this if you downloaded a pre-built package.) + +More detailed documentation is available from the project site, at +["Building Spark"](https://spark.apache.org/docs/latest/building-spark.html). + +For general development tips, including info on developing Spark using an IDE, see ["Useful Developer Tools"](https://spark.apache.org/developer-tools.html). + +## Interactive Scala Shell + +The easiest way to start using Spark is through the Scala shell: + + ./bin/spark-shell + +Try the following command, which should return 1,000,000,000: + + scala> spark.range(1000 * 1000 * 1000).count() + +## Interactive Python Shell + +Alternatively, if you prefer Python, you can use the Python shell: + + ./bin/pyspark + +And run the following command, which should also return 1,000,000,000: + + >>> spark.range(1000 * 1000 * 1000).count() + +## Example Programs + +Spark also comes with several sample programs in the `examples` directory. +To run one of them, use `./bin/run-example [params]`. For example: + + ./bin/run-example SparkPi + +will run the Pi example locally. + +You can set the MASTER environment variable when running examples to submit +examples to a cluster. This can be a mesos:// or spark:// URL, +"yarn" to run on YARN, and "local" to run +locally with one thread, or "local[N]" to run locally with N threads. You +can also use an abbreviated class name if the class is in the `examples` +package. For instance: + + MASTER=spark://host:7077 ./bin/run-example SparkPi + +Many of the example programs print usage help if no params are given. + +## Running Tests + +Testing first requires [building Spark](#building-spark). Once Spark is built, tests +can be run using: + + ./dev/run-tests + +Please see the guidance on how to +[run tests for a module, or individual tests](https://spark.apache.org/developer-tools.html#individual-tests). + +There is also a Kubernetes integration test, see resource-managers/kubernetes/integration-tests/README.md + +## A Note About Hadoop Versions + +Spark uses the Hadoop core library to talk to HDFS and other Hadoop-supported +storage systems. Because the protocols have changed in different versions of +Hadoop, you must build Spark against the same version that your cluster runs. + +Please refer to the build documentation at +["Specifying the Hadoop Version and Enabling YARN"](https://spark.apache.org/docs/latest/building-spark.html#specifying-the-hadoop-version-and-enabling-yarn) +for detailed guidance on building for a particular distribution of Hadoop, including +building for particular Hive and Hive Thriftserver distributions. + +## Configuration + +Please refer to the [Configuration Guide](https://spark.apache.org/docs/latest/configuration.html) +in the online documentation for an overview on how to configure Spark. + +## Contributing + +Please review the [Contribution to Spark guide](https://spark.apache.org/contributing.html) +for information on how to get started contributing to the project. diff --git a/hardening_manifest.yaml b/hardening_manifest.yaml new file mode 100644 index 0000000..8a5b109 --- /dev/null +++ b/hardening_manifest.yaml @@ -0,0 +1,51 @@ +--- +apiVersion: v1 + +# The repository name in registry1, excluding /ironbank/ +name: "opensource/spark-operator/spark" + +# List of tags to push for the repository in registry1 +# The most specific version should be the first tag and will be shown +# on ironbank.dsop.io +tags: +- "v1beta2-1.0.0-2.4.4" +- "latest" + +# Build args passed to Dockerfile ARGs +args: + BASE_IMAGE: "redhat/python/python38" + BASE_TAG: "3.8" + +# Docker image labels +labels: + org.opencontainers.image.title: "Spark" + # Human-readable description of the software packaged in the image + org.opencontainers.image.description: "Apache Spark is a unified analytics engine for large-scale data processing." + # License(s) under which contained software is distributed + org.opencontainers.image.licenses: "Apache-2.0" + # URL to find more information on the image + org.opencontainers.image.url: "https://spark.apache.org/docs/latest/index.html" + # Name of the distributing entity, organization or individual + org.opencontainers.image.vendor: "Apache Software Foundation" + org.opencontainers.image.version: "v1beta2-1.0.0-2.4.4" + # Keywords to help with search (ex. "cicd,gitops,golang") + mil.dso.ironbank.image.keywords: "apache,python,data processing,large scale,hadoop,kubeflow" + # This value can be "opensource" or "commercial" + mil.dso.ironbank.image.type: "opensource" + # Product the image belongs to for grouping multiple images + mil.dso.ironbank.product.name: "spark-operator" + +# List of resources to make available to the offline build context +resources: + - url: "docker://gcr.io/spark-operator/spark-operator@sha256:52b257911a5c47c480aae25f83c80151a2ed8c99e8b50317d451c157c80666fd" + tag: "spark-operator/spark-operator:v1beta2-1.0.0-2.4.4" + + +# List of project maintainers +maintainers: +- email: "jweatherford@oteemo.com" + # The name of the current container owner + name: "Jeff Weatherford" + # The gitlab username of the current container owner + username: "jweatherford" + cht_member: true -- GitLab From 6268d933ae57d869e7afc4ec877cb5cd4dc1144a Mon Sep 17 00:00:00 2001 From: jweatherford Date: Wed, 24 Feb 2021 09:40:33 -0500 Subject: [PATCH 02/16] forgot to rename --- hardening_manifest.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hardening_manifest.yaml b/hardening_manifest.yaml index 8a5b109..8808250 100644 --- a/hardening_manifest.yaml +++ b/hardening_manifest.yaml @@ -2,7 +2,7 @@ apiVersion: v1 # The repository name in registry1, excluding /ironbank/ -name: "opensource/spark-operator/spark" +name: "opensource/spark-operator/spark-py" # List of tags to push for the repository in registry1 # The most specific version should be the first tag and will be shown @@ -18,7 +18,7 @@ args: # Docker image labels labels: - org.opencontainers.image.title: "Spark" + org.opencontainers.image.title: "Python Spark" # Human-readable description of the software packaged in the image org.opencontainers.image.description: "Apache Spark is a unified analytics engine for large-scale data processing." # License(s) under which contained software is distributed -- GitLab From f1ea33f2da31b915d9700056cff0d33b3796381b Mon Sep 17 00:00:00 2001 From: jweatherford Date: Wed, 24 Feb 2021 09:49:50 -0500 Subject: [PATCH 03/16] udpated path --- Dockerfile | 2 +- hardening_manifest.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 2f7b2cd..fcc5dba 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ ARG BASE_REGISTRY=repo1.dso.mil -ARG BASE_IMAGE=ironbank/redhat/python/python38 +ARG BASE_IMAGE=ironbank/opensource/python/python38 ARG BASE_TAG=3.8 FROM spark-operator/spark-operator:v1beta2-1.0.0-2.4.4 as base diff --git a/hardening_manifest.yaml b/hardening_manifest.yaml index 8808250..44ab478 100644 --- a/hardening_manifest.yaml +++ b/hardening_manifest.yaml @@ -13,7 +13,7 @@ tags: # Build args passed to Dockerfile ARGs args: - BASE_IMAGE: "redhat/python/python38" + BASE_IMAGE: "opensource/python/python38" BASE_TAG: "3.8" # Docker image labels -- GitLab From 946cc54cd3a2f8288768214c02f5e2f88147e909 Mon Sep 17 00:00:00 2001 From: jweatherford Date: Wed, 24 Feb 2021 10:22:42 -0500 Subject: [PATCH 04/16] trying to solve python deps --- Dockerfile | 2 +- hardening_manifest.yaml | 11 ++++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index fcc5dba..034072f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,7 +12,7 @@ ARG spark_uid=185 USER root RUN dnf -y update && dnf -y upgrade && \ - pip3 install --upgrade pip setuptools && \ + pip3 install --upgrade pip-21.0.1-py3-none-any.whl setuptools-53.0.0-py3-none-any.whl && \ rm -rf /var/cache/dnf COPY --from=base /python/spark /opt/spark/python/pyspark diff --git a/hardening_manifest.yaml b/hardening_manifest.yaml index 44ab478..76d8290 100644 --- a/hardening_manifest.yaml +++ b/hardening_manifest.yaml @@ -39,7 +39,16 @@ labels: resources: - url: "docker://gcr.io/spark-operator/spark-operator@sha256:52b257911a5c47c480aae25f83c80151a2ed8c99e8b50317d451c157c80666fd" tag: "spark-operator/spark-operator:v1beta2-1.0.0-2.4.4" - + - name: pip-21.0.1-py3-none-any.whl + url: https://files.pythonhosted.org/packages/fe/ef/60d7ba03b5c442309ef42e7d69959f73aacccd0d86008362a681c4698e83/pip-21.0.1-py3-none-any.whl + validation: + type: sha256 + value: 37fd50e056e2aed635dec96594606f0286640489b0db0ce7607f7e51890372d5 + - name: setuptools-53.0.0-py3-none-any.whl + url: https://files.pythonhosted.org/packages/15/0e/255e3d57965f318973e417d5b7034223f1223de500d91b945ddfaef42a37/setuptools-53.0.0-py3-none-any.whl + validation: + type: sha256 + value: 0e86620d658c5ca87a71a283bd308fcaeb4c33e17792ef6f081aec17c171347f # List of project maintainers maintainers: -- GitLab From e39bbb31c77685f079010886d19fec09141146d3 Mon Sep 17 00:00:00 2001 From: jweatherford Date: Wed, 24 Feb 2021 10:28:28 -0500 Subject: [PATCH 05/16] added quotes --- hardening_manifest.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hardening_manifest.yaml b/hardening_manifest.yaml index 76d8290..8fceb49 100644 --- a/hardening_manifest.yaml +++ b/hardening_manifest.yaml @@ -39,13 +39,13 @@ labels: resources: - url: "docker://gcr.io/spark-operator/spark-operator@sha256:52b257911a5c47c480aae25f83c80151a2ed8c99e8b50317d451c157c80666fd" tag: "spark-operator/spark-operator:v1beta2-1.0.0-2.4.4" - - name: pip-21.0.1-py3-none-any.whl - url: https://files.pythonhosted.org/packages/fe/ef/60d7ba03b5c442309ef42e7d69959f73aacccd0d86008362a681c4698e83/pip-21.0.1-py3-none-any.whl + - name: "pip-21.0.1-py3-none-any.whl" + url: "https://files.pythonhosted.org/packages/fe/ef/60d7ba03b5c442309ef42e7d69959f73aacccd0d86008362a681c4698e83/pip-21.0.1-py3-none-any.whl" validation: type: sha256 value: 37fd50e056e2aed635dec96594606f0286640489b0db0ce7607f7e51890372d5 - - name: setuptools-53.0.0-py3-none-any.whl - url: https://files.pythonhosted.org/packages/15/0e/255e3d57965f318973e417d5b7034223f1223de500d91b945ddfaef42a37/setuptools-53.0.0-py3-none-any.whl + - name: "setuptools-53.0.0-py3-none-any.whl" + url: "https://files.pythonhosted.org/packages/15/0e/255e3d57965f318973e417d5b7034223f1223de500d91b945ddfaef42a37/setuptools-53.0.0-py3-none-any.whl" validation: type: sha256 value: 0e86620d658c5ca87a71a283bd308fcaeb4c33e17792ef6f081aec17c171347f -- GitLab From a90a2d0a58b2d4ea04270c9084ec4ba66ba65728 Mon Sep 17 00:00:00 2001 From: jweatherford Date: Wed, 24 Feb 2021 10:37:13 -0500 Subject: [PATCH 06/16] more typos --- hardening_manifest.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hardening_manifest.yaml b/hardening_manifest.yaml index 8fceb49..b4a43ad 100644 --- a/hardening_manifest.yaml +++ b/hardening_manifest.yaml @@ -39,13 +39,13 @@ labels: resources: - url: "docker://gcr.io/spark-operator/spark-operator@sha256:52b257911a5c47c480aae25f83c80151a2ed8c99e8b50317d451c157c80666fd" tag: "spark-operator/spark-operator:v1beta2-1.0.0-2.4.4" - - name: "pip-21.0.1-py3-none-any.whl" - url: "https://files.pythonhosted.org/packages/fe/ef/60d7ba03b5c442309ef42e7d69959f73aacccd0d86008362a681c4698e83/pip-21.0.1-py3-none-any.whl" + - filename: pip-21.0.1-py3-none-any.whl + url: https://files.pythonhosted.org/packages/fe/ef/60d7ba03b5c442309ef42e7d69959f73aacccd0d86008362a681c4698e83/pip-21.0.1-py3-none-any.whl validation: type: sha256 value: 37fd50e056e2aed635dec96594606f0286640489b0db0ce7607f7e51890372d5 - - name: "setuptools-53.0.0-py3-none-any.whl" - url: "https://files.pythonhosted.org/packages/15/0e/255e3d57965f318973e417d5b7034223f1223de500d91b945ddfaef42a37/setuptools-53.0.0-py3-none-any.whl" + - filename: setuptools-53.0.0-py3-none-any.whl + url: https://files.pythonhosted.org/packages/15/0e/255e3d57965f318973e417d5b7034223f1223de500d91b945ddfaef42a37/setuptools-53.0.0-py3-none-any.whl validation: type: sha256 value: 0e86620d658c5ca87a71a283bd308fcaeb4c33e17792ef6f081aec17c171347f -- GitLab From 428dcbc1b7c5b95ffe07a196f77b67aa8026f8e6 Mon Sep 17 00:00:00 2001 From: jweatherford Date: Wed, 24 Feb 2021 11:09:00 -0500 Subject: [PATCH 07/16] forgot to copy the .whl files in --- Dockerfile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 034072f..9965846 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,8 +11,10 @@ ARG spark_uid=185 USER root +COPY pip-21.0.1-py3-none-any.whl setuptools-53.0.0-py3-none-any.whl ./ + RUN dnf -y update && dnf -y upgrade && \ - pip3 install --upgrade pip-21.0.1-py3-none-any.whl setuptools-53.0.0-py3-none-any.whl && \ + pip3 install --upgrade ./pip-21.0.1-py3-none-any.whl ./setuptools-53.0.0-py3-none-any.whl && \ rm -rf /var/cache/dnf COPY --from=base /python/spark /opt/spark/python/pyspark -- GitLab From f8727b2c4f32d4e8e2931fbc9260d9930ad9bd41 Mon Sep 17 00:00:00 2001 From: jweatherford Date: Wed, 24 Feb 2021 11:20:11 -0500 Subject: [PATCH 08/16] rebase on the spark-py 2.4.4 container --- Dockerfile | 2 +- hardening_manifest.yaml | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index 9965846..8496894 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ ARG BASE_REGISTRY=repo1.dso.mil ARG BASE_IMAGE=ironbank/opensource/python/python38 ARG BASE_TAG=3.8 -FROM spark-operator/spark-operator:v1beta2-1.0.0-2.4.4 as base +FROM spark-operator/spark-py:2.4.4 as base FROM ${BASE_REGISTRY}/${BASE_IMAGE}:${BASE_TAG} diff --git a/hardening_manifest.yaml b/hardening_manifest.yaml index b4a43ad..083e54c 100644 --- a/hardening_manifest.yaml +++ b/hardening_manifest.yaml @@ -8,7 +8,7 @@ name: "opensource/spark-operator/spark-py" # The most specific version should be the first tag and will be shown # on ironbank.dsop.io tags: -- "v1beta2-1.0.0-2.4.4" +- "2.4.4" - "latest" # Build args passed to Dockerfile ARGs @@ -27,7 +27,7 @@ labels: org.opencontainers.image.url: "https://spark.apache.org/docs/latest/index.html" # Name of the distributing entity, organization or individual org.opencontainers.image.vendor: "Apache Software Foundation" - org.opencontainers.image.version: "v1beta2-1.0.0-2.4.4" + org.opencontainers.image.version: "2.4.4" # Keywords to help with search (ex. "cicd,gitops,golang") mil.dso.ironbank.image.keywords: "apache,python,data processing,large scale,hadoop,kubeflow" # This value can be "opensource" or "commercial" @@ -37,8 +37,8 @@ labels: # List of resources to make available to the offline build context resources: - - url: "docker://gcr.io/spark-operator/spark-operator@sha256:52b257911a5c47c480aae25f83c80151a2ed8c99e8b50317d451c157c80666fd" - tag: "spark-operator/spark-operator:v1beta2-1.0.0-2.4.4" + - url: "gcr.io/spark-operator/spark-py@sha256:f4980fb33077ae6e03329cf6f835ec671c1b99de391ef9494e28ed19cf3de298" + tag: "spark-operator/spark-py:2.4.4" - filename: pip-21.0.1-py3-none-any.whl url: https://files.pythonhosted.org/packages/fe/ef/60d7ba03b5c442309ef42e7d69959f73aacccd0d86008362a681c4698e83/pip-21.0.1-py3-none-any.whl validation: -- GitLab From dbe511b4429fbc195262a5ebadbc0e039606b69f Mon Sep 17 00:00:00 2001 From: jweatherford Date: Wed, 24 Feb 2021 11:25:35 -0500 Subject: [PATCH 09/16] forgot the docker type for the URI --- hardening_manifest.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hardening_manifest.yaml b/hardening_manifest.yaml index 083e54c..0c420ac 100644 --- a/hardening_manifest.yaml +++ b/hardening_manifest.yaml @@ -37,7 +37,7 @@ labels: # List of resources to make available to the offline build context resources: - - url: "gcr.io/spark-operator/spark-py@sha256:f4980fb33077ae6e03329cf6f835ec671c1b99de391ef9494e28ed19cf3de298" + - url: "docker://gcr.io/spark-operator/spark-py@sha256:f4980fb33077ae6e03329cf6f835ec671c1b99de391ef9494e28ed19cf3de298" tag: "spark-operator/spark-py:2.4.4" - filename: pip-21.0.1-py3-none-any.whl url: https://files.pythonhosted.org/packages/fe/ef/60d7ba03b5c442309ef42e7d69959f73aacccd0d86008362a681c4698e83/pip-21.0.1-py3-none-any.whl -- GitLab From ec4886da8b8163c411414466fd646da8cf1376af Mon Sep 17 00:00:00 2001 From: jweatherford Date: Wed, 24 Feb 2021 12:00:18 -0500 Subject: [PATCH 10/16] path info change --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 8496894..d988f94 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,8 +17,8 @@ RUN dnf -y update && dnf -y upgrade && \ pip3 install --upgrade ./pip-21.0.1-py3-none-any.whl ./setuptools-53.0.0-py3-none-any.whl && \ rm -rf /var/cache/dnf -COPY --from=base /python/spark /opt/spark/python/pyspark -COPY --from=base /python/lib /opt/spark/python/lib +COPY --from=base /opt/spark/python/spark /opt/spark/python/pyspark +COPY --from=base /opt/spark/python/lib /opt/spark/python/lib ENV SPARK_HOME /opt/spark -- GitLab From 4243f368b49d451eb8505030994928e49466ea01 Mon Sep 17 00:00:00 2001 From: jweatherford Date: Wed, 24 Feb 2021 12:14:20 -0500 Subject: [PATCH 11/16] patch change for pyspark --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index d988f94..34883a1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,7 +17,7 @@ RUN dnf -y update && dnf -y upgrade && \ pip3 install --upgrade ./pip-21.0.1-py3-none-any.whl ./setuptools-53.0.0-py3-none-any.whl && \ rm -rf /var/cache/dnf -COPY --from=base /opt/spark/python/spark /opt/spark/python/pyspark +COPY --from=base /opt/spark/python/pyspark /opt/spark/python/pyspark COPY --from=base /opt/spark/python/lib /opt/spark/python/lib -- GitLab From 72d7da0d0f887754b481e368fc025ece6f8f2626 Mon Sep 17 00:00:00 2001 From: jweatherford Date: Wed, 24 Feb 2021 12:30:14 -0500 Subject: [PATCH 12/16] lets try new paths --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 34883a1..d8286a7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,8 +17,8 @@ RUN dnf -y update && dnf -y upgrade && \ pip3 install --upgrade ./pip-21.0.1-py3-none-any.whl ./setuptools-53.0.0-py3-none-any.whl && \ rm -rf /var/cache/dnf -COPY --from=base /opt/spark/python/pyspark /opt/spark/python/pyspark -COPY --from=base /opt/spark/python/lib /opt/spark/python/lib +COPY --from=base python/pyspark /opt/spark/python/pyspark +COPY --from=base python/lib /opt/spark/python/lib ENV SPARK_HOME /opt/spark -- GitLab From 83325051aa22fa03312cac168fe82717e3c74562 Mon Sep 17 00:00:00 2001 From: jweatherford Date: Wed, 24 Feb 2021 12:54:58 -0500 Subject: [PATCH 13/16] need a mkdir --- Dockerfile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index d8286a7..b88e593 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,7 +15,9 @@ COPY pip-21.0.1-py3-none-any.whl setuptools-53.0.0-py3-none-any.whl ./ RUN dnf -y update && dnf -y upgrade && \ pip3 install --upgrade ./pip-21.0.1-py3-none-any.whl ./setuptools-53.0.0-py3-none-any.whl && \ - rm -rf /var/cache/dnf + rm -rf /var/cache/dnf && \ + mkdir -p /opt/spark/python && \ + chown -R 185:185 /opt/spark/ COPY --from=base python/pyspark /opt/spark/python/pyspark COPY --from=base python/lib /opt/spark/python/lib -- GitLab From 6b3758d810cee28364876742211acf778dcd9d9b Mon Sep 17 00:00:00 2001 From: jweatherford Date: Wed, 24 Feb 2021 13:14:03 -0500 Subject: [PATCH 14/16] directory doesn't seem to exist --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index b88e593..0d247ff 100644 --- a/Dockerfile +++ b/Dockerfile @@ -19,7 +19,7 @@ RUN dnf -y update && dnf -y upgrade && \ mkdir -p /opt/spark/python && \ chown -R 185:185 /opt/spark/ -COPY --from=base python/pyspark /opt/spark/python/pyspark +#COPY --from=base python/pyspark /opt/spark/python/pyspark COPY --from=base python/lib /opt/spark/python/lib -- GitLab From 857415079c372521775ce839af8f55e6ff62e373 Mon Sep 17 00:00:00 2001 From: jweatherford Date: Wed, 24 Feb 2021 13:29:43 -0500 Subject: [PATCH 15/16] directory doesn't seem to exist --- Dockerfile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 0d247ff..b551f89 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,10 +16,11 @@ COPY pip-21.0.1-py3-none-any.whl setuptools-53.0.0-py3-none-any.whl ./ RUN dnf -y update && dnf -y upgrade && \ pip3 install --upgrade ./pip-21.0.1-py3-none-any.whl ./setuptools-53.0.0-py3-none-any.whl && \ rm -rf /var/cache/dnf && \ - mkdir -p /opt/spark/python && \ + mkdir -p /opt/spark/python/pyspark && \ + mkdir -p /opt/spark/python/lib && \ chown -R 185:185 /opt/spark/ -#COPY --from=base python/pyspark /opt/spark/python/pyspark +COPY --from=base python/pyspark /opt/spark/python/pyspark COPY --from=base python/lib /opt/spark/python/lib -- GitLab From 479978c3e9a4a918019096ce0cfbbf6670b55764 Mon Sep 17 00:00:00 2001 From: jweatherford Date: Wed, 24 Feb 2021 13:47:20 -0500 Subject: [PATCH 16/16] let's try again --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index b551f89..9aa782b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,8 +20,8 @@ RUN dnf -y update && dnf -y upgrade && \ mkdir -p /opt/spark/python/lib && \ chown -R 185:185 /opt/spark/ -COPY --from=base python/pyspark /opt/spark/python/pyspark -COPY --from=base python/lib /opt/spark/python/lib +#COPY --from=base /opt/spark/python/pyspark /opt/spark/python/pyspark +COPY --from=base /opt/spark/python/lib /opt/spark/python/lib ENV SPARK_HOME /opt/spark -- GitLab