diff --git a/.github/workflows/dev_ny-tlc-report.yaml b/.github/workflows/dev_ny-tlc-report.yaml new file mode 100644 index 00000000..a7d3c851 --- /dev/null +++ b/.github/workflows/dev_ny-tlc-report.yaml @@ -0,0 +1,89 @@ +--- +name: Build and publish ny-tlc-report + +env: + IMAGE_NAME: ny-tlc-report + IMAGE_VERSION: 0.2.0 + REGISTRY_PATH: stackable + DOCKERFILE_PATH: "apps/docker/Dockerfile" + +on: + workflow_dispatch: + push: + branches: + - main + paths: + - apps/docker/Dockerfile + - apps/ny_tlc_report.py + - .github/workflows/dev_ny-tlc-report.yaml + +jobs: + build: + name: Build/Publish ${{ matrix.runner.arch }} Image + permissions: + id-token: write + runs-on: ${{ matrix.runner.name }} + strategy: + matrix: + runner: + - {name: "ubuntu-latest", arch: "amd64"} + - {name: "ubicloud-standard-8-arm", arch: "arm64"} + steps: + - name: Checkout Repository + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + + - name: Build image + id: build + uses: stackabletech/actions/build-container-image@013e6482fbc0edf2d38cf9220fc931f6a81336fb # v0.0.6 + with: + image-name: ${{ env.IMAGE_NAME }} + image-index-manifest-tag: ${{ env.IMAGE_VERSION }} + container-file: ${{ env.DOCKERFILE_PATH }} + + - name: Publish Container Image on docker.stackable.tech + uses: stackabletech/actions/publish-image@013e6482fbc0edf2d38cf9220fc931f6a81336fb # v0.0.6 + with: + image-registry-uri: docker.stackable.tech + image-registry-username: github + image-registry-password: ${{ secrets.NEXUS_PASSWORD }} + image-repository: stackable/${{ env.IMAGE_NAME }} + image-manifest-tag: ${{ steps.build.outputs.image-manifest-tag }} + source-image-uri: ${{ steps.build.outputs.image-manifest-uri }} + + - name: Publish Container Image on oci.stackable.tech + uses: stackabletech/actions/publish-image@013e6482fbc0edf2d38cf9220fc931f6a81336fb # v0.0.6 + with: + image-registry-uri: oci.stackable.tech + image-registry-username: robot$stackable+github-action-build + image-registry-password: ${{ secrets.HARBOR_ROBOT_STACKABLE_GITHUB_ACTION_BUILD_SECRET }} + image-repository: ${{ env.REGISTRY_PATH }}/${{ env.IMAGE_NAME }} + image-manifest-tag: ${{ steps.build.outputs.image-manifest-tag }} + source-image-uri: ${{ steps.build.outputs.image-manifest-uri }} + + publish_manifests: + name: Build/Publish Manifest + needs: [build] + permissions: + id-token: write + runs-on: ubuntu-latest + steps: + - name: Checkout Repository + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + + - name: Publish and Sign Image Index Manifest to docker.stackable.tech + uses: stackabletech/actions/publish-index-manifest@013e6482fbc0edf2d38cf9220fc931f6a81336fb # v0.0.6 + with: + image-registry-uri: docker.stackable.tech + image-registry-username: github + image-registry-password: ${{ secrets.NEXUS_PASSWORD }} + image-repository: stackable/${{ env.IMAGE_NAME }} + image-index-manifest-tag: ${{ env.IMAGE_VERSION }} + + - name: Publish and Sign Image Index Manifest to oci.stackable.tech + uses: stackabletech/actions/publish-index-manifest@013e6482fbc0edf2d38cf9220fc931f6a81336fb # v0.0.6 + with: + image-registry-uri: oci.stackable.tech + image-registry-username: robot$stackable+github-action-build + image-registry-password: ${{ secrets.HARBOR_ROBOT_STACKABLE_GITHUB_ACTION_BUILD_SECRET }} + image-repository: ${{ env.REGISTRY_PATH }}/${{ env.IMAGE_NAME }} + image-index-manifest-tag: ${{ env.IMAGE_VERSION }} diff --git a/apps/README.md b/apps/README.md index 7f423e87..29e1167b 100644 --- a/apps/README.md +++ b/apps/README.md @@ -1,9 +1,5 @@ -## Build job image - - docker build -t docker.stackable.tech/stackable/ny-tlc-report:0.2.0 -t docker.stackable.tech/stackable/ny-tlc-report:latest -f apps/docker/Dockerfile apps/ - -## Generate report from the public data set +# Generate report from the public data set spark-submit --conf spark.hadoop.fs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider --packages org.apache.hadoop:hadoop-aws:3.2.0,com.amazonaws:aws-java-sdk-s3:1.12.180,com.amazonaws:aws-java-sdk-core:1.12.180 ny_tlc_report.py --input 's3a://nyc-tlc/trip data/yellow_tripdata_2021-07.csv' @@ -14,8 +10,8 @@ Notes: * Only one file is used for reporting in this example `yellow_tripdata_2021-07.csv` * This example works with `spark-3.1.3-bin-hadoop3.2` and Python 3.9.7. Other versions may require different dependency versions or even complete different dependencies altogether. -### Links +## Links -[0] TLC trip data set https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page -[1] AWS Java SDK https://github.com/aws/aws-sdk-java -[2] Hadoop AWS module https://hadoop.apache.org/docs/current/hadoop-aws/tools/hadoop-aws/index.html +[0] [TLC trip data set](https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page) +[1] [AWS Java SDK](https://github.com/aws/aws-sdk-java) +[2] [Hadoop AWS module](https://hadoop.apache.org/docs/current/hadoop-aws/tools/hadoop-aws/index.html) diff --git a/examples/README-examples.md b/examples/README-examples.md index 4466c7ea..7d739f74 100644 --- a/examples/README-examples.md +++ b/examples/README-examples.md @@ -9,11 +9,11 @@ This note outlines a few things that are needed to run these examples on a local Create a new local cluster (e.g. with [Kind](https://kind.sigs.k8s.io/docs/user/quick-start/) and the [stackablectl tool](https://github.com/stackabletech/stackablectl)). This creates a cluster named `stackable-data-platform`. ````text -kind delete clusters --all -stackablectl operator install spark-k8s commons secret -k +kind create cluster --name stackable-data-platform +stackablectl operator install spark-k8s commons secret ```` -Build the `ny-tlc-report` image from the Dockerfile in this repository (apps/docker/Dockerfile) and then load it to the cluster: +Load the `ny-tlc-report` image to the cluster: ````text kind load docker-image docker.stackable.tech/stackable/ny-tlc-report:0.2.0 --name stackable-data-platform