Merge pull request #38 from whylabs/dev/aberg/gr-nginx-#00000001

Adding support for proxy cache
whylabs · Dec 5, 2024 · 17ade86 · 17ade86
2 parents 51a3a09 + 32baa84
commit 17ade86
Show file tree

Hide file tree

Showing 12 changed files with 746 additions and 266 deletions.
diff --git a/charts/guardrails/CHANGELOG.md b/charts/guardrails/CHANGELOG.md
@@ -6,6 +6,25 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning]
 (https://semver.org/spec/v2.0.0.html).
 
+## [0.5.0] - 2024-12-02
+
+### Added
+
+- Add caching support, enabled with `cache.enable: true` (default is `true`)
+- HPA support for configuring scaling behavior
+
+## [0.4.0] - 2024-11-26
+
+### Changed
+
+- Default image tag from `2.0.1` to `2.2.2`
+
+### Added
+
+- Caching support enabled with `cache.enable: true`
+- Horizontal Pod Autoscaler (HPA) support for configuring scaling behavior
+- Startup probe to support more graceful startup and scaling behavior
+
 ## [0.3.1] - 2024-10-31
 
 ### Fixed

diff --git a/charts/guardrails/Chart.yaml b/charts/guardrails/Chart.yaml
@@ -2,5 +2,6 @@ apiVersion: v2
 name: guardrails
 description: A Helm chart for WhyLabs Guardrails
 type: application
-version: 0.3.1
-appVersion: "2.0.1"
+version: 0.5.0
+appVersion: "2.2.2"
+icon: "https://whylabs.ai/_next/static/images/whylabs-favicon-192c009321aebbb96c19921a170fc880.png"
diff --git a/charts/guardrails/README.md b/charts/guardrails/README.md
diff --git a/charts/guardrails/README.md.gotmpl b/charts/guardrails/README.md.gotmpl
@@ -11,93 +11,101 @@
 
 {{ template "chart.sourcesSection" . }}
 
+- [Prerequisites](#prerequisites)
+- [Configuring WhyLabs credentials](#whylabs-credentials)
+- [Helm Chart Installation & Upgrades](#installation--upgrades)
+- [Exposing Guardrails Outside Kubernetes](#exposing-guardrails-outside-kubernetes)
+- [Horizontal Pod Autoscaling (HPA)](#horizontal-pod-autoscaling-hpa)
+
 ## Prerequisites
 
-### API Key and Secrets Management
+- [Create and configure WhyLabs credentials](#whylabs-credentials)
 
-Create a [WhyLabs API Key](https://docs.whylabs.ai/docs/whylabs-api/#creating-an-api-token)
-that will be used when creating the required Kubernetes secrets to authenticate
-with the WhyLabs API.
+## WhyLabs credentials
 
-You can manage the API keys and container secrets in one of two ways, depending on your preferred setup: 
+- [WhyLabs API Key](#whylabs-api-key)
+- [WhyLabs Container Password](#whylabs-container-password)
 
-1. **Kubernetes Secret-based Management (default)**
+### WhyLabs API Key
 
-    In this setup, secrets are passed as environment variables by creating Kubernetes Secrets manually. The following commands show how to create secrets for the API key and container authentication:
+1. Create a [WhyLabs API Key](https://docs.whylabs.ai/docs/whylabs-api/#creating-an-api-token)
 
-    Use the following `kubectl` commands to create the required Kubernetes
-    `Secrets`. These secrets must exist prior to installing the Helm chart.
+2. Store the API key in one of the following locations:
 
-    ```shell
-    # API that was created above
-    whylabs_api_key=""
-    # Arbitrary value that will be required to make requests to the containers
-    container_password=""
-    # Change this to the desired namespace
-    target_namespace="default"
-    # Helm release name (See installation for release_name usage)
-    release_name=""
+  - [Kubernetes Secret](#kubernetes-secret-default)
+  - [Mounted Volume](#mounted-volume)
 
-    kubectl create secret generic "whylabs-${release_name}-api-key" \
-      --namespace "${target_namespace}" \
-      --from-literal=WHYLABS_API_KEY="${whylabs_api_key}"
+#### Kubernetes Secret (Default)
 
-    kubectl create secret generic "whylabs-${release_name}-api-secret" \
-      --namespace "${target_namespace}" \
-      --from-literal=CONTAINER_PASSWORD="${container_password}"
+```shell
+# WhyLabs API key
+whylabs_api_key=""
 
-    kubectl create secret docker-registry "whylabs-${release_name}-registry-credentials" \
-      --namespace "${target_namespace}" \
-      --docker-server="registry.gitlab.com" \
-      --docker-username="<whylabs-provided-username>" \
-      --docker-password="<whylabs-provided-token>" \
-      --docker-email="<whylabs-provided-email>"
-    ```
+# Change this to the desired namespace
+target_namespace="default"
 
-2. **File-based Secrets Management with CSI Drivers**
+# The `WHYLABS_API_KEY` key is used as the env variable name within the `Pod`
+kubectl create secret generic "whylabs-guardrails-api-key" \
+  --namespace "${target_namespace}" \
+  --from-literal=WHYLABS_API_KEY="${whylabs_api_key}"
+```
 
-    If you prefer to use file-based secrets with tools like the AWS Secrets Store CSI Driver, you can configure the Helm chart to load secrets from files mounted into the container. To use file-based secrets, set envFrom: {} in your values.yaml file to disable the environment variable-based configuration.
+#### Mounted Volume
 
-    Example configuration for file-based secrets:
+Alternatively, any file mounted to `/var/run/secrets/whylabs.ai/env` will be automatically picked up by the container and used as an environment variable. The environment variable name will be the filename, and the file contents will be the value, e.g.:
 
-    - Modify the envFrom section in your `values.yaml`:
+```shell
+$ tree /var/run/secrets/whylabs.ai/env
 
-        ```yaml
-        envFrom: {}
-        ```
-    - Use your CSI driver to mount secrets as files into the container, which allows
-    the application to read the secrets directly from the filesystem.
+/var/run/secrets/whylabs.ai/env
+├── whylabs_api_key
+├── container_password
+└── any_other_env_vars
 
-### Choose Your Secret Management Strategy
+$ cat /var/run/secrets/whylabs.ai/env/whylabs_api_key
 
-- Environment Variables: This is the default method and requires you to populate secrets as Kubernetes environment variables. Leave the envFrom section in values.yaml unchanged or configure it with your Kubernetes secret references:
+MyS3cr3tWhyL@b5@piK3y
+```
 
-    ```yaml
-    envFrom:
-      whylabs-guardrails-api-key:
-        type: secretRef
-        optional: true
-      whylabs-guardrails-api-secret:
-        type: secretRef
-        optional: true
-    ```
+Declare and mount the volumes by overriding `extraVolumes` and `extraVolumeMounts` in the `values.yaml` file. The following example assumes the use of the [AWS Secrets Store CSI Driver](https://github.com/aws/secrets-store-csi-driver-provider-aws), but the concept is the same for any other method of mounting files into the container.
 
-- File-based Secrets: If you are using a CSI driver, set envFrom: {} in your
-values.yaml and ensure your secrets are available as mounted files.
+```yaml
+extraVolumeMounts:
+  - name: whylabs-secret-provider
+    mountPath: /var/run/secrets/whylabs.ai/env
+    readOnly: true
+
+extraVolumes:
+  - name: whylabs-secret-provider
+    csi:
+      driver: secrets-store.csi.k8s.io
+      readOnly: true
+      volumeAttributes:
+        secretProviderClass: "your-whylabs-secret-provider-name"
+```
 
-## Installation & Upgrades
+### WhyLabs Container Password
 
-> :warning: To expose guardrails to callers outside of your K8s cluster you will
-need an Ingress Controller such as
-[NGINX Ingress Controller](https://kubernetes.github.io/ingress-nginx/), a
-Gateway Controller such as [Ambassador](https://www.getambassador.io/), a
-Service Mesh such as [Istio](https://istio.io/), or a Load Balancer Controller
-such as [AWS Load Balancer Controller](https://kubernetes-sigs.github.io/aws-load-balancer-controller).
-The installation and configuration of the aforementioned controllers are outside
-the scope of this document. However, for a quickstart guide to expose Guardrails
-to the public internet via AWS LBC, see the
-[Exposing Guardrails Outside Kubernetes](#exposing-guardrails-outside-kubernetes)
-section.
+The container password is an arbitrary value that must be included with every guardrails container request (required by default). To disable the container password, set the `DISABLE_CONTAINER_PASSWORD` environment variable to `True`.
+
+To store the container password in a Kubernetes Secret, run the following command:
+
+```shell
+# Arbitrary value that will be required to make requests to the containers
+container_password=""
+
+# Change this to the desired namespace
+target_namespace="default"
+
+# The `CONTAINER_PASSWORD` key is used as the env variable name within the `Pod`
+kubectl create secret generic "whylabs-guardrails-api-secret" \
+  --namespace "${target_namespace}" \
+  --from-literal=CONTAINER_PASSWORD="${container_password}"
+```
+
+Alternatively, the container password can be provided as a [mounted volume](#mounted-volume) as described in the [WhyLabs API Key](#whylabs-api-key) section.
+
+## Installation & Upgrades
 
 ### How to Use WhyLabs Helm Repository
 
@@ -138,6 +146,16 @@ helm upgrade --install \
 
 ## Exposing Guardrails Outside Kubernetes
 
+> :warning: To expose guardrails to callers outside of your K8s cluster you will
+need an Ingress Controller such as
+[NGINX Ingress Controller](https://kubernetes.github.io/ingress-nginx/), a
+Gateway Controller such as [Ambassador](https://www.getambassador.io/), a
+Service Mesh such as [Istio](https://istio.io/), or a Load Balancer Controller
+such as [AWS Load Balancer Controller](https://kubernetes-sigs.github.io/aws-load-balancer-controller).
+The installation and configuration of the aforementioned controllers are outside
+the scope of this document. However, for a quickstart guide to expose Guardrails
+to the public internet via AWS LBC, see the following section.
+
 This section serves as a quickstart guide to install AWS LBC and configure the
 Helm chart to expose Guardrails outside of your Kubernetes cluster via an
 internal NLB.
@@ -171,9 +189,7 @@ service:
 
 The Horizontal Pod Autoscaler automatically scales the number of pods in a
 replication controller, deployment, replica set or stateful set based on
-observed CPU utilization (or, with custom metrics support, on some other
-application-provided metrics). The Horizontal Pod Autoscaler uses the following
-formula to calculate the desired number of pods:
+observed CPU utilization (among other metrics that are not in scope for this document). The Horizontal Pod Autoscaler uses the following default formula to calculate the desired number of pods:
 
 ```text
 Desired Replicas = [ (Current Utilization / Target Utilization) * Current Replicas ]
@@ -192,10 +208,75 @@ Desired Replicas = ⌈ (90% / 50%) * 3 ⌉
 
 HPA uses the same formula for both increasing and decreasing the number of pods.
 Horizontal pod scaling is disabled by default. To enable it, set the
-`hpa.enabled` key to `true`. The pods QoS class will impact HPA behavior as a
-deployment that is allowed to burst CPU usage will cause more aggressive HPA
-scaling than a deployment with a `Guaranteed` QoS that does not go above 100%
-utilization.
+`autoscaling.enabled` to `true`.
+
+### Scaling Behavior configuration
+
+When using Horizontal Pod Autoscalers (HPAs) with default configurations users may encounter the following issues:
+
+- Frequent and rapid scaling operations
+- Resource contention caused by aggressive scaling
+- Startup time delays and queue buildup
+- General behavior that appears as though the HPA is not working
+
+The following Horizontal Pod Autoscaler configuration is intended to provide a
+reasonable starting point. :warning: Each deployment will have unique
+characteristics that will require tuning scaling behavior based on factors
+such as node size and type; starting replica count; request load; traffic
+patterns, etc. The following concepts, referencing the example configuration
+below, provide a framework for understanding how the HPA behavior configuration
+works and how to tune it for optimal scaling.
+
+- The `scaleUp` and `scaleDown` policies limit the number of pods added or removed in a single evaluation period.
+- The `stabilizationWindowSeconds` parameter ensures scaling decisions are based on an averaged utilization over 300 seconds, smoothing out temporary spikes or dips in resource usage.
+- The 180-second `periodSeconds` ensures scaling operations are spaced out, allowing the system to stabilize before further scale operations occur.
+
+```yaml
+autoscaling:
+  # Enable or disable HPA (Horizontal Pod Autoscaler).
+  enabled: false
+
+  # The lower limit for the number of replicas to scale down
+  minReplicas: 1
+
+  # The upper limit for the number of replicas to scale up
+  maxReplicas: 100
+
+  # The specifications to use for calculating the desired replica count
+  targetCPUUtilizationPercentage: 70
+
+  # The behavior configuration for scaling up/down.
+  behavior:
+
+    # This configuration provides two policies: a policy that scales the number
+    # of replicas by a fixed amount (4 pods), and a policy that scales the
+    # number of replicas by a percentage (50%). Setting `selectPolicy` to `Min`
+    # will select the scaling policy that creates the fewest number of replicas.
+    # The `stabilizationWindowSeconds` parameter smooths out temporary
+    # fluctuations in CPU utilization by evaluating recommendations over a
+    # 300-second window.
+    scaleUp:
+      policies:
+        - type: Pods
+          value: 4
+          periodSeconds: 180
+        - type: Percent
+          value: 50
+          periodSeconds: 180
+      selectPolicy: Min
+      stabilizationWindowSeconds: 300
+
+    scaleDown:
+      policies:
+        - type: Pods
+          value: 4
+          periodSeconds: 180
+        - type: Percent
+          value: 30
+          periodSeconds: 180
+      selectPolicy: Max
+      stabilizationWindowSeconds: 300
+```
 
 {{ template "chart.requirementsSection" . }}
 

diff --git a/charts/guardrails/templates/_helpers.tpl b/charts/guardrails/templates/_helpers.tpl
@@ -35,7 +35,6 @@ Common labels
 */}}
 {{- define "guardrails.labels" -}}
 helm.sh/chart: {{ include "guardrails.chart" . }}
-{{ include "guardrails.selectorLabels" . }}
 {{- if .Chart.AppVersion }}
 app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
 {{- end }}

diff --git a/charts/guardrails/templates/configmap-nginx.yaml b/charts/guardrails/templates/configmap-nginx.yaml
@@ -0,0 +1,63 @@
+{{- if .Values.cache.enable }}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ .Release.Name }}-nginx
+data:
+  nginx.conf: |
+    pid /tmp/nginx.pid;
+    events {
+      worker_connections 1024;
+    }
+    http {
+      proxy_cache_path /tmp/nginx_cache levels=1:2 keys_zone=my_cache:10m max_size=1g 
+      inactive=60m use_temp_path=off;
+      client_body_temp_path /tmp/client_temp;
+      proxy_temp_path       /tmp/proxy_temp_path;
+      fastcgi_temp_path     /tmp/fastcgi_temp;
+      uwsgi_temp_path       /tmp/uwsgi_temp;
+      scgi_temp_path        /tmp/scgi_temp;
+      server {
+        listen 8080;
+        server_name localhost;
+        location /v1/policy/list {
+          include cache_config.conf;
+          include common_proxy.conf;
+          proxy_cache_key "$request_uri|$http_x_whylabs_organization";
+        }
+        location /v1/organizations/managed-organizations {
+          include cache_config.conf;
+          include common_proxy.conf;
+          proxy_cache_key "$request_uri|$http_x_whylabs_organization";
+        }
+        location /v1/policy {
+          include cache_config.conf;
+          include common_proxy.conf;
+          proxy_cache_key "$request_uri|$http_x_whylabs_organization";
+        }
+        location /v1/api-key/validate {
+          include cache_config.conf;
+          include common_proxy.conf;
+          proxy_cache_key "$request_uri|$http_x_api_key";
+        }
+        location / {
+          include common_proxy.conf;
+        }
+      }
+    }
+  cache_config.conf: |
+    proxy_cache my_cache;
+    proxy_cache_valid 200 403 {{ .Values.cache.duration }};
+    proxy_cache_use_stale error timeout http_500 http_502 http_503 http_504;
+    add_header X-Cache-Status $upstream_cache_status always;
+  common_proxy.conf: |
+    proxy_pass https://{{ .Values.cache.endpoint }};
+    proxy_set_header Host {{ .Values.cache.endpoint }};
+    proxy_set_header X-Real-IP $remote_addr;
+    proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+    proxy_set_header X-Forwarded-Proto $scheme;
+    proxy_set_header Accept $http_accept;
+    proxy_set_header Accept-Encoding $http_accept_encoding;
+    proxy_set_header X-API-Key $http_x_api_key;
+    proxy_set_header X-WhyLabs-Organization $http_x_whylabs_organization;
+{{- end }}