From b8dfb1dd96d467a8686b47625c955d1c150b5c5e Mon Sep 17 00:00:00 2001 From: Justin Brooks Date: Fri, 6 Oct 2023 12:07:37 -0400 Subject: [PATCH 01/12] init otel --- charts/operator-wandb/Chart.yaml | 5 +++++ charts/operator-wandb/templates/otel.yaml | 12 +++++++++++ charts/operator-wandb/values.yaml | 26 +++++++++++++++++++++++ 3 files changed, 43 insertions(+) create mode 100644 charts/operator-wandb/templates/otel.yaml diff --git a/charts/operator-wandb/Chart.yaml b/charts/operator-wandb/Chart.yaml index e2e2e564..7881d0e1 100644 --- a/charts/operator-wandb/Chart.yaml +++ b/charts/operator-wandb/Chart.yaml @@ -40,3 +40,8 @@ dependencies: version: "18.*.*" condition: redis.install repository: https://charts.bitnami.com/bitnami + - name: opentelemetry-collector + version: "0.70.*" + condition: otel.install + alias: otel + repository: https://open-telemetry.github.io/opentelemetry-helm-charts diff --git a/charts/operator-wandb/templates/otel.yaml b/charts/operator-wandb/templates/otel.yaml new file mode 100644 index 00000000..751096f5 --- /dev/null +++ b/charts/operator-wandb/templates/otel.yaml @@ -0,0 +1,12 @@ +{{- if .Values.otel.install }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ .Release.Name }}-otel + labels: + {{- include "wandb.commonLabels" . | nindent 4 }} +data: + config.yaml: | + exporters: + debug: {} +{{- end }} \ No newline at end of file diff --git a/charts/operator-wandb/values.yaml b/charts/operator-wandb/values.yaml index 2d43ab8a..7fcb0608 100644 --- a/charts/operator-wandb/values.yaml +++ b/charts/operator-wandb/values.yaml @@ -144,5 +144,31 @@ prometheus: mysql-exporter: install: true +otel: + install: true + mode: daemonset + + resources: + limits: + cpu: 100m + memory: 200M + configMap: + create: false + + command: + name: otelcol-contrib + extraArgs: ["--config=/conf/config.yaml"] + extraVolumes: + - name: {{ .Release.Name }}-otel + configMap: + name: {{ .Release.Name }}-otel + items: + - key: config + path: config.yaml + defaultMode: 420 + extraVolumeMounts: + - name: {{ .Release.Name }}-otel + mountPath: /conf/config.yaml + nameOverride: "" fullnameOverride: "" From d52f377d46e36d16df0cb2e5e0b642db20a21937 Mon Sep 17 00:00:00 2001 From: Justin Brooks Date: Mon, 16 Oct 2023 19:54:38 -0400 Subject: [PATCH 02/12] working otel --- charts/operator-wandb/Chart.lock | 9 +- charts/operator-wandb/Chart.yaml | 7 +- .../operator-wandb/charts/console/values.yaml | 8 +- charts/operator-wandb/charts/otel/.helmignore | 23 +++ charts/operator-wandb/charts/otel/Chart.yaml | 15 ++ charts/operator-wandb/charts/otel/README.md | 87 +++++++++ .../charts/otel/templates/_helpers.tpl | 102 +++++++++++ .../charts/otel/templates/configmap.yaml | 168 ++++++++++++++++++ .../charts/otel/templates/deamonset.yaml | 116 ++++++++++++ .../charts/otel/templates/serviceaccount.yaml | 15 ++ charts/operator-wandb/charts/otel/values.yaml | 50 ++++++ .../operator-wandb/charts/weave/values.yaml | 6 +- charts/operator-wandb/templates/otel.yaml | 44 +++-- charts/operator-wandb/values.yaml | 47 ++--- 14 files changed, 648 insertions(+), 49 deletions(-) create mode 100644 charts/operator-wandb/charts/otel/.helmignore create mode 100644 charts/operator-wandb/charts/otel/Chart.yaml create mode 100644 charts/operator-wandb/charts/otel/README.md create mode 100644 charts/operator-wandb/charts/otel/templates/_helpers.tpl create mode 100644 charts/operator-wandb/charts/otel/templates/configmap.yaml create mode 100644 charts/operator-wandb/charts/otel/templates/deamonset.yaml create mode 100644 charts/operator-wandb/charts/otel/templates/serviceaccount.yaml create mode 100644 charts/operator-wandb/charts/otel/values.yaml diff --git a/charts/operator-wandb/Chart.lock b/charts/operator-wandb/Chart.lock index 9e67be2e..7e3157aa 100644 --- a/charts/operator-wandb/Chart.lock +++ b/charts/operator-wandb/Chart.lock @@ -19,6 +19,9 @@ dependencies: version: 0.1.0 - name: redis repository: https://charts.bitnami.com/bitnami - version: 18.1.0 -digest: sha256:0e062062405e017968fb5ad0e5064936cb55e2b441ddb1c2048f34eaf6de11a8 -generated: "2023-09-27T12:33:43.680199603-04:00" + version: 18.1.5 +- name: otel + repository: file://charts/otel + version: 0.1.0 +digest: sha256:d6f7dbed1f8fcbbd34d18b0911891fb27eeef0021092b69ea35e7ca5dcede038 +generated: "2023-10-16T19:07:10.090393-04:00" diff --git a/charts/operator-wandb/Chart.yaml b/charts/operator-wandb/Chart.yaml index 023496b7..6f9d01d7 100644 --- a/charts/operator-wandb/Chart.yaml +++ b/charts/operator-wandb/Chart.yaml @@ -40,8 +40,7 @@ dependencies: version: "18.*.*" condition: redis.install repository: https://charts.bitnami.com/bitnami - - name: opentelemetry-collector - version: "0.70.*" + - name: otel + version: "*.*.*" + repository: file://charts/otel condition: otel.install - alias: otel - repository: https://open-telemetry.github.io/opentelemetry-helm-charts diff --git a/charts/operator-wandb/charts/console/values.yaml b/charts/operator-wandb/charts/console/values.yaml index 3a7fc07f..b1c7a542 100644 --- a/charts/operator-wandb/charts/console/values.yaml +++ b/charts/operator-wandb/charts/console/values.yaml @@ -38,8 +38,8 @@ resources: # specify resources, uncomment the following lines, adjust them as necessary, # and remove the curly braces after 'resources:'. requests: - cpu: 500m - memory: 1Gi + cpu: 200m + memory: 200Mi limits: - cpu: 4000m - memory: 8Gi + cpu: 1 + memory: 500Mi diff --git a/charts/operator-wandb/charts/otel/.helmignore b/charts/operator-wandb/charts/otel/.helmignore new file mode 100644 index 00000000..0e8a0eb3 --- /dev/null +++ b/charts/operator-wandb/charts/otel/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/charts/operator-wandb/charts/otel/Chart.yaml b/charts/operator-wandb/charts/otel/Chart.yaml new file mode 100644 index 00000000..95db1884 --- /dev/null +++ b/charts/operator-wandb/charts/otel/Chart.yaml @@ -0,0 +1,15 @@ +apiVersion: v2 +name: otel +type: application +description: A Helm chart for Kubernetes + +version: 0.1.0 +appVersion: "0.33.0" + +home: https://wandb.ai +icon: https://wandb.ai/logo.svg + +maintainers: + - name: wandb + email: support@wandb.com + url: https://wandb.com diff --git a/charts/operator-wandb/charts/otel/README.md b/charts/operator-wandb/charts/otel/README.md new file mode 100644 index 00000000..df63bb6b --- /dev/null +++ b/charts/operator-wandb/charts/otel/README.md @@ -0,0 +1,87 @@ +# Local + +- [Local](#local) + - [Requirements](#requirements) + - [Configuration](#configuration) + - [Globals](#globals) + - [Options](#options) + - [Chart configuration examples](#chart-configuration-examples) + - [extraEnv](#extraenv) + - [extraEnvFrom](#extraenvfrom) + - [extraEnvFrom](#extraenvfrom-1) + +## Requirements + +This chart depends on Redis, Bucket and MySQL services, either as part of the +complete W&B Server chart or provided as external services reachable from the +Kubernetes cluster this chart is deployed onto. + +## Configuration + +### Globals + +### Options + +| Parameter | Default | Description | +| ------------------- | ------------- | -------------------------------------------------------------------------------------------------- | +| `enabled` | `true` | Server enable flag | +| `priorityClassName` | `""` | Allow configuring pods priorityClassName, this is used to control pod priority in case of eviction | +| `common.labels` | | Supplemental labels that are applied to all objects created by this chart. | +| `podLabels` | | Supplemental Pod labels. Will not be used for selectors. | +| `extraEnv` | | List of extra environment variables to expose | +| `extraEnvFrom` | | List of extra environment variables from other data sources to expose | +| `image.pullPolicy` | `Always` | Server image pull policy | +| `image.pullSecrets` | | Secrets for the image repository | +| `image.repository` | `wandb/local` | Server image repository | +| `image.tag` | | Server image tag | +| `tolerations` | `[]` | Toleration labels for pod assignment | + +### Chart configuration examples + +#### extraEnv + +#### extraEnvFrom + +`extraEnv` allows you to expose additional environment variables in all +containers in the pods. + +```yaml +extraEnv: + SOME_KEY: some_value + SOME_OTHER_KEY: some_other_value +``` + +When the container is started, you can confirm that the environment variables +are exposed: + +Below is an example use of extraEnv: + +```bash +env | grep SOME +SOME_KEY=some_value +SOME_OTHER_KEY=some_other_value +``` + +#### extraEnvFrom + +`extraEnvFrom` allows you to expose additional environment variables from other +data sources in all containers in the pods. Subsequent variables can be +overridden per deployment. + +Below is an example use of `extraEnvFrom`: + +```yaml +extraEnvFrom: + MY_NODE_NAME: + fieldRef: + fieldPath: spec.nodeName + MY_CPU_REQUEST: + resourceFieldRef: + containerName: test-container + resource: requests.cpu + SECRET_THING: + secretKeyRef: + name: special-secret + key: special_token + # optional: boolean +``` diff --git a/charts/operator-wandb/charts/otel/templates/_helpers.tpl b/charts/operator-wandb/charts/otel/templates/_helpers.tpl new file mode 100644 index 00000000..daad4c48 --- /dev/null +++ b/charts/operator-wandb/charts/otel/templates/_helpers.tpl @@ -0,0 +1,102 @@ +{{/* vim: set filetype=mustache: */}} + +{{/* +Expand the name of the chart. +*/}} +{{- define "otel.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "otel.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "otel.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "otel.labels" -}} +helm.sh/chart: {{ include "otel.chart" . }} +{{ include "otel.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +wandb.com/app-name: {{ include "otel.chart" . }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "otel.selectorLabels" -}} +app.kubernetes.io/name: {{ include "otel.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "otel.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "otel.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} + +{{/* +Returns the extraEnv keys and values to inject into containers. + +Global values will override any chart-specific values. +*/}} +{{- define "otel.extraEnv" -}} +{{- $allExtraEnv := merge (default (dict) .local.extraEnv) .global.extraEnv -}} +{{- range $key, $value := $allExtraEnv }} +- name: {{ $key }} + value: {{ $value | quote }} +{{- end -}} +{{- end -}} + +{{/* +Returns a list of _common_ labels to be shared across all +app deployments and other shared objects. +*/}} +{{- define "otel.commonLabels" -}} +{{- $commonLabels := default (dict) .Values.common.labels -}} +{{- if $commonLabels }} +{{- range $key, $value := $commonLabels }} +{{ $key }}: {{ $value | quote }} +{{- end }} +{{- end -}} +{{- end -}} + +{{/* +Returns a list of _pod_ labels to be shared across all +app deployments. +*/}} +{{- define "otel.podLabels" -}} +{{- range $key, $value := .Values.pod.labels }} +{{ $key }}: {{ $value | quote }} +{{- end }} +{{- end -}} + diff --git a/charts/operator-wandb/charts/otel/templates/configmap.yaml b/charts/operator-wandb/charts/otel/templates/configmap.yaml new file mode 100644 index 00000000..ed962f51 --- /dev/null +++ b/charts/operator-wandb/charts/otel/templates/configmap.yaml @@ -0,0 +1,168 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "otel.fullname" . }} + labels: + {{- include "wandb.commonLabels" . | nindent 4 }} + {{- include "otel.commonLabels" . | nindent 4 }} + {{- include "otel.labels" . | nindent 4 }} + {{- if .Values.configMap.labels -}} + {{- toYaml .Values.configMap.labels | nindent 4 }} + {{- end }} + annotations: + {{- if .Values.configMap.annotations -}} + {{- toYaml .Values.configMap.annotations | nindent 4 }} + {{- end }} +data: + config: | + exporters: + debug: {} + logging: {} + extensions: + health_check: {} + memory_ballast: + size_in_percentage: 40 + processors: + batch: {} + memory_limiter: + check_interval: 5s + limit_percentage: 80 + spike_limit_percentage: 25 + receivers: + filelog: + exclude: [] + include: + - /var/log/pods/*/*/*.log + include_file_name: false + include_file_path: true + operators: + - id: get-format + routes: + - expr: body matches "^\\{" + output: parser-docker + - expr: body matches "^[^ Z]+ " + output: parser-crio + - expr: body matches "^[^ Z]+Z" + output: parser-containerd + type: router + - id: parser-crio + regex: ^(?P