From 1db09043c395b80f90d0ee0f4280321e70d3e128 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guillermo=20Julia=CC=81n?= Date: Thu, 26 Dec 2024 11:55:02 +0100 Subject: [PATCH] Auto-enable agent check if system-probe gpu monitoring is enabled --- .../agent/cont-init.d/60-sysprobe-check.sh | 13 ++++++++++++ cmd/agent/dist/conf.d/gpu.d/conf.yaml.example | 20 +++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 cmd/agent/dist/conf.d/gpu.d/conf.yaml.example diff --git a/Dockerfiles/agent/cont-init.d/60-sysprobe-check.sh b/Dockerfiles/agent/cont-init.d/60-sysprobe-check.sh index d556aac48974b..0f371931a657f 100644 --- a/Dockerfiles/agent/cont-init.d/60-sysprobe-check.sh +++ b/Dockerfiles/agent/cont-init.d/60-sysprobe-check.sh @@ -13,3 +13,16 @@ if grep -Eq '^ *enable_oom_kill *: *true' /etc/datadog-agent/system-probe.yaml | /etc/datadog-agent/conf.d/oom_kill.d/conf.yaml.default fi fi + +# Match the key gpu_monitoring.enabled: true, allowing for other keys to be present below gpu_monitoring. +# regex breakdown: +# gpu_monitoring:\s*\n - match the gpu_monitoring parent key line +# (\s+.*\n)? - match any number of child keys indented under gpu_monitoring. Will stop the match if we find another parent key at the same level as gpu_monitoring +# \s+enabled\s*:\s*true - match the enabled: true key-value pair +# We use perl to read the whole file at once (-0777) and exit with 0 if the regex matches, 1 otherwise. +if perl -0777 -ne 'exit 0 if /gpu_monitoring:\s*\n(\s+.*\n)?\s+enabled\s*:\s*true/; exit 1' /etc/datadog-agent/system-probe.yaml || [[ "$DD_GPU_MONITORING_ENABLED" == "true" ]]; then + if [ -f /etc/datadog-agent/conf.d/gpu.d/conf.yaml.example ]; then + mv /etc/datadog-agent/conf.d/gpu.d/conf.yaml.example \ + /etc/datadog-agent/conf.d/gpu.d/conf.yaml.default + fi +fi diff --git a/cmd/agent/dist/conf.d/gpu.d/conf.yaml.example b/cmd/agent/dist/conf.d/gpu.d/conf.yaml.example new file mode 100644 index 0000000000000..18bcc8ed94127 --- /dev/null +++ b/cmd/agent/dist/conf.d/gpu.d/conf.yaml.example @@ -0,0 +1,20 @@ +init_config: + +instances: + + - + + ## @param nvml_library_path - string - optional - default: "" + ## Configure an alternative path for the NVML NVIDIA library. Necessary + ## if the library is in a location where the agent cannot automatically find it. + # + # nvml_library_path: "" + + ## @param tags - list of strings following the pattern: "key:value" - optional + ## List of tags to attach to every metric, event, and service check emitted by this integration. + ## + ## Learn more about tagging: https://docs.datadoghq.com/tagging/ + # + # tags: + # - : + # - :