stargate · Hazel-Datastax · Oct 11, 2024 · Oct 11, 2024 · Oct 11, 2024 · Oct 14, 2024
@@ -86,6 +86,26 @@ public interface JsonApiMetricsConfig {
   @WithDefault("index.usage.count")
   String indexUsageCounterMetrics();
 
+  @NotBlank
+  @WithDefault("command.processor.latency")
+  String commandProcessorLatencyMetrics();
+
+  /**
+   * The minimum value that the meter is expected to observe for the command processor latency. The
+   * unit is milliseconds.
+   */
+  @NotBlank
+  @WithDefault("100")
+  int MinExpectedCommandProcessorLatency();
+
+  /**
+   * The maximum value that the meter is expected to observe for the command processor latency. The
+   * unit is milliseconds.
+   */
+  @NotBlank
+  @WithDefault("15000")
+  int MaxExpectedCommandProcessorLatency();
+
   /** List of values that can be used as value for metrics sort_type. */
   enum SortType {
     // Uses vertor search sorting for document resolution

@@ -29,6 +29,7 @@
 import java.util.Collections;
 import java.util.List;
 import java.util.Set;
+import java.util.concurrent.TimeUnit;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.slf4j.MDC;
@@ -110,9 +111,17 @@ public <T extends Command, U extends SchemaObject> Uni<CommandResult> processCom
         .onItem()
         .invoke(
             result -> {
-              Tags tags = getCustomTags(commandContext, command, result);
-              // add metrics
-              sample.stop(meterRegistry.timer(jsonApiMetricsConfig.metricsName(), tags));
+              Tags complexTags = getCustomTags(commandContext, command, result);
+              Tags simpleTags = getSimpleTags(command);
+              // add command metrics with complex tags
+              long durationNs =
+                  sample.stop(meterRegistry.timer(jsonApiMetricsConfig.metricsName(), complexTags));
+              // add command metrics with simple tags for histogram (reassigned the timer to ensure
+              // two metrics are identical)
+              Timer.builder(jsonApiMetricsConfig.commandProcessorLatencyMetrics())
+                  .tags(simpleTags)
+                  .register(meterRegistry)
+                  .record(durationNs, TimeUnit.NANOSECONDS);
 
               if (isCommandLevelLoggingEnabled(result, false)) {
                 logger.info(buildCommandLog(commandContext, command, result));
@@ -262,6 +271,13 @@ private <T extends SchemaObject> Tags getCustomTags(
     return tags;
   }
 
+  private Tags getSimpleTags(Command command) {
+    Tag commandTag = Tag.of(jsonApiMetricsConfig.command(), command.getClass().getSimpleName());
+    String tenant = dataApiRequestInfo.getTenantId().orElse(UNKNOWN_VALUE);
+    Tag tenantTag = Tag.of(tenantConfig.tenantTag(), tenant);
+    return Tags.of(commandTag, tenantTag);
+  }
+
   private JsonApiMetricsConfig.SortType getVectorTypeTag(Command command) {
     int filterCount = 0;
     if (command instanceof Filterable fc && fc.filterClause() != null) {
@@ -308,6 +324,22 @@ public DistributionStatisticConfig configure(
               .build()
               .merge(config);
         }
+
+        // reduce the number of buckets by setting the min and max expected values to avoid the high
+        // cardinality problem in Grafana
+        if (id.getName().startsWith(jsonApiMetricsConfig.commandProcessorLatencyMetrics())) {
+          return DistributionStatisticConfig.builder()
+              .percentiles(0.5, 0.90, 0.95, 0.99)
+              .percentilesHistogram(true)
+              .minimumExpectedValue(
+                  TimeUnit.MILLISECONDS.toNanos(
+                      jsonApiMetricsConfig.MinExpectedCommandProcessorLatency())) // 0.1 seconds
+              .maximumExpectedValue(
+                  TimeUnit.MILLISECONDS.toNanos(
+                      jsonApiMetricsConfig.MaxExpectedCommandProcessorLatency())) // 15 seconds
+              .build()
+              .merge(config);
+        }
         return config;
       }
     };

@@ -88,6 +88,43 @@ public void metrics() throws Exception {
                       assertThat(line).contains("module=\"sgv2-jsonapi\"");
                     });
               });
+
+      List<String> commandLatencyMetrics =
+          metrics
+              .lines()
+              .filter(
+                  line ->
+                      line.startsWith("command_processor_latency")
+                          && line.contains("CountDocumentsCommand")
+                          && !line.startsWith("command_processor_latency_seconds_bucket")
+                          && !line.contains("quantile"))
+              .toList();
+      assertThat(commandLatencyMetrics)
+          .satisfies(
+              lines -> {
+                assertThat(lines.size()).isEqualTo(3);
+                lines.forEach(
+                    line -> {
+                      assertThat(line).contains("command=\"CountDocumentsCommand\"");
+                      assertThat(line).contains("tenant=\"test-tenant\"");
+                      assertThat(line).contains("module=\"sgv2-jsonapi\"");
+                      assertThat(line).doesNotContain("sort_type");
+                      assertThat(line).doesNotContain("error");
+                      assertThat(line).doesNotContain("error_code");
+                      assertThat(line).doesNotContain("error_class");
+                      assertThat(line).doesNotContain("vector_enabled");
+                    });
+              });
+      List<String> commandLatencyHistogram =
+          metrics
+              .lines()
+              .filter(line -> line.startsWith("command_processor_latency_seconds_bucket"))
+              .toList();
+      assertThat(commandLatencyHistogram)
+          .satisfies(
+              lines -> {
+                assertThat(lines.size()).isNotZero();
+              });
     }
 
     @Test