Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add nim tab #3605

Closed
wants to merge 17 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
337 changes: 337 additions & 0 deletions frontend/src/api/prometheus/NimPerformanceMetrics.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,337 @@
import React from 'react';
import { NimMetricGraphDefinition } from '~/concepts/metrics/kserve/types';
import { defaultResponsePredicate } from '~/api/prometheus/usePrometheusQueryRange';
import { SupportedArea, useIsAreaAvailable } from '~/concepts/areas';
import { TimeframeTitle } from '~/concepts/metrics/types';
import useQueryRangeResourceData from '~/api/prometheus/useQueryRangeResourceData';
import { PendingContextResourceData, PrometheusQueryRangeResultValue } from '~/types';
import { DEFAULT_PENDING_CONTEXT_RESOURCE } from '~/api/prometheus/const';

type RequestCountData = {
data: {
successCount: PendingContextResourceData<PrometheusQueryRangeResultValue>;
failedCount: PendingContextResourceData<PrometheusQueryRangeResultValue>;
};
refreshAll: () => void;
};


// Graph #1 - KV Cache usage over time
type KVCacheUsageData = {
data: {
kvCacheUsage: PendingContextResourceData<PrometheusQueryRangeResultValue>;
};
refreshAll: () => void;
};


export const useFetchNimKVCacheUsageData = (
metricsDef: NimMetricGraphDefinition,
timeframe: TimeframeTitle,
endInMs: number,
namespace: string,
): KVCacheUsageData => {
const active = useIsAreaAvailable(SupportedArea.K_SERVE_METRICS).status;

const kvCacheUsage = useQueryRangeResourceData(
active,
metricsDef.queries[0]?.query,
endInMs,
timeframe,
defaultResponsePredicate,
namespace,
);

const data = React.useMemo(
() => ({
kvCacheUsage,
}),
[kvCacheUsage],
);

return useAllSettledContextResourceData(data, {
kvCacheUsage: DEFAULT_PENDING_CONTEXT_RESOURCE,
});
};


// Graph #3 - Total Prompt Token Count and Total Generation Token Count
type TokensCountData = {
data: {
totalPromptTokenCount: PendingContextResourceData<PrometheusQueryRangeResultValue>;
totalGenerationTokenCount: PendingContextResourceData<PrometheusQueryRangeResultValue>;
};
refreshAll: () => void;
};


export const useFetchNimTokensCountData = (
metricsDef: NimMetricGraphDefinition,
timeframe: TimeframeTitle,
endInMs: number,
namespace: string,
): TokensCountData => {
const active = useIsAreaAvailable(SupportedArea.K_SERVE_METRICS).status;

// Extract the queries for "Total Prompt Token Count" and "Total Generation Token Count
const totalPromptTokenCount = useQueryRangeResourceData(
active,
metricsDef.queries[0]?.query,
endInMs,
timeframe,
defaultResponsePredicate,
namespace,
);

const totalGenerationTokenCount = useQueryRangeResourceData(
active,
metricsDef.queries[1]?.query,
endInMs,
timeframe,
defaultResponsePredicate,
namespace,
);

const data = React.useMemo(
() => ({
totalPromptTokenCount, totalGenerationTokenCount
}),
[totalPromptTokenCount, totalGenerationTokenCount],
);

return useAllSettledContextResourceData(data, {
totalPromptTokenCount: DEFAULT_PENDING_CONTEXT_RESOURCE,
totalGenerationTokenCount: DEFAULT_PENDING_CONTEXT_RESOURCE,
});
};


// Graph #4 - Time to First Token
type TimeToFirstTokenData = {
data: {
timeToFirstToken: PendingContextResourceData<PrometheusQueryRangeResultValue>;
};
refreshAll: () => void;
};


export const useFetchNimTimeToFirstTokenData = (
metricsDef: NimMetricGraphDefinition,
timeframe: TimeframeTitle,
endInMs: number,
namespace: string,
): TimeToFirstTokenData => {
const active = useIsAreaAvailable(SupportedArea.K_SERVE_METRICS).status;

const timeToFirstToken = useQueryRangeResourceData(
active,
metricsDef.queries[0]?.query,
endInMs,
timeframe,
defaultResponsePredicate,
namespace,
);

const data = React.useMemo(
() => ({
timeToFirstToken,
}),
[timeToFirstToken],
);

return useAllSettledContextResourceData(data, {
timeToFirstToken: DEFAULT_PENDING_CONTEXT_RESOURCE,
});
};

// Graph #5
type TimePerOutputTokenData = {
data: {
timePerOutputToken: PendingContextResourceData<PrometheusQueryRangeResultValue>;
};
refreshAll: () => void;
};
export const useFetchNimTimePerOutputTokenData = (
metricsDef: NimMetricGraphDefinition,
timeframe: TimeframeTitle,
endInMs: number,
namespace: string,
): TimePerOutputTokenData => {
// Check if Nim metrics are active
const active = useIsAreaAvailable(SupportedArea.K_SERVE_METRICS).status;
// Extract the query for TIME_PER_OUTPUT_TOKEN
const timePerOutputTokenQuery = metricsDef.queries[0].query; // Assumes it's the first query in the metric definition
// Fetch data using useQueryRangeResourceData
const timePerOutputToken = useQueryRangeResourceData(
active,
timePerOutputTokenQuery,
endInMs,
timeframe,
defaultResponsePredicate,
namespace,
);
// Memoize the fetched data
const data = React.useMemo(
() => ({
timePerOutputToken,
}),
[timePerOutputToken],
);
// Return all-settled context resource data
return useAllSettledContextResourceData(data, {
timePerOutputToken: DEFAULT_PENDING_CONTEXT_RESOURCE,
});
};

// Graph #6
type RequestsOutcomesData = {
data: {
successCount: PendingContextResourceData<PrometheusQueryRangeResultValue>;
failedCount: PendingContextResourceData<PrometheusQueryRangeResultValue>;
};
refreshAll: () => void;
};

export const useFetchNimRequestsOutcomesData = (
metricsDef: NimMetricGraphDefinition,
timeframe: TimeframeTitle,
endInMs: number,
namespace: string,
): RequestsOutcomesData => {
const active = useIsAreaAvailable(SupportedArea.K_SERVE_METRICS).status;

const successQuery = metricsDef.queries[0]?.query;
const failedQuery = metricsDef.queries[1]?.query;

const successCount = useQueryRangeResourceData(
active,
successQuery,
endInMs,
timeframe,
defaultResponsePredicate,
namespace,
);

const failedCount = useQueryRangeResourceData(
active,
failedQuery,
endInMs,
timeframe,
defaultResponsePredicate,
namespace,
);

const data = React.useMemo(
() => ({
successCount,
failedCount,
}),
[failedCount, successCount],
);

return useAllSettledContextResourceData(data, {
successCount: DEFAULT_PENDING_CONTEXT_RESOURCE,
failedCount: DEFAULT_PENDING_CONTEXT_RESOURCE,
});
};

// Graph #2
type CurrentRequestsData = {
data: {
requestsWaiting: PendingContextResourceData<PrometheusQueryRangeResultValue>;
requestsRunning: PendingContextResourceData<PrometheusQueryRangeResultValue>;
maxRequests: PendingContextResourceData<PrometheusQueryRangeResultValue>;
};
refreshAll: () => void;
};

export const useFetchNimCurrentRequestsData = (
metricsDef: NimMetricGraphDefinition,
timeframe: TimeframeTitle,
endInMs: number,
namespace: string,
): CurrentRequestsData => {
// Check if Nim metrics are active
const active = useIsAreaAvailable(SupportedArea.K_SERVE_METRICS).status;

// Extract the queries for "Requests waiting", "Requests running", and "Max requests"
const requestsWaitingQuery = metricsDef.queries[0].query;
const requestsRunningQuery = metricsDef.queries[1].query;
const maxRequestsQuery = metricsDef.queries[2].query;

// Fetch data using useQueryRangeResourceData
const requestsWaiting = useQueryRangeResourceData(
active,
requestsWaitingQuery,
endInMs,
timeframe,
defaultResponsePredicate,
namespace,
);

const requestsRunning = useQueryRangeResourceData(
active,
requestsRunningQuery,
endInMs,
timeframe,
defaultResponsePredicate,
namespace,
);

const maxRequests = useQueryRangeResourceData(
active,
maxRequestsQuery,
endInMs,
timeframe,
defaultResponsePredicate,
namespace,
);

// Combine the fetched data
const data = React.useMemo(
() => ({
requestsWaiting,
requestsRunning,
maxRequests,
}),
[requestsWaiting, requestsRunning, maxRequests],
);

// Use helper to handle pending state and refresh functionality
return useAllSettledContextResourceData(data, {
requestsWaiting: DEFAULT_PENDING_CONTEXT_RESOURCE,
requestsRunning: DEFAULT_PENDING_CONTEXT_RESOURCE,
maxRequests: DEFAULT_PENDING_CONTEXT_RESOURCE,
});
};


const useAllSettledContextResourceData = <
T,
U extends Record<string, PendingContextResourceData<T>>,
>(
data: U,
defaultValue: U,
) => {
const refreshAll = React.useCallback(() => {
Object.values(data).forEach((x) => x.refresh());
}, [data]);

const result = React.useMemo(
() => ({
data,
refreshAll,
}),
[data, refreshAll],
);

// store the result in a reference and only update the reference so long as there are no pending queries
const resultRef = React.useRef({ data: defaultValue, refreshAll });

// only update the ref when all values are settled, i.e. not pending.
if (!Object.values(result.data).some((x) => x.pending)) {
resultRef.current = result;
}

return resultRef.current;
};
Loading