Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(weave): Eval Compare Perf: Split Query #3295

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import {Box} from '@material-ui/core';
import {Alert} from '@mui/material';
import {WaveLoader} from '@wandb/weave/components/Loaders/WaveLoader';
import {Tailwind} from '@wandb/weave/components/Tailwind';
import {maybePluralizeWord} from '@wandb/weave/core/util/string';
import React, {FC, useCallback, useContext, useMemo, useState} from 'react';
Expand Down Expand Up @@ -179,8 +180,10 @@ const CompareEvaluationsPageInner: React.FC<{
}> = props => {
const {state, setSelectedMetrics} = useCompareEvaluationsState();
const showExampleFilter =
Object.keys(state.data.evaluationCalls).length === 2;
const showExamples = Object.keys(state.data.resultRows).length > 0;
Object.keys(state.summary.evaluationCalls).length === 2;
const showExamples =
Object.keys(state.results.result?.resultRows ?? {}).length > 0;
const resultsLoading = state.results.loading;
return (
<Box
sx={{
Expand All @@ -195,12 +198,23 @@ const CompareEvaluationsPageInner: React.FC<{
gridGap: STANDARD_PADDING * 2,
}}>
<InvalidEvaluationBanner
evaluationCalls={Object.values(state.data.evaluationCalls)}
evaluationCalls={Object.values(state.summary.evaluationCalls)}
/>
<ComparisonDefinitionSection state={state} />
<SummaryPlots state={state} setSelectedMetrics={setSelectedMetrics} />
<ScorecardSection state={state} />
{showExamples ? (
{resultsLoading ? (
<Box
sx={{
width: '100%',
display: 'flex',
justifyContent: 'center',
alignItems: 'center',
height: '50px',
}}>
<WaveLoader size="small" />
</Box>
) : showExamples ? (
<>
{showExampleFilter && <ExampleFilterSection state={state} />}
<ResultExplorer state={state} height={props.height} />
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/**
* This file contains a few utilities for working with the
* `MetricDefinitionMap`s in the `EvaluationComparisonData` object. The
* `EvaluationComparisonData` state is a normalized representation of the data,
* `MetricDefinitionMap`s in the `EvaluationComparisonSummary` object. The
* `EvaluationComparisonSummary` state is a normalized representation of the data,
* which is good for not duplicating data, but does present some challenges when
* trying to build the final rendering of the data. As an application-specific
* consideration, when comparing evaluations, metrics can be represented by the
Expand All @@ -13,7 +13,7 @@
*/
import _ from 'lodash';

import {EvaluationComparisonData, MetricDefinition} from './ecpTypes';
import {EvaluationComparisonSummary, MetricDefinition} from './ecpTypes';
import {MetricType} from './ecpTypes';
import {getScoreKeyNameFromScorerRef} from './ecpUtil';
import {flattenedDimensionPath} from './ecpUtil';
Expand Down Expand Up @@ -69,12 +69,12 @@ export type CompositeSummaryMetricGroupForKeyPath = {
};

/**
* Builds a `CompositeScoreMetrics` object from the `EvaluationComparisonData`.
* Builds a `CompositeScoreMetrics` object from the `EvaluationComparisonSummary`.
* This is the primary utility for converting the normalized data into a form
* that is more useful for rendering the data.
*/
export const buildCompositeMetricsMap = (
data: EvaluationComparisonData,
summaryData: EvaluationComparisonSummary,
mType: MetricType,
selectedMetrics: Record<string, boolean> | undefined = undefined
): CompositeScoreMetrics => {
Expand All @@ -83,9 +83,9 @@ export const buildCompositeMetricsMap = (
// Get the metric definition map based on the metric type
let metricDefinitionMap;
if (mType === 'score') {
metricDefinitionMap = data.scoreMetrics;
metricDefinitionMap = summaryData.scoreMetrics;
} else if (mType === 'summary') {
metricDefinitionMap = data.summaryMetrics;
metricDefinitionMap = summaryData.summaryMetrics;
} else {
throw new Error(`Invalid metric type: ${mType}`);
}
Expand Down Expand Up @@ -128,9 +128,10 @@ export const buildCompositeMetricsMap = (
};
}

const evals = Object.values(data.evaluationCalls)
const evals = Object.values(summaryData.evaluationCalls)
.filter(evaluationCall => {
const evaluation = data.evaluations[evaluationCall.evaluationRef];
const evaluation =
summaryData.evaluations[evaluationCall.evaluationRef];
return (
metric.scorerOpOrObjRef == null ||
evaluation.scorerRefs.includes(metric.scorerOpOrObjRef)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,25 @@

import {useMemo} from 'react';

import {useEvaluationComparisonData} from '../wfReactInterface/tsDataModelHooksEvaluationComparison';
import {
useEvaluationComparisonResults,
useEvaluationComparisonSummary,
} from '../wfReactInterface/tsDataModelHooksEvaluationComparison';
import {Loadable} from '../wfReactInterface/wfDataModelHooksInterface';
import {EvaluationComparisonData} from './ecpTypes';
import {
EvaluationComparisonResults,
EvaluationComparisonSummary,
} from './ecpTypes';
import {getMetricIds} from './ecpUtil';

/**
* The global state object used to render the Evaluations Comparison Page.
*/
export type EvaluationComparisonState = {
// The normalized data for the evaluations
data: EvaluationComparisonData;
summary: EvaluationComparisonSummary;
// The results of the evaluations
results: Loadable<EvaluationComparisonResults>;
// The dimensions to compare & filter results
comparisonDimensions?: ComparisonDimensionsType;
// The current digest which is in view
Expand Down Expand Up @@ -50,18 +58,28 @@ export const useEvaluationComparisonState = (
const orderedCallIds = useMemo(() => {
return getCallIdsOrderedForQuery(evaluationCallIds);
}, [evaluationCallIds]);
const data = useEvaluationComparisonData(entity, project, orderedCallIds);
const summaryData = useEvaluationComparisonSummary(
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the major change in this PR: splits up the data fetch into 2 stages

entity,
project,
orderedCallIds
);
const resultsData = useEvaluationComparisonResults(
entity,
project,
orderedCallIds,
summaryData.result
);

const value = useMemo(() => {
if (data.result == null || data.loading) {
if (summaryData.result == null || summaryData.loading) {
return {loading: true, result: null};
}

const scorerDimensions = Object.keys(
getMetricIds(data.result, 'score', 'scorer')
getMetricIds(summaryData.result, 'score', 'scorer')
);
const derivedDimensions = Object.keys(
getMetricIds(data.result, 'score', 'derived')
getMetricIds(summaryData.result, 'score', 'derived')
);

let newComparisonDimensions = comparisonDimensions;
Expand Down Expand Up @@ -93,17 +111,19 @@ export const useEvaluationComparisonState = (
return {
loading: false,
result: {
data: data.result,
summary: summaryData.result,
results: resultsData,
comparisonDimensions: newComparisonDimensions,
selectedInputDigest,
selectedMetrics,
evaluationCallIdsOrdered: evaluationCallIds,
},
};
}, [
data.result,
data.loading,
summaryData.result,
summaryData.loading,
comparisonDimensions,
resultsData,
selectedInputDigest,
selectedMetrics,
evaluationCallIds,
Expand Down Expand Up @@ -132,8 +152,8 @@ const getCallIdsOrderedForQuery = (callIds: string[]) => {
*/
export const getOrderedModelRefs = (state: EvaluationComparisonState) => {
const baselineCallId = getBaselineCallId(state);
const baselineRef = state.data.evaluationCalls[baselineCallId].modelRef;
const refs = Object.keys(state.data.models);
const baselineRef = state.summary.evaluationCalls[baselineCallId].modelRef;
const refs = Object.keys(state.summary.models);
// Make sure the baseline model is first
moveItemToFront(refs, baselineRef);
return refs;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
*/
import {TraceCallSchema} from '../wfReactInterface/traceServerClientTypes';

export type EvaluationComparisonData = {
export type EvaluationComparisonSummary = {
// Entity and Project are constant across all calls
entity: string;
project: string;
Expand All @@ -23,18 +23,27 @@ export type EvaluationComparisonData = {
[callId: string]: EvaluationCall;
};

// Models are the Weave Objects used to define the model logic and properties.
models: {
[modelRef: string]: ModelObj;
};

// ScoreMetrics define the metrics that are associated on each individual prediction
scoreMetrics: MetricDefinitionMap;

// SummaryMetrics define the metrics that are associated with the evaluation as a whole
// often aggregated from the scoreMetrics.
summaryMetrics: MetricDefinitionMap;
};

export type EvaluationComparisonResults = {
// Inputs are the intersection of all inputs used in the evaluations.
// Note, we are able to "merge" the same input digest even if it is
// used in different evaluations.
inputs: {
[rowDigest: string]: DatasetRow;
};

// Models are the Weave Objects used to define the model logic and properties.
models: {
[modelRef: string]: ModelObj;
};

// ResultRows are the actual results of running the evaluation against
// the inputs.
resultRows: {
Expand All @@ -54,15 +63,7 @@ export type EvaluationComparisonData = {
};
};
};

// ScoreMetrics define the metrics that are associated on each individual prediction
scoreMetrics: MetricDefinitionMap;

// SummaryMetrics define the metrics that are associated with the evaluation as a whole
// often aggregated from the scoreMetrics.
summaryMetrics: MetricDefinitionMap;
};

/**
* The EvaluationObj is the primary object that defines the evaluation itself.
*/
Expand All @@ -84,6 +85,7 @@ export type EvaluationCall = {
name: string;
color: string;
summaryMetrics: MetricResultMap;
traceId: string;
};

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* This file contains a handful of utilities for working with the `EvaluationComparisonData` destructure.
* This file contains a handful of utilities for working with the `EvaluationComparisonSummary` destructure.
* These are mostly convenience functions for extracting and resolving metrics from the data, but also
* include some helper functions for working with the `MetricDefinition` objects and constructing
* strings correctly.
Expand All @@ -8,7 +8,7 @@
import {parseRef, WeaveObjectRef} from '../../../../../../react';
import {
EvaluationCall,
EvaluationComparisonData,
EvaluationComparisonSummary,
MetricDefinition,
MetricDefinitionMap,
MetricResult,
Expand Down Expand Up @@ -79,11 +79,12 @@ export const resolveSummaryMetricValueForEvaluateCall = (
};

export const getMetricIds = (
data: EvaluationComparisonData,
summaryData: EvaluationComparisonSummary,
type: MetricType,
source: SourceType
): MetricDefinitionMap => {
const metrics = type === 'score' ? data.scoreMetrics : data.summaryMetrics;
const metrics =
type === 'score' ? summaryData.scoreMetrics : summaryData.summaryMetrics;
return Object.fromEntries(
Object.entries(metrics).filter(([k, v]) => v.source === source)
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,9 @@ export const ComparisonDefinitionSection: React.FC<{
return callIds.map(callId => ({
key: 'evaluations',
value: callId,
label: props.state.data.evaluationCalls[callId]?.name ?? callId,
label: props.state.summary.evaluationCalls[callId]?.name ?? callId,
}));
}, [callIds, props.state.data.evaluationCalls]);
}, [callIds, props.state.summary.evaluationCalls]);

const onSetBaseline = (value: string | null) => {
if (!value) {
Expand Down Expand Up @@ -130,8 +130,8 @@ const AddEvaluationButton: React.FC<{

// Calls query for just evaluations
const evaluationsFilter = useEvaluationsFilter(
props.state.data.entity,
props.state.data.project
props.state.summary.entity,
props.state.summary.project
);
const page = useMemo(
() => ({
Expand All @@ -144,8 +144,8 @@ const AddEvaluationButton: React.FC<{
// Don't query for output here, re-queried in tsDataModelHooksEvaluationComparison.ts
const columns = useMemo(() => ['inputs', 'display_name'], []);
const calls = useCallsForQuery(
props.state.data.entity,
props.state.data.project,
props.state.summary.entity,
props.state.summary.project,
evaluationsFilter,
DEFAULT_FILTER_CALLS,
page,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@ export const EvaluationCallLink: React.FC<{
callId: string;
state: EvaluationComparisonState;
}> = props => {
const evaluationCall = props.state.data.evaluationCalls?.[props.callId];
const evaluationCall = props.state.summary.evaluationCalls?.[props.callId];
if (!evaluationCall) {
return null;
}
const {entity, project} = props.state.data;
const {entity, project} = props.state.summary;

return (
<CallLink
Expand All @@ -51,8 +51,8 @@ export const EvaluationModelLink: React.FC<{
state: EvaluationComparisonState;
}> = props => {
const {useObjectVersion} = useWFHooks();
const evaluationCall = props.state.data.evaluationCalls[props.callId];
const modelObj = props.state.data.models[evaluationCall.modelRef];
const evaluationCall = props.state.summary.evaluationCalls[props.callId];
const modelObj = props.state.summary.models[evaluationCall.modelRef];
const objRef = useMemo(
() => parseRef(modelObj.ref) as WeaveObjectRef,
[modelObj.ref]
Expand Down Expand Up @@ -95,9 +95,9 @@ export const EvaluationDatasetLink: React.FC<{
callId: string;
state: EvaluationComparisonState;
}> = props => {
const evaluationCall = props.state.data.evaluationCalls[props.callId];
const evaluationCall = props.state.summary.evaluationCalls[props.callId];
const evaluationObj =
props.state.data.evaluations[evaluationCall.evaluationRef];
props.state.summary.evaluations[evaluationCall.evaluationRef];
const parsed = parseRef(evaluationObj.datasetRef);
if (!parsed) {
return null;
Expand Down
Loading
Loading