Skip to content

Commit

Permalink
vk: start refactoring profiler
Browse files Browse the repository at this point in the history
Convert direct stack manipulation to simple and cheap event writing.
Draw rudimentary frame times graph.

Related to #412
  • Loading branch information
w23 committed Mar 9, 2023
1 parent 01de595 commit 6ff146e
Show file tree
Hide file tree
Showing 5 changed files with 148 additions and 75 deletions.
29 changes: 29 additions & 0 deletions ref/vk/NOTES.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Frame structure wrt calls from the engine
- (eng) SCR_UpdateScreen()
- (eng) V_PreRender()
- **(ref) R_BeginFrame()**
- (eng) V_RenderView()
- **(ref) GL_BackendStartFrame()** -- ref_gl only sets speeds string to empty here
- (eng) loop over ref_params_t views
- **(ref) GL_RenderFrame()**
- (eng) ??? SV_DrawDebugTriangles()
- **(ref) GL_BackendEndFrame()** -- ref_gl only produces speeds string here
- (eng) V_PostRender()
- **(ref) R_AllowFog(), R_Set2DMode(true)**
- **(ref) R_DrawTileClear()** x N
- (vgui) Paint() -> ???
- (eng) SCR_RSpeeds()
- **(ref) R_SpeedsMessage()**
- (eng) CL_DrawString() ...
- **(ref) GL_SetRenderMode()**
- **(ref) RefGetParm()** for texture resolution
- **(ref) Color4ub()**
- **(ref) R_DrawStretchPic()**
- (eng) SRC_DrawNetGraph()
- **(ref) many TriApi calls** -- 2D usage of triapi. we were not ready for this (maybe track R_Set2DMode()?)
- **(ref) R_ShowTextures()** kekw
- **(ref) VID_ScreenShot()**
- **(ref) R_AllowFog(true)**
- **(ref) R_EndFrame()**


99 changes: 49 additions & 50 deletions ref/vk/profiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,19 +26,15 @@

typedef int aprof_scope_id_t;

// scope_name should be static const, and not on stack
aprof_scope_id_t aprof_scope_init(const char *scope_name);
void aprof_scope_event(aprof_scope_id_t, int begin);
void aprof_scope_frame( void );
// Returns event index for previous frame
uint32_t aprof_scope_frame( void );
uint64_t aprof_time_now_ns( void );

typedef struct {
const char *name;

struct {
uint64_t duration;
uint64_t duration_children;
int count;
} frame;
} aprof_scope_t;

#define APROF_MAX_SCOPES 256
Expand All @@ -49,12 +45,43 @@ typedef struct {
uint64_t time_begin;
} aprof_stack_frame_t;

#define APROF_EVENT_TYPE_MASK 0x0full
#define APROF_EVENT_TYPE_SHIFT 0
#define APROF_EVENT_TYPE(event) (((event)&APROF_EVENT_TYPE_MASK) >> APROF_EVENT_TYPE_SHIFT)

#define APROF_EVENT_SCOPE_ID_MASK 0xff00ull
#define APROF_EVENT_SCOPE_ID_SHIFT 8
#define APROF_EVENT_SCOPE_ID(event) (((event)&APROF_EVENT_SCOPE_ID_MASK) >> APROF_EVENT_SCOPE_ID_SHIFT)

#define APROF_EVENT_TIMESTAMP_SHIFT 16
#define APROF_EVENT_TIMESTAMP(event) ((event) >> APROF_EVENT_TIMESTAMP_SHIFT)

#define APROF_EVENT_MAKE(type, scope_id, timestamp) \
(((type) << APROF_EVENT_TYPE_SHIFT) & APROF_EVENT_TYPE_MASK) | \
(((scope_id) << APROF_EVENT_SCOPE_ID_SHIFT) & APROF_EVENT_SCOPE_ID_MASK) | \
((timestamp) << APROF_EVENT_TIMESTAMP_SHIFT)

enum {
APROF_EVENT_FRAME_BOUNDARY = 0,
APROF_EVENT_SCOPE_BEGIN = 1,
APROF_EVENT_SCOPE_END = 2,
};

// MUST be power of 2
#define APROF_EVENT_BUFFER_SIZE (1<<20)
#define APROF_EVENT_BUFFER_SIZE_MASK (APROF_EVENT_BUFFER_SIZE - 1)

typedef uint64_t aprof_event_t;

typedef struct {
aprof_scope_t scopes[APROF_MAX_SCOPES];
int num_scopes;

aprof_stack_frame_t stack[APROF_MAX_STACK_DEPTH];
int stack_depth;
aprof_event_t events[APROF_EVENT_BUFFER_SIZE];
uint32_t events_write;
uint32_t events_last_frame;

int current_frame_wraparounds;

// TODO event log for chrome://trace (or similar) export and analysis
} aprof_state_t;
Expand Down Expand Up @@ -104,52 +131,24 @@ void aprof_scope_event(aprof_scope_id_t scope_id, int begin) {
if (scope_id < 0 || scope_id >= g_aprof.num_scopes)
return;

// TODO improve performance by just writing into an event array here
// analysis should be done on-demand later
if (begin) {
const int s = g_aprof.stack_depth;
if (g_aprof.stack_depth == APROF_MAX_STACK_DEPTH)
return;

g_aprof.stack[s].scope = scope_id;
g_aprof.stack[s].time_begin = now;
g_aprof.stack_depth++;
} else {
aprof_scope_t *scope;
const aprof_stack_frame_t *const frame = g_aprof.stack + g_aprof.stack_depth - 1;
uint64_t frame_duration;

assert(g_aprof.stack_depth > 0);
if (g_aprof.stack_depth == 0)
return;

assert(frame->scope == scope_id);
g_aprof.events[g_aprof.events_write] = APROF_EVENT_MAKE(begin?APROF_EVENT_SCOPE_BEGIN:APROF_EVENT_SCOPE_END, scope_id, now);
g_aprof.events_write = (g_aprof.events_write + 1) & APROF_EVENT_BUFFER_SIZE_MASK;

scope = g_aprof.scopes + frame->scope;
frame_duration = now - frame->time_begin;
scope->frame.duration += frame_duration;
scope->frame.count++;

if (g_aprof.stack_depth > 1) {
const aprof_stack_frame_t *const parent_frame = g_aprof.stack + g_aprof.stack_depth - 2;
aprof_scope_t *const parent_scope = g_aprof.scopes + parent_frame->scope;
if (g_aprof.events_write == g_aprof.events_last_frame)
++g_aprof.current_frame_wraparounds;
}

assert(parent_frame->scope >= 0);
assert(parent_frame->scope < g_aprof.num_scopes);
uint32_t aprof_scope_frame( void ) {
const uint64_t now = aprof_time_now_ns();
const uint32_t previous_frame = g_aprof.events_last_frame;

parent_scope->frame.duration_children += frame_duration;
}
g_aprof.events_last_frame = g_aprof.events_write;
g_aprof.events[g_aprof.events_write] = APROF_EVENT_MAKE(APROF_EVENT_FRAME_BOUNDARY, 0, now);
g_aprof.events_write = (g_aprof.events_write + 1) & APROF_EVENT_BUFFER_SIZE_MASK;

g_aprof.stack_depth--;
}
}
g_aprof.current_frame_wraparounds = 0;

void aprof_scope_frame( void ) {
assert(g_aprof.stack_depth == 0);
for (int i = 0; i < g_aprof.num_scopes; ++i) {
aprof_scope_t *const scope = g_aprof.scopes + i;
memset(&scope->frame, 0, sizeof(scope->frame));
}
return previous_frame;
}

#endif
57 changes: 57 additions & 0 deletions ref/vk/r_slows.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#include "r_slows.h"
#include "vk_light.h" // For stats
#include "shaders/ray_interop.h" // stats: struct LightCluster
#include "vk_overlay.h"
#include "vk_framectl.h"

#include "profiler.h"

#define MAX_FRAMES_HISTORY 256

static struct {
float frame_times[MAX_FRAMES_HISTORY];
uint32_t frame_num;
} g_slows;


static float linearstep(float min, float max, float v) {
if (v <= min) return 0;
if (v >= max) return 1;
return (v - min) / (max - min);
}

// FIXME move this to r_speeds or something like that
void R_ShowExtendedProfilingData(uint32_t prev_frame_index) {
{
const int dirty = g_lights.stats.dirty_cells;
gEngine.Con_NPrintf(4, "Dirty light cells: %d, size = %dKiB, ranges = %d\n", dirty, (int)(dirty * sizeof(struct LightCluster) / 1024), g_lights.stats.ranges_uploaded);
}

const uint32_t events = g_aprof.events_last_frame - prev_frame_index;
const unsigned long long delta_ns = APROF_EVENT_TIMESTAMP(g_aprof.events[g_aprof.events_last_frame]) - APROF_EVENT_TIMESTAMP(g_aprof.events[prev_frame_index]);
const float frame_time = delta_ns / 1e6;

gEngine.Con_NPrintf(5, "aprof events this frame: %u, wraps: %d, frame time: %.03fms (%lluns)\n", events, g_aprof.current_frame_wraparounds, frame_time, delta_ns);

g_slows.frame_times[g_slows.frame_num] = frame_time;
g_slows.frame_num = (g_slows.frame_num + 1) % MAX_FRAMES_HISTORY;

const float width = (float)vk_frame.width / MAX_FRAMES_HISTORY;
for (int i = 0; i < MAX_FRAMES_HISTORY; ++i) {
const float frame_time = g_slows.frame_times[i];
CL_FillRGBA(i * width, 100, width, frame_time * 2.f,
//255, 255, 255, 255);
255*linearstep(16.67, 16.67*2.f, frame_time), 255, 0, 127);
}

/* gEngine.Con_NPrintf(5, "Perf scopes:"); */
/* for (int i = 0; i < g_aprof.num_scopes; ++i) { */
/* const aprof_scope_t *const scope = g_aprof.scopes + i; */
/* gEngine.Con_NPrintf(6 + i, "%s: c%d t%.03f(%.03f)ms s%.03f(%.03f)ms", scope->name, */
/* scope->frame.count, */
/* scope->frame.duration / 1e6, */
/* (scope->frame.duration / 1e6) / scope->frame.count, */
/* (scope->frame.duration - scope->frame.duration_children) / 1e6, */
/* (scope->frame.duration - scope->frame.duration_children) / 1e6 / scope->frame.count); */
/* } */
}
4 changes: 4 additions & 0 deletions ref/vk/r_slows.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#pragma once
#include <stdint.h>

void R_ShowExtendedProfilingData(uint32_t prev_frame_event_index);
34 changes: 9 additions & 25 deletions ref/vk/vk_framectl.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "shaders/ray_interop.h" // stats: struct LightCluster

#include "profiler.h"
#include "r_slows.h"

#include "eiface.h" // ARRAYSIZE

Expand Down Expand Up @@ -185,33 +186,16 @@ static void updateGamma( void ) {
}
}

// FIXME move this to r_speeds or something like that
static void showProfilingData( void ) {
{
const int dirty = g_lights.stats.dirty_cells;
gEngine.Con_NPrintf(4, "Dirty light cells: %d, size = %dKiB, ranges = %d\n", dirty, (int)(dirty * sizeof(struct LightCluster) / 1024), g_lights.stats.ranges_uploaded);
}

gEngine.Con_NPrintf(5, "Perf scopes:");
for (int i = 0; i < g_aprof.num_scopes; ++i) {
const aprof_scope_t *const scope = g_aprof.scopes + i;
gEngine.Con_NPrintf(6 + i, "%s: c%d t%.03f(%.03f)ms s%.03f(%.03f)ms", scope->name,
scope->frame.count,
scope->frame.duration / 1e6,
(scope->frame.duration / 1e6) / scope->frame.count,
(scope->frame.duration - scope->frame.duration_children) / 1e6,
(scope->frame.duration - scope->frame.duration_children) / 1e6 / scope->frame.count);
}
}

void R_BeginFrame( qboolean clearScene ) {
ASSERT(g_frame.current.phase == Phase_Submitted || g_frame.current.phase == Phase_Idle);
const int prev_frame_index = g_frame.current.index % MAX_CONCURRENT_FRAMES;
g_frame.current.index = (g_frame.current.index + 1) % MAX_CONCURRENT_FRAMES;
const VkCommandBuffer cmdbuf = vk_frame.cmdbuf = g_frame.command.buffers[g_frame.current.index];

showProfilingData();
aprof_scope_frame();
{
const uint32_t prev_frame_event_index = aprof_scope_frame();
R_ShowExtendedProfilingData(prev_frame_event_index);
}

APROF_SCOPE_BEGIN(frame);
APROF_SCOPE_BEGIN(begin_frame);
Expand Down Expand Up @@ -349,10 +333,10 @@ static void submit( VkCommandBuffer cmdbuf, qboolean wait ) {
XVK_CHECK(vkWaitForFences(vk_core.device, 1, g_frame.fence_done + g_frame.current.index, VK_TRUE, INT64_MAX));
APROF_SCOPE_END(frame_gpu_wait);

if (vk_core.debug) {
// FIXME more scopes
XVK_CHECK(vkQueueWaitIdle(vk_core.queue));
}
/* if (vk_core.debug) { */
/* // FIXME more scopes */
/* XVK_CHECK(vkQueueWaitIdle(vk_core.queue)); */
/* } */
g_frame.current.phase = Phase_Idle;
}
}
Expand Down

0 comments on commit 6ff146e

Please sign in to comment.