diff --git a/Common/Math/CrossSIMD.h b/Common/Math/CrossSIMD.h index 0824ee0e10ed..4f0c48cbe283 100644 --- a/Common/Math/CrossSIMD.h +++ b/Common/Math/CrossSIMD.h @@ -111,6 +111,13 @@ struct Vec4F32 { return Vec4F32{ _mm_and_ps(v, _mm_load_ps((float *)mask)) }; } + // Swaps the two lower elements. Useful for reversing triangles.. + Vec4F32 SwapLowerElements() { + return Vec4F32{ + _mm_shuffle_ps(v, v, _MM_SHUFFLE(3, 2, 0, 1)) + }; + } + inline Vec4F32 AsVec3ByMatrix44(const Mat4F32 &m) { return Vec4F32{ _mm_add_ps( _mm_add_ps( @@ -210,8 +217,8 @@ struct Vec4S32 { // Swaps the two lower elements, but NOT the two upper ones. Useful for reversing triangles.. // This is quite awkward on ARM64 :/ Maybe there's a better solution? Vec4S32 SwapLowerElements() { - float32x2_t upper = vget_high_s32(v); - float32x2_t lowerSwapped = vrev64_s32(vget_low_s32(v)); + int32x2_t upper = vget_high_s32(v); + int32x2_t lowerSwapped = vrev64_s32(vget_low_s32(v)); return Vec4S32{ vcombine_s32(lowerSwapped, upper) }; }; @@ -281,6 +288,13 @@ struct Vec4F32 { return Vec4F32{ vsetq_lane_f32(0.0f, v, 3) }; } + // Swaps the two lower elements, but NOT the two upper ones. Useful for reversing triangles.. + // This is quite awkward on ARM64 :/ Maybe there's a better solution? + Vec4F32 SwapLowerElements() { + float32x2_t lowerSwapped = vrev64_f32(vget_low_f32(v)); + return Vec4F32{ vcombine_f32(lowerSwapped, vget_high_f32(v)) }; + }; + // One of many possible solutions. Sometimes we could also use vld4q_f32 probably.. static void Transpose(Vec4F32 &col0, Vec4F32 &col1, Vec4F32 &col2, Vec4F32 &col3) { #if PPSSPP_ARCH(ARM64_NEON) diff --git a/GPU/Common/DepthRaster.cpp b/GPU/Common/DepthRaster.cpp index 920dd93e398c..f9e3f3b165e2 100644 --- a/GPU/Common/DepthRaster.cpp +++ b/GPU/Common/DepthRaster.cpp @@ -81,10 +81,19 @@ struct Edge { } }; +enum class TriangleResult { + OK, + NoPixels, + Backface, + TooSmall, +}; + +constexpr int MIN_TRI_AREA = 10; + // Adapted from Intel's depth rasterizer example. // Started with the scalar version, will SIMD-ify later. // x1/y1 etc are the scissor rect. -void DepthRasterTriangle(uint16_t *depthBuf, int stride, int x1, int y1, int x2, int y2, const int *tx, const int *ty, const int *tz, ZCompareMode compareMode) { +TriangleResult DepthRasterTriangle(uint16_t *depthBuf, int stride, int x1, int y1, int x2, int y2, const int *tx, const int *ty, const float *tz, ZCompareMode compareMode) { int tileStartX = x1; int tileEndX = x2; @@ -95,16 +104,12 @@ void DepthRasterTriangle(uint16_t *depthBuf, int stride, int x1, int y1, int x2, // Due to the many multiplications, we might want to do it in floating point as 32-bit integer muls // are slow on SSE2. - // Convert to whole pixels for now. Later subpixel precision. int v0x = tx[0]; int v0y = ty[0]; - int v0z = tz[0]; int v1x = tx[1]; int v1y = ty[1]; - int v1z = tz[1]; int v2x = tx[2]; int v2y = ty[2]; - int v2z = tz[2]; // use fixed-point only for X and Y. Avoid work for Z and W. // We use 4x1 tiles for simplicity. @@ -114,13 +119,16 @@ void DepthRasterTriangle(uint16_t *depthBuf, int stride, int x1, int y1, int x2, int maxY = std::min(std::max(std::max(v0y, v1y), v2y), tileEndY); if (maxX == minX || maxY == minY) { // No pixels, or outside screen. - return; + return TriangleResult::NoPixels; } - // TODO: Cull really small triangles here. + // TODO: Cull really small triangles here - we can increase the threshold a bit probably. int triArea = (v1y - v2y) * v0x + (v2x - v1x) * v0y + (v1x * v2y - v2x * v1y); if (triArea <= 0) { - return; + return TriangleResult::Backface; + } + if (triArea < MIN_TRI_AREA) { + return TriangleResult::TooSmall; } float oneOverTriArea = 1.0f / (float)triArea; @@ -132,20 +140,25 @@ void DepthRasterTriangle(uint16_t *depthBuf, int stride, int x1, int y1, int x2, Vec4S32 w2_row = e01.init(v0x, v0y, v1x, v1y, minX, minY); // Prepare to interpolate Z - Vec4F32 zz0 = Vec4F32::Splat((float)v0z); - Vec4F32 zz1 = Vec4F32::Splat((float)(v1z - v0z) * oneOverTriArea); - Vec4F32 zz2 = Vec4F32::Splat((float)(v2z - v0z) * oneOverTriArea); + Vec4F32 zz0 = Vec4F32::Splat(tz[0]); + Vec4F32 zz1 = Vec4F32::Splat((tz[1] - tz[0]) * oneOverTriArea); + Vec4F32 zz2 = Vec4F32::Splat((tz[2] - tz[0]) * oneOverTriArea); + + Vec4F32 zdeltaX = zz1 * Vec4F32FromS32(e20.oneStepX) + zz2 * Vec4F32FromS32(e01.oneStepX); + Vec4F32 zdeltaY = zz1 * Vec4F32FromS32(e20.oneStepY) + zz2 * Vec4F32FromS32(e01.oneStepY); + Vec4F32 zrow = zz0 + Vec4F32FromS32(w1_row) * zz1 + Vec4F32FromS32(w2_row) * zz2; // Rasterize - for (int y = minY; y <= maxY; y += Edge::stepYSize, w0_row += e12.oneStepY, w1_row += e20.oneStepY, w2_row += e01.oneStepY) { + for (int y = minY; y <= maxY; y += Edge::stepYSize, w0_row += e12.oneStepY, w1_row += e20.oneStepY, w2_row += e01.oneStepY, zrow += zdeltaY) { // Barycentric coordinates at start of row Vec4S32 w0 = w0_row; Vec4S32 w1 = w1_row; Vec4S32 w2 = w2_row; + Vec4F32 zs = zrow; uint16_t *rowPtr = depthBuf + stride * y; - for (int x = minX; x <= maxX; x += Edge::stepXSize, w0 += e12.oneStepX, w1 += e20.oneStepX, w2 += e01.oneStepX) { + for (int x = minX; x <= maxX; x += Edge::stepXSize, w0 += e12.oneStepX, w1 += e20.oneStepX, w2 += e01.oneStepX, zs += zdeltaX) { // If p is on or inside all edges for any pixels, // render those pixels. Vec4S32 signCalc = w0 | w1 | w2; @@ -157,9 +170,7 @@ void DepthRasterTriangle(uint16_t *depthBuf, int stride, int x1, int y1, int x2, Vec4U16 shortMaskInv = SignBits32ToMaskU16(signCalc); // Now, the mask has 1111111 where we should preserve the contents of the depth buffer. - // Compute the Z value for all four pixels. - // float depth = zz[0] + beta * zz[1] + gamma * zz[2]; - Vec4U16 shortZ = Vec4U16::FromVec4F32(zz0 + Vec4F32FromS32(w1) * zz1 + Vec4F32FromS32(w2) * zz2); + Vec4U16 shortZ = Vec4U16::FromVec4F32(zs); // TODO: Lift this switch out of the inner loop, or even out of the function with templating. switch (compareMode) { @@ -180,6 +191,7 @@ void DepthRasterTriangle(uint16_t *depthBuf, int stride, int x1, int y1, int x2, } } } + return TriangleResult::OK; } void DecodeAndTransformForDepthRaster(float *dest, const float *worldviewproj, const void *vertexData, int indexLowerBound, int indexUpperBound, VertexDecoder *dec, u32 vertTypeID) { @@ -249,7 +261,7 @@ void ConvertPredecodedThroughForDepthRaster(float *dest, const void *decodedVert } } -int DepthRasterClipIndexedRectangles(int *tx, int *ty, int *tz, const float *transformed, const uint16_t *indexBuffer, int count) { +int DepthRasterClipIndexedRectangles(int *tx, int *ty, float *tz, const float *transformed, const uint16_t *indexBuffer, int count) { // TODO: On ARM we can do better by keeping these in lanes instead of splatting. // However, hard to find a common abstraction. const Vec4F32 viewportX = Vec4F32::Splat(gstate.getViewportXCenter()); @@ -289,20 +301,21 @@ int DepthRasterClipIndexedRectangles(int *tx, int *ty, int *tz, const float *tra y *= recipW; z *= recipW; - Vec4S32 screen[3]; + Vec4S32 screen[2]; + Vec4F32 depth; screen[0] = Vec4S32FromF32((x * viewportScaleX + viewportX) - offsetX); screen[1] = Vec4S32FromF32((y * viewportScaleY + viewportY) - offsetY); - screen[2] = Vec4S32FromF32((z * viewportScaleZ + viewportZ).Clamp(0.0f, 65535.0f)); + depth = (z * viewportScaleZ + viewportZ).Clamp(0.0f, 65535.0f); screen[0].Store(tx + outCount); screen[1].Store(ty + outCount); - screen[2].Store(tz + outCount); + depth.Store(tz + outCount); outCount += 2; } return outCount; } -int DepthRasterClipIndexedTriangles(int *tx, int *ty, int *tz, const float *transformed, const uint16_t *indexBuffer, int count) { +int DepthRasterClipIndexedTriangles(int *tx, int *ty, float *tz, const float *transformed, const uint16_t *indexBuffer, int count) { bool cullEnabled = gstate.isCullEnabled(); GECullMode cullMode = gstate.getCullMode(); @@ -351,14 +364,14 @@ int DepthRasterClipIndexedTriangles(int *tx, int *ty, int *tz, const float *tran y *= recipW; z *= recipW; - Vec4S32 screen[3]; + Vec4S32 screen[2]; screen[0] = Vec4S32FromF32((x * viewportScaleX + viewportX) - offsetX); screen[1] = Vec4S32FromF32((y * viewportScaleY + viewportY) - offsetY); - screen[2] = Vec4S32FromF32((z * viewportScaleZ + viewportZ).Clamp(0.0f, 65535.0f)); + Vec4F32 depth = (z * viewportScaleZ + viewportZ).Clamp(0.0f, 65535.0f); screen[0].Store(tx + outCount); screen[1].Store(ty + outCount); - screen[2].Store(tz + outCount); + depth.Store(tz + outCount); outCount += 3; if (!cullEnabled) { @@ -371,25 +384,25 @@ int DepthRasterClipIndexedTriangles(int *tx, int *ty, int *tz, const float *tran screen[0].SwapLowerElements().Store(tx + outCount); screen[1].SwapLowerElements().Store(ty + outCount); - screen[2].SwapLowerElements().Store(tz + outCount); + depth.SwapLowerElements().Store(tz + outCount); outCount += 3; } } return outCount; } -void DepthRasterConvertTransformed(int *tx, int *ty, int *tz, const float *transformed, const uint16_t *indexBuffer, int count) { +void DepthRasterConvertTransformed(int *tx, int *ty, float *tz, const float *transformed, const uint16_t *indexBuffer, int count) { // TODO: This is basically a transpose, or AoS->SoA conversion. There may be fast ways. for (int i = 0; i < count; i++) { const float *pos = transformed + indexBuffer[i] * 4; tx[i] = (int)pos[0]; ty[i] = (int)pos[1]; - tz[i] = (u16)pos[2]; + tz[i] = pos[2]; // clamp? } } // Rasterizes screen-space vertices. -void DepthRasterScreenVerts(uint16_t *depth, int depthStride, GEPrimitiveType prim, int x1, int y1, int x2, int y2, const int *tx, const int *ty, const int *tz, int count) { +void DepthRasterScreenVerts(uint16_t *depth, int depthStride, GEPrimitiveType prim, int x1, int y1, int x2, int y2, const int *tx, const int *ty, const float *tz, int count) { // Prim should now be either TRIANGLES or RECTs. _dbg_assert_(prim == GE_PRIM_RECTANGLES || prim == GE_PRIM_TRIANGLES); @@ -438,17 +451,26 @@ void DepthRasterScreenVerts(uint16_t *depth, int depthStride, GEPrimitiveType pr switch (prim) { case GE_PRIM_RECTANGLES: for (int i = 0; i < count; i += 2) { - uint16_t z = tz[i + 1]; // depth from second vertex + uint16_t z = (uint16_t)tz[i + 1]; // depth from second vertex // TODO: Should clip coordinates to the scissor rectangle. // We remove the subpixel information here. DepthRasterRect(depth, depthStride, tx[i], ty[i], tx[i + 1], ty[i + 1], z, comp); } + gpuStats.numDepthRasterPrims += count / 2; break; case GE_PRIM_TRIANGLES: + { + int stats[4]{}; for (int i = 0; i < count; i += 3) { - DepthRasterTriangle(depth, depthStride, x1, y1, x2, y2, &tx[i], &ty[i], &tz[i], comp); + TriangleResult result = DepthRasterTriangle(depth, depthStride, x1, y1, x2, y2, &tx[i], &ty[i], &tz[i], comp); + stats[(int)result]++; } + gpuStats.numDepthRasterBackface += stats[(int)TriangleResult::Backface]; + gpuStats.numDepthRasterNoPixels += stats[(int)TriangleResult::NoPixels]; + gpuStats.numDepthRasterTooSmall += stats[(int)TriangleResult::TooSmall]; + gpuStats.numDepthRasterPrims += stats[(int)TriangleResult::OK]; break; + } default: _dbg_assert_(false); } diff --git a/GPU/Common/DepthRaster.h b/GPU/Common/DepthRaster.h index ba9788eebe91..fb10e6d2ee65 100644 --- a/GPU/Common/DepthRaster.h +++ b/GPU/Common/DepthRaster.h @@ -17,13 +17,13 @@ struct DepthScreenVertex { class VertexDecoder; struct TransformedVertex; -int DepthRasterClipIndexedTriangles(int *tx, int *ty, int *tz, const float *transformed, const uint16_t *indexBuffer, int count); -int DepthRasterClipIndexedRectangles(int *tx, int *ty, int *tz, const float *transformed, const uint16_t *indexBuffer, int count); +int DepthRasterClipIndexedTriangles(int *tx, int *ty, float *tz, const float *transformed, const uint16_t *indexBuffer, int count); +int DepthRasterClipIndexedRectangles(int *tx, int *ty, float *tz, const float *transformed, const uint16_t *indexBuffer, int count); void DecodeAndTransformForDepthRaster(float *dest, const float *worldviewproj, const void *vertexData, int indexLowerBound, int indexUpperBound, VertexDecoder *dec, u32 vertTypeID); void TransformPredecodedForDepthRaster(float *dest, const float *worldviewproj, const void *decodedVertexData, VertexDecoder *dec, int count); void ConvertPredecodedThroughForDepthRaster(float *dest, const void *decodedVertexData, VertexDecoder *dec, int count); -void DepthRasterConvertTransformed(int *tx, int *ty, int *tz, const float *transformed, const uint16_t *indexBuffer, int count); +void DepthRasterConvertTransformed(int *tx, int *ty, float *tz, const float *transformed, const uint16_t *indexBuffer, int count); // void DepthRasterConvertTransformed(int *tx, int *ty, int *tz, GEPrimitiveType prim, const TransformedVertex *transformed, int count); -void DepthRasterScreenVerts(uint16_t *depth, int depthStride, GEPrimitiveType prim, int x1, int y1, int x2, int y2, const int *tx, const int *ty, const int *tz, int count); +void DepthRasterScreenVerts(uint16_t *depth, int depthStride, GEPrimitiveType prim, int x1, int y1, int x2, int y2, const int *tx, const int *ty, const float *tz, int count); diff --git a/GPU/Common/DrawEngineCommon.cpp b/GPU/Common/DrawEngineCommon.cpp index 088efd052ee5..5f9694952c3d 100644 --- a/GPU/Common/DrawEngineCommon.cpp +++ b/GPU/Common/DrawEngineCommon.cpp @@ -23,6 +23,7 @@ #include "Common/LogReporting.h" #include "Common/Math/SIMDHeaders.h" #include "Common/Math/lin/matrix4x4.h" +#include "Common/TimeUtil.h" #include "Core/System.h" #include "Core/Config.h" #include "GPU/Common/DrawEngineCommon.h" @@ -914,6 +915,7 @@ inline void ComputeFinalProjMatrix(float *worldviewproj) { } void DrawEngineCommon::DepthRasterTransform(GEPrimitiveType prim, VertexDecoder *dec, uint32_t vertTypeID, int vertexCount) { + switch (prim) { case GE_PRIM_INVALID: case GE_PRIM_KEEP_PREVIOUS: @@ -929,6 +931,8 @@ void DrawEngineCommon::DepthRasterTransform(GEPrimitiveType prim, VertexDecoder return; } + TimeCollector collectStat(&gpuStats.msRasterizingDepth, coreCollectDebugStats); + float worldviewproj[16]; ComputeFinalProjMatrix(worldviewproj); @@ -953,7 +957,7 @@ void DrawEngineCommon::DepthRasterTransform(GEPrimitiveType prim, VertexDecoder int *tx = depthScreenVerts_; int *ty = depthScreenVerts_ + DEPTH_SCREENVERTS_COMPONENT_COUNT; - int *tz = depthScreenVerts_ + DEPTH_SCREENVERTS_COMPONENT_COUNT * 2; + float *tz = (float *)(depthScreenVerts_ + DEPTH_SCREENVERTS_COMPONENT_COUNT * 2); // Clip and triangulate using the index buffer. int outVertCount = DepthRasterClipIndexedTriangles(tx, ty, tz, depthTransformed_, decIndex_, vertexCount); @@ -962,7 +966,7 @@ void DrawEngineCommon::DepthRasterTransform(GEPrimitiveType prim, VertexDecoder for (int i = outVertCount; i < ((outVertCount + 16) & ~15); i++) { tx[i] = 0; ty[i] = 0; - tz[i] = 0; + tz[i] = 0.0f; } } @@ -972,6 +976,8 @@ void DrawEngineCommon::DepthRasterTransform(GEPrimitiveType prim, VertexDecoder } void DrawEngineCommon::DepthRasterPredecoded(GEPrimitiveType prim, const void *inVerts, int numDecoded, VertexDecoder *dec, int vertexCount) { + TimeCollector collectStat(&gpuStats.msRasterizingDepth, coreCollectDebugStats); + switch (prim) { case GE_PRIM_INVALID: case GE_PRIM_KEEP_PREVIOUS: @@ -987,7 +993,7 @@ void DrawEngineCommon::DepthRasterPredecoded(GEPrimitiveType prim, const void *i int *tx = depthScreenVerts_; int *ty = depthScreenVerts_ + DEPTH_SCREENVERTS_COMPONENT_COUNT; - int *tz = depthScreenVerts_ + DEPTH_SCREENVERTS_COMPONENT_COUNT * 2; + float *tz = (float *)(depthScreenVerts_ + DEPTH_SCREENVERTS_COMPONENT_COUNT * 2); int outVertCount = 0; @@ -996,6 +1002,9 @@ void DrawEngineCommon::DepthRasterPredecoded(GEPrimitiveType prim, const void *i DepthRasterConvertTransformed(tx, ty, tz, depthTransformed_, decIndex_, vertexCount); outVertCount = vertexCount; } else { + if (dec->VertexType() & (GE_VTYPE_WEIGHT_MASK | GE_VTYPE_MORPHCOUNT_MASK)) { + return; + } float worldviewproj[16]; ComputeFinalProjMatrix(worldviewproj); TransformPredecodedForDepthRaster(depthTransformed_, worldviewproj, decoded_, dec, numDecoded); @@ -1018,7 +1027,7 @@ void DrawEngineCommon::DepthRasterPredecoded(GEPrimitiveType prim, const void *i for (int i = outVertCount; i < ((outVertCount + 16) & ~15); i++) { tx[i] = 0; ty[i] = 0; - tz[i] = 0; + tz[i] = 0.0f; } } DepthRasterScreenVerts((uint16_t *)Memory::GetPointerWrite(gstate.getDepthBufRawAddress() | 0x04000000), gstate.DepthBufStride(), diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index e919ba61133a..0ecfbdfd9676 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -1866,6 +1866,8 @@ void FramebufferManagerCommon::ResizeFramebufFBO(VirtualFramebuffer *vfb, int w, char tag[128]; size_t len = FormatFramebufferName(vfb, tag, sizeof(tag)); + gpuStats.numFBOsCreated++; + vfb->fbo = draw_->CreateFramebuffer({ vfb->renderWidth, vfb->renderHeight, 1, GetFramebufferLayers(), msaaLevel_, true, tag }); if (Memory::IsVRAMAddress(vfb->fb_address) && vfb->fb_stride != 0) { NotifyMemInfo(MemBlockFlags::ALLOC, vfb->fb_address, vfb->BufferByteSize(RASTER_COLOR), tag, len); diff --git a/GPU/GPU.h b/GPU/GPU.h index 4c86c7e815bf..9b98e8634466 100644 --- a/GPU/GPU.h +++ b/GPU/GPU.h @@ -91,6 +91,7 @@ struct GPUStatistics { numPlaneUpdates = 0; numTexturesDecoded = 0; numFramebufferEvaluations = 0; + numFBOsCreated = 0; numBlockingReadbacks = 0; numReadbacks = 0; numUploads = 0; @@ -107,6 +108,11 @@ struct GPUStatistics { numCachedReplacedTextures = 0; numClutTextures = 0; msProcessingDisplayLists = 0; + msRasterizingDepth = 0.0f; + numDepthRasterPrims = 0; + numDepthRasterBackface = 0; + numDepthRasterNoPixels = 0; + numDepthRasterTooSmall = 0; vertexGPUCycles = 0; otherGPUCycles = 0; } @@ -129,6 +135,7 @@ struct GPUStatistics { int numTextureDataBytesHashed; int numTexturesDecoded; int numFramebufferEvaluations; + int numFBOsCreated; int numBlockingReadbacks; int numReadbacks; int numUploads; @@ -145,9 +152,13 @@ struct GPUStatistics { int numCachedReplacedTextures; int numClutTextures; double msProcessingDisplayLists; + double msRasterizingDepth; int vertexGPUCycles; int otherGPUCycles; - + int numDepthRasterPrims; + int numDepthRasterBackface; + int numDepthRasterNoPixels; + int numDepthRasterTooSmall; // Flip count. Doesn't really belong here. int numFlips; }; diff --git a/GPU/GPUCommonHW.cpp b/GPU/GPUCommonHW.cpp index 9b5389c8750a..552a38179c42 100644 --- a/GPU/GPUCommonHW.cpp +++ b/GPU/GPUCommonHW.cpp @@ -1769,13 +1769,14 @@ size_t GPUCommonHW::FormatGPUStatsCommon(char *buffer, size_t size) { "DL processing time: %0.2f ms, %d drawsync, %d listsync\n" "Draw: %d (%d dec, %d culled), flushes %d, clears %d, bbox jumps %d (%d updates)\n" "Vertices: %d dec: %d drawn: %d\n" - "FBOs active: %d (evaluations: %d)\n" + "FBOs active: %d (evaluations: %d, created %d)\n" "Textures: %d, dec: %d, invalidated: %d, hashed: %d kB, clut %d\n" "readbacks %d (%d non-block), upload %d (cached %d), depal %d\n" "block transfers: %d\n" "replacer: tracks %d references, %d unique textures\n" "Cpy: depth %d, color %d, reint %d, blend %d, self %d\n" - "GPU cycles: %d (%0.1f per vertex)\n%s", + "GPU cycles: %d (%0.1f per vertex)\n" + "Depth raster: %0.2f ms, %d prim, %d nopix, %d small, %d backface\n%s", gpuStats.msProcessingDisplayLists * 1000.0f, gpuStats.numDrawSyncs, gpuStats.numListSyncs, @@ -1791,6 +1792,7 @@ size_t GPUCommonHW::FormatGPUStatsCommon(char *buffer, size_t size) { gpuStats.numUncachedVertsDrawn, (int)framebufferManager_->NumVFBs(), gpuStats.numFramebufferEvaluations, + gpuStats.numFBOsCreated, (int)textureCache_->NumLoadedTextures(), gpuStats.numTexturesDecoded, gpuStats.numTextureInvalidations, @@ -1811,6 +1813,11 @@ size_t GPUCommonHW::FormatGPUStatsCommon(char *buffer, size_t size) { gpuStats.numCopiesForSelfTex, gpuStats.vertexGPUCycles + gpuStats.otherGPUCycles, vertexAverageCycles, + gpuStats.msRasterizingDepth * 1000.0, + gpuStats.numDepthRasterPrims, + gpuStats.numDepthRasterNoPixels, + gpuStats.numDepthRasterTooSmall, + gpuStats.numDepthRasterBackface, debugRecording_ ? "(debug-recording)" : "" ); } diff --git a/UI/ImDebugger/ImGe.h b/UI/ImDebugger/ImGe.h index 41dc6a8b1f94..38ac0e6d5288 100644 --- a/UI/ImDebugger/ImGe.h +++ b/UI/ImDebugger/ImGe.h @@ -96,6 +96,8 @@ struct ImGeReadbackViewer : public PixelLookup { } bool FormatValueAt(char *buf, size_t bufSize, int x, int y) const override; + // TODO: This is unsafe! If you load state for example with the debugger open... + // We need to re-fetch this each frame from the parameters. VirtualFramebuffer *vfb = nullptr; // This specifies what to show