diff --git a/GPU/Common/DepthRaster.cpp b/GPU/Common/DepthRaster.cpp index 23e506ff7be9..dbf3a2f06824 100644 --- a/GPU/Common/DepthRaster.cpp +++ b/GPU/Common/DepthRaster.cpp @@ -81,7 +81,7 @@ void DepthRasterRect(uint16_t *dest, int stride, int x1, int y1, int x2, int y2, // Adapted from Intel's depth rasterizer example. // Started with the scalar version, will SIMD-ify later. // x1/y1 etc are the scissor rect. -void DepthRasterTriangle(uint16_t *depthBuf, int stride, int x1, int y1, int x2, int y2, const DepthScreenVertex vertsSub[3], GEComparison compareMode) { +void DepthRasterTriangle(uint16_t *depthBuf, int stride, int x1, int y1, int x2, int y2, const int *tx, const int *ty, const int *tz, GEComparison compareMode) { int tileStartX = x1; int tileEndX = x2; @@ -94,15 +94,15 @@ void DepthRasterTriangle(uint16_t *depthBuf, int stride, int x1, int y1, int x2, // Convert to whole pixels for now. Later subpixel precision. DepthScreenVertex verts[3]; - verts[0].x = vertsSub[0].x; - verts[0].y = vertsSub[0].y; - verts[0].z = vertsSub[0].z; - verts[1].x = vertsSub[2].x; - verts[1].y = vertsSub[2].y; - verts[1].z = vertsSub[2].z; - verts[2].x = vertsSub[1].x; - verts[2].y = vertsSub[1].y; - verts[2].z = vertsSub[1].z; + verts[0].x = tx[0]; + verts[0].y = ty[0]; + verts[0].z = tz[0]; + verts[1].x = tx[2]; + verts[1].y = ty[2]; + verts[1].z = tz[2]; + verts[2].x = tx[1]; + verts[2].y = ty[1]; + verts[2].z = tz[1]; // use fixed-point only for X and Y. Avoid work for Z and W. int startX = std::max(std::min(std::min(verts[0].x, verts[1].x), verts[2].x), tileStartX); @@ -242,7 +242,7 @@ void DecodeAndTransformForDepthRaster(float *dest, GEPrimitiveType prim, const f } } -int DepthRasterClipIndexedTriangles(DepthScreenVertex *screenVerts, const float *transformed, const uint16_t *indexBuffer, int count) { +int DepthRasterClipIndexedTriangles(int *tx, int *ty, int *tz, const float *transformed, const uint16_t *indexBuffer, int count) { bool cullEnabled = gstate.isCullEnabled(); const float viewportX = gstate.getViewportXCenter(); @@ -289,28 +289,28 @@ int DepthRasterClipIndexedTriangles(DepthScreenVertex *screenVerts, const float if (screen[2] >= 65535.0f) { screen[2] = 65535.0f; } - screenVerts[outCount].x = screen[0] * (1.0f / 16.0f); // We ditch the subpixel precision here. - screenVerts[outCount].y = screen[1] * (1.0f / 16.0f); - screenVerts[outCount].z = screen[2]; - + tx[outCount] = screen[0] * (1.0f / 16.0f); // We ditch the subpixel precision here. + ty[outCount] = screen[1] * (1.0f / 16.0f); + tz[outCount] = screen[2]; outCount++; } } return outCount; } -void DepthRasterConvertTransformed(DepthScreenVertex *screenVerts, GEPrimitiveType prim, const TransformedVertex *transformed, int count) { +void DepthRasterConvertTransformed(int *tx, int *ty, int *tz, GEPrimitiveType prim, const TransformedVertex *transformed, int count) { _dbg_assert_(prim == GE_PRIM_RECTANGLES || prim == GE_PRIM_TRIANGLES); + // TODO: This is basically a transpose, or AoS->SoA conversion. There may be fast ways. for (int i = 0; i < count; i++) { - screenVerts[i].x = (int)transformed[i].pos[0]; - screenVerts[i].y = (int)transformed[i].pos[1]; - screenVerts[i].z = (u16)transformed[i].pos[2]; + tx[i] = (int)transformed[i].pos[0]; + ty[i] = (int)transformed[i].pos[1]; + tz[i] = (u16)transformed[i].pos[2]; } } // Rasterizes screen-space vertices. -void DepthRasterScreenVerts(uint16_t *depth, int depthStride, GEPrimitiveType prim, int x1, int y1, int x2, int y2, const DepthScreenVertex *screenVerts, int count) { +void DepthRasterScreenVerts(uint16_t *depth, int depthStride, GEPrimitiveType prim, int x1, int y1, int x2, int y2, const int *tx, const int *ty, const int *tz, int count) { // Prim should now be either TRIANGLES or RECTs. _dbg_assert_(prim == GE_PRIM_RECTANGLES || prim == GE_PRIM_TRIANGLES); @@ -327,17 +327,16 @@ void DepthRasterScreenVerts(uint16_t *depth, int depthStride, GEPrimitiveType pr switch (prim) { case GE_PRIM_RECTANGLES: - for (int i = 0; i < count / 2; i++) { - uint16_t z = screenVerts[i + 1].z; // depth from second vertex + for (int i = 0; i < count; i += 2) { + uint16_t z = tz[i + 1]; // depth from second vertex // TODO: Should clip coordinates to the scissor rectangle. // We remove the subpixel information here. - DepthRasterRect(depth, depthStride, screenVerts[i].x, screenVerts[i].y, screenVerts[i + 1].x, screenVerts[i + 1].y, - z, compareMode); + DepthRasterRect(depth, depthStride, tx[i], ty[i], tx[i + 1], ty[i + 1], z, compareMode); } break; case GE_PRIM_TRIANGLES: - for (int i = 0; i < count / 3; i++) { - DepthRasterTriangle(depth, depthStride, x1, y1, x2, y2, screenVerts + i * 3, compareMode); + for (int i = 0; i < count; i += 3) { + DepthRasterTriangle(depth, depthStride, x1, y1, x2, y2, &tx[i], &ty[i], &tz[i], compareMode); } break; default: diff --git a/GPU/Common/DepthRaster.h b/GPU/Common/DepthRaster.h index 50ef309d936c..50e7a2577ddb 100644 --- a/GPU/Common/DepthRaster.h +++ b/GPU/Common/DepthRaster.h @@ -6,7 +6,7 @@ struct DepthScreenVertex { int x; int y; - uint16_t z; + int z; }; // Specialized, very limited depth-only rasterizer. @@ -17,7 +17,7 @@ struct DepthScreenVertex { class VertexDecoder; struct TransformedVertex; -int DepthRasterClipIndexedTriangles(DepthScreenVertex *screenVerts, const float *transformed, const uint16_t *indexBuffer, int count); +int DepthRasterClipIndexedTriangles(int *tx, int *ty, int *tz, const float *transformed, const uint16_t *indexBuffer, int count); void DecodeAndTransformForDepthRaster(float *dest, GEPrimitiveType prim, const float *worldviewproj, const void *vertexData, int count, VertexDecoder *dec, u32 vertTypeID); -void DepthRasterConvertTransformed(DepthScreenVertex *screenVerts, GEPrimitiveType prim, const TransformedVertex *transformed, int count); -void DepthRasterScreenVerts(uint16_t *depth, int depthStride, GEPrimitiveType prim, int x1, int y1, int x2, int y2, const DepthScreenVertex *screenVerts, int count); +void DepthRasterConvertTransformed(int *tx, int *ty, int *tz, GEPrimitiveType prim, const TransformedVertex *transformed, int count); +void DepthRasterScreenVerts(uint16_t *depth, int depthStride, GEPrimitiveType prim, int x1, int y1, int x2, int y2, const int *tx, const int *ty, const int *tz, int count); diff --git a/GPU/Common/DrawEngineCommon.cpp b/GPU/Common/DrawEngineCommon.cpp index 60024326fbe5..8f857f20acb9 100644 --- a/GPU/Common/DrawEngineCommon.cpp +++ b/GPU/Common/DrawEngineCommon.cpp @@ -38,7 +38,9 @@ enum { TRANSFORMED_VERTEX_BUFFER_SIZE = VERTEX_BUFFER_MAX * sizeof(TransformedVertex), DEPTH_TRANSFORMED_SIZE = VERTEX_BUFFER_MAX * 4, - DEPTH_SCREENVERTS_SIZE = VERTEX_BUFFER_MAX * sizeof(DepthScreenVertex), + DEPTH_SCREENVERTS_COMPONENT_COUNT = VERTEX_BUFFER_MAX, + DEPTH_SCREENVERTS_COMPONENT_SIZE = DEPTH_SCREENVERTS_COMPONENT_COUNT * sizeof(int) + 384, + DEPTH_SCREENVERTS_SIZE = DEPTH_SCREENVERTS_COMPONENT_SIZE * 3, }; DrawEngineCommon::DrawEngineCommon() : decoderMap_(32) { @@ -54,7 +56,7 @@ DrawEngineCommon::DrawEngineCommon() : decoderMap_(32) { useDepthRaster_ = PSP_CoreParameter().compat.flags().SoftwareRasterDepth; if (useDepthRaster_) { depthTransformed_ = (float *)AllocateMemoryPages(DEPTH_TRANSFORMED_SIZE, MEM_PROT_READ | MEM_PROT_WRITE); - depthScreenVerts_ = (DepthScreenVertex *)AllocateMemoryPages(DEPTH_SCREENVERTS_SIZE, MEM_PROT_READ | MEM_PROT_WRITE); + depthScreenVerts_ = (int *)AllocateMemoryPages(DEPTH_SCREENVERTS_SIZE, MEM_PROT_READ | MEM_PROT_WRITE); } } @@ -933,12 +935,16 @@ void DrawEngineCommon::DepthRasterTransform(GEPrimitiveType prim, VertexDecoder numDec += drawVerts_[i].vertexCount; } + int *tx = depthScreenVerts_; + int *ty = depthScreenVerts_ + DEPTH_SCREENVERTS_COMPONENT_COUNT; + int *tz = depthScreenVerts_ + DEPTH_SCREENVERTS_COMPONENT_COUNT * 2; + // Clip and triangulate using the index buffer. - int outVertCount = DepthRasterClipIndexedTriangles(depthScreenVerts_, depthTransformed_, decIndex_, numDec); + int outVertCount = DepthRasterClipIndexedTriangles(tx, ty, tz, depthTransformed_, decIndex_, numDec); DepthRasterScreenVerts((uint16_t *)Memory::GetPointerWrite(gstate.getDepthBufRawAddress() | 0x04000000), gstate.DepthBufStride(), GE_PRIM_TRIANGLES, gstate.getScissorX1(), gstate.getScissorY1(), gstate.getScissorX2(), gstate.getScissorY2(), - depthScreenVerts_, outVertCount); + tx, ty, tz, outVertCount); } void DrawEngineCommon::DepthRasterPretransformed(GEPrimitiveType prim, const TransformedVertex *inVerts, int count) { @@ -955,8 +961,12 @@ void DrawEngineCommon::DepthRasterPretransformed(GEPrimitiveType prim, const Tra _dbg_assert_(prim != GE_PRIM_TRIANGLE_STRIP && prim != GE_PRIM_TRIANGLE_FAN); - DepthRasterConvertTransformed(depthScreenVerts_, prim, inVerts, count); + int *tx = depthScreenVerts_; + int *ty = depthScreenVerts_ + DEPTH_SCREENVERTS_COMPONENT_COUNT; + int *tz = depthScreenVerts_ + DEPTH_SCREENVERTS_COMPONENT_COUNT * 2; + + DepthRasterConvertTransformed(tx, ty, tz, prim, inVerts, count); DepthRasterScreenVerts((uint16_t *)Memory::GetPointerWrite(gstate.getDepthBufRawAddress() | 0x04000000), gstate.DepthBufStride(), prim, gstate.getScissorX1(), gstate.getScissorY1(), gstate.getScissorX2(), gstate.getScissorY2(), - depthScreenVerts_, count); + tx, ty, tz, count); } diff --git a/GPU/Common/DrawEngineCommon.h b/GPU/Common/DrawEngineCommon.h index 64e8478cd06e..2df31f2fba96 100644 --- a/GPU/Common/DrawEngineCommon.h +++ b/GPU/Common/DrawEngineCommon.h @@ -357,5 +357,5 @@ class DrawEngineCommon { bool useDepthRaster_ = false; float *depthTransformed_ = nullptr; - DepthScreenVertex *depthScreenVerts_ = nullptr; + int *depthScreenVerts_ = nullptr; };