Skip to content

Commit

Permalink
Depth raster: Switch to a SoA data layout for the screen space verts
Browse files Browse the repository at this point in the history
  • Loading branch information
hrydgard committed Dec 20, 2024
1 parent d9cfb4f commit 8fa1429
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 37 deletions.
51 changes: 25 additions & 26 deletions GPU/Common/DepthRaster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ void DepthRasterRect(uint16_t *dest, int stride, int x1, int y1, int x2, int y2,
// Adapted from Intel's depth rasterizer example.
// Started with the scalar version, will SIMD-ify later.
// x1/y1 etc are the scissor rect.
void DepthRasterTriangle(uint16_t *depthBuf, int stride, int x1, int y1, int x2, int y2, const DepthScreenVertex vertsSub[3], GEComparison compareMode) {
void DepthRasterTriangle(uint16_t *depthBuf, int stride, int x1, int y1, int x2, int y2, const int *tx, const int *ty, const int *tz, GEComparison compareMode) {
int tileStartX = x1;
int tileEndX = x2;

Expand All @@ -94,15 +94,15 @@ void DepthRasterTriangle(uint16_t *depthBuf, int stride, int x1, int y1, int x2,

// Convert to whole pixels for now. Later subpixel precision.
DepthScreenVertex verts[3];
verts[0].x = vertsSub[0].x;
verts[0].y = vertsSub[0].y;
verts[0].z = vertsSub[0].z;
verts[1].x = vertsSub[2].x;
verts[1].y = vertsSub[2].y;
verts[1].z = vertsSub[2].z;
verts[2].x = vertsSub[1].x;
verts[2].y = vertsSub[1].y;
verts[2].z = vertsSub[1].z;
verts[0].x = tx[0];
verts[0].y = ty[0];
verts[0].z = tz[0];
verts[1].x = tx[2];
verts[1].y = ty[2];
verts[1].z = tz[2];
verts[2].x = tx[1];
verts[2].y = ty[1];
verts[2].z = tz[1];

// use fixed-point only for X and Y. Avoid work for Z and W.
int startX = std::max(std::min(std::min(verts[0].x, verts[1].x), verts[2].x), tileStartX);
Expand Down Expand Up @@ -242,7 +242,7 @@ void DecodeAndTransformForDepthRaster(float *dest, GEPrimitiveType prim, const f
}
}

int DepthRasterClipIndexedTriangles(DepthScreenVertex *screenVerts, const float *transformed, const uint16_t *indexBuffer, int count) {
int DepthRasterClipIndexedTriangles(int *tx, int *ty, int *tz, const float *transformed, const uint16_t *indexBuffer, int count) {
bool cullEnabled = gstate.isCullEnabled();

const float viewportX = gstate.getViewportXCenter();
Expand Down Expand Up @@ -289,28 +289,28 @@ int DepthRasterClipIndexedTriangles(DepthScreenVertex *screenVerts, const float
if (screen[2] >= 65535.0f) {
screen[2] = 65535.0f;
}
screenVerts[outCount].x = screen[0] * (1.0f / 16.0f); // We ditch the subpixel precision here.
screenVerts[outCount].y = screen[1] * (1.0f / 16.0f);
screenVerts[outCount].z = screen[2];

tx[outCount] = screen[0] * (1.0f / 16.0f); // We ditch the subpixel precision here.
ty[outCount] = screen[1] * (1.0f / 16.0f);
tz[outCount] = screen[2];
outCount++;
}
}
return outCount;
}

void DepthRasterConvertTransformed(DepthScreenVertex *screenVerts, GEPrimitiveType prim, const TransformedVertex *transformed, int count) {
void DepthRasterConvertTransformed(int *tx, int *ty, int *tz, GEPrimitiveType prim, const TransformedVertex *transformed, int count) {
_dbg_assert_(prim == GE_PRIM_RECTANGLES || prim == GE_PRIM_TRIANGLES);

// TODO: This is basically a transpose, or AoS->SoA conversion. There may be fast ways.
for (int i = 0; i < count; i++) {
screenVerts[i].x = (int)transformed[i].pos[0];
screenVerts[i].y = (int)transformed[i].pos[1];
screenVerts[i].z = (u16)transformed[i].pos[2];
tx[i] = (int)transformed[i].pos[0];
ty[i] = (int)transformed[i].pos[1];
tz[i] = (u16)transformed[i].pos[2];
}
}

// Rasterizes screen-space vertices.
void DepthRasterScreenVerts(uint16_t *depth, int depthStride, GEPrimitiveType prim, int x1, int y1, int x2, int y2, const DepthScreenVertex *screenVerts, int count) {
void DepthRasterScreenVerts(uint16_t *depth, int depthStride, GEPrimitiveType prim, int x1, int y1, int x2, int y2, const int *tx, const int *ty, const int *tz, int count) {
// Prim should now be either TRIANGLES or RECTs.
_dbg_assert_(prim == GE_PRIM_RECTANGLES || prim == GE_PRIM_TRIANGLES);

Expand All @@ -327,17 +327,16 @@ void DepthRasterScreenVerts(uint16_t *depth, int depthStride, GEPrimitiveType pr

switch (prim) {
case GE_PRIM_RECTANGLES:
for (int i = 0; i < count / 2; i++) {
uint16_t z = screenVerts[i + 1].z; // depth from second vertex
for (int i = 0; i < count; i += 2) {
uint16_t z = tz[i + 1]; // depth from second vertex
// TODO: Should clip coordinates to the scissor rectangle.
// We remove the subpixel information here.
DepthRasterRect(depth, depthStride, screenVerts[i].x, screenVerts[i].y, screenVerts[i + 1].x, screenVerts[i + 1].y,
z, compareMode);
DepthRasterRect(depth, depthStride, tx[i], ty[i], tx[i + 1], ty[i + 1], z, compareMode);
}
break;
case GE_PRIM_TRIANGLES:
for (int i = 0; i < count / 3; i++) {
DepthRasterTriangle(depth, depthStride, x1, y1, x2, y2, screenVerts + i * 3, compareMode);
for (int i = 0; i < count; i += 3) {
DepthRasterTriangle(depth, depthStride, x1, y1, x2, y2, &tx[i], &ty[i], &tz[i], compareMode);
}
break;
default:
Expand Down
8 changes: 4 additions & 4 deletions GPU/Common/DepthRaster.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
struct DepthScreenVertex {
int x;
int y;
uint16_t z;
int z;
};

// Specialized, very limited depth-only rasterizer.
Expand All @@ -17,7 +17,7 @@ struct DepthScreenVertex {
class VertexDecoder;
struct TransformedVertex;

int DepthRasterClipIndexedTriangles(DepthScreenVertex *screenVerts, const float *transformed, const uint16_t *indexBuffer, int count);
int DepthRasterClipIndexedTriangles(int *tx, int *ty, int *tz, const float *transformed, const uint16_t *indexBuffer, int count);
void DecodeAndTransformForDepthRaster(float *dest, GEPrimitiveType prim, const float *worldviewproj, const void *vertexData, int count, VertexDecoder *dec, u32 vertTypeID);
void DepthRasterConvertTransformed(DepthScreenVertex *screenVerts, GEPrimitiveType prim, const TransformedVertex *transformed, int count);
void DepthRasterScreenVerts(uint16_t *depth, int depthStride, GEPrimitiveType prim, int x1, int y1, int x2, int y2, const DepthScreenVertex *screenVerts, int count);
void DepthRasterConvertTransformed(int *tx, int *ty, int *tz, GEPrimitiveType prim, const TransformedVertex *transformed, int count);
void DepthRasterScreenVerts(uint16_t *depth, int depthStride, GEPrimitiveType prim, int x1, int y1, int x2, int y2, const int *tx, const int *ty, const int *tz, int count);
22 changes: 16 additions & 6 deletions GPU/Common/DrawEngineCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@
enum {
TRANSFORMED_VERTEX_BUFFER_SIZE = VERTEX_BUFFER_MAX * sizeof(TransformedVertex),
DEPTH_TRANSFORMED_SIZE = VERTEX_BUFFER_MAX * 4,
DEPTH_SCREENVERTS_SIZE = VERTEX_BUFFER_MAX * sizeof(DepthScreenVertex),
DEPTH_SCREENVERTS_COMPONENT_COUNT = VERTEX_BUFFER_MAX,
DEPTH_SCREENVERTS_COMPONENT_SIZE = DEPTH_SCREENVERTS_COMPONENT_COUNT * sizeof(int) + 384,
DEPTH_SCREENVERTS_SIZE = DEPTH_SCREENVERTS_COMPONENT_SIZE * 3,
};

DrawEngineCommon::DrawEngineCommon() : decoderMap_(32) {
Expand All @@ -54,7 +56,7 @@ DrawEngineCommon::DrawEngineCommon() : decoderMap_(32) {
useDepthRaster_ = PSP_CoreParameter().compat.flags().SoftwareRasterDepth;
if (useDepthRaster_) {
depthTransformed_ = (float *)AllocateMemoryPages(DEPTH_TRANSFORMED_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
depthScreenVerts_ = (DepthScreenVertex *)AllocateMemoryPages(DEPTH_SCREENVERTS_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
depthScreenVerts_ = (int *)AllocateMemoryPages(DEPTH_SCREENVERTS_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
}
}

Expand Down Expand Up @@ -933,12 +935,16 @@ void DrawEngineCommon::DepthRasterTransform(GEPrimitiveType prim, VertexDecoder
numDec += drawVerts_[i].vertexCount;
}

int *tx = depthScreenVerts_;
int *ty = depthScreenVerts_ + DEPTH_SCREENVERTS_COMPONENT_COUNT;
int *tz = depthScreenVerts_ + DEPTH_SCREENVERTS_COMPONENT_COUNT * 2;

// Clip and triangulate using the index buffer.
int outVertCount = DepthRasterClipIndexedTriangles(depthScreenVerts_, depthTransformed_, decIndex_, numDec);
int outVertCount = DepthRasterClipIndexedTriangles(tx, ty, tz, depthTransformed_, decIndex_, numDec);

DepthRasterScreenVerts((uint16_t *)Memory::GetPointerWrite(gstate.getDepthBufRawAddress() | 0x04000000), gstate.DepthBufStride(),
GE_PRIM_TRIANGLES, gstate.getScissorX1(), gstate.getScissorY1(), gstate.getScissorX2(), gstate.getScissorY2(),
depthScreenVerts_, outVertCount);
tx, ty, tz, outVertCount);
}

void DrawEngineCommon::DepthRasterPretransformed(GEPrimitiveType prim, const TransformedVertex *inVerts, int count) {
Expand All @@ -955,8 +961,12 @@ void DrawEngineCommon::DepthRasterPretransformed(GEPrimitiveType prim, const Tra

_dbg_assert_(prim != GE_PRIM_TRIANGLE_STRIP && prim != GE_PRIM_TRIANGLE_FAN);

DepthRasterConvertTransformed(depthScreenVerts_, prim, inVerts, count);
int *tx = depthScreenVerts_;
int *ty = depthScreenVerts_ + DEPTH_SCREENVERTS_COMPONENT_COUNT;
int *tz = depthScreenVerts_ + DEPTH_SCREENVERTS_COMPONENT_COUNT * 2;

DepthRasterConvertTransformed(tx, ty, tz, prim, inVerts, count);
DepthRasterScreenVerts((uint16_t *)Memory::GetPointerWrite(gstate.getDepthBufRawAddress() | 0x04000000), gstate.DepthBufStride(),
prim, gstate.getScissorX1(), gstate.getScissorY1(), gstate.getScissorX2(), gstate.getScissorY2(),
depthScreenVerts_, count);
tx, ty, tz, count);
}
2 changes: 1 addition & 1 deletion GPU/Common/DrawEngineCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -357,5 +357,5 @@ class DrawEngineCommon {
bool useDepthRaster_ = false;

float *depthTransformed_ = nullptr;
DepthScreenVertex *depthScreenVerts_ = nullptr;
int *depthScreenVerts_ = nullptr;
};

0 comments on commit 8fa1429

Please sign in to comment.