int triangle; // triangle this span was generated by
int x; // framebuffer x coord
int y; // framebuffer y coord
- int length; // pixel count
int startx; // usable range (according to pixelmask)
int endx; // usable range (according to pixelmask)
unsigned char *pixelmask; // true for pixels that passed depth test, false for others
DPSOFTRAST_BLENDMODE_MUL2,
DPSOFTRAST_BLENDMODE_SUBALPHA,
DPSOFTRAST_BLENDMODE_PSEUDOALPHA,
+ DPSOFTRAST_BLENDMODE_INVADD,
DPSOFTRAST_BLENDMODE_TOTAL
}
DPSOFTRAST_BLENDMODE;
// derived values (DPSOFTRAST_VALIDATE_FB)
int fb_colormask;
- int fb_clearscissor[4];
+ int fb_scissor[4];
ALIGN(float fb_viewportcenter[4]);
ALIGN(float fb_viewportscale[4]);
if (x2 > dpsoftrast.fb_width) x2 = dpsoftrast.fb_width;
if (y1 < 0) y1 = 0;
if (y2 > dpsoftrast.fb_height) y2 = dpsoftrast.fb_height;
- thread->fb_clearscissor[0] = x1;
- thread->fb_clearscissor[1] = y1;
- thread->fb_clearscissor[2] = x2 - x1;
- thread->fb_clearscissor[3] = y2 - y1;
+ thread->fb_scissor[0] = x1;
+ thread->fb_scissor[1] = y1;
+ thread->fb_scissor[2] = x2 - x1;
+ thread->fb_scissor[3] = y2 - y1;
DPSOFTRAST_RecalcViewport(thread->viewport, thread->fb_viewportcenter, thread->fb_viewportscale);
}
{
#define BLENDFUNC(sfactor, dfactor, blendmode) \
case (sfactor<<16)|dfactor: thread->fb_blendmode = blendmode; break;
- BLENDFUNC(GL_SRC_COLOR, GL_ONE, DPSOFTRAST_BLENDMODE_SUBALPHA)
+ BLENDFUNC(GL_SRC_ALPHA, GL_ONE, DPSOFTRAST_BLENDMODE_SUBALPHA)
default: thread->fb_blendmode = DPSOFTRAST_BLENDMODE_OPAQUE; break;
}
}
BLENDFUNC(GL_DST_COLOR, GL_ZERO, DPSOFTRAST_BLENDMODE_MUL)
BLENDFUNC(GL_DST_COLOR, GL_SRC_COLOR, DPSOFTRAST_BLENDMODE_MUL2)
BLENDFUNC(GL_ONE, GL_ONE_MINUS_SRC_ALPHA, DPSOFTRAST_BLENDMODE_PSEUDOALPHA)
- BLENDFUNC(GL_SRC_COLOR, GL_ONE, DPSOFTRAST_BLENDMODE_SUBALPHA)
+ BLENDFUNC(GL_ONE_MINUS_DST_COLOR, GL_ONE, DPSOFTRAST_BLENDMODE_INVADD)
default: thread->fb_blendmode = DPSOFTRAST_BLENDMODE_OPAQUE; break;
}
}
unsigned int *p;
unsigned int c;
DPSOFTRAST_Validate(thread, DPSOFTRAST_VALIDATE_FB);
- x1 = thread->fb_clearscissor[0];
- y1 = thread->fb_clearscissor[1];
- x2 = thread->fb_clearscissor[0] + thread->fb_clearscissor[2];
- y2 = thread->fb_clearscissor[1] + thread->fb_clearscissor[3];
+ x1 = thread->fb_scissor[0];
+ y1 = thread->fb_scissor[1];
+ x2 = thread->fb_scissor[0] + thread->fb_scissor[2];
+ y2 = thread->fb_scissor[1] + thread->fb_scissor[3];
if (y1 < miny1) y1 = miny1;
if (y2 > maxy2) y2 = maxy2;
w = x2 - x1;
unsigned int *p;
unsigned int c;
DPSOFTRAST_Validate(thread, DPSOFTRAST_VALIDATE_FB);
- x1 = thread->fb_clearscissor[0];
- y1 = thread->fb_clearscissor[1];
- x2 = thread->fb_clearscissor[0] + thread->fb_clearscissor[2];
- y2 = thread->fb_clearscissor[1] + thread->fb_clearscissor[3];
+ x1 = thread->fb_scissor[0];
+ y1 = thread->fb_scissor[1];
+ x2 = thread->fb_scissor[0] + thread->fb_scissor[2];
+ y2 = thread->fb_scissor[1] + thread->fb_scissor[3];
if (y1 < miny1) y1 = miny1;
if (y2 > maxy2) y2 = maxy2;
w = x2 - x1;
pixel[x*4+3] = d[3];
}
break;
+ case DPSOFTRAST_BLENDMODE_INVADD:
+ for (x = startx;x < endx;x++)
+ {
+ if (!pixelmask[x])
+ continue;
+ d[0] = (int)((255.0f-pixel[x*4+2])*in4f[x*4+0] + pixel[x*4+2]);if (d[0] > 255) d[0] = 255;
+ d[1] = (int)((255.0f-pixel[x*4+1])*in4f[x*4+1] + pixel[x*4+1]);if (d[1] > 255) d[1] = 255;
+ d[2] = (int)((255.0f-pixel[x*4+0])*in4f[x*4+2] + pixel[x*4+0]);if (d[2] > 255) d[2] = 255;
+ d[3] = (int)((255.0f-pixel[x*4+3])*in4f[x*4+3] + pixel[x*4+3]);if (d[3] > 255) d[3] = 255;
+ pixel[x*4+0] = d[0];
+ pixel[x*4+1] = d[1];
+ pixel[x*4+2] = d[2];
+ pixel[x*4+3] = d[3];
+ }
+ break;
}
}
dst = _mm_add_epi16(src, _mm_sub_epi16(dst, _mm_srli_epi16(_mm_mullo_epi16(dst, blend), 8)));
});
break;
+ case DPSOFTRAST_BLENDMODE_INVADD:
+ FINISHBLEND({
+ dst = _mm_add_epi16(dst, _mm_mulhi_epi16(_mm_slli_epi16(_mm_sub_epi16(_mm_set1_epi16(255), dst), 4), _mm_slli_epi16(src, 4)));
+ }, {
+ dst = _mm_add_epi16(dst, _mm_mulhi_epi16(_mm_slli_epi16(_mm_sub_epi16(_mm_set1_epi16(255), dst), 4), _mm_slli_epi16(src, 4)));
+ });
}
#endif
}
// if no texture is bound, just fill it with white
if (!texture)
{
- memset(out4ub + startx*4, 255, span->length*4);
+ memset(out4ub + startx*4, 255, (span->endx - span->startx)*4);
return;
}
mip = triangle->mip[texunitindex];
void DPSOFTRAST_Draw_Span_TextureCubeVaryingBGRA8(const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span, unsigned char * RESTRICT out4ub, int texunitindex, int arrayindex, const float * RESTRICT zf)
{
// TODO: IMPLEMENT
- memset(out4ub, 255, span->length*4);
+ memset(out4ub + span->startx*4, 255, (span->startx - span->endx)*4);
}
float DPSOFTRAST_SampleShadowmap(const float *vector)
float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH];
unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
DPSOFTRAST_Draw_Span_Begin(thread, triangle, span, buffer_z);
- memset(buffer_FragColorbgra8, 0, span->length*4);
+ memset(buffer_FragColorbgra8 + span->startx*4, 0, (span->endx - span->startx)*4);
DPSOFTRAST_Draw_Span_FinishBGRA8(thread, triangle, span, buffer_FragColorbgra8);
}
float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH];
unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
DPSOFTRAST_Draw_Span_Begin(thread, triangle, span, buffer_z);
- memset(buffer_FragColorbgra8, 0, span->length*4);
+ memset(buffer_FragColorbgra8 + span->startx*4, 0, (span->endx - span->startx)*4);
DPSOFTRAST_Draw_Span_FinishBGRA8(thread, triangle, span, buffer_FragColorbgra8);
}
float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH];
unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
DPSOFTRAST_Draw_Span_Begin(thread, triangle, span, buffer_z);
- memset(buffer_FragColorbgra8, 0, span->length*4);
+ memset(buffer_FragColorbgra8 + span->startx*4, 0, (span->endx - span->startx)*4);
DPSOFTRAST_Draw_Span_FinishBGRA8(thread, triangle, span, buffer_FragColorbgra8);
}
float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH];
unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
DPSOFTRAST_Draw_Span_Begin(thread, triangle, span, buffer_z);
- memset(buffer_FragColorbgra8, 0, span->length*4);
+ memset(buffer_FragColorbgra8 + span->startx*4, 0, (span->endx - span->startx)*4);
DPSOFTRAST_Draw_Span_FinishBGRA8(thread, triangle, span, buffer_FragColorbgra8);
}
float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH];
unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
DPSOFTRAST_Draw_Span_Begin(thread, triangle, span, buffer_z);
- memset(buffer_FragColorbgra8, 0, span->length*4);
+ memset(buffer_FragColorbgra8 + span->startx*4, 0, (span->endx - span->startx)*4);
DPSOFTRAST_Draw_Span_FinishBGRA8(thread, triangle, span, buffer_FragColorbgra8);
}
float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH];
unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
DPSOFTRAST_Draw_Span_Begin(thread, triangle, span, buffer_z);
- memset(buffer_FragColorbgra8, 0, span->length*4);
+ memset(buffer_FragColorbgra8 + span->startx*4, 0, (span->endx - span->startx)*4);
DPSOFTRAST_Draw_Span_FinishBGRA8(thread, triangle, span, buffer_FragColorbgra8);
}
float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH];
unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
DPSOFTRAST_Draw_Span_Begin(thread, triangle, span, buffer_z);
- memset(buffer_FragColorbgra8, 0, span->length*4);
+ memset(buffer_FragColorbgra8 + span->startx*4, 0, (span->endx - span->startx)*4);
DPSOFTRAST_Draw_Span_FinishBGRA8(thread, triangle, span, buffer_FragColorbgra8);
}
depthslope = (int)(wslope*DPSOFTRAST_DEPTHSCALE);
depth = (int)(w*DPSOFTRAST_DEPTHSCALE - DPSOFTRAST_DEPTHOFFSET*(thread->polygonoffset[1] + fabs(wslope)*thread->polygonoffset[0]));
depthpixel = dpsoftrast.fb_depthpixels + span->y * dpsoftrast.fb_width + span->x;
+ startx = span->startx;
+ endx = span->endx;
switch(thread->fb_depthfunc)
{
default:
- case GL_ALWAYS: for (x = 0, d = depth;x < span->length;x++, d += depthslope) pixelmask[x] = true; break;
- case GL_LESS: for (x = 0, d = depth;x < span->length;x++, d += depthslope) pixelmask[x] = depthpixel[x] < d; break;
- case GL_LEQUAL: for (x = 0, d = depth;x < span->length;x++, d += depthslope) pixelmask[x] = depthpixel[x] <= d; break;
- case GL_EQUAL: for (x = 0, d = depth;x < span->length;x++, d += depthslope) pixelmask[x] = depthpixel[x] == d; break;
- case GL_GEQUAL: for (x = 0, d = depth;x < span->length;x++, d += depthslope) pixelmask[x] = depthpixel[x] >= d; break;
- case GL_GREATER: for (x = 0, d = depth;x < span->length;x++, d += depthslope) pixelmask[x] = depthpixel[x] > d; break;
- case GL_NEVER: for (x = 0, d = depth;x < span->length;x++, d += depthslope) pixelmask[x] = false; break;
+ case GL_ALWAYS: for (x = startx, d = depth + depthslope*startx;x < endx;x++, d += depthslope) pixelmask[x] = true; break;
+ case GL_LESS: for (x = startx, d = depth + depthslope*startx;x < endx;x++, d += depthslope) pixelmask[x] = depthpixel[x] < d; break;
+ case GL_LEQUAL: for (x = startx, d = depth + depthslope*startx;x < endx;x++, d += depthslope) pixelmask[x] = depthpixel[x] <= d; break;
+ case GL_EQUAL: for (x = startx, d = depth + depthslope*startx;x < endx;x++, d += depthslope) pixelmask[x] = depthpixel[x] == d; break;
+ case GL_GEQUAL: for (x = startx, d = depth + depthslope*startx;x < endx;x++, d += depthslope) pixelmask[x] = depthpixel[x] >= d; break;
+ case GL_GREATER: for (x = startx, d = depth + depthslope*startx;x < endx;x++, d += depthslope) pixelmask[x] = depthpixel[x] > d; break;
+ case GL_NEVER: for (x = startx, d = depth + depthslope*startx;x < endx;x++, d += depthslope) pixelmask[x] = false; break;
}
//colorpixel = dpsoftrast.fb_colorpixels[0] + (span->y * dpsoftrast.fb_width + span->x) * 4;;
- //for (x = 0;x < span->length;x++)
+ //for (x = startx;x < endx;x++)
// colorpixel[x] = (depthpixel[x] & 0xFF000000) ? (0x00FF0000) : (depthpixel[x] & 0x00FF0000);
// if there is no color buffer, skip pixel shader
- startx = 0;
- endx = span->length;
while (startx < endx && !pixelmask[startx])
startx++;
while (endx > startx && !pixelmask[endx-1])
// if there is no color buffer, skip pixel shader
if (dpsoftrast.fb_colorpixels[0] && thread->fb_colormask)
{
- memset(pixelmask, 1, span->length);
+ memset(pixelmask + span->startx, 1, span->endx - span->startx);
span->pixelmask = pixelmask;
- span->startx = 0;
- span->endx = span->length;
DPSOFTRAST_ShaderModeTable[thread->shader_mode].Span(thread, triangle, span);
}
}
{
#ifdef SSE2_PRESENT
int cullface = thread->cullface;
- int width = dpsoftrast.fb_width;
- int miny1 = thread->miny1;
- int maxy1 = thread->maxy1;
- int miny2 = thread->miny2;
- int maxy2 = thread->maxy2;
+ int minx, maxx, miny, maxy;
+ int miny1, maxy1, miny2, maxy2;
__m128i fbmin, fbmax;
__m128 viewportcenter, viewportscale;
int firstvertex = command->firstvertex;
__m128 screen[4];
DPSOFTRAST_State_Triangle *triangle;
DPSOFTRAST_Texture *texture;
+ DPSOFTRAST_ValidateQuick(thread, DPSOFTRAST_VALIDATE_DRAW);
+ miny = thread->fb_scissor[1];
+ maxy = thread->fb_scissor[1] + thread->fb_scissor[3];
+ miny1 = bound(miny, thread->miny1, maxy);
+ maxy1 = bound(miny, thread->maxy1, maxy);
+ miny2 = bound(miny, thread->miny2, maxy);
+ maxy2 = bound(miny, thread->maxy2, maxy);
if ((command->starty >= maxy1 || command->endy <= miny1) && (command->starty >= maxy2 || command->endy <= miny2))
{
if (!ATOMIC_DECREMENT(command->refcount))
}
return;
}
- DPSOFTRAST_ValidateQuick(thread, DPSOFTRAST_VALIDATE_DRAW);
- fbmin = _mm_setr_epi16(0, miny1, 0, miny1, 0, miny1, 0, miny1);
- fbmax = _mm_sub_epi16(_mm_setr_epi16(width, maxy2, width, maxy2, width, maxy2, width, maxy2), _mm_set1_epi16(1));
+ minx = thread->fb_scissor[0];
+ maxx = thread->fb_scissor[0] + thread->fb_scissor[2];
+ fbmin = _mm_setr_epi16(minx, miny1, minx, miny1, minx, miny1, minx, miny1);
+ fbmax = _mm_sub_epi16(_mm_setr_epi16(maxx, maxy2, maxx, maxy2, maxx, maxy2, maxx, maxy2), _mm_set1_epi16(1));
viewportcenter = _mm_load_ps(thread->fb_viewportcenter);
viewportscale = _mm_load_ps(thread->fb_viewportscale);
screen[3] = _mm_setzero_ps();
int startx, endx, offset;
startx = _mm_cvtss_si32(xcoords);
endx = _mm_cvtss_si32(_mm_movehl_ps(xcoords, xcoords));
- if (startx < 0) startx = 0;
- if (endx > dpsoftrast.fb_width) endx = dpsoftrast.fb_width;
+ if (startx < minx)
+ {
+ if (startx < 0) startx = 0;
+ startx += (minx-startx)&~(DPSOFTRAST_DRAW_MAXSPANLENGTH-1);
+ }
+ if (endx > maxx) endx = maxx;
if (startx >= endx) continue;
- for (offset = startx; offset < endx;)
+ for (offset = startx; offset < endx;offset += DPSOFTRAST_DRAW_MAXSPANLENGTH)
{
DPSOFTRAST_State_Span *span = &thread->spans[thread->numspans];
span->triangle = thread->numtriangles;
span->x = offset;
span->y = y;
- span->length = endx - offset;
- if (span -> length > DPSOFTRAST_DRAW_MAXSPANLENGTH)
- span -> length = DPSOFTRAST_DRAW_MAXSPANLENGTH;
- offset += span->length;
+ span->startx = max(minx - offset, 0);
+ span->endx = min(endx - offset, DPSOFTRAST_DRAW_MAXSPANLENGTH);
+ if (span->startx >= span->endx)
+ continue;
if (++thread->numspans >= DPSOFTRAST_DRAW_MAXSPANS)
DPSOFTRAST_Draw_ProcessSpans(thread);
}