From: eihrul Date: Sun, 20 Mar 2011 14:14:47 +0000 (+0000) Subject: fix some redundant shuffles X-Git-Tag: xonotic-v0.5.0~393 X-Git-Url: https://git.rm.cloudns.org/?a=commitdiff_plain;h=2917611dcf9f1d0ad5b63a92a3153b6c85725082;p=xonotic%2Fdarkplaces.git fix some redundant shuffles git-svn-id: svn://svn.icculus.org/twilight/trunk/darkplaces@10946 d7cf8633-e32d-0410-b094-e92efae38249 ::stable-branch::merge=d633843993690961f6e17fc4a129dda82a66dc95 --- diff --git a/dpsoftrast.c b/dpsoftrast.c index 9598c63f..93b42ada 100644 --- a/dpsoftrast.c +++ b/dpsoftrast.c @@ -2992,7 +2992,7 @@ void DPSOFTRAST_Draw_Span_AddBloomBGRA8(const DPSOFTRAST_State_Triangle * RESTRI #ifdef SSE2_PRESENT int x, startx = span->startx, endx = span->endx; __m128i localcolor = _mm_shuffle_epi32(_mm_cvtps_epi32(_mm_mul_ps(_mm_loadu_ps(subcolor), _mm_set1_ps(255.0f))), _MM_SHUFFLE(3, 0, 1, 2)); - localcolor = _mm_shuffle_epi32(_mm_packs_epi32(localcolor, localcolor), _MM_SHUFFLE(1, 0, 1, 0)); + localcolor = _mm_packs_epi32(localcolor, localcolor); for (x = startx;x+2 <= endx;x+=2) { __m128i pix1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)&ina4ub[x*4]), _mm_setzero_si128()); @@ -3057,7 +3057,7 @@ void DPSOFTRAST_Draw_Span_TintedAddBuffersBGRA8(const DPSOFTRAST_State_Triangle #ifdef SSE2_PRESENT int x, startx = span->startx, endx = span->endx; __m128i tint = _mm_cvtps_epi32(_mm_mul_ps(_mm_loadu_ps(inbtintbgra), _mm_set1_ps(256.0f))); - tint = _mm_shuffle_epi32(_mm_packs_epi32(tint, tint), _MM_SHUFFLE(1, 0, 1, 0)); + tint = _mm_packs_epi32(tint, tint); for (x = startx;x+2 <= endx;x+=2) { __m128i pix1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)&ina4ub[x*4]), _mm_setzero_si128()); @@ -3103,7 +3103,7 @@ void DPSOFTRAST_Draw_Span_MixUniformColorBGRA8(const DPSOFTRAST_State_Triangle * #ifdef SSE2_PRESENT int x, startx = span->startx, endx = span->endx; __m128i localcolor = _mm_shuffle_epi32(_mm_cvtps_epi32(_mm_mul_ps(_mm_loadu_ps(color), _mm_set1_ps(255.0f))), _MM_SHUFFLE(3, 0, 1, 2)), blend; - localcolor = _mm_shuffle_epi32(_mm_packs_epi32(localcolor, localcolor), _MM_SHUFFLE(1, 0, 1, 0)); + localcolor = _mm_packs_epi32(localcolor, localcolor); blend = _mm_slli_epi16(_mm_shufflehi_epi16(_mm_shufflelo_epi16(localcolor, _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)), 4); for (x = startx;x+2 <= endx;x+=2) {