12 #ifndef BLITTER_32BPP_SSE_FUNC_HPP 13 #define BLITTER_32BPP_SSE_FUNC_HPP 17 static inline void InsertFirstUint32(
const uint32 value, __m128i &into)
19 #if (SSE_VERSION >= 4) 20 into = _mm_insert_epi32(into, value, 0);
22 into = _mm_insert_epi16(into, value, 0);
23 into = _mm_insert_epi16(into, value >> 16, 1);
27 static inline void InsertSecondUint32(
const uint32 value, __m128i &into)
29 #if (SSE_VERSION >= 4) 30 into = _mm_insert_epi32(into, value, 1);
32 into = _mm_insert_epi16(into, value, 2);
33 into = _mm_insert_epi16(into, value >> 16, 3);
37 static inline void LoadUint64(
const uint64 value, __m128i &into)
40 into = _mm_cvtsi64_si128(value);
42 #if (SSE_VERSION >= 4) 43 into = _mm_cvtsi32_si128(value);
44 InsertSecondUint32(value >> 32, into);
46 (*(um128i*) &into).m128i_u64[0] = value;
51 static inline __m128i PackUnsaturated(__m128i from,
const __m128i &mask)
53 #if (SSE_VERSION == 2) 54 from = _mm_and_si128(from, mask);
55 return _mm_packus_epi16(from, from);
57 return _mm_shuffle_epi8(from, mask);
61 static inline __m128i DistributeAlpha(
const __m128i from,
const __m128i &mask)
63 #if (SSE_VERSION == 2) 64 __m128i alphaAB = _mm_shufflelo_epi16(from, 0x3F);
65 return _mm_shufflehi_epi16(alphaAB, 0x3F);
67 return _mm_shuffle_epi8(from, mask);
71 static inline __m128i AlphaBlendTwoPixels(__m128i src, __m128i dst,
const __m128i &distribution_mask,
const __m128i &pack_mask)
73 __m128i srcAB = _mm_unpacklo_epi8(src, _mm_setzero_si128());
74 __m128i dstAB = _mm_unpacklo_epi8(dst, _mm_setzero_si128());
76 __m128i alphaAB = _mm_cmpgt_epi16(srcAB, _mm_setzero_si128());
77 alphaAB = _mm_srli_epi16(alphaAB, 15);
78 alphaAB = _mm_add_epi16(alphaAB, srcAB);
79 alphaAB = DistributeAlpha(alphaAB, distribution_mask);
81 srcAB = _mm_sub_epi16(srcAB, dstAB);
82 srcAB = _mm_mullo_epi16(srcAB, alphaAB);
83 srcAB = _mm_srli_epi16(srcAB, 8);
84 srcAB = _mm_add_epi16(srcAB, dstAB);
85 return PackUnsaturated(srcAB, pack_mask);
91 static inline __m128i DarkenTwoPixels(__m128i src, __m128i dst,
const __m128i &distribution_mask,
const __m128i &tr_nom_base)
93 __m128i srcAB = _mm_unpacklo_epi8(src, _mm_setzero_si128());
94 __m128i dstAB = _mm_unpacklo_epi8(dst, _mm_setzero_si128());
95 __m128i alphaAB = DistributeAlpha(srcAB, distribution_mask);
96 alphaAB = _mm_srli_epi16(alphaAB, 2);
97 __m128i nom = _mm_sub_epi16(tr_nom_base, alphaAB);
98 dstAB = _mm_mullo_epi16(dstAB, nom);
99 dstAB = _mm_srli_epi16(dstAB, 8);
100 return _mm_packus_epi16(dstAB, dstAB);
103 IGNORE_UNINITIALIZED_WARNING_START
104 static Colour ReallyAdjustBrightness(
Colour colour, uint8 brightness)
106 uint64 c16 = colour.b | (uint64) colour.g << 16 | (uint64) colour.r << 32;
109 c16 /= Blitter_32bppBase::DEFAULT_BRIGHTNESS;
110 c16 &= 0x01FF01FF01FFULL;
113 c16_ob = (((c16_ob >> (8 + 7)) & 0x0100010001ULL) * 0xFF) & c16;
114 const uint ob = ((uint16) c16_ob + (uint16) (c16_ob >> 16) + (uint16) (c16_ob >> 32)) / 2;
116 const uint32 alpha32 = colour.
data & 0xFF000000;
118 LoadUint64(c16, ret);
120 __m128i ob128 = _mm_cvtsi32_si128(ob);
121 ob128 = _mm_shufflelo_epi16(ob128, 0xC0);
122 __m128i white = OVERBRIGHT_VALUE_MASK;
124 ret = _mm_subs_epu16(white, c128);
125 ret = _mm_mullo_epi16(ret, ob128);
126 ret = _mm_srli_epi16(ret, 8);
127 ret = _mm_add_epi16(ret, c128);
130 ret = _mm_packus_epi16(ret, ret);
131 return alpha32 | _mm_cvtsi128_si32(ret);
133 IGNORE_UNINITIALIZED_WARNING_STOP
138 static inline Colour AdjustBrightneSSE(
Colour colour, uint8 brightness)
141 if (brightness == Blitter_32bppBase::DEFAULT_BRIGHTNESS)
return colour;
143 return ReallyAdjustBrightness(colour, brightness);
146 static inline __m128i AdjustBrightnessOfTwoPixels(__m128i from, uint32 brightness)
148 #if (SSE_VERSION < 3) 155 brightness &= 0xFF00FF00;
156 brightness += Blitter_32bppBase::DEFAULT_BRIGHTNESS;
158 __m128i colAB = _mm_unpacklo_epi8(from, _mm_setzero_si128());
159 __m128i briAB = _mm_cvtsi32_si128(brightness);
160 briAB = _mm_shuffle_epi8(briAB, BRIGHTNESS_LOW_CONTROL_MASK);
161 colAB = _mm_mullo_epi16(colAB, briAB);
162 __m128i colAB_ob = _mm_srli_epi16(colAB, 8 + 7);
163 colAB = _mm_srli_epi16(colAB, 7);
169 colAB = _mm_and_si128(colAB, BRIGHTNESS_DIV_CLEANER);
170 colAB_ob = _mm_and_si128(colAB_ob, OVERBRIGHT_PRESENCE_MASK);
171 colAB_ob = _mm_mullo_epi16(colAB_ob, OVERBRIGHT_VALUE_MASK);
172 colAB_ob = _mm_and_si128(colAB_ob, colAB);
173 __m128i obAB = _mm_hadd_epi16(_mm_hadd_epi16(colAB_ob, _mm_setzero_si128()), _mm_setzero_si128());
175 obAB = _mm_srli_epi16(obAB, 1);
176 obAB = _mm_shuffle_epi8(obAB, OVERBRIGHT_CONTROL_MASK);
177 __m128i retAB = OVERBRIGHT_VALUE_MASK;
178 retAB = _mm_subs_epu16(retAB, colAB);
179 retAB = _mm_mullo_epi16(retAB, obAB);
180 retAB = _mm_srli_epi16(retAB, 8);
181 retAB = _mm_add_epi16(retAB, colAB);
183 return _mm_packus_epi16(retAB, retAB);
187 #if FULL_ANIMATION == 0 195 IGNORE_UNINITIALIZED_WARNING_START
196 template <BlitterMode mode, Blitter_32bppSSE2::ReadMode read_mode, Blitter_32bppSSE2::BlockType bt_last,
bool translucent>
197 #if (SSE_VERSION == 2) 199 #elif (SSE_VERSION == 3) 201 #elif (SSE_VERSION == 4) 205 const byte *
const remap = bp->
remap;
207 int effective_width = bp->
width;
210 const SpriteData *
const sd = (
const SpriteData *) bp->
sprite;
211 const SpriteInfo *
const si = &sd->infos[zoom];
212 const MapValue *src_mv_line = (
const MapValue *) &sd->data[si->mv_offset] + bp->
skip_top * si->sprite_width;
213 const Colour *src_rgba_line = (
const Colour *) ((
const byte *) &sd->data[si->sprite_offset] + bp->
skip_top * si->sprite_line_size);
215 if (read_mode != RM_WITH_MARGIN) {
219 const MapValue *src_mv = src_mv_line;
222 #if (SSE_VERSION == 2) 223 const __m128i clear_hi = CLEAR_HIGH_BYTE_MASK;
224 #define ALPHA_BLEND_PARAM_1 clear_hi 225 #define ALPHA_BLEND_PARAM_2 clear_hi 226 #define DARKEN_PARAM_1 tr_nom_base 227 #define DARKEN_PARAM_2 tr_nom_base 229 const __m128i a_cm = ALPHA_CONTROL_MASK;
230 const __m128i pack_low_cm = PACK_LOW_CONTROL_MASK;
231 #define ALPHA_BLEND_PARAM_1 a_cm 232 #define ALPHA_BLEND_PARAM_2 pack_low_cm 233 #define DARKEN_PARAM_1 a_cm 234 #define DARKEN_PARAM_2 tr_nom_base 236 const __m128i tr_nom_base = TRANSPARENT_NOM_BASE;
238 for (
int y = bp->
height; y != 0; y--) {
240 const Colour *src = src_rgba_line + META_LENGTH;
243 if (read_mode == RM_WITH_MARGIN) {
244 assert(bt_last == BT_NONE);
245 src += src_rgba_line[0].
data;
246 dst += src_rgba_line[0].
data;
248 const int width_diff = si->sprite_width - bp->
width;
249 effective_width = bp->
width - (int) src_rgba_line[0].data;
250 const int delta_diff = (int) src_rgba_line[1].data - width_diff;
251 const int new_width = effective_width - delta_diff;
252 effective_width = delta_diff > 0 ? new_width : effective_width;
253 if (effective_width <= 0)
goto next_line;
259 for (uint x = (uint) effective_width; x > 0; x--) {
260 if (src->
a) *dst = *src;
267 for (uint x = (uint) effective_width / 2; x > 0; x--) {
268 __m128i srcABCD = _mm_loadl_epi64((
const __m128i*) src);
269 __m128i dstABCD = _mm_loadl_epi64((__m128i*) dst);
270 _mm_storel_epi64((__m128i*) dst, AlphaBlendTwoPixels(srcABCD, dstABCD, ALPHA_BLEND_PARAM_1, ALPHA_BLEND_PARAM_2));
275 if ((bt_last == BT_NONE && effective_width & 1) || bt_last == BT_ODD) {
276 __m128i srcABCD = _mm_cvtsi32_si128(src->
data);
277 __m128i dstABCD = _mm_cvtsi32_si128(dst->
data);
278 dst->
data = _mm_cvtsi128_si32(AlphaBlendTwoPixels(srcABCD, dstABCD, ALPHA_BLEND_PARAM_1, ALPHA_BLEND_PARAM_2));
283 #if (SSE_VERSION >= 3) 284 for (uint x = (uint) effective_width / 2; x > 0; x--) {
285 __m128i srcABCD = _mm_loadl_epi64((
const __m128i*) src);
286 __m128i dstABCD = _mm_loadl_epi64((__m128i*) dst);
287 uint32 mvX2 = *((uint32 *) const_cast<MapValue *>(src_mv));
290 if (mvX2 & 0x00FF00FF) {
291 #define CMOV_REMAP(m_colour, m_colour_init, m_src, m_m) \ 293 Colour m_colour = m_colour_init; \ 295 const Colour srcm = (Colour) (m_src); \ 296 const uint m = (byte) (m_m); \ 297 const uint r = remap[m]; \ 298 const Colour cmap = (this->LookupColourInPalette(r).data & 0x00FFFFFF) | (srcm.data & 0xFF000000); \ 299 m_colour = r == 0 ? m_colour : cmap; \ 300 m_colour = m != 0 ? m_colour : srcm; \ 303 uint64 srcs = _mm_cvtsi128_si64(srcABCD);
304 uint64 remapped_src = 0;
305 CMOV_REMAP(c0, 0, srcs, mvX2);
306 remapped_src = c0.data;
307 CMOV_REMAP(c1, 0, srcs >> 32, mvX2 >> 16);
308 remapped_src |= (uint64) c1.data << 32;
309 srcABCD = _mm_cvtsi64_si128(remapped_src);
312 CMOV_REMAP(c0, 0, _mm_cvtsi128_si32(srcABCD), mvX2);
313 remapped_src[0] = c0.
data;
314 CMOV_REMAP(c1, 0, src[1], mvX2 >> 16);
315 remapped_src[1] = c1.
data;
316 srcABCD = _mm_loadl_epi64((__m128i*) &remapped_src);
319 if ((mvX2 & 0xFF00FF00) != 0x80008000) srcABCD = AdjustBrightnessOfTwoPixels(srcABCD, mvX2);
323 _mm_storel_epi64((__m128i *) dst, AlphaBlendTwoPixels(srcABCD, dstABCD, ALPHA_BLEND_PARAM_1, ALPHA_BLEND_PARAM_2));
329 if ((bt_last == BT_NONE && effective_width & 1) || bt_last == BT_ODD) {
331 for (uint x = (uint) effective_width; x > 0; x--) {
336 const uint r = remap[src_mv->m];
338 Colour remapped_colour = AdjustBrightneSSE(this->LookupColourInPalette(r), src_mv->v);
340 *dst = remapped_colour;
342 remapped_colour.
a = src->
a;
343 srcABCD = _mm_cvtsi32_si128(remapped_colour.
data);
344 goto bmcr_alpha_blend_single;
348 srcABCD = _mm_cvtsi32_si128(src->
data);
350 bmcr_alpha_blend_single:
351 __m128i dstABCD = _mm_cvtsi32_si128(dst->
data);
352 srcABCD = AlphaBlendTwoPixels(srcABCD, dstABCD, ALPHA_BLEND_PARAM_1, ALPHA_BLEND_PARAM_2);
354 dst->
data = _mm_cvtsi128_si32(srcABCD);
356 #if (SSE_VERSION == 2) 366 for (uint x = (uint) bp->
width / 2; x > 0; x--) {
367 __m128i srcABCD = _mm_loadl_epi64((
const __m128i*) src);
368 __m128i dstABCD = _mm_loadl_epi64((__m128i*) dst);
369 _mm_storel_epi64((__m128i *) dst, DarkenTwoPixels(srcABCD, dstABCD, DARKEN_PARAM_1, DARKEN_PARAM_2));
374 if ((bt_last == BT_NONE && bp->
width & 1) || bt_last == BT_ODD) {
375 __m128i srcABCD = _mm_cvtsi32_si128(src->
data);
376 __m128i dstABCD = _mm_cvtsi32_si128(dst->
data);
377 dst->
data = _mm_cvtsi128_si32(DarkenTwoPixels(srcABCD, dstABCD, DARKEN_PARAM_1, DARKEN_PARAM_2));
382 for (uint x = (uint) bp->
width; x > 0; x--) {
383 if (src_mv->m == 0) {
385 uint8 g = MakeDark(src->r, src->g, src->b);
386 *dst = ComposeColourRGBA(g, g, g, src->
a, *dst);
389 uint r = remap[src_mv->m];
390 if (r != 0) *dst = ComposeColourPANoCheck(this->AdjustBrightness(this->LookupColourInPalette(r), src_mv->v), src->
a, *dst);
399 for (uint x = (uint) bp->
width; x > 0; x--) {
412 src_rgba_line = (
const Colour*) ((
const byte*) src_rgba_line + si->sprite_line_size);
413 dst_line += bp->
pitch;
416 IGNORE_UNINITIALIZED_WARNING_STOP
425 #if (SSE_VERSION == 2) 427 #elif (SSE_VERSION == 3) 429 #elif (SSE_VERSION == 4) 437 const BlockType bt_last = (BlockType) (bp->
width & 1);
439 default: Draw<BM_NORMAL, RM_WITH_SKIP, BT_EVEN, true>(bp, zoom);
return;
440 case BT_ODD: Draw<BM_NORMAL, RM_WITH_SKIP, BT_ODD, true>(bp, zoom);
return;
443 if (((
const Blitter_32bppSSE_Base::SpriteData *) bp->
sprite)->flags & SF_TRANSLUCENT) {
444 Draw<BM_NORMAL, RM_WITH_MARGIN, BT_NONE, true>(bp, zoom);
446 Draw<BM_NORMAL, RM_WITH_MARGIN, BT_NONE, false>(bp, zoom);
453 if (((
const Blitter_32bppSSE_Base::SpriteData *) bp->
sprite)->flags & SF_NO_REMAP)
goto bm_normal;
455 Draw<BM_COLOUR_REMAP, RM_WITH_SKIP, BT_NONE, true>(bp, zoom);
return;
457 Draw<BM_COLOUR_REMAP, RM_WITH_MARGIN, BT_NONE, true>(bp, zoom);
return;
459 case BM_TRANSPARENT: Draw<BM_TRANSPARENT, RM_NONE, BT_NONE, true>(bp, zoom);
return;
460 case BM_CRASH_REMAP: Draw<BM_CRASH_REMAP, RM_NONE, BT_NONE, true>(bp, zoom);
return;
461 case BM_BLACK_REMAP: Draw<BM_BLACK_REMAP, RM_NONE, BT_NONE, true>(bp, zoom);
return;
int left
The left offset in the 'dst' in pixels to start drawing.
int height
The height in pixels that needs to be drawn to dst.
Perform transparency colour remapping.
int skip_top
How much pixels of the source to skip on the top (based on zoom of dst)
uint32 data
Conversion of the channel information to a 32 bit number.
int width
The width in pixels that needs to be drawn to dst.
uint8 a
colour channels in LE order
int skip_left
How much pixels of the source to skip on the left (based on zoom of dst)
Parameters related to blitting.
int pitch
The pitch of the destination buffer.
Perform a crash remapping.
Perform remapping to a completely blackened sprite.
ZoomLevel
All zoom levels we know.
int top
The top offset in the 'dst' in pixels to start drawing.
const byte * remap
XXX – Temporary storage for remap array.
const void * sprite
Pointer to the sprite how ever the encoder stored it.
Perform a colour remapping.
void * dst
Destination buffer.
Structure to access the alpha, red, green, and blue channels from a 32 bit number.
BlitterMode
The modes of blitting we can do.