32bpp_anim_sse4.cpp

00001 /* $Id: 32bpp_anim_sse4.cpp 26541 2014-04-29 18:18:52Z frosch $ */
00002 
00003 /*
00004  * This file is part of OpenTTD.
00005  * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
00006  * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
00007  * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
00008  */
00009 
00012 #ifdef WITH_SSE
00013 
00014 #include "../stdafx.h"
00015 #include "../video/video_driver.hpp"
00016 #include "../table/sprites.h"
00017 #include "32bpp_anim_sse4.hpp"
00018 #include "32bpp_sse_func.hpp"
00019 
00021 static FBlitter_32bppSSE4_Anim iFBlitter_32bppSSE4_Anim;
00022 
00030 IGNORE_UNINITIALIZED_WARNING_START
00031 template <BlitterMode mode, Blitter_32bppSSE2::ReadMode read_mode, Blitter_32bppSSE2::BlockType bt_last, bool translucent, bool animated>
00032 inline void Blitter_32bppSSE4_Anim::Draw(const Blitter::BlitterParams *bp, ZoomLevel zoom)
00033 {
00034   const byte * const remap = bp->remap;
00035   Colour *dst_line = (Colour *) bp->dst + bp->top * bp->pitch + bp->left;
00036   uint16 *anim_line = this->anim_buf + ((uint32 *)bp->dst - (uint32 *)_screen.dst_ptr) + bp->top * this->anim_buf_width + bp->left;
00037   int effective_width = bp->width;
00038 
00039   /* Find where to start reading in the source sprite. */
00040   const Blitter_32bppSSE_Base::SpriteData * const sd = (const Blitter_32bppSSE_Base::SpriteData *) bp->sprite;
00041   const SpriteInfo * const si = &sd->infos[zoom];
00042   const MapValue *src_mv_line = (const MapValue *) &sd->data[si->mv_offset] + bp->skip_top * si->sprite_width;
00043   const Colour *src_rgba_line = (const Colour *) ((const byte *) &sd->data[si->sprite_offset] + bp->skip_top * si->sprite_line_size);
00044 
00045   if (read_mode != RM_WITH_MARGIN) {
00046     src_rgba_line += bp->skip_left;
00047     src_mv_line += bp->skip_left;
00048   }
00049   const MapValue *src_mv = src_mv_line;
00050 
00051   /* Load these variables into register before loop. */
00052   const __m128i a_cm        = ALPHA_CONTROL_MASK;
00053   const __m128i pack_low_cm = PACK_LOW_CONTROL_MASK;
00054   const __m128i tr_nom_base = TRANSPARENT_NOM_BASE;
00055 
00056   for (int y = bp->height; y != 0; y--) {
00057     Colour *dst = dst_line;
00058     const Colour *src = src_rgba_line + META_LENGTH;
00059     if (mode != BM_TRANSPARENT) src_mv = src_mv_line;
00060     uint16 *anim = anim_line;
00061 
00062     if (read_mode == RM_WITH_MARGIN) {
00063       assert(bt_last == BT_NONE); // or you must ensure block type is preserved
00064       anim += src_rgba_line[0].data;
00065       src += src_rgba_line[0].data;
00066       dst += src_rgba_line[0].data;
00067       if (mode != BM_TRANSPARENT) src_mv += src_rgba_line[0].data;
00068       const int width_diff = si->sprite_width - bp->width;
00069       effective_width = bp->width - (int) src_rgba_line[0].data;
00070       const int delta_diff = (int) src_rgba_line[1].data - width_diff;
00071       const int new_width = effective_width - delta_diff;
00072       effective_width = delta_diff > 0 ? new_width : effective_width;
00073       if (effective_width <= 0) goto next_line;
00074     }
00075 
00076     switch (mode) {
00077       default:
00078         if (!translucent) {
00079           for (uint x = (uint) effective_width; x > 0; x--) {
00080             if (src->a) {
00081               if (animated) {
00082                 *anim = *(const uint16*) src_mv;
00083                 *dst = (src_mv->m >= PALETTE_ANIM_START) ? AdjustBrightneSSE(this->LookupColourInPalette(src_mv->m), src_mv->v) : src->data;
00084               } else {
00085                 *anim = 0;
00086                 *dst = *src;
00087               }
00088             }
00089             if (animated) src_mv++;
00090             anim++;
00091             src++;
00092             dst++;
00093           }
00094           break;
00095         }
00096 
00097         for (uint x = (uint) effective_width/2; x != 0; x--) {
00098           uint32 mvX2 = *((uint32 *) const_cast<MapValue *>(src_mv));
00099           __m128i srcABCD = _mm_loadl_epi64((const __m128i*) src);
00100           __m128i dstABCD = _mm_loadl_epi64((__m128i*) dst);
00101 
00102           if (animated) {
00103             /* Remap colours. */
00104             const byte m0 = mvX2;
00105             if (m0 >= PALETTE_ANIM_START) {
00106               const Colour c0 = (this->LookupColourInPalette(m0).data & 0x00FFFFFF) | (src[0].data & 0xFF000000);
00107               InsertFirstUint32(AdjustBrightneSSE(c0, (byte) (mvX2 >> 8)).data, srcABCD);
00108             }
00109             const byte m1 = mvX2 >> 16;
00110             if (m1 >= PALETTE_ANIM_START) {
00111               const Colour c1 = (this->LookupColourInPalette(m1).data & 0x00FFFFFF) | (src[1].data & 0xFF000000);
00112               InsertSecondUint32(AdjustBrightneSSE(c1, (byte) (mvX2 >> 24)).data, srcABCD);
00113             }
00114 
00115             /* Update anim buffer. */
00116             const byte a0 = src[0].a;
00117             const byte a1 = src[1].a;
00118             uint32 anim01 = 0;
00119             if (a0 == 255) {
00120               if (a1 == 255) {
00121                 *(uint32*) anim = mvX2;
00122                 goto bmno_full_opacity;
00123               }
00124               anim01 = (uint16) mvX2;
00125             } else if (a0 == 0) {
00126               if (a1 == 0) {
00127                 goto bmno_full_transparency;
00128               } else {
00129                 if (a1 == 255) anim[1] = (uint16) (mvX2 >> 16);
00130                 goto bmno_alpha_blend;
00131               }
00132             }
00133             if (a1 > 0) {
00134               if (a1 == 255) anim01 |= mvX2 & 0xFFFF0000;
00135               *(uint32*) anim = anim01;
00136             } else {
00137               anim[0] = (uint16) anim01;
00138             }
00139           } else {
00140             if (src[0].a) anim[0] = 0;
00141             if (src[1].a) anim[1] = 0;
00142           }
00143 
00144           /* Blend colours. */
00145 bmno_alpha_blend:
00146           srcABCD = AlphaBlendTwoPixels(srcABCD, dstABCD, a_cm, pack_low_cm);
00147 bmno_full_opacity:
00148           _mm_storel_epi64((__m128i *) dst, srcABCD);
00149 bmno_full_transparency:
00150           src_mv += 2;
00151           src += 2;
00152           anim += 2;
00153           dst += 2;
00154         }
00155 
00156         if ((bt_last == BT_NONE && effective_width & 1) || bt_last == BT_ODD) {
00157           if (src->a == 0) {
00158           } else if (src->a == 255) {
00159             *anim = *(const uint16*) src_mv;
00160             *dst = (src_mv->m >= PALETTE_ANIM_START) ? AdjustBrightneSSE(LookupColourInPalette(src_mv->m), src_mv->v) : *src;
00161           } else {
00162             *anim = 0;
00163             __m128i srcABCD;
00164             __m128i dstABCD = _mm_cvtsi32_si128(dst->data);
00165             if (src_mv->m >= PALETTE_ANIM_START) {
00166               Colour colour = AdjustBrightneSSE(LookupColourInPalette(src_mv->m), src_mv->v);
00167               colour.a = src->a;
00168               srcABCD = _mm_cvtsi32_si128(colour.data);
00169             } else {
00170               srcABCD = _mm_cvtsi32_si128(src->data);
00171             }
00172             dst->data = _mm_cvtsi128_si32(AlphaBlendTwoPixels(srcABCD, dstABCD, a_cm, pack_low_cm));
00173           }
00174         }
00175         break;
00176 
00177       case BM_COLOUR_REMAP:
00178         for (uint x = (uint) effective_width / 2; x != 0; x--) {
00179           uint32 mvX2 = *((uint32 *) const_cast<MapValue *>(src_mv));
00180           __m128i srcABCD = _mm_loadl_epi64((const __m128i*) src);
00181           __m128i dstABCD = _mm_loadl_epi64((__m128i*) dst);
00182 
00183           /* Remap colours. */
00184           const uint m0 = (byte) mvX2;
00185           const uint r0 = remap[m0];
00186           const uint m1 = (byte) (mvX2 >> 16);
00187           const uint r1 = remap[m1];
00188           if (mvX2 & 0x00FF00FF) {
00189             #define CMOV_REMAP(m_colour, m_colour_init, m_src, m_m) \
00190               /* Written so the compiler uses CMOV. */ \
00191               Colour m_colour = m_colour_init; \
00192               { \
00193               const Colour srcm = (Colour) (m_src); \
00194               const uint m = (byte) (m_m); \
00195               const uint r = remap[m]; \
00196               const Colour cmap = (this->LookupColourInPalette(r).data & 0x00FFFFFF) | (srcm.data & 0xFF000000); \
00197               m_colour = r == 0 ? m_colour : cmap; \
00198               m_colour = m != 0 ? m_colour : srcm; \
00199               }
00200 #ifdef _SQ64
00201             uint64 srcs = _mm_cvtsi128_si64(srcABCD);
00202             uint64 dsts;
00203             if (animated) dsts = _mm_cvtsi128_si64(dstABCD);
00204             uint64 remapped_src = 0;
00205             CMOV_REMAP(c0, animated ? dsts : 0, srcs, mvX2);
00206             remapped_src = c0.data;
00207             CMOV_REMAP(c1, animated ? dsts >> 32 : 0, srcs >> 32, mvX2 >> 16);
00208             remapped_src |= (uint64) c1.data << 32;
00209             srcABCD = _mm_cvtsi64_si128(remapped_src);
00210 #else
00211             Colour remapped_src[2];
00212             CMOV_REMAP(c0, animated ? _mm_cvtsi128_si32(dstABCD) : 0, _mm_cvtsi128_si32(srcABCD), mvX2);
00213             remapped_src[0] = c0.data;
00214             CMOV_REMAP(c1, animated ? dst[1] : 0, src[1], mvX2 >> 16);
00215             remapped_src[1] = c1.data;
00216             srcABCD = _mm_loadl_epi64((__m128i*) &remapped_src);
00217 #endif
00218 
00219             if ((mvX2 & 0xFF00FF00) != 0x80008000) srcABCD = AdjustBrightnessOfTwoPixels(srcABCD, mvX2);
00220           }
00221 
00222           /* Update anim buffer. */
00223           if (animated) {
00224             const byte a0 = src[0].a;
00225             const byte a1 = src[1].a;
00226             uint32 anim01 = mvX2 & 0xFF00FF00;
00227             if (a0 == 255) {
00228               anim01 |= r0;
00229               if (a1 == 255) {
00230                 *(uint32*) anim = anim01 | (r1 << 16);
00231                 goto bmcr_full_opacity;
00232               }
00233             } else if (a0 == 0) {
00234               if (a1 == 0) {
00235                 goto bmcr_full_transparency;
00236               } else {
00237                 if (a1 == 255) {
00238                   anim[1] = r1 | (anim01 >> 16);
00239                 }
00240                 goto bmcr_alpha_blend;
00241               }
00242             }
00243             if (a1 > 0) {
00244               if (a1 == 255) anim01 |= r1 << 16;
00245               *(uint32*) anim = anim01;
00246             } else {
00247               anim[0] = (uint16) anim01;
00248             }
00249           } else {
00250             if (src[0].a) anim[0] = 0;
00251             if (src[1].a) anim[1] = 0;
00252           }
00253 
00254           /* Blend colours. */
00255 bmcr_alpha_blend:
00256           srcABCD = AlphaBlendTwoPixels(srcABCD, dstABCD, a_cm, pack_low_cm);
00257 bmcr_full_opacity:
00258           _mm_storel_epi64((__m128i *) dst, srcABCD);
00259 bmcr_full_transparency:
00260           src_mv += 2;
00261           dst += 2;
00262           src += 2;
00263           anim += 2;
00264         }
00265 
00266         if ((bt_last == BT_NONE && effective_width & 1) || bt_last == BT_ODD) {
00267           /* In case the m-channel is zero, do not remap this pixel in any way. */
00268           __m128i srcABCD;
00269           if (src->a == 0) break;
00270           if (src_mv->m) {
00271             const uint r = remap[src_mv->m];
00272             *anim = (animated && src->a == 255) ? r | ((uint16) src_mv->v << 8 ) : 0;
00273             if (r != 0) {
00274               Colour remapped_colour = AdjustBrightneSSE(this->LookupColourInPalette(r), src_mv->v);
00275               if (src->a == 255) {
00276                 *dst = remapped_colour;
00277               } else {
00278                 remapped_colour.a = src->a;
00279                 srcABCD = _mm_cvtsi32_si128(remapped_colour.data);
00280                 goto bmcr_alpha_blend_single;
00281               }
00282             }
00283           } else {
00284             *anim = 0;
00285             srcABCD = _mm_cvtsi32_si128(src->data);
00286             if (src->a < 255) {
00287 bmcr_alpha_blend_single:
00288               __m128i dstABCD = _mm_cvtsi32_si128(dst->data);
00289               srcABCD = AlphaBlendTwoPixels(srcABCD, dstABCD, a_cm, pack_low_cm);
00290             }
00291             dst->data = _mm_cvtsi128_si32(srcABCD);
00292           }
00293         }
00294         break;
00295 
00296       case BM_TRANSPARENT:
00297         /* Make the current colour a bit more black, so it looks like this image is transparent. */
00298         for (uint x = (uint) bp->width / 2; x > 0; x--) {
00299           __m128i srcABCD = _mm_loadl_epi64((const __m128i*) src);
00300           __m128i dstABCD = _mm_loadl_epi64((__m128i*) dst);
00301           _mm_storel_epi64((__m128i *) dst, DarkenTwoPixels(srcABCD, dstABCD, a_cm, tr_nom_base));
00302           src += 2;
00303           dst += 2;
00304           anim += 2;
00305           if (src[-2].a) anim[-2] = 0;
00306           if (src[-1].a) anim[-1] = 0;
00307         }
00308 
00309         if ((bt_last == BT_NONE && bp->width & 1) || bt_last == BT_ODD) {
00310           __m128i srcABCD = _mm_cvtsi32_si128(src->data);
00311           __m128i dstABCD = _mm_cvtsi32_si128(dst->data);
00312           dst->data = _mm_cvtsi128_si32(DarkenTwoPixels(srcABCD, dstABCD, a_cm, tr_nom_base));
00313           if (src[0].a) anim[0] = 0;
00314         }
00315         break;
00316 
00317       case BM_CRASH_REMAP:
00318         for (uint x = (uint) bp->width; x > 0; x--) {
00319           if (src_mv->m == 0) {
00320             if (src->a != 0) {
00321               uint8 g = MakeDark(src->r, src->g, src->b);
00322               *dst = ComposeColourRGBA(g, g, g, src->a, *dst);
00323               *anim = 0;
00324             }
00325           } else {
00326             uint r = remap[src_mv->m];
00327             if (r != 0) *dst = ComposeColourPANoCheck(this->AdjustBrightness(this->LookupColourInPalette(r), src_mv->v), src->a, *dst);
00328           }
00329           src_mv++;
00330           dst++;
00331           src++;
00332           anim++;
00333         }
00334         break;
00335     }
00336 
00337 next_line:
00338     if (mode != BM_TRANSPARENT) src_mv_line += si->sprite_width;
00339     src_rgba_line = (const Colour*) ((const byte*) src_rgba_line + si->sprite_line_size);
00340     dst_line += bp->pitch;
00341     anim_line += this->anim_buf_width;
00342   }
00343 }
00344 IGNORE_UNINITIALIZED_WARNING_STOP
00345 
00353 void Blitter_32bppSSE4_Anim::Draw(Blitter::BlitterParams *bp, BlitterMode mode, ZoomLevel zoom)
00354 {
00355   const Blitter_32bppSSE_Base::SpriteFlags sprite_flags = ((const Blitter_32bppSSE_Base::SpriteData *) bp->sprite)->flags;
00356   switch (mode) {
00357     default: {
00358 bm_normal:
00359       if (bp->skip_left != 0 || bp->width <= MARGIN_NORMAL_THRESHOLD) {
00360         const BlockType bt_last = (BlockType) (bp->width & 1);
00361         if (bt_last == BT_EVEN) {
00362           if (sprite_flags & SF_NO_ANIM) Draw<BM_NORMAL, RM_WITH_SKIP, BT_EVEN, true, false>(bp, zoom);
00363           else                           Draw<BM_NORMAL, RM_WITH_SKIP, BT_EVEN, true, true>(bp, zoom);
00364         } else {
00365           if (sprite_flags & SF_NO_ANIM) Draw<BM_NORMAL, RM_WITH_SKIP, BT_ODD, true, false>(bp, zoom);
00366           else                           Draw<BM_NORMAL, RM_WITH_SKIP, BT_ODD, true, true>(bp, zoom);
00367         }
00368       } else {
00369 #ifdef _SQ64
00370         if (sprite_flags & SF_TRANSLUCENT) {
00371           if (sprite_flags & SF_NO_ANIM) Draw<BM_NORMAL, RM_WITH_MARGIN, BT_NONE, true, false>(bp, zoom);
00372           else                           Draw<BM_NORMAL, RM_WITH_MARGIN, BT_NONE, true, true>(bp, zoom);
00373         } else {
00374           if (sprite_flags & SF_NO_ANIM) Draw<BM_NORMAL, RM_WITH_MARGIN, BT_NONE, false, false>(bp, zoom);
00375           else                           Draw<BM_NORMAL, RM_WITH_MARGIN, BT_NONE, false, true>(bp, zoom);
00376         }
00377 #else
00378         if (sprite_flags & SF_NO_ANIM) Draw<BM_NORMAL, RM_WITH_MARGIN, BT_NONE, true, false>(bp, zoom);
00379         else                           Draw<BM_NORMAL, RM_WITH_MARGIN, BT_NONE, true, true>(bp, zoom);
00380 #endif
00381       }
00382       break;
00383     }
00384     case BM_COLOUR_REMAP:
00385       if (sprite_flags & SF_NO_REMAP) goto bm_normal;
00386       if (bp->skip_left != 0 || bp->width <= MARGIN_REMAP_THRESHOLD) {
00387         if (sprite_flags & SF_NO_ANIM) Draw<BM_COLOUR_REMAP, RM_WITH_SKIP, BT_NONE, true, false>(bp, zoom);
00388         else                           Draw<BM_COLOUR_REMAP, RM_WITH_SKIP, BT_NONE, true, true>(bp, zoom);
00389       } else {
00390         if (sprite_flags & SF_NO_ANIM) Draw<BM_COLOUR_REMAP, RM_WITH_MARGIN, BT_NONE, true, false>(bp, zoom);
00391         else                           Draw<BM_COLOUR_REMAP, RM_WITH_MARGIN, BT_NONE, true, true>(bp, zoom);
00392       }
00393       break;
00394     case BM_TRANSPARENT:  Draw<BM_TRANSPARENT, RM_NONE, BT_NONE, true, true>(bp, zoom); return;
00395     case BM_CRASH_REMAP:  Draw<BM_CRASH_REMAP, RM_NONE, BT_NONE, true, true>(bp, zoom); return;
00396   }
00397 }
00398 
00399 #endif /* WITH_SSE */