From 2eb550826dd1d28718fcd2ed5f73f63c57ed26ad Mon Sep 17 00:00:00 2001 From: Shaun Inman Date: Thu, 16 Feb 2023 20:31:52 -0500 Subject: [PATCH] added mixed scalers, using them proved difficult will revisit later --- src/common/scaler_neon.c | 81 ++++++++++++++++++++++++++++++++++++++++ src/common/scaler_neon.h | 6 +++ src/minarch/main.c | 17 ++++++++- 3 files changed, 102 insertions(+), 2 deletions(-) diff --git a/src/common/scaler_neon.c b/src/common/scaler_neon.c index 19c3803..91dbde5 100755 --- a/src/common/scaler_neon.c +++ b/src/common/scaler_neon.c @@ -1039,3 +1039,84 @@ void scale6x_c32(void* __restrict src, void* __restrict dst, uint32_t sw, uint32 memcpy((uint8_t*)dst+dp*5, dst, swl); } } + +// +// mixed scalers +// + +void scale1x2_c16(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp) { + if (!sw||!sh) { return; } + uint32_t swl = sw*sizeof(uint16_t); + if (!sp) { sp = swl; } if (!dp) { dp = swl*1; } + for (; sh>0; sh--, src=(uint8_t*)src+sp, dst=(uint8_t*)dst+dp*2) { + memcpy(dst, src, swl); + memcpy((uint8_t*)dst+dp, dst, swl); + } +} + +void scale2x1_c16(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp) { + if (!sw||!sh) { return; } + uint32_t x, dx, pix, dpix1, dpix2, swl = sw*sizeof(uint16_t); + if (!sp) { sp = swl; } swl*=2; if (!dp) { dp = swl; } + for (; sh>0; sh--, src=(uint8_t*)src+sp, dst=(uint8_t*)dst+dp*1) { + uint32_t *s = (uint32_t* __restrict)src; + uint32_t *d = (uint32_t* __restrict)dst; + for (x=dx=0; x<(sw/2); x++, dx+=2) { + pix = s[x]; + dpix1=(pix & 0x0000FFFF)|(pix<<16); + dpix2=(pix & 0xFFFF0000)|(pix>>16); + d[dx] = dpix1; d[dx+1] = dpix2; + } + if (sw&1) { + uint16_t *s16 = (uint16_t*)s; + uint16_t pix16 = s16[x*2]; + d[dx] = pix16|(pix16<<16); + } + } +} + +void scale2x3_c16(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp) { + if (!sw||!sh) { return; } + uint32_t x, dx, pix, dpix1, dpix2, swl = sw*sizeof(uint16_t); + if (!sp) { sp = swl; } swl*=2; if (!dp) { dp = swl; } + for (; sh>0; sh--, src=(uint8_t*)src+sp, dst=(uint8_t*)dst+dp*3) { + uint32_t *s = (uint32_t* __restrict)src; + uint32_t *d = (uint32_t* __restrict)dst; + for (x=dx=0; x<(sw/2); x++, dx+=2) { + pix = s[x]; + dpix1=(pix & 0x0000FFFF)|(pix<<16); + dpix2=(pix & 0xFFFF0000)|(pix>>16); + d[dx] = dpix1; d[dx+1] = dpix2; + } + if (sw&1) { + uint16_t *s16 = (uint16_t*)s; + uint16_t pix16 = s16[x*2]; + d[dx] = pix16|(pix16<<16); + } + memcpy((uint8_t*)dst+dp*1, dst, swl); + memcpy((uint8_t*)dst+dp*2, dst, swl); + } +} + +void scale4x2_c16(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp) { + if (!sw||!sh) { return; } + uint32_t x, dx, pix, dpix1, dpix2, swl = sw*sizeof(uint16_t); + if (!sp) { sp = swl; } swl*=4; if (!dp) { dp = swl; } + for (; sh>0; sh--, src=(uint8_t*)src+sp, dst=(uint8_t*)dst+dp*2) { + uint32_t *s = (uint32_t* __restrict)src; + uint32_t *d = (uint32_t* __restrict)dst; + for (x=dx=0; x<(sw/2); x++, dx+=4) { + pix = s[x]; + dpix1=(pix & 0x0000FFFF)|(pix<<16); + dpix2=(pix & 0xFFFF0000)|(pix>>16); + d[dx] = dpix1; d[dx+1] = dpix1; d[dx+2] = dpix2; d[dx+3] = dpix2; + } + if (sw&1) { + uint16_t *s16 = (uint16_t*)s; + uint16_t pix16 = s16[x*2]; + dpix1 = pix16|(pix16<<16); + d[dx] = dpix1; d[dx+1] = dpix1; + } + memcpy((uint8_t*)dst+dp*1, dst, swl); + } +} \ No newline at end of file diff --git a/src/common/scaler_neon.h b/src/common/scaler_neon.h index cdbd9f9..7ca4670 100755 --- a/src/common/scaler_neon.h +++ b/src/common/scaler_neon.h @@ -51,4 +51,10 @@ void scale5x_c32(void* __restrict src, void* __restrict dst, uint32_t sw, uint32 void scale6x_c16(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp); void scale6x_c32(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp); +// mixed scalers +void scale1x2_c16(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp); +void scale2x1_c16(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp); +void scale2x3_c16(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp); +void scale4x2_c16(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp); + #endif diff --git a/src/minarch/main.c b/src/minarch/main.c index 011790a..bb002ad 100644 --- a/src/minarch/main.c +++ b/src/minarch/main.c @@ -2242,7 +2242,6 @@ static void selectScaler_PAR(int width, int height, int pitch) { } } } - ////////////////////////////// // DEBUG HUD @@ -2266,8 +2265,8 @@ static void selectScaler_AR(int width, int height, int pitch) { if (scale>2) scale = 4; // TODO: pillar/letterboxing at 3x produces vertical banding (some kind of alignment issue?) // reduce scale if we don't have enough memory to accomodate it - // TODO: some resolutions are getting through here unadjusted? oh maybe because of aspect ratio adjustments below? revisit // scaled width and height can't be greater than our fixed page width or height + // TODO: some resolutions are getting through here unadjusted? oh maybe because of aspect ratio adjustments below? revisit while (src_w * scale * FIXED_BPP * src_h * scale > PAGE_SIZE || src_w * scale > PAGE_WIDTH || src_h * scale > PAGE_HEIGHT) scale -= 1; int dst_w = src_w * scale; @@ -2317,6 +2316,20 @@ static void selectScaler_AR(int width, int height, int pitch) { case 3: renderer.scaler = scale3x_n16; break; case 2: renderer.scaler = scale2x_n16; break; default: renderer.scaler = scale1x_n16; break; + // case 4: + // if (scale_y==2) renderer.scaler = scale4x2_c16; + // else renderer.scaler = scale4x_c16; + // break; + // case 3: + // if (scale_x==2) renderer.scaler = scale2x3_c16; + // else renderer.scaler = scale3x_c16; + // break; + // case 2: + // if (scale_x==1) renderer.scaler = scale1x2_c16; + // else if (scale_y==1) renderer.scaler = scale2x1_c16; + // else renderer.scaler = scale2x_c16; + // break; + // default: renderer.scaler = scale1x_n16; break; } if (scaler_surface) SDL_FreeSurface(scaler_surface);