commit ec15d449e1a815256373a4a72abbb5f3779fcc2a Author: Shaun Inman Date: Sun Jan 1 21:48:35 2023 -0500 initial (partial) commit diff --git a/src/common/defines.h b/src/common/defines.h new file mode 100644 index 0000000..ab77f97 --- /dev/null +++ b/src/common/defines.h @@ -0,0 +1,49 @@ +#ifndef __DEFS_H__ +#define __DEFS_H__ + +#define CODE_UP 0x5A +#define CODE_DOWN 0x5B +#define CODE_LEFT 0x5C +#define CODE_RIGHT 0x5D +#define CODE_A 0x5E +#define CODE_B 0x5F +#define CODE_X 0x60 +#define CODE_Y 0x61 +#define CODE_START 0x62 +#define CODE_SELECT 0x63 +#define CODE_L1 0x64 +#define CODE_R1 0x65 +#define CODE_L2 0x66 +#define CODE_R2 0x67 +#define CODE_MENU 0x68 +#define CODE_VOL_UP 0x6C +#define CODE_VOL_DN 0x6D +#define CODE_POWER 0x74 + +#define VOLUME_MIN 0 +#define VOLUME_MAX 20 +#define BRIGHTNESS_MIN 0 +#define BRIGHTNESS_MAX 10 + +#define SDCARD_PATH "/mnt/sdcard" +#define SYSTEM_PATH SDCARD_PATH "/.system/" PLATFORM +#define USERDATA_PATH SDCARD_PATH "/.userdata/" PLATFORM +#define MAX_PATH 512 + +#define SCREEN_WIDTH 640 +#define SCREEN_HEIGHT 480 +#define SCREEN_DEPTH 16 +#define SCREEN_PITCH 1280 +#define SCREEN_BPP 2 +#define SCREEN_BUFFER_COUNT 3 + + +/////////////////////////////// + +#define STR_HELPER(x) #x +#define STR(x) STR_HELPER(x) + +#define MAX(a, b) (a) > (b) ? (a) : (b) +#define MIN(a, b) (a) < (b) ? (a) : (b) + +#endif // __DEFS_H__ \ No newline at end of file diff --git a/src/common/scaler_neon.c b/src/common/scaler_neon.c new file mode 100755 index 0000000..19c3803 --- /dev/null +++ b/src/common/scaler_neon.c @@ -0,0 +1,1041 @@ +#include +#include +#include +#include "scaler_neon.h" + +// +// arm NEON / C integer scalers for miyoomini +// args/ src : src offset address of top left corner +// dst : dst offset address of top left corner +// sw : src width pixels +// sh : src height pixels +// sp : src pitch (stride) bytes if 0, (src width * [2|4]) is used +// dp : dst pitch (stride) bytes if 0, (src width * [2|4] * multiplier) is used +// +// ** NOTE ** +// since 32bit aligned addresses need to be processed for NEON scalers, +// x-offset and stride pixels must be even# in the case of 16bpp, +// if odd#, then handled by the C scaler +// + +// memcpy_neon (dst/src must be aligned 4, size must be aligned 2) +static inline void memcpy_neon(void* dst, void* src, uint32_t size) { + asm volatile ( + " bic r4, %2, #127 ;" + " add r3, %0, %2 ;" // r3 = endofs + " add r4, %0, r4 ;" // r4 = s128ofs + " cmp %[s], r4 ;" + " beq 2f ;" + "1: vldmia %[s]!, {q8-q15} ;" // 128 bytes + " vstmia %[d]!, {q8-q15} ;" + " cmp %[s], r4 ;" + " bne 1b ;" + "2: cmp %[s], r3 ;" + " beq 7f ;" + " tst %[sz], #64 ;" + " beq 3f ;" + " vldmia %[s]!, {q8-q11} ;" // 64 bytes + " vstmia %[d]!, {q8-q11} ;" + " cmp %[s], r3 ;" + " beq 7f ;" + "3: tst %[sz], #32 ;" + " beq 4f ;" + " vldmia %[s]!, {q12-q13} ;" // 32 bytes + " vstmia %[d]!, {q12-q13} ;" + " cmp %[s], r3 ;" + " beq 7f ;" + "4: tst %[sz], #16 ;" + " beq 5f ;" + " vldmia %[s]!, {q14} ;" // 16 bytes + " vstmia %[d]!, {q14} ;" + " cmp %[s], r3 ;" + " beq 7f ;" + "5: tst %[sz], #8 ;" + " beq 6f ;" + " vldmia %[s]!, {d30} ;" // 8 bytes + " vstmia %[d]!, {d30} ;" + " cmp %[s], r3 ;" + " beq 7f ;" + "6: ldrh r4, [%[s]],#2 ;" // rest + " strh r4, [%[d]],#2 ;" + " cmp %[s], r3 ;" + " bne 6b ;" + "7: " + : [s]"+r"(src), [d]"+r"(dst) + : [sz]"r"(size) + : "r3","r4","q8","q9","q10","q11","q12","q13","q14","q15","memory","cc" + ); +} + +// +// NEON scalers +// + +void scale1x_n16(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp) { + if (!sw||!sh) { return; } + uint32_t swl = sw*sizeof(uint16_t); + if (!sp) { sp = swl; } if (!dp) { dp = swl*1; } + if ( ((uintptr_t)src&3)||((uintptr_t)dst&3)||(sp&3)||(dp&3) ) { scale1x_c16(src,dst,sw,sh,sp,dp); return; } + if ((swl == sp)&&(sp == dp)) memcpy_neon(dst, src, sp*sh); + else for (; sh>0; sh--, src=(uint8_t*)src+sp, dst=(uint8_t*)dst+dp) memcpy_neon(dst, src, swl); +} + +void scale1x_n32(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp) { + if (!sw||!sh) { return; } + uint32_t swl = sw*sizeof(uint32_t); + if (!sp) { sp = swl; } if (!dp) { dp = swl*1; } + if ( ((uintptr_t)src&3)||((uintptr_t)dst&3)||(sp&3)||(dp&3) ) { scale1x_c32(src,dst,sw,sh,sp,dp); return; } + if ((swl == sp)&&(sp == dp)) memcpy_neon(dst, src, sp*sh); + else for (; sh>0; sh--, src=(uint8_t*)src+sp, dst=(uint8_t*)dst+dp) memcpy_neon(dst, src, swl); +} + +void scale2x_n16(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp) { + if (!sw||!sh) { return; } + uint32_t swl = sw * sizeof(uint16_t); + if (!sp) { sp = swl; } if (!dp) { dp = swl*2; } + if ( ((uintptr_t)src&3)||((uintptr_t)dst&3)||(sp&3)||(dp&3) ) { scale2x_c16(src,dst,sw,sh,sp,dp); return; } + uint32_t swl64 = swl & ~63; + uint32_t swrest = swl & 63; + uint32_t sadd = sp - swl; + uint32_t dadd = dp*2 - swl*2; + uint8_t* finofs = (uint8_t*)src + (sp*sh); + asm volatile ( + "1: add lr, %0, %2 ;" // lr = x64bytes offset + " add r9, %0, %3 ;" // r9 = lineend offset + " add r10, %1, %7 ;" // r10 = 2x line offset + " cmp %0, lr ;" + " beq 3f ;" + "2: vldmia %0!, {q8-q11} ;" // 32 pixels 64 bytes + " vdup.16 d0, d23[3] ;" + " vdup.16 d1, d23[2] ;" + " vext.16 d31, d1,d0,#2 ;" + " vdup.16 d0, d23[1] ;" + " vdup.16 d1, d23[0] ;" + " vext.16 d30, d1,d0,#2 ;" + " vdup.16 d0, d22[3] ;" + " vdup.16 d1, d22[2] ;" + " vext.16 d29, d1,d0,#2 ;" + " vdup.16 d0, d22[1] ;" + " vdup.16 d1, d22[0] ;" + " vext.16 d28, d1,d0,#2 ;" + " vdup.16 d0, d21[3] ;" + " vdup.16 d1, d21[2] ;" + " vext.16 d27, d1,d0,#2 ;" + " vdup.16 d0, d21[1] ;" + " vdup.16 d1, d21[0] ;" + " vext.16 d26, d1,d0,#2 ;" + " vdup.16 d0, d20[3] ;" + " vdup.16 d1, d20[2] ;" + " vext.16 d25, d1,d0,#2 ;" + " vdup.16 d0, d20[1] ;" + " vdup.16 d1, d20[0] ;" + " vext.16 d24, d1,d0,#2 ;" + " vdup.16 d0, d19[3] ;" + " vdup.16 d1, d19[2] ;" + " vext.16 d23, d1,d0,#2 ;" + " vdup.16 d0, d19[1] ;" + " vdup.16 d1, d19[0] ;" + " vext.16 d22, d1,d0,#2 ;" + " vdup.16 d0, d18[3] ;" + " vdup.16 d1, d18[2] ;" + " vext.16 d21, d1,d0,#2 ;" + " vdup.16 d0, d18[1] ;" + " vdup.16 d1, d18[0] ;" + " vext.16 d20, d1,d0,#2 ;" + " vdup.16 d0, d17[3] ;" + " vdup.16 d1, d17[2] ;" + " vext.16 d19, d1,d0,#2 ;" + " vdup.16 d0, d17[1] ;" + " vdup.16 d1, d17[0] ;" + " vext.16 d18, d1,d0,#2 ;" + " vdup.16 d0, d16[3] ;" + " vdup.16 d1, d16[2] ;" + " vext.16 d17, d1,d0,#2 ;" + " vdup.16 d0, d16[1] ;" + " vdup.16 d1, d16[0] ;" + " vext.16 d16, d1,d0,#2 ;" + " cmp %0, lr ;" + " vstmia %1!, {q8-q15} ;" + " vstmia r10!, {q8-q15} ;" + " bne 2b ;" + "3: cmp %0, r9 ;" + " beq 5f ;" + " tst %8, #32 ;" + " beq 4f ;" + " vldmia %0!,{q8-q9} ;" // 16 pixels + " vdup.16 d0, d19[3] ;" + " vdup.16 d1, d19[2] ;" + " vext.16 d23, d1,d0,#2 ;" + " vdup.16 d0, d19[1] ;" + " vdup.16 d1, d19[0] ;" + " vext.16 d22, d1,d0,#2 ;" + " vdup.16 d0, d18[3] ;" + " vdup.16 d1, d18[2] ;" + " vext.16 d21, d1,d0,#2 ;" + " vdup.16 d0, d18[1] ;" + " vdup.16 d1, d18[0] ;" + " vext.16 d20, d1,d0,#2 ;" + " vdup.16 d0, d17[3] ;" + " vdup.16 d1, d17[2] ;" + " vext.16 d19, d1,d0,#2 ;" + " vdup.16 d0, d17[1] ;" + " vdup.16 d1, d17[0] ;" + " vext.16 d18, d1,d0,#2 ;" + " vdup.16 d0, d16[3] ;" + " vdup.16 d1, d16[2] ;" + " vext.16 d17, d1,d0,#2 ;" + " vdup.16 d0, d16[1] ;" + " vdup.16 d1, d16[0] ;" + " vext.16 d16, d1,d0,#2 ;" + " cmp %0, r9 ;" + " vstmia %1!, {q8-q11} ;" + " vstmia r10!, {q8-q11} ;" + " beq 5f ;" + "4: ldrh lr, [%0],#2 ;" // rest + " orr lr, lr, lsl #16 ;" + " cmp %0, r9 ;" + " str lr, [%1],#4 ;" + " str lr, [r10],#4 ;" + " bne 4b ;" + "5: add %0, %0, %4 ;" + " add %1, %1, %5 ;" + " cmp %0, %6 ;" + " bne 1b " + : "+r"(src), "+r"(dst) + : "r"(swl64), "r"(swl), "r"(sadd), "r"(dadd), "r"(finofs), "r"(dp), "r"(swrest) + : "r9","r10","lr","q0","q8","q9","q10","q11","q12","q13","q14","q15","memory","cc" + ); +} + +void scale2x_n32(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp) { + if (!sw||!sh) { return; } + uint32_t swl = sw * sizeof(uint32_t); + if (!sp) { sp = swl; } if (!dp) { dp = swl*2; } + if ( ((uintptr_t)src&3)||((uintptr_t)dst&3)||(sp&3)||(dp&3) ) { scale2x_c32(src,dst,sw,sh,sp,dp); return; } + uint32_t swl64 = swl & ~63; + uint32_t sadd = sp - swl; + uint32_t dadd = dp*2 - swl*2; + uint8_t* finofs = (uint8_t*)src + (sp*sh); + asm volatile ( + "1: add lr, %0, %2 ;" // lr = x64bytes offset + " add r8, %0, %3 ;" // r8 = lineend offset + " add r9, %1, %7 ;" // r9 = 2x line offset + " cmp %0, lr ;" + " beq 3f ;" + "2: vldmia %0!, {q8-q11} ;" // 16 pixels 64 bytes + " vdup.32 d31, d23[1] ;" + " vdup.32 d30, d23[0] ;" + " vdup.32 d29, d22[1] ;" + " vdup.32 d28, d22[0] ;" + " vdup.32 d27, d21[1] ;" + " vdup.32 d26, d21[0] ;" + " vdup.32 d25, d20[1] ;" + " vdup.32 d24, d20[0] ;" + " vdup.32 d23, d19[1] ;" + " vdup.32 d22, d19[0] ;" + " vdup.32 d21, d18[1] ;" + " vdup.32 d20, d18[0] ;" + " vdup.32 d19, d17[1] ;" + " vdup.32 d18, d17[0] ;" + " vdup.32 d17, d16[1] ;" + " vdup.32 d16, d16[0] ;" + " cmp %0, lr ;" + " vstmia %1!, {q8-q15} ;" + " vstmia r9!, {q8-q15} ;" + " bne 2b ;" + "3: cmp %0, r8 ;" + " beq 5f ;" + "4: ldr lr, [%0],#4 ;" // rest + " vdup.32 d16, lr ;" + " cmp %0, r8 ;" + " vstmia %1!, {d16} ;" + " vstmia r9!, {d16} ;" + " bne 4b ;" + "5: add %0, %0, %4 ;" + " add %1, %1, %5 ;" + " cmp %0, %6 ;" + " bne 1b " + : "+r"(src), "+r"(dst) + : "r"(swl64), "r"(swl), "r"(sadd), "r"(dadd), "r"(finofs), "r"(dp) + : "r8","r9","lr","q8","q9","q10","q11","q12","q13","q14","q15","memory","cc" + ); +} + +void scale3x_n16(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp) { + if (!sw||!sh) { return; } + uint32_t swl = sw * sizeof(uint16_t); + if (!sp) { sp = swl; } if (!dp) { dp = swl*3; } + if ( ((uintptr_t)src&3)||((uintptr_t)dst&3)||(sp&3)||(dp&3) ) { scale3x_c16(src,dst,sw,sh,sp,dp); return; } + uint32_t swl32 = swl & ~31; + uint32_t sadd = sp - swl; + uint32_t dadd = dp - swl*3; + uint32_t dwl = swl*3; + uint32_t dwl128 = dwl & ~127; + uint32_t dwrest = dwl & 127; + uint8_t* finofs = (uint8_t*)src + (sp*sh); + asm volatile ( + "1: mov r11,%1 ;" // dst push + " add lr, %0, %2 ;" // lr = x32bytes offset + " add r10, %0, %3 ;" // r10 = lineend offset + " cmp %0, lr ;" + " beq 3f ;" + "2: vldmia %0!, {q8-q9} ;" // 16 pixels 32 bytes + " vdup.16 d31, d19[3] ;" // FFFF + " vdup.16 d30, d19[2] ;" // EEEE + " vdup.16 d29, d19[1] ;" // DDDD + " vdup.16 d28, d19[0] ;" // CCCC + " vext.16 d27, d30,d31,#3 ;" // EFFF + " vext.16 d26, d29,d30,#2 ;" // DDEE + " vext.16 d25, d28,d29,#1 ;" // CCCD + " vdup.16 d31, d18[3] ;" // BBBB + " vdup.16 d30, d18[2] ;" // AAAA + " vdup.16 d29, d18[1] ;" // 9999 + " vdup.16 d28, d18[0] ;" // 8888 + " vext.16 d24, d30,d31,#3 ;" // ABBB + " vext.16 d23, d29,d30,#2 ;" // 99AA + " vext.16 d22, d28,d29,#1 ;" // 8889 + " vdup.16 d31, d17[3] ;" // 7777 + " vdup.16 d30, d17[2] ;" // 6666 + " vdup.16 d29, d17[1] ;" // 5555 + " vdup.16 d28, d17[0] ;" // 4444 + " vext.16 d21, d30,d31,#3 ;" // 6777 + " vext.16 d20, d29,d30,#2 ;" // 5566 + " vext.16 d19, d28,d29,#1 ;" // 4445 + " vdup.16 d31, d16[3] ;" // 3333 + " vdup.16 d30, d16[2] ;" // 2222 + " vdup.16 d29, d16[1] ;" // 1111 + " vdup.16 d28, d16[0] ;" // 0000 + " vext.16 d18, d30,d31,#3 ;" // 2333 + " vext.16 d17, d29,d30,#2 ;" // 1122 + " vext.16 d16, d28,d29,#1 ;" // 0001 + " cmp %0, lr ;" + " vstmia %1!, {q8-q13} ;" + " bne 2b ;" + "3: cmp %0, r10 ;" + " beq 5f ;" + "4: ldrh lr, [%0],#2 ;" // rest + " orr lr, lr, lsl #16 ;" + " cmp %0, r10 ;" + " str lr, [%1],#4 ;" + " strh lr, [%1],#2 ;" + " bne 4b ;" + "5: add %0, %4 ;" + " add %1, %5 ;" + " mov r12, %1 ;" // r12 = 2x line offset + " add %1, %8 ;" // + " add %1, %5 ;" // %1 = 3x line offset + " add lr, r11, %7 ;" // lr = x128bytes offset + " add r10, r11, %8 ;" // r10 = lineend offset + " cmp r11, lr ;" + " beq 7f ;" + "6: vldmia r11!, {q8-q15} ;" // 64 pixels 128 bytes + " vstmia r12!, {q8-q15} ;" + " vstmia %1!, {q8-q15} ;" + " cmp r11, lr ;" + " bne 6b ;" + "7: cmp r11, r10 ;" + " beq 10f ;" + " tst %9, #64 ;" + " beq 8f ;" + " vldmia r11!, {q8-q11} ;" // 32 pixels + " vstmia r12!, {q8-q11} ;" + " vstmia %1!, {q8-q11} ;" + " cmp r11, r10 ;" + " beq 10f ;" + "8: tst %9, #32 ;" + " beq 9f ;" + " vldmia r11!, {q8-q9} ;" // 16 pixels + " vstmia r12!, {q8-q9} ;" + " vstmia %1!, {q8-q9} ;" + " cmp r11, r10 ;" + " beq 10f ;" + "9: ldrh lr, [r11],#2 ;" // rest + " strh lr, [r12],#2 ;" + " strh lr, [%1],#2 ;" + " cmp r11, r10 ;" + " bne 9b ;" + "10: add %1, %5 ;" + " cmp %0, %6 ;" + " bne 1b " + : "+r"(src), "+r"(dst) + : "r"(swl32), "r"(swl), "r"(sadd), "r"(dadd), "r"(finofs), "r"(dwl128), "r"(dwl), "r"(dwrest) + : "r10","r11","r12","lr","q8","q9","q10","q11","q12","q13","q14","q15","memory","cc" + ); +} + +void scale3x_n32(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp) { + if (!sw||!sh) { return; } + uint32_t swl = sw * sizeof(uint32_t); + if (!sp) { sp = swl; } if (!dp) { dp = swl*3; } + if ( ((uintptr_t)src&3)||((uintptr_t)dst&3)||(sp&3)||(dp&3) ) { scale3x_c32(src,dst,sw,sh,sp,dp); return; } + uint32_t swl32 = swl & ~31; + uint32_t sadd = sp - swl; + uint32_t dadd = dp - swl*3; + uint32_t dwl = swl*3; + uint32_t dwl128 = dwl & ~127; + uint32_t dwrest = dwl & 127; + uint8_t* finofs = (uint8_t*)src + (sp*sh); + asm volatile ( + "1: mov r11,%1 ;" // dst push + " add lr, %0, %2 ;" // lr = x32bytes offset + " add r10, %0, %3 ;" // r10 = lineend offset + " cmp %0, lr ;" + " beq 3f ;" + "2: vldmia %0!,{q8-q9} ;" // 8 pixels 32 bytes + " vdup.32 q15, d19[1] ;" // 7777 + " vdup.32 q14, d19[0] ;" // 6666 + " vdup.32 q1, d18[1] ;" // 5555 + " vdup.32 q0, d18[0] ;" // 4444 + " vext.32 q13, q14,q15,#3 ;" // 6777 + " vext.32 q12, q1,q14,#2 ;" // 5566 + " vext.32 q11, q0,q1,#1 ;" // 4445 + " vdup.32 q15, d17[1] ;" // 3333 + " vdup.32 q14, d17[0] ;" // 2222 + " vdup.32 q1, d16[1] ;" // 1111 + " vdup.32 q0, d16[0] ;" // 0000 + " vext.32 q10, q14,q15,#3 ;" // 2333 + " vext.32 q9, q1,q14,#2 ;" // 1122 + " vext.32 q8, q0,q1,#1 ;" // 0001 + " cmp %0, lr ;" + " vstmia %1!,{q8-q13} ;" + " bne 2b ;" + "3: cmp %0, r10 ;" + " beq 5f ;" + "4: ldr lr, [%0],#4 ;" // rest + " vdup.32 d16, lr ;" + " cmp %0, r10 ;" + " vstmia %1!, {d16} ;" + " str lr, [%1],#4 ;" + " bne 4b ;" + "5: add %0, %4 ;" + " add %1, %5 ;" + " mov r12, %1 ;" // r12 = 2x line offset + " add %1, %8 ;" // + " add %1, %5 ;" // %1 = 3x line offset + " add lr, r11, %7 ;" // lr = x128bytes offset + " add r10, r11, %8 ;" // r10 = lineend offset + " cmp r11, lr ;" + " beq 7f ;" + "6: vldmia r11!, {q8-q15} ;" // 32 pixels 128 bytes + " vstmia r12!, {q8-q15} ;" + " vstmia %1!, {q8-q15} ;" + " cmp r11, lr ;" + " bne 6b ;" + "7: cmp r11, r10 ;" + " beq 10f ;" + " tst %9, #64 ;" + " beq 8f ;" + " vldmia r11!, {q8-q11} ;" // 16 pixels + " vstmia r12!, {q8-q11} ;" + " vstmia %1!, {q8-q11} ;" + " cmp r11, r10 ;" + " beq 10f ;" + "8: tst %9, #32 ;" + " beq 9f ;" + " vldmia r11!, {q8-q9} ;" // 8 pixels + " vstmia r12!, {q8-q9} ;" + " vstmia %1!, {q8-q9} ;" + " cmp r11, r10 ;" + " beq 10f ;" + "9: ldr lr, [r11],#4 ;" // rest + " str lr, [r12],#4 ;" + " str lr, [%1],#4 ;" + " cmp r11, r10 ;" + " bne 9b ;" + "10: add %1, %5 ;" + " cmp %0, %6 ;" + " bne 1b " + : "+r"(src), "+r"(dst) + : "r"(swl32), "r"(swl), "r"(sadd), "r"(dadd), "r"(finofs), "r"(dwl128), "r"(dwl), "r"(dwrest) + : "r10","r11","r12","lr","q0","q1","q8","q9","q10","q11","q12","q13","q14","q15","memory","cc" + ); +} + +void scale4x_n16(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp) { + if (!sw||!sh) { return; } + uint32_t swl = sw * sizeof(uint16_t); + if (!sp) { sp = swl; } if (!dp) { dp = swl*4; } + if ( ((uintptr_t)src&3)||((uintptr_t)dst&3)||(sp&3)||(dp&3) ) { scale4x_c16(src,dst,sw,sh,sp,dp); return; } + uint32_t swl32 = swl & ~31; + uint32_t sadd = sp - swl; + uint32_t dadd = dp*4 - swl*4; + uint8_t* finofs = (uint8_t*)src + (sp*sh); + asm volatile ( + "1: add lr, %0, %2 ;" // lr = x32bytes offset + " add r8, %0, %3 ;" // r8 = lineend offset + " add r9, %1, %7 ;" // r9 = 2x line offset + " add r10, r9, %7 ;" // r10 = 3x line offset + " add r11, r10, %7 ;" // r11 = 4x line offset + " cmp %0, lr ;" + " beq 3f ;" + "2: vldmia %0!,{q8-q9} ;" // 16 pixels 32 bytes + " vdup.16 d31,d19[3] ;" + " vdup.16 d30,d19[2] ;" + " vdup.16 d29,d19[1] ;" + " vdup.16 d28,d19[0] ;" + " vdup.16 d27,d18[3] ;" + " vdup.16 d26,d18[2] ;" + " vdup.16 d25,d18[1] ;" + " vdup.16 d24,d18[0] ;" + " vdup.16 d23,d17[3] ;" + " vdup.16 d22,d17[2] ;" + " vdup.16 d21,d17[1] ;" + " vdup.16 d20,d17[0] ;" + " vdup.16 d19,d16[3] ;" + " vdup.16 d18,d16[2] ;" + " vdup.16 d17,d16[1] ;" + " vdup.16 d16,d16[0] ;" + " cmp %0, lr ;" + " vstmia %1!,{q8-q15} ;" + " vstmia r9!,{q8-q15} ;" + " vstmia r10!,{q8-q15} ;" + " vstmia r11!,{q8-q15} ;" + " bne 2b ;" + "3: cmp %0, r8 ;" + " beq 5f ;" + "4: ldrh lr, [%0],#2 ;" // rest + " vdup.16 d16, lr ;" + " cmp %0, r8 ;" + " vstmia %1!, {d16} ;" + " vstmia r9!, {d16} ;" + " vstmia r10!, {d16} ;" + " vstmia r11!, {d16} ;" + " bne 4b ;" + "5: add %0, %0, %4 ;" + " add %1, %1, %5 ;" + " cmp %0, %6 ;" + " bne 1b " + : "+r"(src), "+r"(dst) + : "r"(swl32), "r"(swl), "r"(sadd), "r"(dadd), "r"(finofs), "r"(dp) + : "r8","r9","r10","r11","lr","q8","q9","q10","q11","q12","q13","q14","q15","memory","cc" + ); +} + +void scale4x_n32(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp) { + if (!sw||!sh) { return; } + uint32_t swl = sw * sizeof(uint32_t); + if (!sp) { sp = swl; } if (!dp) { dp = swl*4; } + if ( ((uintptr_t)src&3)||((uintptr_t)dst&3)||(sp&3)||(dp&3) ) { scale4x_c32(src,dst,sw,sh,sp,dp); return; } + uint32_t swl32 = swl & ~31; + uint32_t sadd = sp - swl; + uint32_t dadd = dp*4 - swl*4; + uint8_t* finofs = (uint8_t*)src + (sp*sh); + asm volatile ( + "1: add lr, %0, %2 ;" // lr = x32bytes offset + " add r8, %0, %3 ;" // r8 = lineend offset + " add r9, %1, %7 ;" // r9 = 2x line offset + " add r10, r9, %7 ;" // r10 = 3x line offset + " add r11, r10, %7 ;" // r11 = 4x line offset + " cmp %0, lr ;" + " beq 3f ;" + "2: vldmia %0!,{q8-q9} ;" // 8 pixels 32 bytes + " vdup.32 q15,d19[1] ;" + " vdup.32 q14,d19[0] ;" + " vdup.32 q13,d18[1] ;" + " vdup.32 q12,d18[0] ;" + " vdup.32 q11,d17[1] ;" + " vdup.32 q10,d17[0] ;" + " vdup.32 q9,d16[1] ;" + " vdup.32 q8,d16[0] ;" + " cmp %0, lr ;" + " vstmia %1!,{q8-q15} ;" + " vstmia r9!,{q8-q15} ;" + " vstmia r10!,{q8-q15} ;" + " vstmia r11!,{q8-q15} ;" + " bne 2b ;" + "3: cmp %0, r8 ;" + " beq 5f ;" + "4: ldr lr, [%0],#4 ;" // rest + " vdup.32 q8, lr ;" + " cmp %0, r8 ;" + " vstmia %1!, {q8} ;" + " vstmia r9!, {q8} ;" + " vstmia r10!, {q8} ;" + " vstmia r11!, {q8} ;" + " bne 4b ;" + "5: add %0, %0, %4 ;" + " add %1, %1, %5 ;" + " cmp %0, %6 ;" + " bne 1b " + : "+r"(src), "+r"(dst) + : "r"(swl32), "r"(swl), "r"(sadd), "r"(dadd), "r"(finofs), "r"(dp) + : "r8","r9","r10","r11","lr","q8","q9","q10","q11","q12","q13","q14","q15","memory","cc" + ); +} + +static inline void scale5x_n16line(void* src, void* dst, uint32_t swl) { + asm volatile ( + " bic r4, %2, #15 ;" // r4 = swl16 + " add r3, %0, %2 ;" // r3 = lineend offset + " add r4, %0, r4 ;" // r4 = x16bytes offset + " cmp %0, r4 ;" + " beq 2f ;" + "1: vldmia %0!, {q8} ;" // 8 pixels 16 bytes + " vdup.16 d25, d17[3] ;" // 7777 + " vdup.16 d27, d17[2] ;" // 6666 + " vdup.16 d26, d17[1] ;" // 5555 + " vdup.16 d21, d17[0] ;" // 4444 + " vext.16 d24, d27,d25,#1 ;" // 6667 + " vext.16 d23, d26,d27,#2 ;" // 5566 + " vext.16 d22, d21,d26,#3 ;" // 4555 + " vdup.16 d20, d16[3] ;" // 3333 + " vdup.16 d27, d16[2] ;" // 2222 + " vdup.16 d26, d16[1] ;" // 1111 + " vdup.16 d16, d16[0] ;" // 0000 + " vext.16 d19, d27,d20,#1 ;" // 2223 + " vext.16 d18, d26,d27,#2 ;" // 1122 + " vext.16 d17, d16,d26,#3 ;" // 0111 + " cmp %0, r4 ;" + " vstmia %1!, {q8-q12} ;" + " bne 1b ;" + "2: cmp %0, r3 ;" + " beq 4f ;" + "3: ldrh r4, [%0],#2 ;" // rest + " orr r4, r4, lsl #16 ;" + " cmp %0, r3 ;" + " str r4, [%1],#4 ;" + " str r4, [%1],#4 ;" + " strh r4, [%1],#2 ;" + " bne 3b ;" + "4: " + : "+r"(src), "+r"(dst) + : "r"(swl) + : "r3","r4","q8","q9","q10","q11","q12","q13","memory","cc" + ); +} + +void scale5x_n16(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp) { + if (!sw||!sh) { return; } + uint32_t swl = sw * sizeof(uint16_t); + uint32_t dwl = swl*5; + if (!sp) { sp = swl; } if (!dp) { dp = dwl; } + if ( ((uintptr_t)src&3)||((uintptr_t)dst&3)||(sp&3)||(dp&3) ) { scale5x_c16(src,dst,sw,sh,sp,dp); return; } + void* __restrict dstsrc; + for (; sh>0; sh--, src=(uint8_t*)src+sp) { + scale5x_n16line(src, dst, swl); + dstsrc = dst; dst = (uint8_t*)dst+dp; + for (uint32_t i=4; i>0; i--, dst=(uint8_t*)dst+dp) memcpy_neon(dst, dstsrc, dwl); + } +} + +static inline void scale5x_n32line(void* src, void* dst, uint32_t swl) { + asm volatile ( + " bic r4, %2, #15 ;" // r4 = swl16 + " add r3, %0, %2 ;" // r3 = lineend offset + " add r4, %0, r4 ;" // r4 = x16bytes offset + " cmp %0, r4 ;" + " beq 2f ;" + "1: vldmia %0!,{q8} ;" // 4 pixels 16 bytes + " vdup.32 q12, d17[1] ;" // 3333 + " vdup.32 q14, d17[0] ;" // 2222 + " vdup.32 q13, d16[1] ;" // 1111 + " vdup.32 q8, d16[0] ;" // 0000 + " vext.32 q11, q14,q12,#1 ;" // 2223 + " vext.32 q10, q13,q14,#2 ;" // 1122 + " vext.32 q9, q8,q13,#3 ;" // 0111 + " cmp %0, r4 ;" + " vstmia %1!,{q8-q12} ;" + " bne 1b ;" + "2: cmp %0, r3 ;" + " beq 4f ;" + "3: ldr r4, [%0],#4 ;" // rest + " vdup.32 q8, r4 ;" + " cmp %0, r3 ;" + " vstmia %1!, {q8} ;" + " str r4, [%1],#4 ;" + " bne 3b ;" + "4: " + : "+r"(src), "+r"(dst) + : "r"(swl) + : "r3","r4","q8","q9","q10","q11","q12","q13","q14","memory","cc" + ); +} + +void scale5x_n32(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp) { + if (!sw||!sh) { return; } + uint32_t swl = sw * sizeof(uint32_t); + uint32_t dwl = swl*5; + if (!sp) { sp = swl; } if (!dp) { dp = dwl; } + if ( ((uintptr_t)src&3)||((uintptr_t)dst&3)||(sp&3)||(dp&3) ) { scale5x_c32(src,dst,sw,sh,sp,dp); return; } + void* __restrict dstsrc; + for (; sh>0; sh--, src=(uint8_t*)src+sp) { + scale5x_n32line(src, dst, swl); + dstsrc = dst; dst = (uint8_t*)dst+dp; + for (uint32_t i=4; i>0; i--, dst=(uint8_t*)dst+dp) memcpy_neon(dst, dstsrc, dwl); + } +} + +static inline void scale6x_n16line(void* src, void* dst, uint32_t swl) { + asm volatile ( + " bic r4, %2, #15 ;" // r4 = swl16 + " add r3, %0, %2 ;" // r3 = lineend offset + " add r4, %0, r4 ;" // r4 = x16bytes offset + " cmp %0, r4 ;" + " beq 2f ;" + "1: vldmia %0!, {q8} ;" // 8 pixels 16 bytes + " vdup.16 d27, d17[3] ;" // 7777 + " vdup.16 d25, d17[2] ;" // 6666 + " vdup.16 d24, d17[1] ;" // 5555 + " vdup.16 d22, d17[0] ;" // 4444 + " vext.16 d26, d25,d27,#2 ;" // 6677 + " vext.16 d23, d22,d24,#2 ;" // 4455 + " vdup.16 d21, d16[3] ;" // 3333 + " vdup.16 d19, d16[2] ;" // 2222 + " vdup.16 d18, d16[1] ;" // 1111 + " vdup.16 d16, d16[0] ;" // 0000 + " vext.16 d20, d19,d21,#2 ;" // 2233 + " vext.16 d17, d16,d18,#2 ;" // 0011 + " cmp %0, r4 ;" + " vstmia %1!, {q8-q13} ;" + " bne 1b ;" + "2: cmp %0, r3 ;" + " beq 4f ;" + "3: ldrh r4, [%0],#2 ;" // rest + " orr r4, r4, lsl #16 ;" + " vdup.32 d16, r4 ;" + " cmp %0, r3 ;" + " vstmia %1!, {d16} ;" + " str r4, [%1],#4 ;" + " bne 3b ;" + "4: " + : "+r"(src), "+r"(dst) + : "r"(swl) + : "r3","r4","q8","q9","q10","q11","q12","q13","memory","cc" + ); +} + +void scale6x_n16(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp) { + if (!sw||!sh) { return; } + uint32_t swl = sw * sizeof(uint16_t); + uint32_t dwl = swl*6; + if (!sp) { sp = swl; } if (!dp) { dp = dwl; } + if ( ((uintptr_t)src&3)||((uintptr_t)dst&3)||(sp&3)||(dp&3) ) { scale6x_c16(src,dst,sw,sh,sp,dp); return; } + void* __restrict dstsrc; + for (; sh>0; sh--, src=(uint8_t*)src+sp) { + scale6x_n16line(src, dst, swl); + dstsrc = dst; dst = (uint8_t*)dst+dp; + for (uint32_t i=5; i>0; i--, dst=(uint8_t*)dst+dp) memcpy_neon(dst, dstsrc, dwl); + } +} + +static inline void scale6x_n32line(void* src, void* dst, uint32_t swl) { + asm volatile ( + " bic r4, %2, #15 ;" // r4 = swl16 + " add r3, %0, %2 ;" // r3 = lineend offset + " add r4, %0, r4 ;" // r4 = x16bytes offset + " cmp %0, r4 ;" + " beq 2f ;" + "1: vldmia %0!,{q8} ;" // 4 pixels 16 bytes + " vdup.32 q13, d17[1] ;" // 3333 + " vdup.32 q11, d17[0] ;" // 2222 + " vdup.32 q10, d16[1] ;" // 1111 + " vdup.32 q8, d16[0] ;" // 0000 + " vext.32 q12, q11,q13,#2 ;" // 2233 + " vext.32 q9, q8,q10,#2 ;" // 0011 + " cmp %0, r4 ;" + " vstmia %1!,{q8-q13} ;" + " bne 1b ;" + "2: cmp %0, r3 ;" + " beq 4f ;" + "3: ldr r4, [%0],#4 ;" // rest + " vdup.32 q8, r4 ;" + " vmov d18, d16 ;" + " cmp %0, r3 ;" + " vstmia %1!, {d16-d18} ;" + " bne 3b ;" + "4: " + : "+r"(src), "+r"(dst) + : "r"(swl) + : "r3","r4","q8","q9","q10","q11","q12","q13","memory","cc" + ); +} + +void scale6x_n32(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp) { + if (!sw||!sh) { return; } + uint32_t swl = sw * sizeof(uint32_t); + uint32_t dwl = swl*6; + if (!sp) { sp = swl; } if (!dp) { dp = dwl; } + if ( ((uintptr_t)src&3)||((uintptr_t)dst&3)||(sp&3)||(dp&3) ) { scale6x_c32(src,dst,sw,sh,sp,dp); return; } + void* __restrict dstsrc; + for (; sh>0; sh--, src=(uint8_t*)src+sp) { + scale6x_n32line(src, dst, swl); + dstsrc = dst; dst = (uint8_t*)dst+dp; + for (uint32_t i=5; i>0; i--, dst=(uint8_t*)dst+dp) memcpy_neon(dst, dstsrc, dwl); + } +} + +// +// C scalers +// + +void scale1x_c16(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp) { + if (!sw||!sh) { return; } + uint32_t swl = sw*sizeof(uint16_t); + if (!sp) { sp = swl; } if (!dp) { dp = swl*1; } + if ((swl == sp)&&(sp == dp)) memcpy(dst, src, sp*sh); + else for (; sh>0; sh--, src=(uint8_t*)src+sp, dst=(uint8_t*)dst+dp) memcpy(dst, src, swl); +} + +void scale1x_c32(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp) { + if (!sw||!sh) { return; } + uint32_t swl = sw*sizeof(uint32_t); + if (!sp) { sp = swl; } if (!dp) { dp = swl*1; } + if ((swl == sp)&&(sp == dp)) memcpy(dst, src, sp*sh); + else for (; sh>0; sh--, src=(uint8_t*)src+sp, dst=(uint8_t*)dst+dp) memcpy(dst, src, swl); +} + +void scale2x_c16(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp) { + if (!sw||!sh) { return; } + uint32_t x, dx, pix, dpix1, dpix2, swl = sw*sizeof(uint16_t); + if (!sp) { sp = swl; } swl*=2; if (!dp) { dp = swl; } + for (; sh>0; sh--, src=(uint8_t*)src+sp, dst=(uint8_t*)dst+dp*2) { + uint32_t *s = (uint32_t* __restrict)src; + uint32_t *d = (uint32_t* __restrict)dst; + for (x=dx=0; x<(sw/2); x++, dx+=2) { + pix = s[x]; + dpix1=(pix & 0x0000FFFF)|(pix<<16); + dpix2=(pix & 0xFFFF0000)|(pix>>16); + d[dx] = dpix1; d[dx+1] = dpix2; + } + if (sw&1) { + uint16_t *s16 = (uint16_t*)s; + uint16_t pix16 = s16[x*2]; + d[dx] = pix16|(pix16<<16); + } + memcpy((uint8_t*)dst+dp*1, dst, swl); + } +} + +void scale2x_c32(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp) { + if (!sw||!sh) { return; } + uint32_t x, dx, pix, swl = sw*sizeof(uint32_t); + if (!sp) { sp = swl; } swl*=2; if (!dp) { dp = swl; } + for (; sh>0; sh--, src=(uint8_t*)src+sp, dst=(uint8_t*)dst+dp*2) { + uint32_t *s = (uint32_t* __restrict)src; + uint32_t *d = (uint32_t* __restrict)dst; + for (x=dx=0; x0; sh--, src=(uint8_t*)src+sp, dst=(uint8_t*)dst+dp*3) { + uint32_t *s = (uint32_t* __restrict)src; + uint32_t *d = (uint32_t* __restrict)dst; + for (x=dx=0; x<(sw/2); x++, dx+=3) { + pix = s[x]; + dpix1=(pix & 0x0000FFFF)|(pix<<16); + dpix2=(pix & 0xFFFF0000)|(pix>>16); + d[dx] = dpix1; d[dx+1] = pix; d[dx+2] = dpix2; + } + if (sw&1) { + uint16_t *s16 = (uint16_t*)s; + uint16_t *d16 = (uint16_t*)d; + uint16_t pix16 = s16[x*2]; + dpix1 = pix16|(pix16<<16); + d[dx] = dpix1; d16[(dx+1)*2] = pix16; + } + memcpy((uint8_t*)dst+dp*1, dst, swl); + memcpy((uint8_t*)dst+dp*2, dst, swl); + } +} + +void scale3x_c32(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp) { + if (!sw||!sh) { return; } + uint32_t x, dx, pix, swl = sw*sizeof(uint32_t); + if (!sp) { sp = swl; } swl*=3; if (!dp) { dp = swl; } + for (; sh>0; sh--, src=(uint8_t*)src+sp, dst=(uint8_t*)dst+dp*3) { + uint32_t *s = (uint32_t* __restrict)src; + uint32_t *d = (uint32_t* __restrict)dst; + for (x=dx=0; x0; sh--, src=(uint8_t*)src+sp, dst=(uint8_t*)dst+dp*4) { + uint32_t *s = (uint32_t* __restrict)src; + uint32_t *d = (uint32_t* __restrict)dst; + for (x=dx=0; x<(sw/2); x++, dx+=4) { + pix = s[x]; + dpix1=(pix & 0x0000FFFF)|(pix<<16); + dpix2=(pix & 0xFFFF0000)|(pix>>16); + d[dx] = dpix1; d[dx+1] = dpix1; d[dx+2] = dpix2; d[dx+3] = dpix2; + } + if (sw&1) { + uint16_t *s16 = (uint16_t*)s; + uint16_t pix16 = s16[x*2]; + dpix1 = pix16|(pix16<<16); + d[dx] = dpix1; d[dx+1] = dpix1; + } + memcpy((uint8_t*)dst+dp*1, dst, swl); + memcpy((uint8_t*)dst+dp*2, dst, swl); + memcpy((uint8_t*)dst+dp*3, dst, swl); + } +} + +// faster than 4x_c16 when -Ofast/-O3 and aligned width, however dp must be 4xN +void scale4x_c16b(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp) { + if (!sw||!sh) { return; } if (!sp) { sp = sw*sizeof(uint16_t); } if (!dp) { dp = sw*sizeof(uint16_t)*4; } + uint32_t x, dx, pix, dpix1, dpix2, dp32 = dp / sizeof(uint32_t); + for (; sh>0; sh--, src=(uint8_t*)src+sp, dst=(uint8_t*)dst+dp*4) { + uint32_t *s = (uint32_t* __restrict)src; + uint32_t *d = (uint32_t* __restrict)dst; + for (x=dx=0; x<(sw/2); x++, dx+=4) { + pix = s[x]; + dpix1=(pix & 0x0000FFFF)|(pix<<16); + dpix2=(pix & 0xFFFF0000)|(pix>>16); + d[dx] = dpix1; d[dx+1] = dpix1; d[dx+2] = dpix2; d[dx+3] = dpix2; + d[dp32+dx] = dpix1; d[dp32+dx+1]= dpix1; d[dp32+dx+2]= dpix2; d[dp32+dx+3]= dpix2; + d[dp32*2+dx] = dpix1; d[dp32*2+dx+1]= dpix1; d[dp32*2+dx+2]= dpix2; d[dp32*2+dx+3]= dpix2; + d[dp32*3+dx] = dpix1; d[dp32*3+dx+1]= dpix1; d[dp32*3+dx+2]= dpix2; d[dp32*3+dx+3]= dpix2; + } + if (sw&1) { + uint16_t *s16 = (uint16_t*)s; + uint16_t pix16 = s16[x*2]; + dpix1 = pix16|(pix16<<16); + d[dx] = dpix1; d[dx+1] = dpix1; + d[dp32+dx] = dpix1; d[dp32+dx+1] = dpix1; + d[dp32*2+dx] = dpix1; d[dp32*2+dx+1] = dpix1; + d[dp32*3+dx] = dpix1; d[dp32*3+dx+1] = dpix1; + } + } +} + +void scale4x_c32(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp) { + if (!sw||!sh) { return; } + uint32_t x, dx, pix, swl = sw*sizeof(uint32_t); + if (!sp) { sp = swl; } swl*=4; if (!dp) { dp = swl; } + for (; sh>0; sh--, src=(uint8_t*)src+sp, dst=(uint8_t*)dst+dp*4) { + uint32_t *s = (uint32_t* __restrict)src; + uint32_t *d = (uint32_t* __restrict)dst; + for (x=dx=0; x0; sh--, src=(uint8_t*)src+sp, dst=(uint8_t*)dst+dp*4) { + uint32_t *s = (uint32_t* __restrict)src; + uint32_t *d = (uint32_t* __restrict)dst; + for (x=dx=0; x0; sh--, src=(uint8_t*)src+sp, dst=(uint8_t*)dst+dp*5) { + uint32_t *s = (uint32_t* __restrict)src; + uint32_t *d = (uint32_t* __restrict)dst; + for (x=dx=0; x<(sw/2); x++, dx+=5) { + pix = s[x]; + dpix1=(pix & 0x0000FFFF)|(pix<<16); + dpix2=(pix & 0xFFFF0000)|(pix>>16); + d[dx] = dpix1; d[dx+1] = dpix1; d[dx+2] = pix; d[dx+3] = dpix2; d[dx+4] = dpix2; + } + if (sw&1) { + uint16_t *s16 = (uint16_t*)s; + uint16_t *d16 = (uint16_t*)d; + uint16_t pix16 = s16[x*2]; + dpix1 = pix16|(pix16<<16); + d[dx] = dpix1; d[dx+1] = dpix1; d16[(dx+2)*2] = pix16; + } + memcpy((uint8_t*)dst+dp*1, dst, swl); + memcpy((uint8_t*)dst+dp*2, dst, swl); + memcpy((uint8_t*)dst+dp*3, dst, swl); + memcpy((uint8_t*)dst+dp*4, dst, swl); + } +} + +void scale5x_c32(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp) { + if (!sw||!sh) { return; } + uint32_t x, dx, pix, swl = sw*sizeof(uint32_t); + if (!sp) { sp = swl; } swl*=5; if (!dp) { dp = swl; } + for (; sh>0; sh--, src=(uint8_t*)src+sp, dst=(uint8_t*)dst+dp*5) { + uint32_t *s = (uint32_t* __restrict)src; + uint32_t *d = (uint32_t* __restrict)dst; + for (x=dx=0; x0; sh--, src=(uint8_t*)src+sp, dst=(uint8_t*)dst+dp*6) { + uint32_t *s = (uint32_t* __restrict)src; + uint32_t *d = (uint32_t* __restrict)dst; + for (x=dx=0; x<(sw/2); x++, dx+=6) { + pix = s[x]; + dpix1=(pix & 0x0000FFFF)|(pix<<16); + dpix2=(pix & 0xFFFF0000)|(pix>>16); + d[dx] = dpix1; d[dx+1] = dpix1; d[dx+2] = dpix1; d[dx+3] = dpix2; d[dx+4] = dpix2; d[dx+5] = dpix2; + } + if (sw&1) { + uint16_t *s16 = (uint16_t*)s; + uint16_t pix16 = s16[x*2]; + dpix1 = pix16|(pix16<<16); + d[dx] = dpix1; d[dx+1] = dpix1; d[dx+2] = dpix1; + } + memcpy((uint8_t*)dst+dp*1, dst, swl); + memcpy((uint8_t*)dst+dp*2, dst, swl); + memcpy((uint8_t*)dst+dp*3, dst, swl); + memcpy((uint8_t*)dst+dp*4, dst, swl); + memcpy((uint8_t*)dst+dp*5, dst, swl); + } +} + +void scale6x_c32(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp) { + if (!sw||!sh) { return; } + uint32_t x, dx, pix, swl = sw*sizeof(uint32_t); + if (!sp) { sp = swl; } swl*=6; if (!dp) { dp = swl; } + for (; sh>0; sh--, src=(uint8_t*)src+sp, dst=(uint8_t*)dst+dp*6) { + uint32_t *s = (uint32_t* __restrict)src; + uint32_t *d = (uint32_t* __restrict)dst; + for (x=dx=0; x + +// +// arm NEON / C integer scalers for miyoomini +// args/ src : src offset address of top left corner +// dst : dst offset address of top left corner +// sw : src width pixels +// sh : src height pixels +// sp : src pitch (stride) bytes if 0, (src width * [2|4]) is used +// dp : dst pitch (stride) bytes if 0, (src width * [2|4] * multiplier) is used +// +// ** NOTE ** +// since 32bit aligned addresses need to be processed for NEON scalers, +// x-offset and stride pixels must be even# in the case of 16bpp, +// if odd#, then handled by the C scaler +// + +typedef void (*scale_neon_t)(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp); + + +// NEON scalers +void scale1x_n16(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp); +void scale1x_n32(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp); +void scale2x_n16(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp); +void scale2x_n32(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp); +void scale3x_n16(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp); +void scale3x_n32(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp); +void scale4x_n16(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp); +void scale4x_n32(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp); +void scale5x_n16(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp); +void scale5x_n32(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp); +void scale6x_n16(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp); +void scale6x_n32(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp); + +// C scalers +void scale1x_c16(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp); +void scale1x_c32(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp); +void scale2x_c16(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp); +void scale2x_c32(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp); +void scale3x_c16(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp); +void scale3x_c32(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp); +void scale4x_c16(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp); +void scale4x_c32(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp); +// c16b/c32b: faster when -Ofast/-O3 and aligned width, however dp must be 4xN +void scale4x_c16b(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp); +void scale4x_c32b(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp); +void scale5x_c16(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp); +void scale5x_c32(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp); +void scale6x_c16(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp); +void scale6x_c32(void* __restrict src, void* __restrict dst, uint32_t sw, uint32_t sh, uint32_t sp, uint32_t dp); + +#endif diff --git a/src/keymon/credits.txt b/src/keymon/credits.txt new file mode 100644 index 0000000..e885e5d --- /dev/null +++ b/src/keymon/credits.txt @@ -0,0 +1,3 @@ +based on eggs custom keymon for Trimui: + https://www.dropbox.com/sh/5e9xwvp672vt8cr/AABUIdw1vLYp9h0waoCUqHPOa/source?dl=0&subfolder_nav_tracking=1 +modified to use libmsettings \ No newline at end of file diff --git a/src/keymon/keymon.c b/src/keymon/keymon.c new file mode 100644 index 0000000..565ff38 --- /dev/null +++ b/src/keymon/keymon.c @@ -0,0 +1,143 @@ +// miyoomini/keymon.c + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include "defines.h" + +// for ev.value +#define RELEASED 0 +#define PRESSED 1 +#define REPEAT 2 + +#define INPUT_COUNT 2 +static int inputs[INPUT_COUNT]; +static struct input_event ev; +static int jack_fd; +static pthread_t jack_pt; + +// TODO: HDMI? + +#define JACK_STATE_PATH "/sys/class/switch/h2w/state" +#define HDMI_STATE_PATH "/sys/class/switch/hdmi/state" + +static void* watchJack(void *arg) { + uint32_t has_headphones; + uint32_t had_headphones; + + FILE *file = fopen(JACK_STATE_PATH, "r"); + fscanf(file, "%i", &has_headphones); + had_headphones = has_headphones; + SetJack(has_headphones); + + while(1) { + sleep(1); + rewind(file); + fscanf(file, "%i", &has_headphones); + if (had_headphones!=has_headphones) { + had_headphones = has_headphones; + SetJack(has_headphones); + } + } + return 0; +} + +int main (int argc, char *argv[]) { + InitSettings(); + pthread_create(&jack_pt, NULL, &watchJack, NULL); + + char path[32]; + for (int i=0; i REPEAT )) continue; + switch (ev.code) { + case CODE_MENU: + if ( val != REPEAT ) menu_pressed = val; + break; + case CODE_POWER: + if ( val != REPEAT ) power_pressed = val; + break; + case CODE_VOL_DN: + if ( val == REPEAT ) { + // Adjust repeat speed to 1/2 + val = repeat_volume; + repeat_volume ^= PRESSED; + } else { + repeat_volume = 0; + } + if ( val == PRESSED ) { + if (menu_pressed) { + val = GetBrightness(); + if (val>BRIGHTNESS_MIN) SetBrightness(--val); + } + else { + val = GetVolume(); + if (val>VOLUME_MIN) SetVolume(--val); + } + } + break; + case CODE_VOL_UP: + if ( val == REPEAT ) { + // Adjust repeat speed to 1/2 + val = repeat_volume; + repeat_volume ^= PRESSED; + } else { + repeat_volume = 0; + } + if ( val == PRESSED ) { + if (menu_pressed) { + val = GetBrightness(); + if (val /proc/sysrq-trigger; echo u > /proc/sysrq-trigger; echo o > /proc/sysrq-trigger"); + // while (1) pause(); + // } + } + } + usleep(16666); // 60fps + } +} diff --git a/src/keymon/makefile b/src/keymon/makefile new file mode 100755 index 0000000..4d623af --- /dev/null +++ b/src/keymon/makefile @@ -0,0 +1,14 @@ +ifeq (,$(CROSS_COMPILE)) +$(error missing CROSS_COMPILE for this toolchain) +endif + +TARGET = keymon.elf + +CC = $(CROSS_COMPILE)gcc +CFLAGS = -Os -lmsettings -lpthread -lrt -ldl -Wl,--gc-sections -s +CFLAGS += -I. -I../common -DPLATFORM=\"$(UNION_PLATFORM)\" + +all: + $(CC) keymon.c -o $(TARGET) $(CFLAGS) +clean: + rm -rf $(TARGET) \ No newline at end of file diff --git a/src/libmsettings/makefile b/src/libmsettings/makefile new file mode 100644 index 0000000..b1418a2 --- /dev/null +++ b/src/libmsettings/makefile @@ -0,0 +1,32 @@ +ifeq (,$(CROSS_COMPILE)) +$(error missing CROSS_COMPILE for this toolchain) +endif +ifeq (,$(PREFIX)) +$(error missing PREFIX for this toolchain) +endif + +TARGET=msettings + +.PHONY: build +.PHONY: clean + +CC = $(CROSS_COMPILE)gcc + +SYSROOT := $(shell $(CC) --print-sysroot) + +INCLUDEDIR = $(SYSROOT)/usr/include +CFLAGS = -I$(INCLUDEDIR) +LDFLAGS = -ldl -lrt -s + +OPTM=-Ofast + +build: + $(CC) -c -Werror -fpic "$(TARGET).c" -Wl,--no-as-needed $(LDFLAGS) + $(CC) -shared -o "lib$(TARGET).so" "$(TARGET).o" $(LDFLAGS) + cp "$(TARGET).h" "$(PREFIX)/include" + cp "lib$(TARGET).so" "$(PREFIX)/lib" +clean: + rm -f *.o + rm -f "lib$(TARGET).so" + rm -f $(PREFIX)/include/$(TARGET).h + rm -f $(PREFIX)/lib/lib$(TARGET).so \ No newline at end of file diff --git a/src/libmsettings/msettings.c b/src/libmsettings/msettings.c new file mode 100644 index 0000000..b32b547 --- /dev/null +++ b/src/libmsettings/msettings.c @@ -0,0 +1,142 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "msettings.h" + +/////////////////////////////////////// + +typedef struct Settings { + int version; // future proofing + int brightness; + int headphones; + int speaker; + int unused[3]; // for future use + int jack; // NOTE: doesn't really need to be persisted but still needs to be shared +} Settings; +static Settings DefaultSettings = { + .version = 1, + .brightness = 2, + .headphones = 4, + .speaker = 8, + .jack = 0, +}; +static Settings* settings; + +#define SHM_KEY "/SharedSettings" +// static char SettingsPath[256]; +static char* SettingsPath = "/mnt/sdcard/.userdata/rg35xx/msettings.bin"; +static int shm_fd = -1; +static int is_host = 0; +static int shm_size = sizeof(Settings); + +void InitSettings(void) { + // sprintf(SettingsPath, "%s/msettings.bin", getenv("USERDATA_PATH")); + + shm_fd = shm_open(SHM_KEY, O_RDWR | O_CREAT | O_EXCL, 0644); // see if it exists + if (shm_fd==-1 && errno==EEXIST) { // already exists + puts("Settings client"); + shm_fd = shm_open(SHM_KEY, O_RDWR, 0644); + settings = mmap(NULL, shm_size, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0); + } + else { // host + puts("Settings host"); + is_host = 1; + // we created it so set initial size and populate + ftruncate(shm_fd, shm_size); + settings = mmap(NULL, shm_size, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0); + + int fd = open(SettingsPath, O_RDONLY); + if (fd>=0) { + read(fd, settings, shm_size); + // TODO: use settings->version for future proofing + close(fd); + } + else { + // load defaults + memcpy(settings, &DefaultSettings, shm_size); + } + } + printf("brightness: %i\nspeaker: %i \n", settings->brightness, settings->speaker); + + SetVolume(GetVolume()); + SetBrightness(GetBrightness()); +} +void QuitSettings(void) { + munmap(settings, shm_size); + if (is_host) shm_unlink(SHM_KEY); +} +static inline void SaveSettings(void) { + int fd = open(SettingsPath, O_CREAT|O_WRONLY, 0644); + if (fd>=0) { + write(fd, settings, shm_size); + close(fd); + sync(); + } +} + +int GetBrightness(void) { // 0-10 + return settings->brightness; +} +void SetBrightness(int value) { + int raw; + switch (value) { + case 0: raw=8; break; + case 1: raw=16; break; + case 2: raw=32; break; + case 3: raw=64; break; + case 4: raw=128; break; + case 5: raw=192; break; + case 6: raw=256; break; + case 7: raw=384; break; + case 8: raw=512; break; + case 9: raw=768; break; + case 10: raw=1024; break; + } + SetRawBrightness(raw); + settings->brightness = value; + SaveSettings(); +} + +int GetVolume(void) { // 0-20 + return settings->jack ? settings->headphones : settings->speaker; +} +void SetVolume(int value) { + if (settings->jack) settings->headphones = value; + else settings->speaker = value; + + int raw = value * 2; + SetRawVolume(raw); + SaveSettings(); +} + +void SetRawBrightness(int val) { // 0 - 1024 + int fd = open("/sys/class/backlight/backlight.2/brightness", O_WRONLY); + if (fd>=0) { + dprintf(fd,"%d",val); + close(fd); + } +} +void SetRawVolume(int val) { // 0 - 40 + int fd = open("/sys/class/volume/value", O_WRONLY); + if (fd>=0) { + dprintf(fd,"%d",val); + close(fd); + } +} + +int GetJack(void) { + // return /sys/class/switch/h2w/state==1` + // access("/dev/dsp1", F_OK)==0 + return settings->jack; +} +void SetJack(int value) { // monitored and set by thread in keymon + settings->jack = value; + SetVolume(GetVolume()); +} \ No newline at end of file diff --git a/src/libmsettings/msettings.h b/src/libmsettings/msettings.h new file mode 100644 index 0000000..e8276ee --- /dev/null +++ b/src/libmsettings/msettings.h @@ -0,0 +1,19 @@ +#ifndef __msettings_h__ +#define __msettings_h__ + +void InitSettings(void); +void QuitSettings(void); + +int GetBrightness(void); +int GetVolume(void); + +void SetRawBrightness(int value); // 0-1024 +void SetRawVolume(int value); // 0-40 + +void SetBrightness(int value); // 0-10 +void SetVolume(int value); // 0-20 + +int GetJack(void); +void SetJack(int value); // 0-1 + +#endif // __msettings_h__ diff --git a/src/minarch/main.c b/src/minarch/main.c new file mode 100644 index 0000000..4f6c8d3 --- /dev/null +++ b/src/minarch/main.c @@ -0,0 +1,1277 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "libretro.h" +#include "defines.h" +#include "scaler_neon.h" + +/////////////////////////////// + +enum { + LOG_DEBUG = 0, + LOG_INFO, + LOG_WARN, + LOG_ERROR, +}; +#define LOG_debug(fmt, ...) LOG_note(LOG_DEBUG, fmt, ##__VA_ARGS__) +#define LOG_info(fmt, ...) LOG_note(LOG_INFO, fmt, ##__VA_ARGS__) +#define LOG_warn(fmt, ...) LOG_note(LOG_WARN, fmt, ##__VA_ARGS__) +#define LOG_error(fmt, ...) LOG_note(LOG_ERROR, fmt, ##__VA_ARGS__) +void LOG_note(int level, const char* fmt, ...) { + char buf[1024] = {0}; + va_list args; + va_start(args, fmt); + vsnprintf(buf, sizeof(buf), fmt, args); + va_end(args); + + switch(level) { +#ifdef DEBUG + case LOG_DEBUG: + printf("DEBUG: %s", buf); + break; +#endif + case LOG_INFO: + printf("INFO: %s", buf); + break; + case LOG_WARN: + fprintf(stderr, "WARN: %s", buf); + break; + case LOG_ERROR: + fprintf(stderr, "ERROR: %s", buf); + break; + default: + break; + } + fflush(stdout); +} + +/////////////////////////////// + +static struct GFX_Context { + int fb; + int pitch; + int buffer; + int buffer_size; + int map_size; + void* map; + struct fb_var_screeninfo vinfo; + struct fb_fix_screeninfo finfo; + + SDL_Surface* screen; +} gfx; +SDL_Surface* GFX_init(void) { + SDL_Init(SDL_INIT_VIDEO); + + // we're drawing to the (triple-buffered) framebuffer directly + // but we still need to set video mode to initialize input events + SDL_SetVideoMode(SCREEN_WIDTH, SCREEN_HEIGHT, SCREEN_DEPTH, SDL_SWSURFACE); + SDL_ShowCursor(0); + + // open framebuffer + gfx.fb = open("/dev/fb0", O_RDWR); + + // configure framebuffer + ioctl(gfx.fb, FBIOGET_VSCREENINFO, &gfx.vinfo); + gfx.vinfo.bits_per_pixel = SCREEN_DEPTH; + gfx.vinfo.xres = SCREEN_WIDTH; + gfx.vinfo.yres = SCREEN_HEIGHT; + gfx.vinfo.xres_virtual = SCREEN_WIDTH; + gfx.vinfo.yres_virtual = SCREEN_HEIGHT * SCREEN_BUFFER_COUNT; + gfx.vinfo.xoffset = 0; + gfx.vinfo.activate = FB_ACTIVATE_VBL; + ioctl(gfx.fb, FBIOPUT_VSCREENINFO, &gfx.vinfo); + + // get fixed screen info + ioctl(gfx.fb, FBIOGET_FSCREENINFO, &gfx.finfo); + gfx.map_size = gfx.finfo.smem_len; + gfx.map = mmap(0, gfx.map_size, PROT_READ | PROT_WRITE, MAP_SHARED, gfx.fb, 0); + + // buffer tracking + gfx.buffer = 0; + gfx.buffer_size = SCREEN_PITCH * SCREEN_HEIGHT; + + // return screen + gfx.screen = SDL_CreateRGBSurfaceFrom(gfx.map, SCREEN_WIDTH,SCREEN_HEIGHT, SCREEN_DEPTH,SCREEN_PITCH, 0,0,0,0); + return gfx.screen; +} +void GFX_clear(SDL_Surface* screen) { + memset(screen->pixels, 0, gfx.buffer_size); +} +void GFX_clearAll(void) { + memset(gfx.map, 0, gfx.map_size); +} +void GFX_flip(SDL_Surface* screen) { + // TODO: this would be moved to a thread + // I'm not clear on why that would be necessary + // if it's non-blocking and the pan will wait + // until the next vblank... + gfx.vinfo.yoffset = gfx.buffer * SCREEN_HEIGHT; + ioctl(gfx.fb, FBIOPAN_DISPLAY, &gfx.vinfo); + + gfx.buffer += 1; + if (gfx.buffer>=SCREEN_BUFFER_COUNT) gfx.buffer -= SCREEN_BUFFER_COUNT; + screen->pixels = gfx.map + (gfx.buffer * gfx.buffer_size); +} +void GFX_quit(void) { + GFX_clearAll(); + munmap(gfx.map, gfx.map_size); + close(gfx.fb); + SDL_Quit(); +} + +/////////////////////////////// + +// based on picoarch's audio +// implementation, rewritten +// to understand it better + +#define MAX_SAMPLE_RATE 48000 +#define BATCH_SIZE 100 + +typedef struct SND_Frame { + int16_t left; + int16_t right; +} SND_Frame; +typedef int (*SND_Resampler)(const SND_Frame frame); +static struct SND_Context { + double frame_rate; + + int sample_rate_in; + int sample_rate_out; + + int buffer_seconds; // current_audio_buffer_size + SND_Frame* buffer; // buf + size_t frame_count; // buf_len + + int frame_in; // buf_w + int frame_out; // buf_r + int frame_filled; // max_buf_w + + SND_Resampler resample; +} snd; +void SND_audioCallback(void* userdata, uint8_t* stream, int len) { // plat_sound_callback + if (snd.frame_count==0) return; + + int16_t *out = (int16_t *)stream; + len /= (sizeof(int16_t) * 2); + + while (snd.frame_out!=snd.frame_in && len>0) { + *out++ = snd.buffer[snd.frame_out].left; + *out++ = snd.buffer[snd.frame_out].right; + + snd.frame_filled = snd.frame_out; + + snd.frame_out += 1; + len -= 1; + + if (snd.frame_out>=snd.frame_count) snd.frame_out = 0; + } + + while (len>0) { + *out++ = 0; + *out++ = 0; + len -= 1; + } +} +void SND_resizeBuffer(void) { // plat_sound_resize_buffer + snd.frame_count = snd.buffer_seconds * snd.sample_rate_in / snd.frame_rate; + if (snd.frame_count==0) return; + + SDL_LockAudio(); + + int buffer_bytes = snd.frame_count * sizeof(SND_Frame); + snd.buffer = realloc(snd.buffer, buffer_bytes); + + memset(snd.buffer, 0, buffer_bytes); + + snd.frame_in = 0; + snd.frame_out = 0; + snd.frame_filled = snd.frame_count - 1; + + SDL_UnlockAudio(); +} +int SND_resampleNone(SND_Frame frame) { // audio_resample_passthrough + snd.buffer[snd.frame_in++] = frame; + if (snd.frame_in >= snd.frame_count) snd.frame_in = 0; + return 1; +} +int SND_resampleNear(SND_Frame frame) { // audio_resample_nearest + static int diff = 0; + int consumed = 0; + + if (diff < snd.sample_rate_out) { + snd.buffer[snd.frame_in++] = frame; + if (snd.frame_in >= snd.frame_count) snd.frame_in = 0; + diff += snd.sample_rate_in; + } + + if (diff >= snd.sample_rate_out) { + consumed++; + diff -= snd.sample_rate_out; + } + + return consumed; +} +void SND_selectResampler(void) { // plat_sound_select_resampler + if (snd.sample_rate_in==snd.sample_rate_out) { + snd.resample = SND_resampleNone; + } + else { + snd.resample = SND_resampleNear; + } +} +size_t SND_batchSamples(const SND_Frame* frames, size_t frame_count) { // plat_sound_write / plat_sound_write_resample + if (snd.frame_count==0) return 0; + + SDL_LockAudio(); + + int consumed = 0; + while (frame_count > 0) { + int tries = 0; + int amount = MIN(BATCH_SIZE, frame_count); + + while (tries < 10 && snd.frame_in==snd.frame_filled) { + tries++; + SDL_UnlockAudio(); + SDL_Delay(1); + SDL_LockAudio(); + } + + while (amount && snd.frame_in != snd.frame_filled) { + consumed = snd.resample(*frames); + frames += consumed; + amount -= consumed; + frame_count -= consumed; + } + } + SDL_UnlockAudio(); + + return consumed; +} + +void SND_init(double sample_rate, double frame_rate) { // plat_sound_init + SDL_InitSubSystem(SDL_INIT_AUDIO); + + snd.frame_rate = frame_rate; + + SDL_AudioSpec spec_in; + SDL_AudioSpec spec_out; + + spec_in.freq = MIN(sample_rate, MAX_SAMPLE_RATE); // TODO: always MAX_SAMPLE_RATE on Miyoo Mini? use #ifdef PLATFORM_MIYOOMINI? + spec_in.format = AUDIO_S16; + spec_in.channels = 2; + spec_in.samples = 512; + spec_in.callback = SND_audioCallback; + + SDL_OpenAudio(&spec_in, &spec_out); + + snd.buffer_seconds = 5; + snd.sample_rate_in = sample_rate; + snd.sample_rate_out = spec_out.freq; + + SND_selectResampler(); + SND_resizeBuffer(); + + SDL_PauseAudio(0); +} +void SND_quit(void) { // plat_sound_finish + SDL_PauseAudio(1); + SDL_CloseAudio(); + + if (snd.buffer) { + free(snd.buffer); + snd.buffer = NULL; + } +} + +/////////////////////////////// + +enum { + BTN_NONE = 0, + BTN_UP = 1 << 0, + BTN_DOWN = 1 << 1, + BTN_LEFT = 1 << 2, + BTN_RIGHT = 1 << 3, + BTN_A = 1 << 4, + BTN_B = 1 << 5, + BTN_X = 1 << 6, + BTN_Y = 1 << 7, + BTN_START = 1 << 8, + BTN_SELECT = 1 << 9, + BTN_L1 = 1 << 10, + BTN_R1 = 1 << 11, + BTN_L2 = 1 << 12, + BTN_R2 = 1 << 13, + BTN_MENU = 1 << 14, + BTN_VOL_UP = 1 << 15, + BTN_VOL_DN = 1 << 16, + BTN_POWER = 1 << 17, +}; +static struct PAD_Context { + int is_pressed; + int just_pressed; + int just_released; +} pad; +void PAD_poll(void) { + // reset transient state + pad.just_pressed = 0; + pad.just_released = 0; + + // the actual poll + SDL_Event event; + while (SDL_PollEvent(&event)) { + int btn = BTN_NONE; + if (event.type==SDL_KEYDOWN || event.type==SDL_KEYUP) { + uint8_t code = event.key.keysym.scancode; + if (code==CODE_UP) btn = BTN_UP; + else if (code==CODE_DOWN) btn = BTN_DOWN; + else if (code==CODE_LEFT) btn = BTN_LEFT; + else if (code==CODE_RIGHT) btn = BTN_RIGHT; + else if (code==CODE_A) btn = BTN_A; + else if (code==CODE_B) btn = BTN_B; + else if (code==CODE_X) btn = BTN_X; + else if (code==CODE_Y) btn = BTN_Y; + else if (code==CODE_START) btn = BTN_START; + else if (code==CODE_SELECT) btn = BTN_SELECT; + else if (code==CODE_MENU) btn = BTN_MENU; + else if (code==CODE_L1) btn = BTN_L1; + else if (code==CODE_L2) btn = BTN_L2; + else if (code==CODE_R1) btn = BTN_R1; + else if (code==CODE_R2) btn = BTN_R2; + else if (code==CODE_VOL_UP) btn = BTN_VOL_UP; + else if (code==CODE_VOL_DN) btn = BTN_VOL_DN; + else if (code==CODE_POWER) btn = BTN_POWER; + } + + if (btn==BTN_NONE) continue; + + if (event.type==SDL_KEYUP) { + pad.is_pressed &= ~btn; // unset + pad.just_released |= btn; // set + } + else if ((pad.is_pressed & btn)==BTN_NONE) { + pad.just_pressed |= btn; // set + pad.is_pressed |= btn; // set + } + } +} + +// TODO: switch to macros? not if I want to move it to a separate file +int PAD_anyPressed(void) { return pad.is_pressed!=BTN_NONE; } +int PAD_justPressed(int btn) { return pad.just_pressed & btn; } +int PAD_isPressed(int btn) { return pad.is_pressed & btn; } +int PAD_justReleased(int btn) { return pad.just_released & btn; } + +// #define PAD_anyPressed() (pad.is_pressed!=BTN_NONE) +// #define PAD_justPressed(btn) (pad.just_pressed & (btn)) +// #define PAD_isPressed(btn) (pad.is_pressed & (btn)) +// #define PAD_justReleased(btn) (pad.just_released & (btn)) + +/////////////////////////////////////// + +static struct Game { + char path[MAX_PATH]; + char name[MAX_PATH]; // TODO: rename to basename? + void* data; + size_t size; +} game; +static void Game_open(char* path) { + strcpy((char*)game.path, path); + strcpy((char*)game.name, strrchr(path, '/')+1); + + FILE *file = fopen(game.path, "r"); + if (file==NULL) { + LOG_error("Error opening game: %s\n\t%s\n", game.path, strerror(errno)); + return; + } + + fseek(file, 0, SEEK_END); + game.size = ftell(file); + + rewind(file); + game.data = malloc(game.size); + fread(game.data, sizeof(uint8_t), game.size, file); + + fclose(file); +} +static void Game_close(void) { + free(game.data); +} + +/////////////////////////////// + +static struct Core { + int initialized; + + const char tag[8]; // eg. GBC + const char name[128]; // eg. gambatte + const char version[128]; // eg. Gambatte (v0.5.0-netlink 7e02df6) + + double fps; + double sample_rate; + + void* handle; + void (*init)(void); + void (*deinit)(void); + + void (*get_system_info)(struct retro_system_info *info); + void (*get_system_av_info)(struct retro_system_av_info *info); + void (*set_controller_port_device)(unsigned port, unsigned device); + + void (*reset)(void); + void (*run)(void); + size_t (*serialize_size)(void); + bool (*serialize)(void *data, size_t size); + bool (*unserialize)(const void *data, size_t size); + bool (*load_game)(const struct retro_game_info *game); + bool (*load_game_special)(unsigned game_type, const struct retro_game_info *info, size_t num_info); + void (*unload_game)(void); + unsigned (*get_region)(void); + void *(*get_memory_data)(unsigned id); + size_t (*get_memory_size)(unsigned id); + retro_audio_buffer_status_callback_t audio_buffer_status; +} core; + +/////////////////////////////////////// +// saves and states + +static void SRAM_getPath(char* filename) { + sprintf(filename, SDCARD_PATH "/Saves/%s/%s.sav", core.tag, game.name); +} + +static void SRAM_read(void) { + size_t sram_size = core.get_memory_size(RETRO_MEMORY_SAVE_RAM); + if (!sram_size) return; + + char filename[MAX_PATH]; + SRAM_getPath(filename); + + FILE *sram_file = fopen(filename, "r"); + if (!sram_file) return; + + void* sram = core.get_memory_data(RETRO_MEMORY_SAVE_RAM); + + if (!sram || !fread(sram, 1, sram_size, sram_file)) { + LOG_error("Error reading SRAM data\n"); + } + + fclose(sram_file); +} +static void SRAM_write(void) { + size_t sram_size = core.get_memory_size(RETRO_MEMORY_SAVE_RAM); + if (!sram_size) return; + + char filename[MAX_PATH]; + SRAM_getPath(filename); + + FILE *sram_file = fopen(filename, "w"); + if (!sram_file) { + LOG_error("Error opening SRAM file: %s\n", strerror(errno)); + return; + } + + void *sram = core.get_memory_data(RETRO_MEMORY_SAVE_RAM); + + if (!sram || sram_size != fwrite(sram, 1, sram_size, sram_file)) { + LOG_error("Error writing SRAM data to file\n"); + } + + fclose(sram_file); + + sync(); +} + +static int state_slot = 0; +static void State_getPath(char* filename) { + sprintf(filename, SDCARD_PATH "/.userdata/" PLATFORM "/%s-%s/%s.st%i", core.tag, core.name, game.name, state_slot); +} + +static void State_read(void) { // from picoarch + size_t state_size = core.serialize_size(); + if (!state_size) return; + + void *state = calloc(1, state_size); + if (!state) { + LOG_error("Couldn't allocate memory for state\n"); + goto error; + } + + char filename[MAX_PATH]; + State_getPath(filename); + + FILE *state_file = fopen(filename, "r"); + if (!state_file) { + if (state_slot!=8) { // st8 is a default state in MiniUI and may not exist, that's okay + LOG_error("Error opening state file: %s (%s)\n", filename, strerror(errno)); + } + goto error; + } + + if (state_size != fread(state, 1, state_size, state_file)) { + LOG_error("Error reading state data from file: %s (%s)\n", filename, strerror(errno)); + goto error; + } + + if (!core.unserialize(state, state_size)) { + LOG_error("Error restoring save state: %s (%s)\n", filename, strerror(errno)); + goto error; + } + +error: + if (state) free(state); + if (state_file) fclose(state_file); +} +static void State_write(void) { // from picoarch + size_t state_size = core.serialize_size(); + if (!state_size) return; + + void *state = calloc(1, state_size); + if (!state) { + LOG_error("Couldn't allocate memory for state\n"); + goto error; + } + + char filename[MAX_PATH]; + State_getPath(filename); + + FILE *state_file = fopen(filename, "w"); + if (!state_file) { + LOG_error("Error opening state file: %s (%s)\n", filename, strerror(errno)); + goto error; + } + + if (!core.serialize(state, state_size)) { + LOG_error("Error creating save state: %s (%s)\n", filename, strerror(errno)); + goto error; + } + + if (state_size != fwrite(state, 1, state_size, state_file)) { + LOG_error("Error writing state data to file: %s (%s)\n", filename, strerror(errno)); + goto error; + } + +error: + if (state) free(state); + if (state_file) fclose(state_file); + + sync(); +} + +/////////////////////////////// + +// callbacks +static struct retro_disk_control_ext_callback disk_control_ext; + +static char sys_dir[MAX_PATH]; // TODO: move this somewhere else, maybe core.userdata? + +// TODO: tmp, naive options +static struct { + char key[128]; + char value[128]; +} tmp_options[128]; +static bool environment_callback(unsigned cmd, void *data) { // copied from picoarch initially + // printf("environment_callback: %i\n", cmd); fflush(stdout); + + switch(cmd) { + case RETRO_ENVIRONMENT_GET_OVERSCAN: { /* 2 */ + bool *out = (bool *)data; + if (out) + *out = true; + break; + } + case RETRO_ENVIRONMENT_GET_CAN_DUPE: { /* 3 */ + bool *out = (bool *)data; + if (out) + *out = true; + break; + } + case RETRO_ENVIRONMENT_SET_MESSAGE: { /* 6 */ + const struct retro_message *message = (const struct retro_message*)data; + if (message) LOG_info("%s\n", message->msg); + break; + } + // TODO: RETRO_ENVIRONMENT_SET_PERFORMANCE_LEVEL 8 + case RETRO_ENVIRONMENT_GET_SYSTEM_DIRECTORY: { /* 9 */ + puts("RETRO_ENVIRONMENT_GET_SYSTEM_DIRECTORY"); + + const char **out = (const char **)data; + if (out) + // TODO: set this once somewhere else + // TODO: core.tag isn't available at this point + // TODO: it only becomes available after we open the game... + sprintf(sys_dir, SDCARD_PATH "/.userdata/%s/%s-%s", PLATFORM, core.tag, core.name); + *out = sys_dir; + break; + } + case RETRO_ENVIRONMENT_SET_PIXEL_FORMAT: { /* 10 */ + const enum retro_pixel_format *format = (enum retro_pixel_format *)data; + + if (*format != RETRO_PIXEL_FORMAT_RGB565) { // TODO: pull from platform.h? + /* 565 is only supported format */ + return false; + } + break; + } + case RETRO_ENVIRONMENT_SET_INPUT_DESCRIPTORS: { /* 11 */ + const struct retro_input_descriptor *vars = (const struct retro_input_descriptor *)data; + if (vars) { + // TODO: create an array of char* description indexed by id + for (int i=0; vars[i].description; i++) { + // vars[i].id == RETRO_DEVICE_ID_JOYPAD_*, vars[i].description = name + printf("%i %s\n", vars[i].id, vars[i].description); + } + return false; + } + } break; + case RETRO_ENVIRONMENT_SET_DISK_CONTROL_INTERFACE: { /* 13 */ + const struct retro_disk_control_callback *var = + (const struct retro_disk_control_callback *)data; + + if (var) { + memset(&disk_control_ext, 0, sizeof(struct retro_disk_control_ext_callback)); + memcpy(&disk_control_ext, var, sizeof(struct retro_disk_control_callback)); + } + break; + } + // TODO: this is called whether using variables or options + case RETRO_ENVIRONMENT_GET_VARIABLE: { /* 15 */ + struct retro_variable *var = (struct retro_variable *)data; + if (var && var->key) { + printf("get key: %s\n", var->key); + for (int i=0; i<128; i++) { + if (!strcmp(tmp_options[i].key, var->key)) { + var->value = tmp_options[i].value; + break; + } + } + // var->value = options_get_value(var->key); + } + break; + } + // TODO: I think this is where the core reports its variables (the precursor to options) + // TODO: this is called if RETRO_ENVIRONMENT_GET_CORE_OPTIONS_VERSION sets out to 0 + case RETRO_ENVIRONMENT_SET_VARIABLES: { /* 16 */ + const struct retro_variable *vars = (const struct retro_variable *)data; + // options_free(); + if (vars) { + // options_init_variables(vars); + // load_config(); + + for (int i=0; vars[i].key; i++) { + // value appears to be NAME; DEFAULT|VALUE|VALUE|ETC + printf("set var key: %s to value: %s\n", vars[i].key, vars[i].value); + } + } + break; + } + case RETRO_ENVIRONMENT_GET_VARIABLE_UPDATE: { /* 17 */ + bool *out = (bool *)data; + if (out) + *out = false; // options_changed(); + break; + } + // case RETRO_ENVIRONMENT_GET_RUMBLE_INTERFACE: { /* 23 */ + // struct retro_rumble_interface *iface = + // (struct retro_rumble_interface*)data; + // + // PA_INFO("Setup rumble interface.\n"); + // iface->set_rumble_state = pa_set_rumble_state; + // break; + // } + case RETRO_ENVIRONMENT_GET_LOG_INTERFACE: { /* 27 */ + struct retro_log_callback *log_cb = (struct retro_log_callback *)data; + if (log_cb) + log_cb->log = (void (*)(enum retro_log_level, const char*, ...))LOG_note; // same difference + break; + } + case RETRO_ENVIRONMENT_GET_SAVE_DIRECTORY: { /* 31 */ + const char **out = (const char **)data; + if (out) + *out = NULL; // save_dir; + break; + } + // RETRO_ENVIRONMENT_GET_LANGUAGE 39 + case RETRO_ENVIRONMENT_GET_INPUT_BITMASKS: { /* 52 */ + bool *out = (bool *)data; + if (out) + *out = true; + break; + } + case RETRO_ENVIRONMENT_GET_CORE_OPTIONS_VERSION: { /* 52 */ + unsigned *out = (unsigned *)data; + if (out) + *out = 1; + break; + } + // TODO: options and variables are separate concepts use for the same thing...I think. + // TODO: not used by gambatte + case RETRO_ENVIRONMENT_SET_CORE_OPTIONS: { /* 53 */ + puts("RETRO_ENVIRONMENT_SET_CORE_OPTIONS"); + // options_free(); + if (data) { + // options_init(*(const struct retro_core_option_definition **)data); + // load_config(); + + const struct retro_core_option_definition *vars = *(const struct retro_core_option_definition **)data; + for (int i=0; vars[i].key; i++) { + const struct retro_core_option_definition *var = &vars[i]; + // printf("set key: %s to value: %s (%s)\n", var->key, var->default_value, var->desc); + printf("set option key: %s to value: %s\n", var->key, var->default_value); + } + } + break; + } + + // TODO: used by gambatte, fceumm (probably others) + case RETRO_ENVIRONMENT_SET_CORE_OPTIONS_INTL: { /* 54 */ + puts("RETRO_ENVIRONMENT_SET_CORE_OPTIONS_INTL"); + + const struct retro_core_options_intl *options = (const struct retro_core_options_intl *)data; + + if (options && options->us) { + // options_free(); + // options_init(options->us); + // load_config(); + + const struct retro_core_option_definition *vars = options->us; + for (int i=0; vars[i].key; i++) { + const struct retro_core_option_definition *var = &vars[i]; + // printf("set key: %s to value: %s (%s)\n", var->key, var->default_value, var->desc); + printf("set core (intl) key: %s to value: %s\n", var->key, var->default_value); + strcpy(tmp_options[i].key, var->key); + strcpy(tmp_options[i].value, var->default_value); + } + } + break; + } + // TODO: not used by gambatte + case RETRO_ENVIRONMENT_SET_CORE_OPTIONS_DISPLAY: { /* 55 */ + puts("RETRO_ENVIRONMENT_SET_CORE_OPTIONS_DISPLAY"); + + const struct retro_core_option_display *display = + (const struct retro_core_option_display *)data; + + if (display) + printf("visible: %i (%s)\n", display->visible, display->key); + // options_set_visible(display->key, display->visible); + break; + } + case RETRO_ENVIRONMENT_GET_DISK_CONTROL_INTERFACE_VERSION: { /* 57 */ + unsigned *out = (unsigned *)data; + if (out) + *out = 1; + break; + } + case RETRO_ENVIRONMENT_SET_DISK_CONTROL_EXT_INTERFACE: { /* 58 */ + const struct retro_disk_control_ext_callback *var = + (const struct retro_disk_control_ext_callback *)data; + + if (var) { + memcpy(&disk_control_ext, var, sizeof(struct retro_disk_control_ext_callback)); + } + break; + } + // TODO: RETRO_ENVIRONMENT_GET_MESSAGE_INTERFACE_VERSION 59 + case RETRO_ENVIRONMENT_SET_AUDIO_BUFFER_STATUS_CALLBACK: { /* 62 */ + const struct retro_audio_buffer_status_callback *cb = + (const struct retro_audio_buffer_status_callback *)data; + if (cb) { + core.audio_buffer_status = cb->callback; + } else { + core.audio_buffer_status = NULL; + } + break; + } + // TODO: not used by gambatte + case RETRO_ENVIRONMENT_SET_MINIMUM_AUDIO_LATENCY: { /* 63 */ + puts("RETRO_ENVIRONMENT_SET_MINIMUM_AUDIO_LATENCY"); + + const unsigned *latency_ms = (const unsigned *)data; + if (latency_ms) { + unsigned frames = *latency_ms * core.fps / 1000; + if (frames < 30) + // audio_buffer_size_override = frames; + printf("audio_buffer_size_override = %i\n", frames); + // else + // PA_WARN("Audio buffer change out of range (%d), ignored\n", frames); + } + break; + } + + // TODO: RETRO_ENVIRONMENT_SET_FASTFORWARDING_OVERRIDE 64 + // TODO: RETRO_ENVIRONMENT_SET_CORE_OPTIONS_UPDATE_DISPLAY_CALLBACK 69 + // TODO: UNKNOWN 70 + // TODO: UNKNOWN 65572 + // TODO: UNKNOWN 65578 + // TODO: UNKNOWN 65581 + // TODO: UNKNOWN 65587 + default: + LOG_debug("Unsupported environment cmd: %u\n", cmd); + return false; + } + + return true; +} + +/////////////////////////////// + +// from gambatte-dms +//from RGB565 +#define cR(A) (((A) & 0xf800) >> 11) +#define cG(A) (((A) & 0x7e0) >> 5) +#define cB(A) ((A) & 0x1f) +//to RGB565 +#define Weight2_3(A, B) (((((cR(A) << 1) + (cR(B) * 3)) / 5) & 0x1f) << 11 | ((((cG(A) << 1) + (cG(B) * 3)) / 5) & 0x3f) << 5 | ((((cB(A) << 1) + (cB(B) * 3)) / 5) & 0x1f)) +#define Weight3_2(A, B) (((((cR(B) << 1) + (cR(A) * 3)) / 5) & 0x1f) << 11 | ((((cG(B) << 1) + (cG(A) * 3)) / 5) & 0x3f) << 5 | ((((cB(B) << 1) + (cB(A) * 3)) / 5) & 0x1f)) + +// TODO: flesh out +static void scale1x(int w, int h, int pitch, const void *src, void *dst) { + // pitch of src image not src buffer! + // eg. gb has a 160 pixel wide image but + // gambatte uses a 256 pixel wide buffer + // (only matters when using memcpy) + int src_pitch = w * SCREEN_BPP; + int src_stride = pitch / SCREEN_BPP; + int dst_stride = SCREEN_PITCH / SCREEN_BPP; + int cpy_pitch = MIN(src_pitch, SCREEN_PITCH); + + uint16_t* restrict src_row = (uint16_t*)src; + uint16_t* restrict dst_row = (uint16_t*)dst; + for (int y=0; ypixels); + GFX_flip(gfx.screen); +} + +static void audio_sample_callback(int16_t left, int16_t right) { + SND_batchSamples(&(const SND_Frame){left,right}, 1); +} +static size_t audio_sample_batch_callback(const int16_t *data, size_t frames) { + return SND_batchSamples((const SND_Frame*)data, frames); +}; + +static uint32_t buttons = 0; +static void input_poll_callback(void) { + PAD_poll(); + + // TODO: support remapping + + buttons = 0; + if (PAD_isPressed(BTN_UP)) buttons |= 1 << RETRO_DEVICE_ID_JOYPAD_UP; + if (PAD_isPressed(BTN_DOWN)) buttons |= 1 << RETRO_DEVICE_ID_JOYPAD_DOWN; + if (PAD_isPressed(BTN_LEFT)) buttons |= 1 << RETRO_DEVICE_ID_JOYPAD_LEFT; + if (PAD_isPressed(BTN_RIGHT)) buttons |= 1 << RETRO_DEVICE_ID_JOYPAD_RIGHT; + if (PAD_isPressed(BTN_A)) buttons |= 1 << RETRO_DEVICE_ID_JOYPAD_A; + if (PAD_isPressed(BTN_B)) buttons |= 1 << RETRO_DEVICE_ID_JOYPAD_B; + if (PAD_isPressed(BTN_X)) buttons |= 1 << RETRO_DEVICE_ID_JOYPAD_X; + if (PAD_isPressed(BTN_Y)) buttons |= 1 << RETRO_DEVICE_ID_JOYPAD_Y; + if (PAD_isPressed(BTN_START)) buttons |= 1 << RETRO_DEVICE_ID_JOYPAD_START; + if (PAD_isPressed(BTN_SELECT)) buttons |= 1 << RETRO_DEVICE_ID_JOYPAD_SELECT; + if (PAD_isPressed(BTN_L1)) buttons |= 1 << RETRO_DEVICE_ID_JOYPAD_L; + if (PAD_isPressed(BTN_L2)) buttons |= 1 << RETRO_DEVICE_ID_JOYPAD_L2; + if (PAD_isPressed(BTN_R1)) buttons |= 1 << RETRO_DEVICE_ID_JOYPAD_R; + if (PAD_isPressed(BTN_R2)) buttons |= 1 << RETRO_DEVICE_ID_JOYPAD_R2; +} +static int16_t input_state_callback(unsigned port, unsigned device, unsigned index, unsigned id) { // copied from picoarch + // id == RETRO_DEVICE_ID_JOYPAD_MASK or RETRO_DEVICE_ID_JOYPAD_* + if (port == 0 && device == RETRO_DEVICE_JOYPAD && index == 0) { + if (id == RETRO_DEVICE_ID_JOYPAD_MASK) return buttons; + return (buttons >> id) & 1; + } + return 0; +} + +/////////////////////////////////////// + +void Core_getName(char* in_name, char* out_name) { + strcpy(out_name, basename(in_name)); + char* tmp = strrchr(out_name, '_'); + tmp[0] = '\0'; +} +void Core_open(const char* core_path, const char* tag_name) { + LOG_info("inside Core_open\n"); + core.handle = dlopen(core_path, RTLD_LAZY); + LOG_info("after dlopen\n"); + + if (!core.handle) LOG_error("%s\n", dlerror()); + + core.init = dlsym(core.handle, "retro_init"); + core.deinit = dlsym(core.handle, "retro_deinit"); + core.get_system_info = dlsym(core.handle, "retro_get_system_info"); + core.get_system_av_info = dlsym(core.handle, "retro_get_system_av_info"); + core.set_controller_port_device = dlsym(core.handle, "retro_set_controller_port_device"); + core.reset = dlsym(core.handle, "retro_reset"); + core.run = dlsym(core.handle, "retro_run"); + core.serialize_size = dlsym(core.handle, "retro_serialize_size"); + core.serialize = dlsym(core.handle, "retro_serialize"); + core.unserialize = dlsym(core.handle, "retro_unserialize"); + core.load_game = dlsym(core.handle, "retro_load_game"); + core.load_game_special = dlsym(core.handle, "retro_load_game_special"); + core.unload_game = dlsym(core.handle, "retro_unload_game"); + core.get_region = dlsym(core.handle, "retro_get_region"); + core.get_memory_data = dlsym(core.handle, "retro_get_memory_data"); + core.get_memory_size = dlsym(core.handle, "retro_get_memory_size"); + + void (*set_environment_callback)(retro_environment_t); + void (*set_video_refresh_callback)(retro_video_refresh_t); + void (*set_audio_sample_callback)(retro_audio_sample_t); + void (*set_audio_sample_batch_callback)(retro_audio_sample_batch_t); + void (*set_input_poll_callback)(retro_input_poll_t); + void (*set_input_state_callback)(retro_input_state_t); + + set_environment_callback = dlsym(core.handle, "retro_set_environment"); + set_video_refresh_callback = dlsym(core.handle, "retro_set_video_refresh"); + set_audio_sample_callback = dlsym(core.handle, "retro_set_audio_sample"); + set_audio_sample_batch_callback = dlsym(core.handle, "retro_set_audio_sample_batch"); + set_input_poll_callback = dlsym(core.handle, "retro_set_input_poll"); + set_input_state_callback = dlsym(core.handle, "retro_set_input_state"); + + struct retro_system_info info = {}; + core.get_system_info(&info); + + Core_getName((char*)core_path, (char*)core.name); + sprintf((char*)core.version, "%s (%s)", info.library_name, info.library_version); + strcpy((char*)core.tag, tag_name); + + set_environment_callback(environment_callback); + set_video_refresh_callback(video_refresh_callback); + set_audio_sample_callback(audio_sample_callback); + set_audio_sample_batch_callback(audio_sample_batch_callback); + set_input_poll_callback(input_poll_callback); + set_input_state_callback(input_state_callback); +} +void Core_init(void) { + core.init(); + core.initialized = 1; +} +void Core_load(void) { + LOG_info("inside Core_load\n"); + + struct retro_game_info game_info; + game_info.path = game.path; + game_info.data = game.data; + game_info.size = game.size; + + core.load_game(&game_info); + LOG_info("after core.load_game\n"); + + SRAM_read(); + LOG_info("after SRAM_read\n"); + + // NOTE: must be called after core.load_game! + struct retro_system_av_info av_info = {}; + core.get_system_av_info(&av_info); + LOG_info("after core.get_system_av_info\n"); + + double a = av_info.geometry.aspect_ratio; + int w = av_info.geometry.base_width; + int h = av_info.geometry.base_height; + // char r[8]; + // getRatio(a, r); + // LOG_info("after getRatio\n"); + + core.fps = av_info.timing.fps; + core.sample_rate = av_info.timing.sample_rate; + + printf("%s\n%s\n", core.tag, core.version); + // printf("%dx%d (%s)\n", w,h,r); + printf("%f\n%f\n", core.fps, core.sample_rate); + fflush(stdout); +} +void Core_unload(void) { + SND_quit(); +} +void Core_quit(void) { + if (core.initialized) { + SRAM_write(); + core.unload_game(); + core.deinit(); + core.initialized = 0; + } +} +void Core_close(void) { + if (core.handle) dlclose(core.handle); +} + +int main(int argc , char* argv[]) { + // system("touch /tmp/wait"); + + // char* core_path = "/mnt/sdcard/.system/rg35xx/cores/gambatte_libretro.so"; + // char* rom_path = "/mnt/sdcard/Roms/Game Boy Color (GBC)/Legend of Zelda, The - Link's Awakening DX (USA, Europe) (Rev 2) (SGB Enhanced) (GB Compatible).gbc"; + // char* rom_path = "/mnt/sdcard/Roms/Game Boy Color (GBC)/Dragon Warrior I & II (USA) (SGB Enhanced).gbc"; + // char* tag_name = "GBC"; + // char* rom_path = "/mnt/sdcard/Roms/Game Boy (GB)/Super Mario Land (World) (Rev A).gb"; + // char* rom_path = "/mnt/sdcard/Roms/Game Boy (GB)/Dr. Mario (World).gb"; + // char* tag_name = "GB"; + + // char* core_path = "/mnt/sdcard/.system/rg35xx/cores/gpsp_libretro.so"; + // char* rom_path = "/mnt/sdcard/Roms/Game Boy Advance (GBA)/Metroid Zero Mission.gba"; + // char* tag_name = "GBA"; + + // char* core_path = "/mnt/sdcard/.system/rg35xx/cores/fceumm_libretro.so"; + // char* rom_path = "/mnt/sdcard/Roms/Nintendo (FC)/Castlevania 3 - Dracula's Curse (U).nes"; + // char* rom_path = "/mnt/sdcard/Roms/Nintendo (FC)/Mega Man 2 (U).nes"; + // char* tag_name = "FC"; + + // char* core_path = "/mnt/sdcard/.system/rg35xx/cores/picodrive_libretro.so"; + // char* rom_path = "/mnt/sdcard/Roms/Genesis (MD)/Sonic The Hedgehog (USA, Europe).md"; + // char* tag_name = "MD"; + + char* core_path = "/mnt/sdcard/.system/rg35xx/cores/snes9x2005_plus_libretro.so"; + // char* rom_path = "/mnt/sdcard/Roms/Super Nintendo (SFC)/Super Mario World (USA).sfc"; + // char* rom_path = "/mnt/sdcard/Roms/Super Nintendo (SFC)/Super Mario World 2 - Yoshi's Island (USA, Asia) (Rev 1).sfc"; + char* rom_path = "/mnt/sdcard/Roms/Super Nintendo (SFC)/Final Fantasy III (USA) (Rev 1).sfc"; + char* tag_name = "SFC"; + + // char* core_path = "/mnt/sdcard/.system/rg35xx/cores/pcsx_rearmed_libretro.so"; + // char* rom_path = "/mnt/sdcard/Roms/PlayStation (PS)/Castlevania - Symphony of the Night (USA)/Castlevania - Symphony of the Night (USA).cue"; + // char* rom_path = "/mnt/sdcard/Roms/PlayStation (PS)/Final Fantasy VII (USA)/Final Fantasy VII (USA).m3u"; + // char* tag_name = "PS"; + + // char* core_path = "/mnt/sdcard/.system/rg35xx/cores/pokemini_libretro.so"; + // char* rom_path = "/mnt/sdcard/Roms/Pokémon mini (PKM)/Pokemon Tetris (Europe) (En,Ja,Fr).min"; + // char* tag_name = "PKM"; + + // char core_path[MAX_PATH]; strcpy(core_path, argv[1]); + // char rom_path[MAX_PATH]; strcpy(rom_path, argv[2]); + // char tag_name[MAX_PATH]; strcpy(tag_name, argv[3]); + + LOG_info("core_path: %s\n", core_path); + LOG_info("rom_path: %s\n", rom_path); + LOG_info("tag_name: %s\n", tag_name); + + SDL_Surface* screen = GFX_init(); + Core_open(core_path, tag_name); LOG_info("after Core_open\n"); + Core_init(); LOG_info("after Core_init\n"); + Game_open(rom_path); LOG_info("after Game_open\n"); + Core_load(); LOG_info("after Core_load\n"); + SND_init(core.sample_rate, core.fps); LOG_info("after SND_init\n"); + State_read(); LOG_info("after State_read\n"); + + while (1) { + if (PAD_justPressed(BTN_POWER)) { + system("rm /tmp/minui_exec"); + break; + } + + // still not working + // if (PAD_justPressed(BTN_L1)) State_read(); + // else if (PAD_justPressed(BTN_R1)) State_write(); + core.run(); + } + + Game_close(); + Core_unload(); + + Core_quit(); + Core_close(); LOG_info("after Core_close\n"); + + SDL_FreeSurface(screen); + GFX_quit(); + + return EXIT_SUCCESS; +} diff --git a/src/minarch/makefile b/src/minarch/makefile new file mode 100644 index 0000000..61ea2fa --- /dev/null +++ b/src/minarch/makefile @@ -0,0 +1,15 @@ +ifeq (,$(CROSS_COMPILE)) +$(error missing CROSS_COMPILE for this toolchain) +endif + +TARGET = minarch.elf + +CC = $(CROSS_COMPILE)gcc +CFLAGS = -marm -mtune=cortex-a9 -mfpu=neon-vfpv4 -mfloat-abi=hard -march=armv7-a -fomit-frame-pointer +CFLAGS += -I. -I../common -I./libretro-common/include -DPLATFORM=\"$(UNION_PLATFORM)\" -Ofast +LDFLAGS = -ldl -lSDL -lSDL_image -lSDL_ttf -lmsettings + +all: + $(CC) main.c ../common/scaler_neon.c -o $(TARGET) $(CFLAGS) $(LDFLAGS) +clean: + rm -f $(TARGET) \ No newline at end of file