From a0ab070ea0222c9bf791a254fe4bf0d787dc1546 Mon Sep 17 00:00:00 2001 From: Zachary Canann Date: Mon, 18 May 2026 10:28:42 -0700 Subject: [PATCH 1/3] Restore Bink SDK source and matching tools --- config/GQPE78/splits.txt | 15 +- config/GQPE78/symbols.txt | 264 +- configure.py | 10 +- src/SB/Core/gc/iFMV.cpp | 52 +- src/SB/Core/gc/ngcrad3d.c | 193 +- src/bink/include/bink.h | 351 +- src/bink/include/binkngc.h | 23 +- src/bink/include/binkread.h | 6 - src/bink/include/binktextures.h | 40 + src/bink/include/popmal.h | 10 +- src/bink/include/rad3d.h | 99 +- src/bink/include/radbase.h | 368 +- src/bink/include/radcb.h | 64 +- src/bink/shared/time/radcb.c | 346 +- src/bink/src/sdk/binkbits.h | 116 + src/bink/src/sdk/bitplane.c | 1736 ++++++++ src/bink/src/sdk/bitplane.h | 27 + src/bink/src/sdk/dct.c | 1335 ++++++ src/bink/src/sdk/dct.h | 16 + src/bink/src/sdk/decode/binkacd.c | 483 +++ src/bink/src/sdk/decode/binkacd.h | 38 + src/bink/src/sdk/decode/binkread.c | 3424 +++++++++++---- src/bink/src/sdk/decode/expand.c | 1278 +++++- src/bink/src/sdk/decode/expand.h | 27 + src/bink/src/sdk/decode/ngc/binkngc.c | 319 +- src/bink/src/sdk/decode/ngc/ngcfile.c | 584 +++ src/bink/src/sdk/decode/ngc/ngcfile.h | 8 + src/bink/src/sdk/decode/ngc/ngcrgb.c | 1570 +++++++ src/bink/src/sdk/decode/ngc/ngcrgb.h | 61 + src/bink/src/sdk/decode/ngc/ngcsnd.c | 1042 +++++ src/bink/src/sdk/decode/ngc/ngcsnd.h | 8 + src/bink/src/sdk/decode/ngc/ngcyuy2.c | 410 ++ src/bink/src/sdk/decode/yuv.cpp | 3890 ++++++++++++++++++ src/bink/src/sdk/decode/yuv.h | 48 + src/bink/src/sdk/fft.c | 2428 +++++++++++ src/bink/src/sdk/fft.h | 9 + src/bink/src/sdk/popmal.c | 84 +- src/bink/src/sdk/varbits.c | 58 + src/bink/src/sdk/varbits.h | 367 ++ src/dolphin/include/dolphin/ax.h | 11 + src/dolphin/include/dolphin/dvd/dvd.h | 4 +- src/dolphin/include/dolphin/dvd/dvdfs.h | 5 +- src/dolphin/include/dolphin/os/OSInterrupt.h | 3 + tools/asm_shape_search.py | 325 ++ tools/bink_firstdiff_clusters.py | 126 + tools/bink_flag_matrix.py | 444 ++ tools/bink_match.py | 1727 ++++++++ 47 files changed, 21946 insertions(+), 1906 deletions(-) delete mode 100644 src/bink/include/binkread.h create mode 100644 src/bink/include/binktextures.h create mode 100644 src/bink/src/sdk/binkbits.h create mode 100644 src/bink/src/sdk/bitplane.h create mode 100644 src/bink/src/sdk/dct.h create mode 100644 src/bink/src/sdk/decode/binkacd.h create mode 100644 src/bink/src/sdk/decode/expand.h create mode 100644 src/bink/src/sdk/decode/ngc/ngcfile.h create mode 100644 src/bink/src/sdk/decode/ngc/ngcrgb.h create mode 100644 src/bink/src/sdk/decode/ngc/ngcsnd.h create mode 100644 src/bink/src/sdk/decode/yuv.h create mode 100644 src/bink/src/sdk/fft.h create mode 100644 src/bink/src/sdk/varbits.h create mode 100644 tools/asm_shape_search.py create mode 100644 tools/bink_firstdiff_clusters.py create mode 100644 tools/bink_flag_matrix.py create mode 100644 tools/bink_match.py diff --git a/config/GQPE78/splits.txt b/config/GQPE78/splits.txt index 0c0a2ff90..bcbf66aea 100644 --- a/config/GQPE78/splits.txt +++ b/config/GQPE78/splits.txt @@ -2400,7 +2400,6 @@ bink/src/sdk/decode/ngc/ngcfile.c: bink/src/sdk/decode/yuv.cpp: .text start:0x8019CE8C end:0x801A414C .text start:0x801A414C end:0x801A414C - .rodata start:0x80274E00 end:0x80274E60 .data start:0x802AFFC0 end:0x802B0240 .data start:0x802B0240 end:0x802B0240 .bss start:0x80362C88 end:0x80363C88 @@ -2410,6 +2409,7 @@ bink/src/sdk/decode/yuv.cpp: bink/src/sdk/decode/binkacd.c: .text start:0x801A414C end:0x801A4E30 .text start:0x801A4E30 end:0x801A4E30 + .rodata start:0x80274E00 end:0x80274E60 .data start:0x802B0240 end:0x802B0380 .sdata start:0x803CAE30 end:0x803CAE30 @@ -2443,27 +2443,24 @@ bink/src/sdk/decode/ngc/ngcrgb.c: bink/src/sdk/decode/ngc/ngcyuy2.c: .text start:0x801ABC7C end:0x801AC988 .text start:0x801AC988 end:0x801AC988 - .rodata start:0x80275300 end:0x80275480 - .sdata2 start:0x803CFC30 end:0x803CFC40 bink/src/sdk/varbits.c: .text start:0x801AC988 end:0x801ACB6C .text start:0x801ACB6C end:0x801ACB6C - .rodata start:0x80275480 end:0x802754E0 + .rodata start:0x80275300 end:0x80275480 + .sdata2 start:0x803CFC30 end:0x803CFC40 bink/src/sdk/fft.c: .text start:0x801ACB6C end:0x801B0E50 .text start:0x801B0E50 end:0x801B0E50 - .rodata start:0x802754E0 end:0x802754E0 + .rodata start:0x80275480 end:0x802754E0 .sdata2 start:0x803CFC40 end:0x803CFC40 bink/src/sdk/dct.c: .text start:0x801B0E50 end:0x801B1F24 .text start:0x801B1F24 end:0x801B1F24 - .rodata start:0x802754E0 end:0x802794E0 - .rodata start:0x802794E0 end:0x80279920 - .sdata2 start:0x803CFC40 end:0x803CFC60 - .sdata2 start:0x803CFC60 end:0x803CFC70 + .rodata start:0x802754E0 end:0x80279920 + .sdata2 start:0x803CFC40 end:0x803CFC70 bink/src/sdk/bitplane.c: .text start:0x801B1F24 end:0x801B5350 diff --git a/config/GQPE78/symbols.txt b/config/GQPE78/symbols.txt index 975586644..795ba4946 100644 --- a/config/GQPE78/symbols.txt +++ b/config/GQPE78/symbols.txt @@ -7839,9 +7839,9 @@ gcc2_compiled. = .text:0x8019CE8C; // type:label scope:local checkzoombufs__FUl = .text:0x8019CE8C; // type:function size:0x70 scope:local zoom2heven__Fl = .text:0x8019CEFC; // type:function size:0xB0 scope:local zoom2hodd__Fl = .text:0x8019CFAC; // type:function size:0xB0 scope:local -setevenodd__FUlPUlUlUlP5BLITST1 = .text:0x8019D05C; // type:function size:0x504 scope:local -YUV_blit__FPvUlUlUlT0UlUlUlUlUlUlUlT0P5BLITS = .text:0x8019D560; // type:function size:0x438 scope:local -YUV_blit_mask__FPvUlUlUlPUcUlT0UlUlUlUlUlUlUlT0P5BLITS = .text:0x8019D998; // type:function size:0xC34 scope:local +setup_scaling__FUlPUlUlUlP5BLITST1 = .text:0x8019D05C; // type:function size:0x504 scope:local +YUV_blit = .text:0x8019D560; // type:function size:0x438 scope:local +YUV_blit_mask = .text:0x8019D998; // type:function size:0xC34 scope:local YUV_init = .text:0x8019E5CC; // type:function size:0x6FC scope:global dounaligned32rowm2w__FUlUl = .text:0x8019ECC8; // type:function size:0x60 scope:local dounaligned32colm2w__FUll = .text:0x8019ED28; // type:function size:0x88 scope:local @@ -7965,7 +7965,7 @@ RADCB_idle_on_callbacks = .text:0x801A5508; // type:function size:0x4 scope:glob RADCB_callback_size = .text:0x801A550C; // type:function size:0x8 scope:global gcc2_compiled. = .text:0x801A5514; // type:label scope:local OpenReadBundle = .text:0x801A5514; // type:function size:0x58 scope:local -mergesort = .text:0x801A556C; // type:function size:0xD4 scope:local +simpmergesort = .text:0x801A556C; // type:function size:0xD4 scope:local ReadHuffTable = .text:0x801A5640; // type:function size:0x51C scope:local StartReadHuff4Bundle = .text:0x801A5B5C; // type:function size:0x34 scope:local StartReadHuff8Bundle = .text:0x801A5B90; // type:function size:0x84 scope:local @@ -11070,79 +11070,79 @@ default_adjust = .rodata:0x80274C90; // type:object size:0x10 scope:local @811 = .rodata:0x80274CB0; // type:object size:0x10 scope:local @814 = .rodata:0x80274CC0; // type:object size:0x10 scope:local @stringBase0 = .rodata:0x80274CD0; // type:object size:0x80 scope:local data:string_table -lbl_80274D50 = .rodata:0x80274D50; // type:label align:4 data:float -lbl_80274D54 = .rodata:0x80274D54; // type:label align:4 data:float -lbl_80274D58 = .rodata:0x80274D58; // type:label align:4 data:float -lbl_80274D60 = .rodata:0x80274D60; // type:label align:8 data:double -lbl_80274D68 = .rodata:0x80274D68; // type:label align:4 data:float -lbl_80274D70 = .rodata:0x80274D70; // type:label align:8 data:double -lbl_80274D78 = .rodata:0x80274D78; // type:label align:4 data:float -lbl_80274D7C = .rodata:0x80274D7C; // type:label align:4 data:float -lbl_80274D80 = .rodata:0x80274D80; // type:label -lbl_80274D94 = .rodata:0x80274D94; // type:label -lbl_80274DA8 = .rodata:0x80274DA8; // type:label -lbl_80274DDC = .rodata:0x80274DDC; // type:label -lbl_80274DF0 = .rodata:0x80274DF0; // type:label align:8 data:double -lbl_80274DF8 = .rodata:0x80274DF8; // type:label align:8 data:double -lbl_80274E00 = .rodata:0x80274E00; // type:label align:8 data:double -lbl_80274E08 = .rodata:0x80274E08; // type:label align:4 data:float -lbl_80274E10 = .rodata:0x80274E10; // type:label align:8 data:double -lbl_80274E18 = .rodata:0x80274E18; // type:label align:8 data:double -lbl_80274E20 = .rodata:0x80274E20; // type:label align:4 data:float -lbl_80274E24 = .rodata:0x80274E24; // type:label align:4 data:float -lbl_80274E28 = .rodata:0x80274E28; // type:label align:8 data:double -lbl_80274E30 = .rodata:0x80274E30; // type:label align:8 data:double -lbl_80274E38 = .rodata:0x80274E38; // type:label align:4 data:float -lbl_80274E40 = .rodata:0x80274E40; // type:label align:8 data:double -lbl_80274E48 = .rodata:0x80274E48; // type:label align:8 data:double -lbl_80274E50 = .rodata:0x80274E50; // type:label align:8 data:double -lbl_80274E58 = .rodata:0x80274E58; // type:label align:4 data:float -huff4decode00 = .rodata:0x80274E60; // type:object size:0x10 scope:local data:string -huff4decode01 = .rodata:0x80274E70; // type:object size:0x20 scope:local -huff4decode02 = .rodata:0x80274E90; // type:object size:0x20 scope:local data:string -huff4decode03 = .rodata:0x80274EB0; // type:object size:0x20 scope:local data:string -huff4decode04 = .rodata:0x80274ED0; // type:object size:0x20 scope:local data:string -huff4decode05 = .rodata:0x80274EF0; // type:object size:0x20 scope:local data:string -huff4decode06 = .rodata:0x80274F10; // type:object size:0x20 scope:local data:string -huff4decode07 = .rodata:0x80274F30; // type:object size:0x40 scope:local -huff4decode08 = .rodata:0x80274F70; // type:object size:0x40 scope:local -huff4decode09 = .rodata:0x80274FB0; // type:object size:0x40 scope:local -huff4decode10 = .rodata:0x80274FF0; // type:object size:0x40 scope:local data:string -huff4decode11 = .rodata:0x80275030; // type:object size:0x40 scope:local -huff4decode12 = .rodata:0x80275070; // type:object size:0x40 scope:local data:string -huff4decode13 = .rodata:0x802750B0; // type:object size:0x80 scope:local -huff4decode14 = .rodata:0x80275130; // type:object size:0x80 scope:local -huff4decode15 = .rodata:0x802751B0; // type:object size:0x80 scope:local -huff4reads = .rodata:0x80275230; // type:object size:0x10 scope:local -mask2 = .rodata:0x80275240; // type:object size:0x40 scope:local -mask1 = .rodata:0x80275280; // type:object size:0x40 scope:local -mask4 = .rodata:0x802752C0; // type:object size:0x10 scope:local -mask3 = .rodata:0x802752D0; // type:object size:0x10 scope:local -VarBitsLens = .rodata:0x80275300; // type:object size:0x84 scope:global -_bitlevels = .rodata:0x802753C0; // type:object size:0x81 scope:global -lbl_80275480 = .rodata:0x80275480; // type:label align:4 data:float -lbl_80275484 = .rodata:0x80275484; // type:label align:4 data:float -lbl_80275488 = .rodata:0x80275488; // type:label align:8 data:double -lbl_80275490 = .rodata:0x80275490; // type:label align:8 data:double -lbl_80275498 = .rodata:0x80275498; // type:label align:4 data:float -lbl_8027549C = .rodata:0x8027549C; // type:label align:4 data:float -lbl_802754A0 = .rodata:0x802754A0; // type:label align:4 data:float -lbl_802754A4 = .rodata:0x802754A4; // type:label align:4 data:float -lbl_802754A8 = .rodata:0x802754A8; // type:label align:8 data:double -lbl_802754B0 = .rodata:0x802754B0; // type:label align:4 data:float -lbl_802754B8 = .rodata:0x802754B8; // type:label align:8 data:double -lbl_802754C0 = .rodata:0x802754C0; // type:label align:4 data:float -lbl_802754C4 = .rodata:0x802754C4; // type:label align:4 data:float -lbl_802754C8 = .rodata:0x802754C8; // type:label align:4 data:float -lbl_802754CC = .rodata:0x802754CC; // type:label align:4 data:float -lbl_802754D0 = .rodata:0x802754D0; // type:label align:4 data:float -lbl_802754D4 = .rodata:0x802754D4; // type:label align:4 data:float -ifiquantlevels8 = .rodata:0x802754E0; // type:object size:0x1000 scope:local -ifquantlevels8 = .rodata:0x802764E0; // type:object size:0x1000 scope:local -ifimquantlevels8 = .rodata:0x802774E0; // type:object size:0x1000 scope:local -ifmquantlevels8 = .rodata:0x802784E0; // type:object size:0x1000 scope:local -zigzag = .rodata:0x802794E0; // type:object size:0x40 scope:global -patterns = .rodata:0x80279520; // type:object size:0x400 scope:global +BINK_NGC_PAN_ONE = .rodata:0x80274D50; // type:object size:0x4 scope:global align:4 data:float +BINK_NGC_PAN_EXPONENT = .rodata:0x80274D54; // type:object size:0x4 scope:global align:4 data:float +BINK_NGC_MIX_SCALE = .rodata:0x80274D58; // type:object size:0x4 scope:global align:4 data:float +BINK_NGC_U32_TO_F64_BIAS = .rodata:0x80274D60; // type:object size:0x8 scope:global align:8 data:double +BINK_NGC_AX_SAMPLE_RATE = .rodata:0x80274D68; // type:object size:0x4 scope:global align:4 data:float +BINK_NGC_S32_TO_F64_BIAS = .rodata:0x80274D70; // type:object size:0x8 scope:global align:8 data:double +BINK_NGC_PAN_TO_FLOAT = .rodata:0x80274D78; // type:object size:0x4 scope:global align:4 data:float +BINK_NGC_PAN_CENTER = .rodata:0x80274D7C; // type:object size:0x4 scope:global align:4 data:float +BINK_ERROR_OPENING_FILE = .rodata:0x80274D80; // type:object size:0x14 scope:global data:string +BINK_ERROR_NOT_BINK = .rodata:0x80274D94; // type:object size:0x14 scope:global data:string +BINK_ERROR_NO_COMPRESSED_FRAMES = .rodata:0x80274DA8; // type:object size:0x34 scope:global data:string +BINK_ERROR_OUT_OF_MEMORY = .rodata:0x80274DDC; // type:object size:0x14 scope:global data:string +BINK_NGC_SOUND_U32_TO_F64_BIAS = .rodata:0x80274DF0; // type:object size:0x8 scope:global align:8 data:double +BINK_NGC_SOUND_SIGN_BIAS = .rodata:0x80274DF8; // type:object size:0x8 scope:global align:8 data:double +BINKAC_FXP_TO_FLOAT_BIAS = .rodata:0x80274E00; // type:label align:8 data:double +BINKAC_SAMPLE_ZERO = .rodata:0x80274E08; // type:label align:4 data:float +BINKAC_VARBITS_U32_TO_F64_BIAS = .rodata:0x80274E10; // type:label align:8 data:double +BINKAC_QUANT_U32_TO_F64_BIAS = .rodata:0x80274E18; // type:label align:8 data:double +BINKAC_QUANT_INDEX_SCALE_CONST = .rodata:0x80274E20; // type:label align:4 data:float +BINKAC_QUANT_POWER_SCALE_CONST = .rodata:0x80274E24; // type:label align:4 data:float +BINKAC_QUANT_POWER_BASE_CONST = .rodata:0x80274E28; // type:label align:8 data:double +BINKAC_OPEN_U32_TO_F64_BIAS = .rodata:0x80274E30; // type:label align:8 data:double +BINKAC_RSQRT_ZERO = .rodata:0x80274E38; // type:label align:4 data:float +BINKAC_RSQRT_NEWTON_HALF_CONST = .rodata:0x80274E40; // type:label align:8 data:double +BINKAC_RSQRT_NEWTON_THREE_CONST = .rodata:0x80274E48; // type:label align:8 data:double +BINKAC_U32_LIMIT_AS_F64 = .rodata:0x80274E50; // type:label align:8 data:double +BINKAC_TRANSFORM_ROOT_SCALE_CONST = .rodata:0x80274E58; // type:label align:4 data:float +huff4decode00 = .rodata:0x80274E60; // type:object size:0x10 scope:local data:byte +huff4decode01 = .rodata:0x80274E70; // type:object size:0x20 scope:local data:byte +huff4decode02 = .rodata:0x80274E90; // type:object size:0x20 scope:local data:byte +huff4decode03 = .rodata:0x80274EB0; // type:object size:0x20 scope:local data:byte +huff4decode04 = .rodata:0x80274ED0; // type:object size:0x20 scope:local data:byte +huff4decode05 = .rodata:0x80274EF0; // type:object size:0x20 scope:local data:byte +huff4decode06 = .rodata:0x80274F10; // type:object size:0x20 scope:local data:byte +huff4decode07 = .rodata:0x80274F30; // type:object size:0x40 scope:local data:byte +huff4decode08 = .rodata:0x80274F70; // type:object size:0x40 scope:local data:byte +huff4decode09 = .rodata:0x80274FB0; // type:object size:0x40 scope:local data:byte +huff4decode10 = .rodata:0x80274FF0; // type:object size:0x40 scope:local data:byte +huff4decode11 = .rodata:0x80275030; // type:object size:0x40 scope:local data:byte +huff4decode12 = .rodata:0x80275070; // type:object size:0x40 scope:local data:byte +huff4decode13 = .rodata:0x802750B0; // type:object size:0x80 scope:local data:byte +huff4decode14 = .rodata:0x80275130; // type:object size:0x80 scope:local data:byte +huff4decode15 = .rodata:0x802751B0; // type:object size:0x80 scope:local data:byte +BINK_HUFF4_BITS_TO_PEEK = .rodata:0x80275230; // type:object size:0x10 scope:local data:byte +mask2 = .rodata:0x80275240; // type:object size:0x40 scope:local data:byte +mask1 = .rodata:0x80275280; // type:object size:0x40 scope:local data:byte +mask4 = .rodata:0x802752C0; // type:object size:0x10 scope:local data:byte +mask3 = .rodata:0x802752D0; // type:object size:0x10 scope:local data:byte +VarBitsLens = .rodata:0x80275300; // type:object size:0x84 scope:global align:32 data:4byte +_bitlevels = .rodata:0x802753C0; // type:object size:0x81 scope:global align:64 data:byte +BINK_RDFT_INVERSE_SCALE_BITS = .rodata:0x80275480; // type:object size:0x4 scope:global align:4 data:float +BINK_FFT_TRIG_ONE_BITS = .rodata:0x80275484; // type:object size:0x4 scope:global align:4 data:float +BINK_FFT_INT_TO_FLOAT_BIAS = .rodata:0x80275488; // type:object size:0x8 scope:global align:8 data:double +BINK_FFT_HALF_SECANT_SCALE_BITS = .rodata:0x80275490; // type:object size:0x8 scope:global align:8 data:double +BINK_FFT_SIX_BITS = .rodata:0x80275498; // type:object size:0x4 scope:global align:4 data:float +BINK_FFT_THREE_BITS = .rodata:0x8027549C; // type:object size:0x4 scope:global align:4 data:float +BINK_FFT_HALF_RECIP_SCALE_BITS = .rodata:0x802754A0; // type:object size:0x4 scope:global align:4 data:float +BINK_DCT_TRIG_ONE_BITS = .rodata:0x802754A4; // type:object size:0x4 scope:global align:4 data:float +BINK_DCT_INT_TO_FLOAT_BIAS = .rodata:0x802754A8; // type:object size:0x8 scope:global align:8 data:double +BINK_DCT_CENTER_SCALE_BITS = .rodata:0x802754B0; // type:object size:0x8 scope:global align:4 data:double +BINK_DCT_HALF_SCALE_BITS = .rodata:0x802754B8; // type:object size:0x8 scope:global align:8 data:double +BINK_CFT_ROT_ONE_BITS = .rodata:0x802754C0; // type:object size:0x4 scope:global align:4 data:float +BINK_CFT_ROT_ZERO_BITS = .rodata:0x802754C4; // type:object size:0x4 scope:global align:4 data:4byte +BINK_CFT_INV_ROT_ONE_BITS = .rodata:0x802754C8; // type:object size:0x4 scope:global align:4 data:float +BINK_CFT_INV_ROT_ZERO_BITS = .rodata:0x802754CC; // type:object size:0x4 scope:global align:4 data:4byte +BINK_RFT_HALF_SCALE_BITS = .rodata:0x802754D0; // type:object size:0x4 scope:global align:4 data:float +BINK_RFT_INV_HALF_SCALE_BITS = .rodata:0x802754D4; // type:object size:0xC scope:global align:4 data:4byte +ifiquantlevels8 = .rodata:0x802754E0; // type:object size:0x1000 scope:local data:4byte +ifquantlevels8 = .rodata:0x802764E0; // type:object size:0x1000 scope:local data:4byte +ifimquantlevels8 = .rodata:0x802774E0; // type:object size:0x1000 scope:local data:4byte +ifmquantlevels8 = .rodata:0x802784E0; // type:object size:0x1000 scope:local data:4byte +zigzag = .rodata:0x802794E0; // type:object size:0x40 scope:global data:byte +patterns = .rodata:0x80279520; // type:object size:0x400 scope:global data:byte ...rodata.0 = .rodata:0x80279920; // type:label scope:local ClampRegion = .rodata:0x80279920; // type:object size:0xA scope:local data:byte fix_pool_sizes = .rodata:0x80279930; // type:object size:0x18 scope:local data:4byte @@ -11943,36 +11943,36 @@ __vt__Q24xhud11text_widget = .data:0x802A1B60; // type:object size:0x28 scope:gl comboReward = .data:0x802A1B88; // type:object size:0x380 scope:local data:4byte @925 = .data:0x802A1F08; // type:object size:0x2C scope:local TrackNums = .data:0x802A1F34; // type:object size:0x20 scope:local data:4byte -LogoData = .data:0x802A1F54; // type:object size:0x3A00 scope:global noreloc -ytable = .data:0x802A5960; // type:object size:0x410 scope:global -ytable_x4 = .data:0x802A5D80; // type:object size:0x410 scope:global -clamp_ytable = .data:0x802A61A0; // type:object size:0x410 scope:global -clamptable = .data:0x802A65C0; // type:object size:0xC10 scope:global -clamp_a4 = .data:0x802A71E0; // type:object size:0x410 scope:global -clamp_r = .data:0x802A7600; // type:object size:0xC10 scope:global -clamp_g = .data:0x802A8220; // type:object size:0xC10 scope:global -clamp_b = .data:0x802A8E40; // type:object size:0xC10 scope:global -clamp_rh = .data:0x802A9A60; // type:object size:0xC10 scope:global -clamp_gh = .data:0x802AA680; // type:object size:0xC10 scope:global -clamp_bh = .data:0x802AB2A0; // type:object size:0xC10 scope:global -clamp_rr = .data:0x802ABEC0; // type:object size:0xC10 scope:global -clamp_gg = .data:0x802ACAE0; // type:object size:0xC10 scope:global -clamp_bb = .data:0x802AD700; // type:object size:0xC10 scope:global -mono16 = .data:0x802AE320; // type:object size:0x400 scope:global -mono16x2 = .data:0x802AE720; // type:object size:0x400 scope:global -mono32 = .data:0x802AEB20; // type:object size:0x400 scope:global +LogoData = .data:0x802A1F54; // type:object size:0x3A00 scope:global data:2byte noreloc +ytable = .data:0x802A5960; // type:object size:0x410 scope:global data:4byte +ytable_x4 = .data:0x802A5D80; // type:object size:0x410 scope:global data:4byte +clamp_ytable = .data:0x802A61A0; // type:object size:0x410 scope:global data:4byte +clamptable = .data:0x802A65C0; // type:object size:0xC10 scope:global data:4byte +clamp_a4 = .data:0x802A71E0; // type:object size:0x410 scope:global data:4byte +clamp_r = .data:0x802A7600; // type:object size:0xC10 scope:global data:4byte +clamp_g = .data:0x802A8220; // type:object size:0xC10 scope:global data:4byte +clamp_b = .data:0x802A8E40; // type:object size:0xC10 scope:global data:4byte +clamp_rh = .data:0x802A9A60; // type:object size:0xC10 scope:global data:4byte +clamp_gh = .data:0x802AA680; // type:object size:0xC10 scope:global data:4byte +clamp_bh = .data:0x802AB2A0; // type:object size:0xC10 scope:global data:4byte +clamp_rr = .data:0x802ABEC0; // type:object size:0xC10 scope:global data:4byte +clamp_gg = .data:0x802ACAE0; // type:object size:0xC10 scope:global data:4byte +clamp_bb = .data:0x802AD700; // type:object size:0xC10 scope:global data:4byte +mono16 = .data:0x802AE320; // type:object size:0x400 scope:global data:4byte +mono16x2 = .data:0x802AE720; // type:object size:0x400 scope:global data:4byte +mono32 = .data:0x802AEB20; // type:object size:0x400 scope:global data:4byte S = .data:0x802AEF20; // type:object size:0x4C scope:global data:4byte YUVTables = .data:0x802AEF80; // type:object size:0x1000 scope:global data:4byte RGBshift = .data:0x802AFF80; // type:object size:0x30 scope:global data:4byte -blits32 = .data:0x802AFFC0; // type:object size:0x80 scope:local -blits32a = .data:0x802B0040; // type:object size:0x80 scope:local -blits16 = .data:0x802B00C0; // type:object size:0x80 scope:local -blits16a4 = .data:0x802B0140; // type:object size:0x80 scope:local -blitsyuy2 = .data:0x802B01C0; // type:object size:0x80 scope:local -rlelens = .data:0x802B0240; // type:object size:0x10 scope:local -bandtopfreq = .data:0x802B0250; // type:object size:0x64 scope:local -invertbins = .data:0x802B02B8; // type:object size:0xC0 scope:local -huff4decodes = .data:0x802B0380; // type:object size:0x40 scope:local +blits32 = .data:0x802AFFC0; // type:object size:0x80 scope:local data:4byte +blits32a = .data:0x802B0040; // type:object size:0x80 scope:local data:4byte +blits16 = .data:0x802B00C0; // type:object size:0x80 scope:local data:4byte +blits16a4 = .data:0x802B0140; // type:object size:0x80 scope:local data:4byte +blitsyuy2 = .data:0x802B01C0; // type:object size:0x80 scope:local data:4byte +bink_rlelens_snd = .data:0x802B0240; // type:object size:0x10 scope:local data:byte +bink_bandtopfreq = .data:0x802B0250; // type:object size:0x64 scope:local data:4byte +bink_invertbins = .data:0x802B02B8; // type:object size:0xC0 scope:local align:8 data:double +huff4decodes = .data:0x802B0380; // type:object size:0x40 scope:local data:4byte ...data.0 = .data:0x802B03C0; // type:label scope:local @1 = .data:0x802B03C0; // type:object size:0x44 scope:local data:string ...data.0 = .data:0x802B0408; // type:label scope:local @@ -12679,10 +12679,10 @@ nozey_npc_cinematics$1033 = .bss:0x80362B10; // type:object size:0x14 scope:loca mat_fake$1292 = .bss:0x80362B24; // type:object size:0x40 scope:local sUnderCamPos = .bss:0x80362B68; // type:object size:0xC scope:local sHideText = .bss:0x80362B74; // type:object size:0x14 scope:local data:4byte -binkerr = .bss:0x80362B88; // type:label size:0x100 scope:local data:byte -origYUVTables = .bss:0x80362C88; // type:label size:0x1000 scope:local data:4byte -ptrs = .bss:0x80363C88; // type:label size:0x80 scope:local -amt = .bss:0x80363D08; // type:label size:0x80 scope:local +binkerr = .bss:0x80362B88; // type:object size:0x100 scope:local data:byte +origYUVTables = .bss:0x80362C88; // type:object size:0x1000 scope:local data:4byte +ptrs = .bss:0x80363C88; // type:object size:0x80 scope:local data:4byte +amt = .bss:0x80363D08; // type:object size:0x80 scope:local data:4byte __AXStackHead = .bss:0x80363D88; // type:object size:0x80 scope:local data:4byte ...bss.0 = .bss:0x80363D88; // type:label scope:local __AXStackTail = .bss:0x80363E08; // type:object size:0x80 scope:local data:4byte @@ -13867,13 +13867,13 @@ iFileSyncAsyncReadActive = .sbss:0x803CBAC4; // type:object size:0x4 scope:globa fopcount$503 = .sbss:0x803CBAC8; // type:object size:0x4 scope:local data:4byte init$504 = .sbss:0x803CBACC; // type:object size:0x1 scope:local data:byte frame_num = .sbss:0x803CBAD0; // type:object size:0x4 scope:local data:4byte -fuckingSurfaceType = .sbss:0x803CBAD4; // type:object size:0x4 scope:global data:4byte +Rad_surface_type = .sbss:0x803CBAD4; // type:object size:0x4 scope:global data:4byte Bink = .sbss:0x803CBAD8; // type:object size:0x4 scope:local data:4byte Image = .sbss:0x803CBADC; // type:object size:0x4 scope:local data:4byte Paused = .sbss:0x803CBAE0; // type:object size:0x4 scope:local data:4byte pixels = .sbss:0x803CBAE4; // type:object size:0x4 scope:local data:4byte vol = .sbss:0x803CBAE8; // type:object size:0x4 scope:local align:4 data:float -ip = .sbss:0x803CBAEC; // type:object size:0x4 scope:global data:4byte +track_id = .sbss:0x803CBAEC; // type:object size:0x4 scope:global data:4byte oof = .sbss:0x803CBAF0; // type:object size:0x4 scope:global data:4byte mXFBs__4iFMV = .sbss:0x803CBAF4; // type:object size:0x8 scope:global data:4byte mCurrentFrameBuffer__4iFMV = .sbss:0x803CBAFC; // type:object size:0x4 scope:global data:4byte @@ -14300,17 +14300,17 @@ comboCounter = .sbss:0x803CC1E0; // type:object size:0x4 scope:local data:4byte comboTimer = .sbss:0x803CC1E4; // type:object size:0x4 scope:local align:4 data:float credits_data = .sbss:0x803CC1E8; // type:object size:0x4 scope:local data:4byte credits_parentID = .sbss:0x803CC1EC; // type:object size:0x4 scope:local data:4byte -ForceRateDiv = .sbss:0x803CC1F0; // type:label scope:local data:4byte -EVEN = .sbss:0x803CC1F4; // type:label scope:local data:4byte -ODD = .sbss:0x803CC1F8; // type:label scope:local data:4byte -EVENx = .sbss:0x803CC1FC; // type:label scope:local data:4byte -ODDx = .sbss:0x803CC200; // type:label scope:local data:4byte -dounalignedrow = .sbss:0x803CC204; // type:label scope:local data:4byte -dounalignedcol = .sbss:0x803CC208; // type:label scope:local data:4byte -align = .sbss:0x803CC20C; // type:label scope:local data:4byte -alignm1 = .sbss:0x803CC210; // type:label scope:local data:4byte -alignshift = .sbss:0x803CC214; // type:label scope:local data:4byte -testing = .sbss:0x803CC218; // type:label scope:local data:4byte +ForceRateDiv = .sbss:0x803CC1F0; // type:object size:0x4 scope:local data:4byte +EVEN = .sbss:0x803CC1F4; // type:object size:0x4 scope:local data:4byte +ODD = .sbss:0x803CC1F8; // type:object size:0x4 scope:local data:4byte +EVENx = .sbss:0x803CC1FC; // type:object size:0x4 scope:local data:4byte +ODDx = .sbss:0x803CC200; // type:object size:0x4 scope:local data:4byte +dounalignedrow = .sbss:0x803CC204; // type:object size:0x4 scope:local data:4byte +dounalignedcol = .sbss:0x803CC208; // type:object size:0x4 scope:local data:4byte +align = .sbss:0x803CC20C; // type:object size:0x4 scope:local data:4byte +alignm1 = .sbss:0x803CC210; // type:object size:0x4 scope:local data:4byte +alignshift = .sbss:0x803CC214; // type:object size:0x4 scope:local data:4byte +testing = .sbss:0x803CC218; // type:object size:0x8 scope:local data:4byte __AIS_Callback = .sbss:0x803CC220; // type:object size:0x4 scope:local data:4byte __AID_Callback = .sbss:0x803CC224; // type:object size:0x4 scope:local data:4byte __CallbackStack = .sbss:0x803CC228; // type:object size:0x4 scope:local data:4byte @@ -17594,17 +17594,17 @@ uv_slice_dray$1697 = .sdata2:0x803CFB80; // type:object size:0x8 scope:local ali @879 = .sdata2:0x803CFC08; // type:object size:0x4 scope:local align:4 data:float @880 = .sdata2:0x803CFC0C; // type:object size:0x4 scope:local align:4 data:float @927 = .sdata2:0x803CFC10; // type:object size:0x4 scope:local align:4 data:float -huff4decode00align = .sdata2:0x803CFC18; // type:object size:0x8 scope:local -huff4decodesalign = .sdata2:0x803CFC20; // type:object size:0x8 scope:local -rlelens = .sdata2:0x803CFC28; // type:object size:0x4 scope:local -VarBitsLensalign = .sdata2:0x803CFC30; // type:object size:0x8 scope:global -_bitlevelsalign = .sdata2:0x803CFC38; // type:object size:0x8 scope:global -ifiquantlevels8align = .sdata2:0x803CFC40; // type:object size:0x8 scope:local -ifquantlevels8align = .sdata2:0x803CFC48; // type:object size:0x8 scope:local -ifimquantlevels8align = .sdata2:0x803CFC50; // type:object size:0x8 scope:local -ifmquantlevels8align = .sdata2:0x803CFC58; // type:object size:0x8 scope:local -zigzagalign = .sdata2:0x803CFC60; // type:object size:0x8 scope:global -patternsalign = .sdata2:0x803CFC68; // type:object size:0x8 scope:global +BINK_HUFF4_DECODE0_ALIGN = .sdata2:0x803CFC18; // type:object size:0x8 scope:global align:8 data:double +BINK_HUFF4_DECODE_TABLES_ALIGN = .sdata2:0x803CFC20; // type:object size:0x8 scope:global align:8 data:double +BINK_HUFF4_RLE_LENGTHS_PACKED = .sdata2:0x803CFC28; // type:object size:0x4 scope:global data:4byte +VarBitsLensalign = .sdata2:0x803CFC30; // type:object size:0x8 scope:global align:8 data:double +_bitlevelsalign = .sdata2:0x803CFC38; // type:object size:0x8 scope:global align:8 data:double +ifiquantlevels8align = .sdata2:0x803CFC40; // type:object size:0x8 scope:local align:8 data:double +ifquantlevels8align = .sdata2:0x803CFC48; // type:object size:0x8 scope:local align:8 data:double +ifimquantlevels8align = .sdata2:0x803CFC50; // type:object size:0x8 scope:local align:8 data:double +ifmquantlevels8align = .sdata2:0x803CFC58; // type:object size:0x8 scope:local align:8 data:double +zigzagalign = .sdata2:0x803CFC60; // type:object size:0x8 scope:global align:8 data:double +patternsalign = .sdata2:0x803CFC68; // type:object size:0x8 scope:global align:8 data:double @239 = .sdata2:0x803CFC70; // type:object size:0x4 scope:local align:4 data:float __GXData = .sdata2:0x803CFC78; // type:object size:0x4 scope:global data:4byte @289 = .sdata2:0x803CFC7C; // type:object size:0x4 scope:local align:4 data:float diff --git a/configure.py b/configure.py index 8f1608937..c4ae5b196 100644 --- a/configure.py +++ b/configure.py @@ -286,7 +286,7 @@ "-D__GEKKO__", "-I src/bink/include", "-I src/PowerPC_EABI_Support/include", - "-G4", + "-G8", ] # Renderware library flags @@ -641,17 +641,17 @@ def MatchingFor(*versions): "progress_category": "bink", "objects": [ Object(NonMatching, "bink/src/sdk/decode/ngc/binkngc.c"), - Object(NonMatching, "bink/src/sdk/decode/ngc/ngcsnd.c"), + Object(NonMatching, "bink/src/sdk/decode/ngc/ngcsnd.c", extra_cflags=["-G0"]), Object(NonMatching, "bink/src/sdk/decode/binkread.c"), Object(NonMatching, "bink/src/sdk/decode/ngc/ngcfile.c"), Object(NonMatching, "bink/src/sdk/decode/yuv.cpp"), Object(NonMatching, "bink/src/sdk/decode/binkacd.c"), - Object(NonMatching, "bink/shared/time/radcb.c"), + Object(Matching, "bink/shared/time/radcb.c"), Object(NonMatching, "bink/src/sdk/decode/expand.c"), - Object(NonMatching, "bink/src/sdk/popmal.c"), + Object(Matching, "bink/src/sdk/popmal.c"), Object(NonMatching, "bink/src/sdk/decode/ngc/ngcrgb.c"), Object(NonMatching, "bink/src/sdk/decode/ngc/ngcyuy2.c"), - Object(NonMatching, "bink/src/sdk/varbits.c"), + Object(Matching, "bink/src/sdk/varbits.c"), Object(NonMatching, "bink/src/sdk/fft.c"), Object(NonMatching, "bink/src/sdk/dct.c"), Object(NonMatching, "bink/src/sdk/bitplane.c"), diff --git a/src/SB/Core/gc/iFMV.cpp b/src/SB/Core/gc/iFMV.cpp index 9838215ab..8d7084f03 100644 --- a/src/SB/Core/gc/iFMV.cpp +++ b/src/SB/Core/gc/iFMV.cpp @@ -21,17 +21,17 @@ void RwGameCubeGetXFBs(void*, void*); } // .bss -static U32 Bink_surface_type[5]; +static u32 Bink_surface_type[RAD3DSURFACECOUNT]; // .sbss static S32 frame_num; -U32 fuckingSurfaceType; +u32 Rad_surface_type; static HBINK Bink; static HRAD3DIMAGE Image; static S32 Paused; static void* pixels; static volatile F32 vol; -S32 ip; +S32 track_id; s32 oof; void* iFMV::mXFBs[2]; void* iFMV::mCurrentFrameBuffer; @@ -70,33 +70,31 @@ U32 iFMVPlay(char* filename, U32 buttons, F32 time, bool skippable, bool lockCon static void Setup_surface_array() { - Bink_surface_type[0] = BINKSURFACE32; - Bink_surface_type[1] = BINKSURFACE32A; - Bink_surface_type[2] = BINKSURFACE565; - Bink_surface_type[3] = BINKSURFACE4444; - Bink_surface_type[4] = BINKSURFACEYUY2; + Bink_surface_type[RAD3DSURFACE32] = BINKSURFACE32; + Bink_surface_type[RAD3DSURFACE32A] = BINKSURFACE32A; + Bink_surface_type[RAD3DSURFACE565] = BINKSURFACE565; + Bink_surface_type[RAD3DSURFACE4444] = BINKSURFACE4444; + Bink_surface_type[RAD3DSURFACEYUY2] = BINKSURFACEYUY2; } -// WIP. void Decompress_frame(HBINK bnk, HRAD3DIMAGE rad_image, long flags) { - struct Result + struct RAD3DLockInfo { - S32 unk_0; - S32 unk_4; - U32 unk_8; - U32 unk_c; + S32 copy_status; + S32 do_frame_status; + u32 surface_type; + u32 buffer_pitch; }; - Result result; - result.unk_4 = BinkDoFrame(bnk); - if (Lock_RAD_3D_image(rad_image, &pixels, &result.unk_c, &result.unk_8) != 0) + RAD3DLockInfo lock_info; + + lock_info.do_frame_status = BinkDoFrame(bnk); + if (Lock_RAD_3D_image(rad_image, &pixels, &lock_info.buffer_pitch, &lock_info.surface_type) != 0) { - S32 mask = flags * -1; - mask = mask | flags; - mask = mask >> 0x1f; - mask = mask & 0x80000000; - mask |= Bink_surface_type[result.unk_8]; - result.unk_0 = BinkCopyToBuffer(bnk, pixels, result.unk_c, bnk->Height, NULL, NULL, mask); + u32 copy_flags = + Bink_surface_type[lock_info.surface_type] | (flags != 0 ? BINKCOPYALL : 0); + lock_info.copy_status = + BinkCopyToBuffer(bnk, pixels, lock_info.buffer_pitch, bnk->Height, NULL, NULL, copy_flags); Unlock_RAD_3D_image(rad_image); } } @@ -241,18 +239,18 @@ static void PlayFMV(char* fname, size_t buttons, F32 time) if (Bink != NULL) { - if (Bink->Width != 0) + if (Bink->NumTracks != 0) { - for (ip = 0; ip <= Bink->Width; ++ip) + for (track_id = 0; track_id <= Bink->NumTracks; ++track_id) { vol = gSnd.categoryVolFader[SND_CAT_CUTSCENE]; vol = vol * vol; vol = vol * 32768.0f; - BinkSetVolume(Bink, ip, vol); + BinkSetVolume(Bink, track_id, vol); } } - Image = Open_RAD_3D_image(NULL, Bink->Width, Bink->Height, fuckingSurfaceType); + Image = Open_RAD_3D_image(NULL, Bink->Width, Bink->Height, Rad_surface_type); if (Image != NULL) { if (frame_num != 0) diff --git a/src/SB/Core/gc/ngcrad3d.c b/src/SB/Core/gc/ngcrad3d.c index b891c4d23..cfed0cbf6 100644 --- a/src/SB/Core/gc/ngcrad3d.c +++ b/src/SB/Core/gc/ngcrad3d.c @@ -1,14 +1,19 @@ #include "ngcrad3d.h" #include +#include +#include #include "iFMV.h" #include -static int D3D_surface_type[5]; -static unsigned int Pixel_info[5]; +static int GX_texture_format[RAD3DSURFACECOUNT]; +static u32 Surface_info[RAD3DSURFACECOUNT]; static int Built_tables; +#define RAD3D_BYTES_PER_PIXEL_MASK 0xff +#define RAD3D_ALPHA_PIXELS 0x80000000 + static void Setup_surface_array() { if (Built_tables) @@ -16,52 +21,82 @@ static void Setup_surface_array() return; } - D3D_surface_type[0] = 6; - D3D_surface_type[1] = 6; - D3D_surface_type[2] = 4; - D3D_surface_type[3] = 5; - D3D_surface_type[4] = -1; + GX_texture_format[RAD3DSURFACE32] = GX_TF_RGBA8; + GX_texture_format[RAD3DSURFACE32A] = GX_TF_RGBA8; + GX_texture_format[RAD3DSURFACE565] = GX_TF_RGB565; + GX_texture_format[RAD3DSURFACE4444] = GX_TF_RGB5A3; + GX_texture_format[RAD3DSURFACEYUY2] = -1; - Pixel_info[0] = 0x00000004; - Pixel_info[1] = 0x80000004; - Pixel_info[2] = 0x00000002; - Pixel_info[3] = 0x80000002; - Pixel_info[4] = 0x00000002; + Surface_info[RAD3DSURFACE32] = 4; + Surface_info[RAD3DSURFACE32A] = RAD3D_ALPHA_PIXELS | 4; + Surface_info[RAD3DSURFACE565] = 2; + Surface_info[RAD3DSURFACE4444] = RAD3D_ALPHA_PIXELS | 2; + Surface_info[RAD3DSURFACEYUY2] = 2; Built_tables = 1; } -// TODO: -// Defining this struct locally because i believe this isnt 100% right. Or if it is right why isnt this in the bink.h or other headers? -// inestigate this - struct RAD3DIMAGE { - int a; - int b; - unsigned int c; - int d; - int e; - void* f; - int g; - int h; + u32 width; + u32 height; + u32 alpha_pixels; + u32 bytes_per_pixel; + u32 surface_format; + void* pixels; + u32 buffer_size; + GXTexObj texobj; }; +HRAD3DIMAGE Open_RAD_3D_image(HRAD3D rad_3d, u32 width, u32 height, u32 rad3d_surface_format) +{ + RAD3DIMAGE* image; + u32 bytes_per_pixel; + + Setup_surface_array(); + + bytes_per_pixel = Surface_info[rad3d_surface_format] & RAD3D_BYTES_PER_PIXEL_MASK; + image = (RAD3DIMAGE*)iFMVmalloc(sizeof(RAD3DIMAGE)); + if (image == 0) + { + image = 0; + } + else + { + image->width = width; + image->height = height; + image->alpha_pixels = Surface_info[rad3d_surface_format] >> 31; + image->bytes_per_pixel = bytes_per_pixel; + image->surface_format = rad3d_surface_format; + image->buffer_size = GXGetTexBufferSize((u16)width, (u16)height, + (GXTexFmt)GX_texture_format[rad3d_surface_format], + GX_FALSE, 0); + image->pixels = iFMVmalloc(image->buffer_size); + GXInitTexObj(&image->texobj, image->pixels, (u16)width, (u16)height, + (GXTexFmt)GX_texture_format[rad3d_surface_format], + GX_CLAMP, GX_CLAMP, GX_FALSE); + GXInitTexObjLOD(&image->texobj, GX_LINEAR, GX_NEAR, 0.0f, 0.0f, 0.0f, + GX_FALSE, GX_FALSE, GX_ANISO_1); + } + + return image; +} + void Close_RAD_3D_image(struct RAD3DIMAGE* image) { if (image != 0) { - if (image->f != 0) + if (image->pixels != 0) { - iFMVfree(image->f); - image->f = 0; + iFMVfree(image->pixels); + image->pixels = 0; } iFMVfree(image); } } -S32 Lock_RAD_3D_image(HRAD3DIMAGE rad_image, void* out_pixel_buffer, U32* out_buffer_pitch, - U32* arg3) +s32 Lock_RAD_3D_image(HRAD3DIMAGE rad_image, void* out_pixel_buffer, u32* out_buffer_pitch, + u32* out_surface_format) { if (rad_image == 0) { @@ -70,35 +105,105 @@ S32 Lock_RAD_3D_image(HRAD3DIMAGE rad_image, void* out_pixel_buffer, U32* out_bu if (out_pixel_buffer != 0) { - *(void**)(out_pixel_buffer) = rad_image->f; + *(void**)(out_pixel_buffer) = rad_image->pixels; } if (out_buffer_pitch != 0) { - *out_buffer_pitch = rad_image->a * rad_image->d; + *out_buffer_pitch = rad_image->width * rad_image->bytes_per_pixel; } - if (arg3 != 0) + if (out_surface_format != 0) { - *arg3 = rad_image->e; + *out_surface_format = rad_image->surface_format; } return 1; } -static void GXColor4u8(int r3, int r4, int r5, int r6) +void Unlock_RAD_3D_image(HRAD3DIMAGE rad_image) { - int ptr = 0xcc010000; - *((char*)(ptr)-0x8000) = r3; - *((char*)(ptr)-0x8000) = r4; - *((char*)(ptr)-0x8000) = r5; - *((char*)(ptr)-0x8000) = r6; + if (rad_image != 0) + { + DCStoreRange(rad_image->pixels, rad_image->buffer_size); + } +} + +static void Submit_vertices(f32 dest_x, f32 dest_y, f32 scale_x, f32 scale_y, long width, + long height, f32 alpha_level) +{ + s16 x0; + s16 y0; + s16 x1; + s16 y1; + u8 alpha; + + GXSetNumChans(0); + GXSetNumTexGens(1); + GXSetZMode(GX_TRUE, GX_ALWAYS, GX_TRUE); + GXBegin(GX_QUADS, GX_VTXFMT0, 4); + + x0 = (s16)dest_x; + y0 = (s16)(dest_y + (scale_y * (f32)height)); + alpha = (u8)((s32)(alpha_level * 255.0f) & 0xff); + GXPosition3s16((int)x0, (int)y0, 0); + GXColor4u8(0xff, 0xff, 0xff, (int)alpha); + GXTexCoord2f32(0.0f, 1.0f); + + GXPosition3s16((int)x0, (int)(s16)dest_y, 0); + GXColor4u8(0xff, 0xff, 0xff, (int)alpha); + GXTexCoord2f32(0.0f, 0.0f); + + x1 = (s16)(dest_x + (scale_x * (f32)width)); + GXPosition3s16((int)x1, (int)(s16)dest_y, 0); + GXColor4u8(0xff, 0xff, 0xff, (int)alpha); + GXTexCoord2f32(1.0f, 0.0f); + + GXPosition3s16((int)x1, (int)y0, 0); + GXColor4u8(0xff, 0xff, 0xff, (int)alpha); + GXTexCoord2f32(1.0f, 1.0f); + GXEnd(); } -static void GXPosition3s16(int r3, int r4, int r5) +void Blit_RAD_3D_image(HRAD3DIMAGE rad_image, f32 x_offset, f32 y_offset, f32 x_scale, + f32 y_scale, f32 alpha_level) { - int ptr = 0xcc010000; - *(short*)((char*)(ptr)-0x8000) = r3; - *(short*)((char*)(ptr)-0x8000) = r4; - *(short*)((char*)(ptr)-0x8000) = r5; + Mtx tex_mtx; + + if (rad_image != 0) + { + if (alpha_level >= 1.0f) + { + if (rad_image->alpha_pixels == 0) + { + GXSetBlendMode(GX_BM_NONE, GX_BL_SRCALPHA, GX_BL_INVSRCALPHA, GX_LO_CLEAR); + } + else + { + GXSetBlendMode(GX_BM_BLEND, GX_BL_SRCALPHA, GX_BL_INVSRCALPHA, GX_LO_CLEAR); + GXSetTevAlphaIn(GX_TEVSTAGE0, GX_CA_ZERO, GX_CA_TEXA, GX_CA_KONST, GX_CA_ZERO); + } + } + else + { + GXSetBlendMode(GX_BM_BLEND, GX_BL_SRCALPHA, GX_BL_INVSRCALPHA, GX_LO_CLEAR); + if (rad_image->alpha_pixels == 0) + { + GXSetTevAlphaIn(GX_TEVSTAGE0, GX_CA_ZERO, GX_CA_RASA, GX_CA_KONST, GX_CA_ZERO); + } + else + { + GXSetTevAlphaIn(GX_TEVSTAGE0, GX_CA_ZERO, GX_CA_TEXA, GX_CA_RASA, GX_CA_ZERO); + } + } + + GXLoadTexObj(&rad_image->texobj, GX_TEXMAP0); + PSMTXScale(tex_mtx, 1.0f / (f32)rad_image->width, 1.0f / (f32)rad_image->height, 1.0f); + PSMTXScale(tex_mtx, 1.0f, 1.0f, 1.0f); + GXLoadTexMtxImm(tex_mtx, GX_TEXMTX0, GX_MTX2x4); + GXSetNumTexGens(1); + GXSetTexCoordGen(GX_TEXCOORD0, GX_TG_MTX2x4, GX_TG_TEX0, GX_TEXMTX0); + Submit_vertices(0.0f, 0.0f, 1.0f, 1.0f, (long)640.0f, (long)480.0f, alpha_level); + GXSetBlendMode(GX_BM_NONE, GX_BL_SRCALPHA, GX_BL_INVSRCALPHA, GX_LO_CLEAR); + } } diff --git a/src/bink/include/bink.h b/src/bink/include/bink.h index 67582f244..cf2136d0c 100644 --- a/src/bink/include/bink.h +++ b/src/bink/include/bink.h @@ -7,16 +7,43 @@ #define BINKVERSION "1.5y" #define BINKDATE "2003-09-23" -#ifndef __RADRES__ - #ifndef __RADBASEH__ #include "radbase.h" #endif +#include "radcb.h" RADDEFSTART typedef struct BINK PTR4* HBINK; +#define BINKMAXFRAMEBUFFERS 2 + +typedef struct BINKPLANE +{ + s32 Allocate; + void PTR4* Buffer; + u32 BufferPitch; +} BINKPLANE; + +typedef struct BINKFRAMEPLANESET +{ + BINKPLANE YPlane; + BINKPLANE cRPlane; + BINKPLANE cBPlane; + BINKPLANE APlane; +} BINKFRAMEPLANESET; + +typedef struct BINKFRAMEBUFFERS +{ + s32 TotalFrames; + u32 YABufferWidth; + u32 YABufferHeight; + u32 cRcBBufferWidth; + u32 cRcBBufferHeight; + u32 FrameNum; + BINKFRAMEPLANESET Frames[BINKMAXFRAMEBUFFERS]; +} BINKFRAMEBUFFERS; + struct BINKIO; typedef s32(RADLINK PTR4* BINKIOOPEN)(struct BINKIO PTR4* Bnkio, const char PTR4* name, u32 flags); typedef u32(RADLINK PTR4* BINKIOREADHEADER)(struct BINKIO PTR4* Bnkio, s32 Offset, void PTR4* Dest, @@ -28,17 +55,17 @@ typedef void(RADLINK PTR4* BINKIOSETINFO)(struct BINKIO PTR4* Bnkio, void PTR4* u32 FileSize, u32 simulate); typedef u32(RADLINK PTR4* BINKIOIDLE)(struct BINKIO PTR4* Bnkio); typedef void(RADLINK PTR4* BINKIOCLOSE)(struct BINKIO PTR4* Bnkio); +typedef s32(RADLINK PTR4* BINKIOBGCONTROL)(struct BINKIO PTR4* Bnkio, u32 Control); typedef void(RADLINK PTR4* BINKCBSUSPEND)(struct BINKIO PTR4* Bnkio); typedef s32(RADLINK PTR4* BINKCBTRYSUSPEND)(struct BINKIO PTR4* Bnkio); typedef void(RADLINK PTR4* BINKCBRESUME)(struct BINKIO PTR4* Bnkio); typedef void(RADLINK PTR4* BINKCBIDLE)(struct BINKIO PTR4* Bnkio); -// Unsorted -// Pulled from previous bink.h to maintain the build -extern void RADSetAudioMemory(void* (*malloc)(size_t), void (*free)(void*)); -extern void RADSetMemory(void* (*malloc)(size_t), void (*free)(void*)); -// +void RADSetAudioMemory(RADMEMALLOC malloc_fn, RADMEMFREE free_fn); + +#define BINKIO_DATA_SIZE (128 + 32) +#define BINK_CALLBACK_CONTROL_WORDS RADCB_CALLBACK_STORAGE_WORDS typedef struct BINKIO { @@ -61,14 +88,19 @@ typedef struct BINKIO volatile u32 BufHighUsed; volatile u32 CurBufSize; volatile u32 CurBufUsed; - volatile u8 iodata[128 + 32]; + union + { + volatile u32 Suspended; + // Platform IO drivers overlay their private state here. + volatile u8 iodata[BINKIO_DATA_SIZE]; + }; // filled in by the caller BINKCBSUSPEND suspend_callback; BINKCBTRYSUSPEND try_suspend_callback; BINKCBRESUME resume_callback; BINKCBIDLE idle_on_callback; - volatile u32 callback_control[16]; // buffer for background IO callback + volatile RADCB_CALLBACK_STORAGE callback_control; // RADCB_CALLBACK storage for background IO } BINKIO; struct BINKSND; @@ -88,7 +120,9 @@ typedef s32(RADLINK PTR4* BINKSNDONOFF)(struct BINKSND PTR4* BnkSnd, s32 status) typedef s32(RADLINK PTR4* BINKSNDPAUSE)(struct BINKSND PTR4* BnkSnd, s32 status); typedef void(RADLINK PTR4* BINKSNDCLOSE)(struct BINKSND PTR4* BnkSnd); -typedef BINKSNDOPEN(RADLINK PTR4* BINKSNDSYSOPEN)(u32 param); +typedef BINKSNDOPEN(RADLINK PTR4* BINKSNDSYSOPEN)(UINTa param); + +#define BINKSND_DATA_SIZE 256 typedef struct BINKSND { @@ -108,7 +142,7 @@ typedef struct BINKSND u8 PTR4* sndend; // end of the sound buffer u8 PTR4* sndwritepos; // current write position u8 PTR4* sndreadpos; // current read position - u32 sndcomp; // sound compression handle + UINTa sndcomp; // sound compression handle u32 sndamt; // amount of sound currently in the buffer u32 sndconvert8; // convert back to 8-bit sound at runtime u32 sndendframe; // frame number that the sound ends on @@ -118,13 +152,13 @@ typedef struct BINKSND u32 BestSizeIn16; u32 BestSizeMask; u32 SoundDroppedOut; - s32 NoThreadService; s32 OnOff; + s32 NoThreadService; u32 Latency; - u32 VideoScale; u32 freq; s32 bits, chans; - u8 snddata[256]; + // Platform sound drivers overlay their private playback state here. + u8 snddata[BINKSND_DATA_SIZE]; } BINKSND; typedef struct BINKRECT @@ -136,15 +170,15 @@ typedef struct BINKRECT typedef struct BUNDLEPOINTERS { - void* typeptr; - void* type16ptr; - void* colorptr; - void* bits2ptr; - void* motionXptr; - void* motionYptr; - void* dctptr; - void* mdctptr; - void* patptr; + void PTR4* typeptr; // 8x8 block types + void PTR4* type16ptr; // 16x16 subblock types + void PTR4* colorptr; // color values + void PTR4* bits2ptr; // two-color pattern bits + void PTR4* motionXptr; // motion X offsets + void PTR4* motionYptr; // motion Y offsets + void PTR4* dctptr; // intra DC values + void PTR4* mdctptr; // inter/motion DC values + void PTR4* patptr; // run lengths } BUNDLEPOINTERS; typedef struct BINK @@ -170,8 +204,8 @@ typedef struct BINK s32 NumRects; u32 PlaneNum; // which set of planes is current - void PTR4* YPlane[2]; // pointer to the uncompressed Y (Cr and Cr follow) - void PTR4* APlane[2]; // decompressed alpha plane (if present) + void PTR4* YPlane[BINKMAXFRAMEBUFFERS]; // pointer to the uncompressed Y (Cr and Cr follow) + void PTR4* APlane[BINKMAXFRAMEBUFFERS]; // decompressed alpha plane (if present) u32 YWidth; // widths and heights of the video planes u32 YHeight; u32 UVWidth; @@ -197,7 +231,7 @@ typedef struct BINK void PTR4* compframe; // compressed frame data void PTR4* preloadptr; // preloaded compressed frame data - u32* frameoffsets; // offsets of each of the frames + u32 PTR4* frameoffsets; // offsets of each of the frames BINKIO bio; // IO structure u8 PTR4* ioptr; // io buffer ptr @@ -251,9 +285,6 @@ typedef struct BINK u32 lastblitflags; // flags used on last blit u32 lastdecompframe; // last frame number decompressed - u32 lastresynctime; // last loop point that we did a resync on - u32 doresync; // should we do a resync in the next doframe? - u32 playingtracks; // how many tracks are playing u32 soundskips; // number of sound stops BINKSND PTR4* bsnd; // SND structures @@ -265,10 +296,10 @@ typedef struct BINK u32 skipped_in_a_row; // how many frames have we skipped in a row u32 big_sound_skip_adj; // adjustment for large skips u32 big_sound_skip_reduce; // amount to reduce large skips by each frame - u32 last_time_almost_empty; // time of last almost empty IO buffer u32 last_read_count; // counter to keep track of the last bink IO u32 last_sound_count; // counter to keep track of the last bink sound - u32 snd_callback_buffer[16]; // buffer for background sound callback + u32 last_time_almost_empty; // time of last almost empty IO buffer + RADCB_CALLBACK_STORAGE snd_callback_buffer; // RADCB_CALLBACK storage for background sound } BINK; typedef struct BINKSUMMARY @@ -308,13 +339,11 @@ typedef struct BINKSUMMARY typedef struct BINKREALTIME { - // TODO: marked these as volatile to get matches in radcb - u32 FrameNum; // Current frame number - volatile u32 FrameRate; // frame rate + u32 FrameRate; // frame rate u32 FrameRateDiv; // frame rate divisor u32 Frames; // frames in this sample period - volatile u32 FramesTime; // time is ms for these frames + u32 FramesTime; // time is ms for these frames u32 FramesVideoDecompTime; // time decompressing these frames u32 FramesAudioDecompTime; // time decompressing these frames u32 FramesReadTime; // time reading these frames @@ -324,7 +353,6 @@ typedef struct BINKREALTIME u32 ReadBufferSize; // size of read buffer u32 ReadBufferUsed; // amount of read buffer currently used u32 FramesDataRate; // data rate for these frames - // last offset it 0x34 } BINKREALTIME; #define BINKMARKER1 'fKIB' @@ -360,7 +388,7 @@ typedef struct BINKHDR #define BINKNOMMX 0x00040000L // Don't use MMX #define BINKNOSKIP 0x00080000L // Don't skip frames if falling behind #define BINKALPHA 0x00100000L // Decompress alpha plane (if present) -#define BINKNOFILLIOBUF 0x00200000L // Fill the IO buffer in SmackOpen +#define BINKNOFILLIOBUF 0x00200000L // Don't fill the IO buffer #define BINKSIMULATE 0x00400000L // Simulate the speed (call BinkSim first) #define BINKFILEHANDLE 0x00800000L // Use when passing in a file handle #define BINKIOSIZE 0x01000000L // Set an io size (call BinkIOSize first) @@ -368,6 +396,10 @@ typedef struct BINKHDR #define BINKFROMMEMORY 0x04000000L // Use when passing in a pointer to the file #define BINKNOTHREADEDIO 0x08000000L // Don't use a background thread for IO +#define BINKBGIOSUSPEND 1 +#define BINKBGIORESUME 2 +#define BINKBGIOWAIT 0x80000000 + #define BINKSURFACEFAST 0x00000000L #define BINKSURFACESLOW 0x08000000L #define BINKSURFACEDIRECT 0x04000000L @@ -388,6 +420,7 @@ typedef struct BINKHDR //#define BINKRBINVERT 0x00010000L // use reversed R and B planes #define BINKSURFACE8P 0 +#define BINKSURFACEP8 BINKSURFACE8P #define BINKSURFACE24 1 #define BINKSURFACE24R 2 #define BINKSURFACE32 3 @@ -405,25 +438,6 @@ typedef struct BINKHDR #define BINKSURFACEYV12 15 #define BINKSURFACEMASK 15 -#ifdef __RADXBOX__ - -#define BINKSURFACESALL 32 -#define BINKCONVERTERSMONO 64 -#define BINKCONVERTERS2X 256 - -#define BINKCONVERTERSALL (BINKSURFACESALL | BINKCONVERTERSMONO | BINKCONVERTERS2X) - -#define BinkLoad() BinkLoadUnload(1) -#define BinkUnload() BinkLoadUnload(0) - -#define BinkLoadConverter(val) BinkLoadUnloadConverter(val, 1) -#define BinkUnloadConverter(val) BinkLoadUnloadConverter(val, 0) - -RADEXPFUNC void RADEXPLINK BinkLoadUnload(s32 inout); -RADEXPFUNC void RADEXPLINK BinkLoadUnloadConverter(u32 surfaces, s32 inout); - -#endif - #define BINKGOTOQUICK 1 #define BINKGOTOQUICKSOUND 2 @@ -434,12 +448,6 @@ RADEXPFUNC void RADEXPLINK BinkLoadUnloadConverter(u32 surfaces, s32 inout); //======================================================================= -#ifdef __RADMAC__ -#pragma export on - -RADEXPFUNC HBINK RADEXPLINK BinkMacOpen(void /*FSSpec*/* fsp, u32 flags); -#endif - RADEXPFUNC void PTR4* RADEXPLINK BinkLogoAddress(void); RADEXPFUNC void RADEXPLINK BinkSetError(const char PTR4* err); @@ -452,9 +460,9 @@ RADEXPFUNC void RADEXPLINK BinkNextFrame(HBINK bnk); RADEXPFUNC s32 RADEXPLINK BinkWait(HBINK bnk); RADEXPFUNC void RADEXPLINK BinkClose(HBINK bnk); RADEXPFUNC s32 RADEXPLINK BinkPause(HBINK bnk, s32 pause); -RADEXPFUNC s32 RADEXPLINK BinkCopyToBuffer(HBINK bnk, void* dest, s32 destpitch, u32 destheight, +RADEXPFUNC s32 RADEXPLINK BinkCopyToBuffer(HBINK bnk, void PTR4* dest, s32 destpitch, u32 destheight, u32 destx, u32 desty, u32 flags); -RADEXPFUNC s32 RADEXPLINK BinkCopyToBufferRect(HBINK bnk, void* dest, s32 destpitch, u32 destheight, +RADEXPFUNC s32 RADEXPLINK BinkCopyToBufferRect(HBINK bnk, void PTR4* dest, s32 destpitch, u32 destheight, u32 destx, u32 desty, u32 srcx, u32 srcy, u32 srcw, u32 srch, u32 flags); RADEXPFUNC s32 RADEXPLINK BinkGetRects(HBINK bnk, u32 flags); @@ -463,7 +471,6 @@ RADEXPFUNC u32 RADEXPLINK BinkGetKeyFrame(HBINK bnk, u32 frame, s32 flags); RADEXPFUNC s32 RADEXPLINK BinkSetVideoOnOff(HBINK bnk, s32 onoff); RADEXPFUNC s32 RADEXPLINK BinkSetSoundOnOff(HBINK bnk, s32 onoff); -RADEXPFUNC void RADEXPLINK BinkFreeGlocalMemory(void); RADEXPFUNC void RADEXPLINK BinkSetVolume(HBINK bnk, u32 trackid, s32 volume); RADEXPFUNC void RADEXPLINK BinkSetPan(HBINK bnk, u32 trackid, s32 pan); RADEXPFUNC void RADEXPLINK BinkSetMixBins(HBINK bnk, u32 trackid, u32 PTR4* mix_bins, u32 total); @@ -481,7 +488,7 @@ typedef struct BINKTRACK u32 MaxSize; HBINK bink; - u32 sndcomp; + UINTa sndcomp; s32 trackindex; } BINKTRACK; @@ -496,46 +503,15 @@ RADEXPFUNC u32 RADEXPLINK BinkGetTrackID(HBINK bnk, u32 trackindex); RADEXPFUNC void RADEXPLINK BinkGetSummary(HBINK bnk, BINKSUMMARY PTR4* sum); RADEXPFUNC void RADEXPLINK BinkGetRealtime(HBINK bink, BINKREALTIME PTR4* run, u32 frames); +#define BINKNOSOUND 0xffffffff + RADEXPFUNC void RADEXPLINK BinkSetSoundTrack(u32 total_tracks, u32 PTR4* tracks); RADEXPFUNC void RADEXPLINK BinkSetIO(BINKIOOPEN io); RADEXPFUNC void RADEXPLINK BinkSetFrameRate(u32 forcerate, u32 forceratediv); RADEXPFUNC void RADEXPLINK BinkSetSimulate(u32 sim); RADEXPFUNC void RADEXPLINK BinkSetIOSize(u32 iosize); -RADEXPFUNC s32 RADEXPLINK BinkSetSoundSystem(BINKSNDSYSOPEN open, u32 param); - -#ifdef __RADWIN__ - -RADEXPFUNC BINKSNDOPEN RADEXPLINK BinkOpenDirectSound(u32 param); // don't call directly -#define BinkSoundUseDirectSound(lpDS) BinkSetSoundSystem(BinkOpenDirectSound, (u32)lpDS) - -RADEXPFUNC BINKSNDOPEN RADEXPLINK BinkOpenWaveOut(u32 param); // don't call directly -#define BinkSoundUseWaveOut() BinkSetSoundSystem(BinkOpenWaveOut, 0) - -#endif - -#ifndef __RADMAC__ - -RADEXPFUNC BINKSNDOPEN RADEXPLINK BinkOpenMiles(u32 param); // don't call directly -#define BinkSoundUseMiles(hdigdriver) BinkSetSoundSystem(BinkOpenMiles, (u32)hdigdriver) - -#endif - -#ifdef __RADMAC__ - -RADEXPFUNC BINKSNDOPEN RADEXPLINK BinkOpenSoundManager(u32 param); // don't call directly -#define BinkSoundUseSoundManager() BinkSetSoundSystem(BinkOpenSoundManager, 0) - -#endif - -#ifdef __RADLINUX__ - -RADEXPFUNC BINKSNDOPEN RADEXPLINK BinkOpenSDLMixer(u32 param); // don't call directly -#define BinkSoundUseSDLMixer() BinkSetSoundSystem(BinkOpenSDLMixer, 0) - -#endif - -#ifdef __RADNGC__ +RADEXPFUNC s32 RADEXPLINK BinkSetSoundSystem(BINKSNDSYSOPEN open, UINTa param); typedef void PTR4*(RADLINK PTR4* RADARAMALLOC)(u32 num_bytes); typedef void(RADLINK PTR4* RADARAMFREE)(void PTR4* ptr); @@ -546,192 +522,21 @@ typedef struct RADARAMCALLBACKS RADARAMFREE aram_free; } RADARAMCALLBACKS; -RADEXPFUNC BINKSNDOPEN RADEXPLINK BinkOpenAX(u32 param); // don't call directly +RADEXPFUNC BINKSNDOPEN RADEXPLINK BinkOpenNGCSound(u32 param); // don't call directly +#define BinkOpenAX BinkOpenNGCSound #define BinkSoundUseAX(functions) \ BinkSetSoundSystem(BinkOpenAX, (u32)functions) // takes a pointer to RADARAMCALLBACKS RADEXPFUNC BINKSNDOPEN RADEXPLINK BinkOpenMusyXSound(u32 param); // don't call directly #define BinkSoundUseMusyX() BinkSetSoundSystem(BinkOpenMusyXSound, 0) -#endif - -#if defined(__RADXBOX__) || defined(__RADWIN__) - -RADEXPFUNC s32 RADEXPLINK BinkDX8SurfaceType(void* lpD3Ds); - -#endif - -#if defined(__RADWIN__) - -RADEXPFUNC s32 RADEXPLINK BinkDX9SurfaceType(void* lpD3Ds); - -#endif - -// The BinkBuffer API isn't implemented on DOS, Xbox or GameCube -#if !defined(__RADDOS__) && !defined(__RADXBOX__) && !defined(__RADNGC__) - -//========================================================================= -typedef struct BINKBUFFER* HBINKBUFFER; - -#define BINKBUFFERSTRETCHXINT 0x80000000 -#define BINKBUFFERSTRETCHX 0x40000000 -#define BINKBUFFERSHRINKXINT 0x20000000 -#define BINKBUFFERSHRINKX 0x10000000 -#define BINKBUFFERSTRETCHYINT 0x08000000 -#define BINKBUFFERSTRETCHY 0x04000000 -#define BINKBUFFERSHRINKYINT 0x02000000 -#define BINKBUFFERSHRINKY 0x01000000 -#define BINKBUFFERSCALES 0xff000000 -#define BINKBUFFERRESOLUTION 0x00800000 - -#ifdef __RADMAC__ - -//#include -//#include -//#include - -typedef struct BINKBUFFER -{ - u32 Width; - u32 Height; - u32 WindowWidth; - u32 WindowHeight; - u32 SurfaceType; - void* Buffer; - s32 BufferPitch; - u32 ScreenWidth; - u32 ScreenHeight; - u32 ScreenDepth; - u32 ScaleFlags; - - s32 destx, desty; - s32 wndx, wndy; - u32 wnd; - - s32 noclipping; - u32 type; - s32 issoftcur; - u32 cursorcount; - -} BINKBUFFER; - -#define BINKBUFFERAUTO 0 -#define BINKBUFFERDIRECT 1 -#define BINKBUFFERGWORLD 2 -#define BINKBUFFERTYPEMASK 31 - -RADEXPFUNC HBINKBUFFER RADEXPLINK BinkBufferOpen(void* /*WindowPtr*/ wnd, u32 width, u32 height, - u32 bufferflags); -RADEXPFUNC s32 RADEXPLINK BinkGDSurfaceType(void* /*GDHandle*/ gd); -RADEXPFUNC s32 RADEXPLINK BinkIsSoftwareCursor(void* /*GDHandle*/ gd); -RADEXPFUNC s32 RADEXPLINK BinkCheckCursor(void* /*WindowPtr*/ wp, s32 x, s32 y, s32 w, s32 h); - -#else - -typedef struct BINKBUFFER -{ - u32 Width; - u32 Height; - u32 WindowWidth; - u32 WindowHeight; - u32 SurfaceType; - void* Buffer; - s32 BufferPitch; - s32 ClientOffsetX; - s32 ClientOffsetY; - u32 ScreenWidth; - u32 ScreenHeight; - u32 ScreenDepth; - u32 ExtraWindowWidth; - u32 ExtraWindowHeight; - u32 ScaleFlags; - u32 StretchWidth; - u32 StretchHeight; - - s32 surface; - void* ddsurface; - void* ddclipper; - s32 destx, desty; - s32 wndx, wndy; - u32 wnd; - s32 minimized; - s32 ddoverlay; - s32 ddoffscreen; - s32 lastovershow; - - s32 issoftcur; - u32 cursorcount; - void* buffertop; - u32 type; - s32 noclipping; - - s32 loadeddd; - s32 loadedwin; - - void* dibh; - void* dibbuffer; - s32 dibpitch; - void* dibinfo; - u32 dibdc; - u32 diboldbitmap; - -} BINKBUFFER; - -#define BINKBUFFERAUTO 0 -#define BINKBUFFERPRIMARY 1 -#define BINKBUFFERDIBSECTION 2 -#define BINKBUFFERYV12OVERLAY 3 -#define BINKBUFFERYUY2OVERLAY 4 -#define BINKBUFFERUYVYOVERLAY 5 -#define BINKBUFFERYV12OFFSCREEN 6 -#define BINKBUFFERYUY2OFFSCREEN 7 -#define BINKBUFFERUYVYOFFSCREEN 8 -#define BINKBUFFERRGBOFFSCREENVIDEO 9 -#define BINKBUFFERRGBOFFSCREENSYSTEM 10 -#define BINKBUFFERLAST 10 -#define BINKBUFFERTYPEMASK 31 - -RADEXPFUNC HBINKBUFFER RADEXPLINK BinkBufferOpen(void* /*HWND*/ wnd, u32 width, u32 height, - u32 bufferflags); -RADEXPFUNC s32 RADEXPLINK BinkBufferSetHWND(HBINKBUFFER buf, void* /*HWND*/ newwnd); -RADEXPFUNC s32 RADEXPLINK BinkDDSurfaceType(void PTR4* lpDDS); -RADEXPFUNC s32 RADEXPLINK BinkIsSoftwareCursor(void PTR4* lpDDSP, void* /*HCURSOR*/ cur); -RADEXPFUNC s32 RADEXPLINK BinkCheckCursor(void* /*HWND*/ wnd, s32 x, s32 y, s32 w, s32 h); -RADEXPFUNC s32 RADEXPLINK BinkBufferSetDirectDraw(void PTR4* lpDirectDraw, void PTR4* lpPrimary); - -#endif - -RADEXPFUNC void RADEXPLINK BinkBufferClose(HBINKBUFFER buf); -RADEXPFUNC s32 RADEXPLINK BinkBufferLock(HBINKBUFFER buf); -RADEXPFUNC s32 RADEXPLINK BinkBufferUnlock(HBINKBUFFER buf); -RADEXPFUNC void RADEXPLINK BinkBufferSetResolution(s32 w, s32 h, s32 bits); -RADEXPFUNC void RADEXPLINK BinkBufferCheckWinPos(HBINKBUFFER buf, s32 PTR4* NewWindowX, - s32 PTR4* NewWindowY); -RADEXPFUNC s32 RADEXPLINK BinkBufferSetOffset(HBINKBUFFER buf, s32 destx, s32 desty); -RADEXPFUNC void RADEXPLINK BinkBufferBlit(HBINKBUFFER buf, BINKRECT PTR4* rects, u32 numrects); -RADEXPFUNC s32 RADEXPLINK BinkBufferSetScale(HBINKBUFFER buf, u32 w, u32 h); -RADEXPFUNC char PTR4* RADEXPLINK BinkBufferGetDescription(HBINKBUFFER buf); -RADEXPFUNC char PTR4* RADEXPLINK BinkBufferGetError(); -RADEXPFUNC void RADEXPLINK BinkRestoreCursor(s32 checkcount); -RADEXPFUNC s32 RADEXPLINK BinkBufferClear(HBINKBUFFER buf, u32 RGB); - -#endif - typedef void PTR4*(RADLINK PTR4* BINKMEMALLOC)(u32 bytes); typedef void(RADLINK PTR4* BINKMEMFREE)(void PTR4* ptr); RADEXPFUNC void RADEXPLINK BinkSetMemory(BINKMEMALLOC a, BINKMEMFREE f); -#ifdef __RADMAC__ - -#pragma export off - -#endif - RADDEFEND -#endif - // @cdep pre $set(INCs,$INCs -I$clipfilename($file)) $ignore(TakeCPP) #endif diff --git a/src/bink/include/binkngc.h b/src/bink/include/binkngc.h index 6660b550b..d6bc84d85 100644 --- a/src/bink/include/binkngc.h +++ b/src/bink/include/binkngc.h @@ -3,8 +3,25 @@ #include "bink.h" -void radfree(void* ptr); -typedef void *(*RADMEMALLOC)(u32 size); -typedef void (*RADMEMFREE)(void* mem); +u32 mult64anddiv(u32 left, u32 right, u32 divisor); +u32 mult64andshift(u32 left, u32 right, u32 shift); +void radmemset16(void PTR4* dest, u16 value, u32 size); +/* Milliseconds since the first call, derived from the GameCube OS timebase. */ +u32 RADTimerRead(void); +/* Cycle timers store either the low timebase word or a full 64-bit timebase snapshot. */ +void RADCycleTimerStartAddr(u32 PTR4* dest); +u32 RADCycleTimerDeltaAddr(u32 PTR4* dest); +void RADCycleTimerStartAddr64(u64 PTR4* dest); +void RADCycleTimerDeltaAddr64(u64 PTR4* dest); +#define RADCycleTimerStart(var) RADCycleTimerStartAddr(&(var)) +#define RADCycleTimerDelta(var) RADCycleTimerDeltaAddr(&(var)) +#define RADCycleTimerStart64(var) RADCycleTimerStartAddr64(&(var)) +#define RADCycleTimerDelta64(var) RADCycleTimerDeltaAddr64(&(var)) +void RADSetAudioMemory(RADMEMALLOC malloc_fn, RADMEMFREE free_fn); +/* Audio memory is supplied by the client; there is no ARAM heap fallback here. */ +void PTR4* radaudiomalloc(u32 size); +void radaudiofree(void PTR4* ptr); +u32 div64(u32 high, u32 low, u32 divisor); +void ReadTimeBase(u32 PTR4* dest); #endif diff --git a/src/bink/include/binkread.h b/src/bink/include/binkread.h deleted file mode 100644 index 6735fc490..000000000 --- a/src/bink/include/binkread.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef BINKREAD_H -#define BINKREAD_H - -#include "bink.h" -void BinkCloseTrack(HBINKTRACK bnkt); -#endif diff --git a/src/bink/include/binktextures.h b/src/bink/include/binktextures.h new file mode 100644 index 000000000..9cffbaaa2 --- /dev/null +++ b/src/bink/include/binktextures.h @@ -0,0 +1,40 @@ +#ifndef _BINKTEXTURES_H_ +#define _BINKTEXTURES_H_ +#include "bink.h" +#include "dolphin/gx.h" + +/* + * RAD's high level API for using 3D hardware to do color conversion. + * + * Playback allocates platform textures, registers the resulting + * BINKFRAMEBUFFERS with Bink, waits for the GPU before decoding, syncs the + * decoded texture memory after BinkDoFrame, and then draws the frame. + */ + +typedef struct BINKFRAMETEXTURES { + GXTexObj Ytexture; + GXTexObj cRtexture; + GXTexObj cBtexture; + GXTexObj Atexture; +} BINKFRAMETEXTURES; + +typedef struct BINKTEXTURESET { + /* GPU texture resources for each Bink frame buffer. */ + BINKFRAMETEXTURES textures[BINKMAXFRAMEBUFFERS]; + + /* Bink's view of the frame buffers backed by those textures. */ + BINKFRAMEBUFFERS bink_buffers; + + /* GameCube texture memory and deswizzle state. */ + void* base_ptr; + u32 framesize; + u32 YAdeswizzle_width; + u32 YAdeswizzle_height; + u32 cRcBdeswizzle_width; + u32 cRcBdeswizzle_height; + GXTexObj YAdeswizzle; + GXTexObj cRcBdeswizzle; + s32 drawing[BINKMAXFRAMEBUFFERS]; +} BINKTEXTURESET; + +#endif diff --git a/src/bink/include/popmal.h b/src/bink/include/popmal.h index 527d08d61..6ccaaf0ed 100644 --- a/src/bink/include/popmal.h +++ b/src/bink/include/popmal.h @@ -3,7 +3,13 @@ #include "bink.h" -u32 popmalloctotal(); -u32 popmalloc(HBINK bnk); +#define Round32(num) (((num) + 31) & ~31) +#define PushMallocBytesForXPtrs(count) (((count) * sizeof(void PTR4*)) + ((count) * sizeof(u64)) + 64) + +void pushmalloc(void PTR4* PTR4* ptr, u32 amount); +u32 popmalloctotal(void); +void PTR4* popmalloc(u32 amount); + +#define popfree(ptr, memfree) memfree(ptr) #endif diff --git a/src/bink/include/rad3d.h b/src/bink/include/rad3d.h index e13581f5b..927c48b60 100644 --- a/src/bink/include/rad3d.h +++ b/src/bink/include/rad3d.h @@ -5,36 +5,13 @@ #include "radbase.h" #endif -#ifdef GEKKO - #define RAD3DSURFACE32 0 #define RAD3DSURFACE32A 1 #define RAD3DSURFACE565 2 #define RAD3DSURFACE4444 3 // actually RGB4A3 -#define RADSURFACEYUY2 4 -#define RAD3DSURFACECOUNT (RADSURFACEYUY2 + 1) - -#else - -#define RAD3DSURFACE32 0 -#define RAD3DSURFACE32A 1 -#define RAD3DSURFACE555 2 -#define RAD3DSURFACE565 3 -#define RAD3DSURFACE5551 4 -#define RAD3DSURFACE4444 5 - -#ifdef _XBOX -#define RAD3DSURFACEYUY2 6 +#define RAD3DSURFACEYUY2 4 +#define RADSURFACEYUY2 RAD3DSURFACEYUY2 #define RAD3DSURFACECOUNT (RAD3DSURFACEYUY2 + 1) -#else -#define RAD3DSURFACE32R 6 -#define RAD3DSURFACE32RA 7 -#define RAD3DSURFACE24 8 -#define RAD3DSURFACE24R 9 -#define RAD3DSURFACECOUNT (RAD3DSURFACE24R + 1) -#endif - -#endif #ifdef __cplusplus #define RADCFUNC extern "C" @@ -42,47 +19,11 @@ #define RADCFUNC #endif -#ifdef GEKKO - #define HRAD3D int -#elif _XBOX - -#define HRAD3D LPDIRECT3DDEVICE8 - -#else - -// -// Define the handle types. -// - -struct RAD3D; -typedef struct RAD3D* HRAD3D; - -// -// Functions to open a RAD 3D handle (to OpenGL or Direct3D). -// - -typedef void* HWND; - -RADCFUNC HRAD3D Open_RAD_3D(HWND window); - -RADCFUNC void Close_RAD_3D(HRAD3D rad_3D); - -RADCFUNC void Resize_RAD_3D(HRAD3D rad_3d, U32 width, U32 height); - -RADCFUNC char* Describe_RAD_3D(void); - -#endif - RADCFUNC void Start_RAD_3D_frame(HRAD3D rad_3D); -RADCFUNC void End_RAD_3D_frame(HRAD3D rad_3D -#ifdef GEKKO - , - S32 swap -#endif -); +RADCFUNC void End_RAD_3D_frame(HRAD3D rad_3D, s32 swap); // // Define the handle types. @@ -91,38 +32,20 @@ RADCFUNC void End_RAD_3D_frame(HRAD3D rad_3D struct RAD3DIMAGE; typedef struct RAD3DIMAGE* HRAD3DIMAGE; -// -// Functions to open a 3D image handle (GL or D3D texture array). -// - -RADCFUNC HRAD3DIMAGE Open_RAD_3D_image(HRAD3D rad_3d, U32 width, U32 height, -#if defined(_XBOX) || defined(GEKKO) - U32 rad3d_surface_format -#else - S32 alpha_pixels, U32 maximum_texture_size RADDEFAULT(256) -#endif -); +RADCFUNC HRAD3DIMAGE Open_RAD_3D_image(HRAD3D rad_3d, u32 width, u32 height, + u32 rad3d_surface_format); RADCFUNC void Close_RAD_3D_image(HRAD3DIMAGE rad_image); -// RADCFUNC S32 Lock_RAD_3D_image(HRAD3DIMAGE rad_image, void* out_pixel_buffer, U32* out_buffer_pitch, -// U32* out_surface_type - -// #if !(defined(_XBOX) || defined(GEKKO)) -// , -// U32* src_x, U32* src_y, U32* src_w, U32* src_h -// #endif -// ); - -RADCFUNC S32 Lock_RAD_3D_image(HRAD3DIMAGE rad_image, void* out_pixel_buffer, U32* out_buffer_pitch, - U32* arg3); +RADCFUNC s32 Lock_RAD_3D_image(HRAD3DIMAGE rad_image, void* out_pixel_buffer, + u32* out_buffer_pitch, u32* out_surface_format); RADCFUNC void Unlock_RAD_3D_image(HRAD3DIMAGE rad_image); -RADCFUNC void Blit_RAD_3D_image(HRAD3DIMAGE rad_image, F32 x_offset, F32 y_offset, F32 x_scale, - F32 y_scale, F32 alpha_level); +RADCFUNC void Blit_RAD_3D_image(HRAD3DIMAGE rad_image, f32 x_offset, f32 y_offset, f32 x_scale, + f32 y_scale, f32 alpha_level); -RADCFUNC void Draw_lines_RAD_3D_image(HRAD3DIMAGE rad_image, F32 x_offset, F32 y_offset, - F32 x_scale, F32 y_scale); +RADCFUNC void Draw_lines_RAD_3D_image(HRAD3DIMAGE rad_image, f32 x_offset, f32 y_offset, + f32 x_scale, f32 y_scale); #endif diff --git a/src/bink/include/radbase.h b/src/bink/include/radbase.h index e98456921..ebdd277f5 100644 --- a/src/bink/include/radbase.h +++ b/src/bink/include/radbase.h @@ -1,300 +1,15 @@ -// __RAD16__ means 16 bit code (Win16) -// __RAD32__ means 32 bit code (DOS, Win386, Win32s, Mac) - -// __RADDOS__ means DOS code (16 or 32 bit) -// __RADWIN__ means Windows code (Win16, Win386, Win32s) -// __RADWINEXT__ means Windows 386 extender (Win386) -// __RADNT__ means Win32s code -// __RADMAC__ means Macintosh -// __RADCARBON__ means Carbon -// __RADMACH__ means MachO -// __RADXBOX__ means the XBox console -// __RADNGC__ means the Nintendo GameCube -// __RADNTBUILDLINUX__ means building Linux on NT -// __RADLINUX__ means actually building on Linux (most likely with GCC) - -// __RADX86__ means Intel x86 -// __RADMMX__ means Intel x86 MMX instructions are allowed -// __RAD68K__ means 68K -// __RADPPC__ means PowerPC - -// __RADLITTLEENDIAN__ means processor is little-endian (x86) -// __RADBIGENDIAN__ means processor is big-endian (680x0, PPC) - #ifndef __RADBASEH__ #define __RADBASEH__ #define RADCOPYRIGHT "Copyright (C) 1994-2003, RAD Game Tools, Inc." -#ifndef __RADRES__ - -#define __RADNGC__ -#define __RAD32__ -#define __RADPPC__ -#define __RADBIGENDIAN__ - -#if defined(GEKKO) - -#define __RADNGC__ -#define __RAD32__ -#define __RADPPC__ -#define __RADBIGENDIAN__ -#define RADINLINE inline - -#elif (defined(__MWERKS__) && !defined(__INTEL__)) || defined(__MRC__) || defined(THINK_C) || \ - defined(powerc) || defined(macintosh) || defined(__powerc) || defined(__APPLE__) || \ - defined(__MACH__) -#define __RADMAC__ -#if defined(powerc) || defined(__powerc) || defined(__ppc__) -#define __RADPPC__ -#else -#define __RAD68K__ -#endif - -#define __RAD32__ -#define __RADBIGENDIAN__ - -#if defined(__MWERKS__) -#if (defined(__cplusplus) || !__option(only_std_keywords)) -#define RADINLINE inline -#endif -#elif defined(__MRC__) -#if defined(__cplusplus) #define RADINLINE inline -#endif -#elif defined(__GNUC__) || defined(__GNUG__) -#define RADINLINE inline -#define __RADMACH__ -#endif - -#ifdef __MACH__ -#define __RADMACH__ -#endif - -#ifdef TARGET_API_MAC_CARBON -#if TARGET_API_MAC_CARBON -#ifndef __RADCARBON__ -#define __RADCARBON__ -#endif -#endif -#endif - -#elif defined(linux) - -#define __RADLINUX__ -#define __RADX86__ -#define __RADMMX__ -#define __RAD32__ -#define __RADLITTLEENDIAN__ -#define RADINLINE inline - -#else - -#define __RADX86__ -#define __RADMMX__ - -#ifdef __MWERKS__ -#define _WIN32 -#endif - -#ifdef __DOS__ -#define __RADDOS__ -#endif - -#ifdef __386__ -#define __RAD32__ -#endif - -#ifdef _Windows //For Borland -#ifdef __WIN32__ -#define WIN32 -#else -#define __WINDOWS__ -#endif -#endif - -#ifdef _WINDOWS //For MS -#ifndef _WIN32 -#define __WINDOWS__ -#endif -#endif - -#ifdef _WIN32 -#ifdef _XBOX -#define __RADXBOX__ -#else -#define __RADNT__ -#endif -#define __RADWIN__ -#define __RAD32__ -#else -#ifdef __NT__ -#ifdef _XBOX -#define __RADXBOX__ -#else -#define __RADNT__ -#endif -#define __RADWIN__ -#define __RAD32__ -#else -#ifdef __WINDOWS_386__ -#define __RADWIN__ -#define __RADWINEXT__ -#define __RAD32__ -#else -#ifdef __WINDOWS__ -#define __RADWIN__ -#define __RAD16__ -#else -#ifdef WIN32 -#ifdef _XBOX -#define __RADXBOX__ -#else -#define __RADNT__ -#endif -#define __RADWIN__ -#define __RAD32__ -#endif -#endif -#endif -#endif -#endif - -#define __RADLITTLEENDIAN__ -#ifdef __WATCOMC__ -#define RADINLINE -#else -#define RADINLINE __inline -#endif -#endif - -#if (!defined(__RADDOS__) && !defined(__RADWIN__) && !defined(__RADMAC__) && \ - !defined(__RADNGC__) && !defined(__RADXBOX__) && !defined(__RADLINUX__)) -#error \ - "RAD.H did not detect your platform. Define __DOS__, __WINDOWS__, WIN32, macintosh, or powerc." -#endif - -#ifdef __RADFINAL__ -#define RADTODO(str) \ - { \ - char __str[0] = str; \ - } -#else -#define RADTODO(str) -#endif - -#ifdef __RADNGC__ #define RADLINK #define RADEXPLINK #define RADEXPFUNC RADDEFFUNC -#define RADASMLINK -#define PTR4 - -#elif defined(__RADLINUX__) - -#define RADLINK __attribute__((cdecl)) -#define RADEXPLINK __attribute__((cdecl)) -#define RADEXPFUNC RADDEFFUNC -#define RADASMLINK #define PTR4 -#elif defined(__RADMAC__) - -// this define is for CodeWarrior 11's stupid new libs (even though -// we don't use longlong's). - -#define __MSL_LONGLONG_SUPPORT__ - -#define RADLINK -#define RADEXPLINK - -#ifdef __CFM68K__ -#ifdef __RADINDLL__ -#define RADEXPFUNC RADDEFFUNC __declspec(export) -#else -#define RADEXPFUNC RADDEFFUNC __declspec(import) -#endif -#else -#define RADEXPFUNC RADDEFFUNC -#endif -#define RADASMLINK - -#else - -#ifdef __RADNT__ -#ifndef _WIN32 -#define _WIN32 -#endif -#ifndef WIN32 -#define WIN32 -#endif -#endif - -#ifdef __RADWIN__ -#ifdef __RAD32__ -#ifdef __RADXBOX__ - -#define RADLINK __stdcall -#define RADEXPLINK __stdcall -#define RADEXPFUNC RADDEFFUNC - -#elif defined(__RADNTBUILDLINUX__) - -#define RADLINK __cdecl -#define RADEXPLINK __cdecl -#define RADEXPFUNC RADDEFFUNC - -#else -#ifdef __RADNT__ - -#define RADLINK __stdcall -#define RADEXPLINK __stdcall - -#ifdef __RADINEXE__ -#define RADEXPFUNC RADDEFFUNC -#else -#ifndef __RADINDLL__ -#define RADEXPFUNC RADDEFFUNC __declspec(dllimport) -#ifdef __BORLANDC__ -#if __BORLANDC__ <= 0x460 -#undef RADEXPFUNC -#define RADEXPFUNC RADDEFFUNC -#endif -#endif -#else -#define RADEXPFUNC RADDEFFUNC __declspec(dllexport) -#endif -#endif -#else -#define RADLINK __pascal -#define RADEXPLINK __far __pascal -#define RADEXPFUNC RADDEFFUNC -#endif -#endif -#else -#define RADLINK __pascal -#define RADEXPLINK __far __pascal __export -#define RADEXPFUNC RADDEFFUNC -#endif -#else -#define RADLINK __pascal -#define RADEXPLINK __pascal -#define RADEXPFUNC RADDEFFUNC -#endif - -#define RADASMLINK __cdecl - -#endif - -#ifndef __RADXBOX__ -#ifdef __RADWIN__ -#ifndef _WINDOWS -#define _WINDOWS -#endif -#endif -#endif - #ifndef RADDEFFUNC #ifdef __cplusplus @@ -315,85 +30,20 @@ #endif -#ifdef __RADNGC__ #define RAD_ATTRIBUTE_ALIGN(num) __attribute__((aligned(num))) -#else -#ifdef __RADX86__ -#ifdef __WATCOMC__ -#define RAD_ATTRIBUTE_ALIGN(num) -#else -#define RAD_ATTRIBUTE_ALIGN(num) __declspec(align(num)) -#endif -#else -#define RAD_ATTRIBUTE_ALIGN(num) -#endif -#endif - -#ifdef __RADX86__ -#ifdef __WATCOMC__ -#define RAD_ALIGN_TYPE double -#define RAD_ALIGN_DEF 0.0 -#else -#define RAD_ALIGN_TYPE double __declspec(align(8)) -#define RAD_ALIGN_DEF 0.0 -#endif -#else -#define RAD_ALIGN_TYPE double -#define RAD_ALIGN_DEF 0.0 -#endif - -#define RAD_ALIGN_ADD_TYPE(var) RAD_ALIGN_TYPE var##align = RAD_ALIGN_DEF - -// Each commented out line is required for our build - -// #define S8 signed char -// #define U8 unsigned char -// #define U32 unsigned long -// #define S32 signed long -// #define F32 float -// #define F64 double -#if defined(__MWERKS__) || defined(__MRC__) || defined(GEKKO) -// #define U64 unsigned long long -// #define S64 signed long long -// #else -// #define U64 unsigned __int64 -// #define S64 signed __int64 -#endif - -#ifdef __RAD32__ -#define PTR4 -#define U16 unsigned short -#define S16 signed short -#else -#define PTR4 __far -#define U16 unsigned int -#define S16 signed int -#endif - -#ifndef RAD_NO_LOWERCASE_TYPES - -#ifdef __RADNGC__ +#define INTADDR signed long +#define UINTADDR unsigned long +#define SINTa signed long +#define UINTa unsigned long #include -#else +typedef void PTR4* (RADLINK PTR4* RADMEMALLOC)(u32 bytes); +typedef void (RADLINK PTR4* RADMEMFREE)(void PTR4* ptr); -#define u8 U8 -#define s8 S8 -#define u16 U16 -#define s16 S16 -#define u32 U32 -#define s32 S32 -#define u64 U64 -#define s64 S64 -#define f32 F32 -#define f64 F64 - -#endif - -#endif - -#endif +RADEXPFUNC void RADEXPLINK RADSetMemory(RADMEMALLOC a, RADMEMFREE f); +RADEXPFUNC void PTR4* RADEXPLINK radmalloc(u32 numbytes); +RADEXPFUNC void RADEXPLINK radfree(void PTR4* ptr); #endif diff --git a/src/bink/include/radcb.h b/src/bink/include/radcb.h index 853e3fa00..8b6aa2874 100644 --- a/src/bink/include/radcb.h +++ b/src/bink/include/radcb.h @@ -1,21 +1,53 @@ -#ifndef RADCD_H +#ifndef RADCB_H #define RADCB_H -#include "bink.h" +#include "radbase.h" -void RADCB_unregister_2_callbacks(BINKIO* io); -u32 RADCB_registered_count(BINKIO io); -u32 RADCB_try_to_suspend_handler(BINKIO* io); -void RADCB_suspend_handler(BINKIO* io); -void RADCB_resume_handler(BINKIO* io); -void RADCB_try_to_suspend_callback(HBINK bnk, HBINK bnk1); -void RADCB_suspend_callback(HBINK bnk, HBINK bnk1); -void RADCB_suspend_2_callbacks(BINKIO* io, BINKIO* io2, BINKIO* io3, BINKIO* io4); -void RADCB_resume_callback(HBINK bnk, HBINK bnk1); -void RADCB_suspend_2_handlers(BINKIO* io, BINKIO* io2); -void RADCB_free_handler(RADARAMCALLBACKS* callback); -void RADCB_free_2_handlers(BINKIO* io, BINKIO* io2); -void RADCB_idle_on_callbacks(); -u32 RADCB_callback_size(); +typedef struct RADCB_HANDLER RADCB_HANDLER; +typedef struct RADCB_CALLBACK RADCB_CALLBACK; +typedef u32(RADLINK PTR4* RADCB_POLL)(RADCB_CALLBACK PTR4* callback, u32 count); +typedef void(RADLINK PTR4* RADCB_RUN)(RADCB_CALLBACK PTR4* callback, u32 count); + +struct RADCB_CALLBACK +{ + RADCB_CALLBACK PTR4* next; + u32 unused; + u32 result; /* Last nonzero poll result for this callback. */ + u32 state; + volatile u32 suspend_count; + RADCB_POLL poll; + RADCB_RUN run; +}; + +#define RADCB_CALLBACK_STORAGE_WORDS 16 + +typedef union RADCB_CALLBACK_STORAGE +{ + RADCB_CALLBACK callback; + u32 words[RADCB_CALLBACK_STORAGE_WORDS]; +} RADCB_CALLBACK_STORAGE; + +RADCB_HANDLER PTR4* RADCB_allocate_handler(u32 rate); +void RADCB_register_callback(RADCB_HANDLER PTR4* handler, RADCB_CALLBACK PTR4* callback, + RADCB_POLL poll, RADCB_RUN run); +u32 RADCB_unregister_callback(RADCB_HANDLER PTR4* handler, RADCB_CALLBACK PTR4* callback, + u32 flags); +u32 RADCB_unregister_2_callbacks(RADCB_HANDLER PTR4* handler1, RADCB_CALLBACK PTR4* callback1, + RADCB_HANDLER PTR4* handler2, RADCB_CALLBACK PTR4* callback2, + u32 flags); +u32 RADCB_registered_count(RADCB_HANDLER PTR4* handler); +u32 RADCB_try_to_suspend_handler(RADCB_HANDLER PTR4* handler); +void RADCB_suspend_handler(RADCB_HANDLER PTR4* handler); +void RADCB_resume_handler(RADCB_HANDLER PTR4* handler); +u32 RADCB_try_to_suspend_callback(RADCB_HANDLER PTR4* handler, RADCB_CALLBACK PTR4* callback); +void RADCB_suspend_callback(RADCB_HANDLER PTR4* handler, RADCB_CALLBACK PTR4* callback); +void RADCB_suspend_2_callbacks(RADCB_HANDLER PTR4* handler1, RADCB_CALLBACK PTR4* callback1, + RADCB_HANDLER PTR4* handler2, RADCB_CALLBACK PTR4* callback2); +void RADCB_resume_callback(RADCB_HANDLER PTR4* handler, RADCB_CALLBACK PTR4* callback); +void RADCB_suspend_2_handlers(RADCB_HANDLER PTR4* handler1, RADCB_HANDLER PTR4* handler2); +void RADCB_free_handler(RADCB_HANDLER PTR4* handler); +void RADCB_free_2_handlers(RADCB_HANDLER PTR4* handler1, RADCB_HANDLER PTR4* handler2); +void RADCB_idle_on_callbacks(void); +u32 RADCB_callback_size(void); #endif diff --git a/src/bink/shared/time/radcb.c b/src/bink/shared/time/radcb.c index ecb5c1e71..34d9dcc4d 100644 --- a/src/bink/shared/time/radcb.c +++ b/src/bink/shared/time/radcb.c @@ -1,184 +1,306 @@ #include "binkngc.h" +#include "radcb.h" #include +#include -// TODO: review the args here and make sure these make some sort of sense. -// Considering there are multiple different structs that these functions could access and still match try to be mindful of which you use -// Using BINKREALTIME seems to make the most sense at this moment +typedef struct RAD_LOW_MEM RAD_LOW_MEM; -void RADCB_register_callback(BINKSUMMARY* bnk, BINKSUMMARY* bnkReal, HBINK bnk3, HBINK bnk4) +#define RAD_LOW_MEM_BUS_CLOCK_OFFSET 0xf8 +#define RADCB_MILLISECONDS_PER_SECOND 1000 +#define RADCB_BUS_CLOCK_DIVISOR 4000 +#define RADCB_STATE_STOPPED 0 +#define RADCB_STATE_RUNNING 1 +#define RADCB_STATE_SUSPENDING 2 +#define RADCB_UNREGISTER_FREE_EMPTY_HANDLER 1 + +struct RAD_LOW_MEM +{ + u8 pad[RAD_LOW_MEM_BUS_CLOCK_OFFSET]; + u32 bus_clock; +}; + +struct RADCB_HANDLER +{ + u32 state; + volatile u32 suspend_count; + OSAlarm alarm; /* Periodic timer that drives the callback list. */ + RADCB_CALLBACK PTR4* callbacks; + u32 count; /* Tick counter passed to poll/run callbacks. */ + u32 registered_count; + u32 pad; +}; + +#define RADCB_HANDLER_ALARM_OFFSET ((u32)&((RADCB_HANDLER PTR4*)0)->alarm) +#define RADCB_HANDLER_FROM_ALARM(alarm) ((RADCB_HANDLER PTR4*)((u8 PTR4*)(alarm) - RADCB_HANDLER_ALARM_OFFSET)) + +BOOL OSDisableInterrupts(void); +BOOL OSEnableInterrupts(void); +BOOL OSRestoreInterrupts(BOOL level); +void RADCB_free_handler(RADCB_HANDLER PTR4* handler); + +static u32 remove_cb_from_list(RADCB_HANDLER PTR4* handler, RADCB_CALLBACK PTR4* callback) { + RADCB_CALLBACK PTR4* cb; + + if (handler == 0) { + return 0; + } + + cb = handler->callbacks; + if (cb == callback) { + handler->callbacks = cb->next; + return 1; + } + + while (cb->next != 0) { + if (cb->next == callback) { + cb->next = callback->next; + return 1; + } + cb = cb->next; + } + + return 0; } -u32 RADCB_unregister_callback(BINKREALTIME* bnk, BINKREALTIME* bnk1, u32 tmp) +static void call_callbacks(RADCB_HANDLER PTR4* handler) { - int iVar1; - u32 uVar2; + RADCB_CALLBACK PTR4* cb; + + ++handler->count; - uVar2 = 0; - if ((((bnk != 0) && (bnk1 != 0)) && (bnk1->FramesAudioDecompTime != 0)) && - bnk1->FramesVideoDecompTime != 0) - { - OSDisableInterrupts(); - if (((remove_cb_from_list(bnk, bnk1) != 0) && (bnk = bnk + -1, (tmp & 1) != 0)) && - (bnk == 0)) - { - uVar2 = 1; - RADCB_free_handler(bnk); + if (handler->state == RADCB_STATE_RUNNING) { + cb = handler->callbacks; + while (cb != 0) { + if (cb->state == RADCB_STATE_RUNNING) { + cb->result = cb->poll(cb, handler->count); + if (cb->result != 0) { + cb->run(cb, handler->count); + } + } + cb = cb->next; } - OSRestoreInterrupts(); } - return uVar2; } -u32 RADCB_unregister_2_callbacks(BINKIO* io, BINKIO* io2, BINKIO* io3, BINKIO* io4, u32 tmp) +static void RAD_callback_timer(OSAlarm* alarm, OSContext* context) { - return RADCB_unregister_callback(io, io2, tmp) | RADCB_unregister_callback(io3, io4, tmp) << 1; + RADCB_HANDLER PTR4* handler = RADCB_HANDLER_FROM_ALARM(alarm); + BOOL enabled; + + if (handler != 0 && handler->callbacks != 0) { + enabled = OSEnableInterrupts(); + call_callbacks(handler); + OSRestoreInterrupts(enabled); + } } -u32 RADCB_registered_count(BINKIO io) +RADCB_HANDLER PTR4* RADCB_allocate_handler(u32 rate) { - return io.ThreadTime; + OSAlarm PTR4* alarm; + OSTime now; + u32 period; + volatile RAD_LOW_MEM PTR4* low_mem; + RADCB_HANDLER PTR4* handler = (RADCB_HANDLER PTR4*)radmalloc(sizeof(*handler)); + volatile RADCB_HANDLER PTR4* init; + + if (handler != 0) { + alarm = &handler->alarm; + init = handler; + memset(handler, 0, sizeof(*handler)); + init->state = RADCB_STATE_STOPPED; + init->count = 1; + init->registered_count = 0; + init->suspend_count = 1; + init->callbacks = 0; + + now = OSGetTime(); + low_mem = (volatile RAD_LOW_MEM PTR4*)0x80000000; + period = (RADCB_MILLISECONDS_PER_SECOND / rate) * + (low_mem->bus_clock / RADCB_BUS_CLOCK_DIVISOR); + OSSetPeriodicAlarm(alarm, now, period, RAD_callback_timer); + } + + return handler; } -u32 RADCB_try_to_suspend_handler(BINKREALTIME* bnk) +void RADCB_register_callback(RADCB_HANDLER PTR4* handler, RADCB_CALLBACK PTR4* callback, + RADCB_POLL poll, RADCB_RUN run) { - u32 tmpReg; + BOOL enabled; - if ((bnk != 0)) - { - OSDisableInterrupts(); - if (bnk->FrameNum == 1) - { - bnk->FrameNum = 2; - } - bnk->FrameRate = bnk->FrameRate + 1; - OSRestoreInterrupts(); - tmpReg = 1; - if (bnk->FrameNum == 1) - { - tmpReg = 0; + enabled = OSDisableInterrupts(); + + callback->state = RADCB_STATE_RUNNING; + callback->poll = poll; + callback->run = run; + callback->suspend_count = 0; + callback->next = handler->callbacks; + handler->callbacks = callback; + ++handler->registered_count; + + OSRestoreInterrupts(enabled); +} + +u32 RADCB_unregister_callback(RADCB_HANDLER PTR4* handler, RADCB_CALLBACK PTR4* callback, + u32 flags) +{ + u32 freed = 0; + BOOL enabled; + + if (handler != 0 && callback != 0 && callback->run != 0 && callback->poll != 0) { + enabled = OSDisableInterrupts(); + if (remove_cb_from_list(handler, callback) != 0) { + --handler->registered_count; + if ((flags & RADCB_UNREGISTER_FREE_EMPTY_HANDLER) != 0 && handler->callbacks == 0) { + freed = 1; + RADCB_free_handler(handler); + } } + OSRestoreInterrupts(enabled); } - else - { - tmpReg = 1; + + return freed; +} + +u32 RADCB_unregister_2_callbacks(RADCB_HANDLER PTR4* handler1, RADCB_CALLBACK PTR4* callback1, + RADCB_HANDLER PTR4* handler2, RADCB_CALLBACK PTR4* callback2, + u32 flags) +{ + return RADCB_unregister_callback(handler1, callback1, flags) | + (RADCB_unregister_callback(handler2, callback2, flags) << 1); +} + +u32 RADCB_registered_count(RADCB_HANDLER PTR4* handler) +{ + return handler->registered_count; +} + +u32 RADCB_try_to_suspend_handler(RADCB_HANDLER PTR4* handler) +{ + u32 suspended; + BOOL enabled; + + if (handler != 0) { + enabled = OSDisableInterrupts(); + if (handler->state == RADCB_STATE_RUNNING) { + handler->state = RADCB_STATE_SUSPENDING; + } + ++handler->suspend_count; + OSRestoreInterrupts(enabled); + + suspended = 1; + if (handler->state == RADCB_STATE_RUNNING) { + suspended = 0; + } + } else { + suspended = 1; } - return tmpReg; + + return suspended; } -void RADCB_suspend_handler(BINKIO* io) +void RADCB_suspend_handler(RADCB_HANDLER PTR4* handler) { - while (!RADCB_try_to_suspend_handler(io)) - { + while (RADCB_try_to_suspend_handler(handler) == 0) { OSYieldThread(); } } -void RADCB_resume_handler(BINKREALTIME* bnk) +void RADCB_resume_handler(RADCB_HANDLER PTR4* handler) { - if (bnk != NULL) - { - OSDisableInterrupts(); - bnk->FrameRate = bnk->FrameRate + -1; - if (bnk->FrameRate == 0) - { - bnk->FrameNum = 1; + BOOL enabled; + + if (handler != 0) { + enabled = OSDisableInterrupts(); + --handler->suspend_count; + if (handler->suspend_count == 0) { + handler->state = RADCB_STATE_RUNNING; } - OSRestoreInterrupts(); + OSRestoreInterrupts(enabled); } } -HBINK RADCB_try_to_suspend_callback(HBINK bnk, BINKREALTIME* bnk1) +u32 RADCB_try_to_suspend_callback(RADCB_HANDLER PTR4* handler, RADCB_CALLBACK PTR4* callback) { - u32 tmpReg; + u32 suspended; + BOOL enabled; - if ((((bnk != 0) && (bnk1 != 0)) && (bnk1->FramesAudioDecompTime != 0)) && - bnk1->FramesVideoDecompTime != 0) - { - OSDisableInterrupts(); - if (bnk1->Frames == 1) - { - bnk1->Frames = 2; + if (handler != 0 && callback != 0 && callback->run != 0 && callback->poll != 0) { + enabled = OSDisableInterrupts(); + if (callback->state == RADCB_STATE_RUNNING) { + callback->state = RADCB_STATE_SUSPENDING; } - bnk1->FramesTime = bnk1->FramesTime + 1; - OSRestoreInterrupts(); - tmpReg = 1; - if (bnk1->Frames == 1) - { - tmpReg = 0; + ++callback->suspend_count; + OSRestoreInterrupts(enabled); + + suspended = 1; + if (callback->state == RADCB_STATE_RUNNING) { + suspended = 0; } + } else { + suspended = 1; } - else - { - tmpReg = 1; - } - return tmpReg; + + return suspended; } -void RADCB_suspend_callback(HBINK bnk, BINKREALTIME* bnk1) +void RADCB_suspend_callback(RADCB_HANDLER PTR4* handler, RADCB_CALLBACK PTR4* callback) { - if ((((bnk != 0) && (bnk1 != 0)) && (bnk1->FramesAudioDecompTime != 0)) && - bnk1->FramesVideoDecompTime != 0) - { - while (RADCB_try_to_suspend_callback(bnk, bnk1) == 0) - { + if (handler != 0 && callback != 0 && callback->run != 0 && callback->poll != 0) { + while (RADCB_try_to_suspend_callback(handler, callback) == 0) { OSYieldThread(); } } } -void RADCB_suspend_2_callbacks(BINKIO* io, BINKIO* io2, BINKIO* io3, BINKIO* io4) +void RADCB_suspend_2_callbacks(RADCB_HANDLER PTR4* handler1, RADCB_CALLBACK PTR4* callback1, + RADCB_HANDLER PTR4* handler2, RADCB_CALLBACK PTR4* callback2) { - RADCB_suspend_callback(io, io2); - RADCB_suspend_callback(io3, io4); + RADCB_suspend_callback(handler1, callback1); + RADCB_suspend_callback(handler2, callback2); } -// TODO: -// Make sure that this function is correct. -// To get this match I had to mark FramesTime as volatile in the header -void RADCB_resume_callback(HBINK bnk, BINKREALTIME* bnk1) +void RADCB_resume_callback(RADCB_HANDLER PTR4* handler, RADCB_CALLBACK PTR4* callback) { - if ((((bnk != 0) && (bnk1 != 0)) && (bnk1->FramesAudioDecompTime != 0)) && - bnk1->FramesVideoDecompTime != 0) - { - OSDisableInterrupts(); - bnk1->FramesTime--; - if (bnk1->FramesTime == 0) - { - bnk1->Frames = 1; - } + BOOL enabled; - OSRestoreInterrupts(); + if (handler != 0 && callback != 0 && callback->run != 0 && callback->poll != 0) { + enabled = OSDisableInterrupts(); + --callback->suspend_count; + if (callback->suspend_count == 0) { + callback->state = RADCB_STATE_RUNNING; + } + OSRestoreInterrupts(enabled); } } -void RADCB_suspend_2_handlers(BINKIO* io, BINKIO* io2) +void RADCB_suspend_2_handlers(RADCB_HANDLER PTR4* handler1, RADCB_HANDLER PTR4* handler2) { - RADCB_suspend_handler(io); - RADCB_suspend_handler(io2); + RADCB_suspend_handler(handler1); + RADCB_suspend_handler(handler2); } -void RADCB_free_handler(RADARAMCALLBACKS* callback) +void RADCB_free_handler(RADCB_HANDLER PTR4* handler) { - if (callback != NULL) - { - RADCB_suspend_handler(callback); - OSCancelAlarm(callback + 1); - radfree(callback); + if (handler != 0) { + RADCB_suspend_handler(handler); + OSCancelAlarm(&handler->alarm); + radfree(handler); } } -void RADCB_free_2_handlers(BINKIO* io, BINKIO* io2) +void RADCB_free_2_handlers(RADCB_HANDLER PTR4* handler1, RADCB_HANDLER PTR4* handler2) { - RADCB_free_handler(io); - RADCB_free_handler(io2); + RADCB_free_handler(handler1); + RADCB_free_handler(handler2); } -void RADCB_idle_on_callbacks() +void RADCB_idle_on_callbacks(void) { } -u32 RADCB_callback_size() +u32 RADCB_callback_size(void) { - return 28; + return sizeof(RADCB_CALLBACK); } diff --git a/src/bink/src/sdk/binkbits.h b/src/bink/src/sdk/binkbits.h new file mode 100644 index 000000000..31e12482e --- /dev/null +++ b/src/bink/src/sdk/binkbits.h @@ -0,0 +1,116 @@ +#ifndef BINK_SDK_BINKBITS_H +#define BINK_SDK_BINKBITS_H + +#include "bink.h" + +typedef struct BINKVARBITS +{ + void PTR4* cur; /* Next word in the packed stream. */ + u32 bits; /* Low-order bit reservoir. */ + u32 bitlen; /* Valid bits currently in the reservoir. */ +} BINKVARBITS; + +#define BINKBITSLOCALS(name) \ + void PTR4* name##cur; \ + u32 name##bits; \ + u32 name##bitlen + +/* Local BinkBits state uses the same cur/bits/bitlen fields as BINKVARBITS. */ +#define BINK_LOAD32(ptr) (*(const u32 PTR4*)(ptr)) + +#define BinkVarBitsOpen(vb, pointer) \ + do { \ + (vb).bits = BINK_LOAD32(pointer); \ + (vb).cur = ((u8 PTR4*)(pointer)) + 4; \ + (vb).bitlen = 32; \ + } while (0) + +#define BinkBitsGet(v, type, vb, len, mask) \ + do { \ + if (vb##bitlen < (len)) { \ + register u32 nb = BINK_LOAD32((u32 PTR4*)vb##cur); \ + (v) = (type)((vb##bits | (nb << vb##bitlen)) & (mask)); \ + vb##bits = nb >> ((len)-vb##bitlen); \ + vb##bitlen = vb##bitlen + 32 - (len); \ + vb##cur = ((u8 PTR4*)vb##cur) + 4; \ + } else { \ + (v) = (type)(vb##bits & (mask)); \ + vb##bits >>= (len); \ + vb##bitlen -= (len); \ + } \ + } while (0) + +#define MAX_AT_LEAST_BITS 32 + +#define BinkBitsAtLeastStart(vb, len) \ + do { \ + if (vb##bitlen < (len)) { \ + u32 nb = BINK_LOAD32((u32 PTR4*)vb##cur); \ + vb##bits |= nb << vb##bitlen; \ + } \ + } while (0) + +#define BinkBitsAtLeastEnd(vb) \ + do { \ + if ((s32)vb##bitlen <= 0) { \ + vb##bits = BINK_LOAD32((u32 PTR4*)vb##cur) >> (-(s32)vb##bitlen); \ + vb##cur = ((u8 PTR4*)vb##cur) + 4; \ + vb##bitlen += 32; \ + } \ + } while (0) + +#define BINKBITSCOPY(name, from) \ + do { \ + name##cur = from##cur; \ + name##bits = from##bits; \ + name##bitlen = from##bitlen; \ + } while (0) + +#define BinkBitsInAtLeastPeek(vb) (vb##bits) +#define BinkBitsInAtLeastUse(vb, bl) \ + do { \ + vb##bits >>= (bl); \ + vb##bitlen -= (bl); \ + } while (0) + +#define BinkBitsPeek(v, type, vb, len) \ + do { \ + BinkBitsAtLeastStart(vb, len); \ + (v) = (type)BinkBitsInAtLeastPeek(vb); \ + } while (0) + +#define BinkBitsUse(vb, bl) \ + do { \ + BinkBitsInAtLeastUse(vb, bl); \ + BinkBitsAtLeastEnd(vb); \ + } while (0) + +#define BinkVarBitsUse(vb, len) \ + do { \ + (vb).bits >>= (len); \ + (vb).bitlen -= (len); \ + } while (0) + +/* Copy between the struct form used by VarBits and the local BinkBits form. */ +#define VarBitsCopyToBinkBits(local, vb) \ + do { \ + local##cur = (vb).cur; \ + local##bits = (vb).bits; \ + local##bitlen = (vb).bitlen; \ + } while (0) + +#define BinkBitsCopyToVarBits(vb, local) \ + do { \ + (vb).cur = local##cur; \ + (vb).bits = local##bits; \ + (vb).bitlen = local##bitlen; \ + } while (0) + +/* Return the consumed byte count rounded up to a 32-bit word boundary. */ +#define BinkBitsSizeBytesRoundedToU32(local, base) \ + ((((((u8 PTR4*)local##cur) - ((u8 PTR4*)(base))) - (local##bitlen / 8)) + 3) & ~3) + +#define BinkVarBitsSizeBytesRoundedToU32(vb, base) \ + ((((((u8 PTR4*)(vb).cur) - ((u8 PTR4*)(base))) - ((vb).bitlen / 8)) + 3) & ~3) + +#endif diff --git a/src/bink/src/sdk/bitplane.c b/src/bink/src/sdk/bitplane.c index e69de29bb..e5460afb7 100644 --- a/src/bink/src/sdk/bitplane.c +++ b/src/bink/src/sdk/bitplane.c @@ -0,0 +1,1736 @@ +#include "bink.h" +#include "bitplane.h" +#include "dct.h" +#include "varbits.h" + +#define BP_BITS_PER_WORD BPBITSTYPELEN +#define BP_WORD_TOP_BIT (BP_BITS_PER_WORD - 1) +#define BP_S32_SIGN_SHIFT BP_WORD_TOP_BIT +#define BP_S16_SIGN_SHIFT 15 +#define BP_BLOCK_COEFFS 64 +#define BP_AC_COEFFS (BP_BLOCK_COEFFS - 1) +#define BP_LOSSY_OUTPUT_COEFFS 32 +#define BP_TREE_NODES 68 +#define BP_TREE_GROUPS 16 +#define BP_TREE_LAST_GROUP (BP_TREE_GROUPS - 1) +#define BP_TREE_HIGH_GROUPS 8 +#define BP_TREE_CHILD_COUNT 4 +#define BP_TREE_ADDED_CHILD_COUNT (BP_TREE_CHILD_COUNT - 1) +#define BP_TREE_CHILD1_INDEX 1 +#define BP_TREE_CHILD2_INDEX 2 +#define BP_TREE_CHILD3_INDEX 3 +#define BP_TREE_CHILD1_BASE (BP_TREE_CHILD_COUNT * 1) +#define BP_TREE_CHILD2_BASE (BP_TREE_CHILD_COUNT * 2) +#define BP_TREE_CHILD3_BASE (BP_TREE_CHILD_COUNT * 3) +#define BP_TREE_GROUP_INDEX(index) ((index) >> 2) +#define BP_NEXT_TREE_GROUP(ptr) ((ptr) + BP_TREE_CHILD_COUNT) +#define BP_FIRST_LOSSLESS_TREE_GROUP_INDEX BP_TREE_CHILD_COUNT +#define BP_FIRST_LOSSLESS_TREE_GROUP_END_INDEX (BP_FIRST_LOSSLESS_TREE_GROUP_INDEX + BP_TREE_CHILD3_INDEX) +#define BP_FIRST_LOSSY_TREE_GROUP_END_INDEX BP_TREE_CHILD3_INDEX +#define BP_TREE_GROUP1_INDEX 1 +#define BP_TREE_GROUP6_INDEX 6 +#define BP_TREE_GROUP11_INDEX 11 +#define BP_TREE_HIGH_GROUP0_INDEX 2 +#define BP_TREE_HIGH_GROUP1_INDEX 7 +#define BP_TREE_HIGH_GROUP2_INDEX 12 +#define BP_TREE_HIGH_GROUP0_CHILD0_INDEX 3 +#define BP_TREE_HIGH_GROUP1_CHILD0_INDEX 8 +#define BP_TREE_HIGH_GROUP2_CHILD0_INDEX 13 +#define BP_BYTE_MASK 0xff +#define BP_U16_MASK 0xffff +#define BP_SIGN_BIT 0x80 +#define BP_NEGATIVE_COEFF_SIGN 0xffff +#define BP_POSITIVE_COEFF_SIGN 1 +#define BP_ABS_COEFF(value, sign) (((sign) ^ (value)) - (sign)) +#define BP_TREE_KIND_MASK 0x300 +/* Write-side tree nodes pack kind, coefficient/group index, and bit depth. */ +#define BP_TREE_INDEX_SHIFT 10 +#define BP_TREE_GROUP_SHIFT 12 +#define BP_TREE_HIGH_GROUP_SHIFT 14 +#define BP_TREE_BASE_MASK 0xfc00 +#define BP_TREE_INDEX_STRIDE 0x400 +#define BP_COEFF1_INDEX 1 +#define BP_COEFF2_INDEX 2 +#define BP_COEFF3_INDEX 3 +#define BP_GROUP1_NODE_BASE (1 << BP_TREE_GROUP_SHIFT) +#define BP_GROUP6_NODE_BASE (6 << BP_TREE_GROUP_SHIFT) +#define BP_GROUP11_NODE_BASE (11 << BP_TREE_GROUP_SHIFT) +#define BP_COEFF1_LEAF_BASE (7 << 8) +#define BP_COEFF2_LEAF_BASE (11 << 8) +#define BP_COEFF3_LEAF_BASE (15 << 8) +#define BP_READ_TREE_KIND_MASK 3 +/* Read-side nodes pack the same logical tree into byte-sized entries. */ +#define BP_READ_TREE_INDEX_SHIFT 2 +#define BP_READ_TREE_BASE_MASK 0xfc +#define BP_READ_TREE_CHILD_COUNT BP_TREE_CHILD_COUNT +#define BP_READ_TREE_CHILD1_BASE (BP_READ_TREE_CHILD_COUNT * 1) +#define BP_READ_TREE_CHILD2_BASE (BP_READ_TREE_CHILD_COUNT * 2) +#define BP_READ_TREE_CHILD3_BASE (BP_READ_TREE_CHILD_COUNT * 3) +#define BP_READ_TREE_NODE(index, kind) (((index) << BP_READ_TREE_INDEX_SHIFT) + (kind)) +#define BP_READ_TREE_INDEX(node) ((node) >> BP_READ_TREE_INDEX_SHIFT) +#define BP_READ_TREE_BASE(node) ((node) & BP_READ_TREE_BASE_MASK) +#define BP_READ_TREE_GROUP_BASE(group) ((group) * BP_READ_TREE_CHILD_COUNT) +#define BP_READ_TREE_GROUP(group) BP_READ_TREE_NODE(BP_READ_TREE_GROUP_BASE(group), BP_READ_TREE_HIGH_NODE) +#define BP_READ_TREE_GROUP1_ROOT BP_READ_TREE_GROUP(1) +#define BP_READ_TREE_GROUP6_ROOT BP_READ_TREE_GROUP(6) +#define BP_READ_TREE_GROUP11_ROOT BP_READ_TREE_GROUP(11) +#define BP_READ_TREE_BRANCH(index) BP_READ_TREE_NODE(index, BP_READ_TREE_BRANCH_NODE) +#define BP_READ_TREE_COEFF(index) BP_READ_TREE_NODE(index, BP_READ_TREE_COEFF_NODE) +#define BP_READ_TREE_COEFF1_ROOT BP_READ_TREE_COEFF(BP_COEFF1_INDEX) +#define BP_READ_TREE_COEFF2_ROOT BP_READ_TREE_COEFF(BP_COEFF2_INDEX) +#define BP_READ_TREE_COEFF3_ROOT BP_READ_TREE_COEFF(BP_COEFF3_INDEX) +#define BP_READ_TREE_BRANCH_FROM_NODE(node) (BP_READ_TREE_BASE(node) + BP_READ_TREE_BRANCH_NODE) +#define BP_READ_TREE_GROUP_FROM_INDEX(index) BP_READ_TREE_NODE((index) + BP_READ_TREE_CHILD_COUNT, BP_READ_TREE_GROUP_NODE) +#define BP_LOSSLESS_ROOT_NODES 6 +#define BP_LOSSY_ROOT_NODES 4 +#define BP_LOSSLESS_LEVEL_BITS 4 +#define BP_LOSSLESS_LEVEL_MASK 0xf +#define BP_LOSSY_LEVEL_BITS 3 +#define BP_LOSSY_LEVEL_MASK 7 +#define BP_ZIGZAG_COEFF(vals, index) ((vals)[zigzag[index]]) + +typedef enum BPWriteTreeKind +{ + BP_TREE_GROUP_NODE = 0x100, + BP_TREE_AFTER_GROUP_NODE = BP_TREE_GROUP_NODE + 1, + BP_TREE_BRANCH_NODE = 0x200, + BP_TREE_COEFF_NODE = 0x300 +} BPWriteTreeKind; + +typedef enum BPReadTreeKind +{ + BP_READ_TREE_HIGH_NODE = 0, + BP_READ_TREE_GROUP_NODE = 1, + BP_READ_TREE_BRANCH_NODE = 2, + BP_READ_TREE_COEFF_NODE = 3 +} BPReadTreeKind; + +typedef struct BPCOEFFPAIR +{ + s16 first; + s16 second; +} BPCOEFFPAIR; + +typedef union BPLOSSYBLOCK +{ + s8 bytes[BP_BLOCK_COEFFS]; + s16 words[BP_LOSSY_OUTPUT_COEFFS]; +} BPLOSSYBLOCK; + +void readlossy(void PTR4* out, BPBITSTREAM PTR4* bits, s32 limit); + +#define BP_STREAM(bits) ((BPBITSTREAM*)(bits)) +#define BP_STREAM_CUR(bits) (BP_STREAM(bits)->cur) +#define BP_STREAM_BITS(bits) (BP_STREAM(bits)->bits) +#define BP_STREAM_BITLEN(bits) (BP_STREAM(bits)->bitlen) + +#define PUT_BP_BIT(bits, bit) \ + do { \ + u32 _bitcount; \ + if (bit) { \ + BP_STREAM_BITS(bits) |= 1 << BP_STREAM_BITLEN(bits); \ + } \ + _bitcount = BP_STREAM_BITLEN(bits); \ + BP_STREAM_BITLEN(bits) = _bitcount + 1; \ + if (_bitcount + 1 == BP_BITS_PER_WORD) { \ + *BP_STREAM_CUR(bits) = BP_STREAM_BITS(bits); \ + BP_STREAM_BITLEN(bits) = 0; \ + BP_STREAM_BITS(bits) = 0; \ + BP_STREAM_CUR(bits) = BP_STREAM_CUR(bits) + 1; \ + } \ + } while (0) + +#define PUT_BP_BITS(bits, value, size, mask) \ + do { \ + BPBITSTYPE _value = (value) & (mask); \ + u32 _bitcount = BP_STREAM_BITLEN(bits) + (size); \ + BPBITSTYPE _bitbuf = BP_STREAM_BITS(bits) | (_value << BP_STREAM_BITLEN(bits)); \ + BP_STREAM_BITLEN(bits) = _bitcount; \ + BP_STREAM_BITS(bits) = _bitbuf; \ + if (_bitcount >= BP_BITS_PER_WORD) { \ + *BP_STREAM_CUR(bits) = _bitbuf; \ + _bitcount = BP_STREAM_BITLEN(bits) - BP_BITS_PER_WORD; \ + BP_STREAM_CUR(bits) = BP_STREAM_CUR(bits) + 1; \ + BP_STREAM_BITLEN(bits) = _bitcount; \ + BP_STREAM_BITS(bits) = 0; \ + if (_bitcount != 0) { \ + BP_STREAM_BITS(bits) = _value >> ((size) - _bitcount); \ + } \ + } \ + } while (0) + +u32 LenBPLossless(s16 PTR4* vals) +{ + u16 entry; + u16 kind; + u32 sign; + u32 bits; + u32 maxbits; + s32 len; + s32 count; + u8 PTR4* group_ptr; + u8 PTR4* len_ptr; + u32 value; + u16 PTR4* cur; + u16 PTR4* restart; + u16 PTR4* end; + u16 PTR4* roots; + s32 total; + u16 tree_nodes[BP_TREE_NODES]; + u8 lens[BP_BLOCK_COEFFS]; + u8 groups[BP_TREE_GROUPS]; + u8 hi_groups[BP_TREE_HIGH_GROUPS]; + s32 active; + + /* Lossless bitplanes code AC coefficient magnitudes by zigzag bit depth. */ + count = BP_AC_COEFFS; + maxbits = 0; + len = 1; + do { + value = (u32)BP_ZIGZAG_COEFF(vals, len); + sign = (s32)value >> BP_S32_SIGN_SHIFT; + bits = getbitlevelvar(BP_ABS_COEFF(value, sign) & BP_U16_MASK) & BP_BYTE_MASK; + if (maxbits < bits) { + maxbits = bits; + } + lens[len] = (u8)bits; + len++; + count--; + } while (count != 0); + + /* Each four-coefficient subtree inherits the deepest child bit depth. */ + len = BP_FIRST_LOSSLESS_TREE_GROUP_INDEX; + count = BP_TREE_LAST_GROUP; + active = maxbits != 0; + group_ptr = lens + BP_FIRST_LOSSLESS_TREE_GROUP_END_INDEX; + len_ptr = lens; + do { + len_ptr = BP_NEXT_TREE_GROUP(len_ptr); + bits = *len_ptr; + if (*len_ptr < len_ptr[1]) { + bits = len_ptr[1]; + } + if (bits < group_ptr[-1]) { + bits = group_ptr[-1]; + } + if (bits < *group_ptr) { + bits = *group_ptr; + } + groups[BP_TREE_GROUP_INDEX(len)] = (u8)bits; + group_ptr = BP_NEXT_TREE_GROUP(group_ptr); + len += BP_TREE_CHILD_COUNT; + count--; + } while (count != 0); + + if (groups[BP_TREE_HIGH_GROUP0_INDEX] < groups[BP_TREE_HIGH_GROUP0_CHILD0_INDEX]) { + groups[BP_TREE_HIGH_GROUP0_INDEX] = groups[BP_TREE_HIGH_GROUP0_CHILD0_INDEX]; + } + if (groups[BP_TREE_HIGH_GROUP0_INDEX] < groups[BP_TREE_HIGH_GROUP0_CHILD0_INDEX + BP_TREE_CHILD1_INDEX]) { + groups[BP_TREE_HIGH_GROUP0_INDEX] = groups[BP_TREE_HIGH_GROUP0_CHILD0_INDEX + BP_TREE_CHILD1_INDEX]; + } + if (groups[BP_TREE_HIGH_GROUP0_INDEX] < groups[BP_TREE_HIGH_GROUP0_CHILD0_INDEX + BP_TREE_CHILD2_INDEX]) { + groups[BP_TREE_HIGH_GROUP0_INDEX] = groups[BP_TREE_HIGH_GROUP0_CHILD0_INDEX + BP_TREE_CHILD2_INDEX]; + } + hi_groups[0] = groups[BP_TREE_HIGH_GROUP0_INDEX]; + if (groups[BP_TREE_HIGH_GROUP1_INDEX] < groups[BP_TREE_HIGH_GROUP1_CHILD0_INDEX]) { + groups[BP_TREE_HIGH_GROUP1_INDEX] = groups[BP_TREE_HIGH_GROUP1_CHILD0_INDEX]; + } + if (groups[BP_TREE_HIGH_GROUP1_INDEX] < groups[BP_TREE_HIGH_GROUP1_CHILD0_INDEX + BP_TREE_CHILD1_INDEX]) { + groups[BP_TREE_HIGH_GROUP1_INDEX] = groups[BP_TREE_HIGH_GROUP1_CHILD0_INDEX + BP_TREE_CHILD1_INDEX]; + } + if (groups[BP_TREE_HIGH_GROUP1_INDEX] < groups[BP_TREE_HIGH_GROUP1_CHILD0_INDEX + BP_TREE_CHILD2_INDEX]) { + groups[BP_TREE_HIGH_GROUP1_INDEX] = groups[BP_TREE_HIGH_GROUP1_CHILD0_INDEX + BP_TREE_CHILD2_INDEX]; + } + hi_groups[1] = groups[BP_TREE_HIGH_GROUP1_INDEX]; + if (groups[BP_TREE_HIGH_GROUP2_INDEX] < groups[BP_TREE_HIGH_GROUP2_CHILD0_INDEX]) { + groups[BP_TREE_HIGH_GROUP2_INDEX] = groups[BP_TREE_HIGH_GROUP2_CHILD0_INDEX]; + } + if (groups[BP_TREE_HIGH_GROUP2_INDEX] < groups[BP_TREE_HIGH_GROUP2_CHILD0_INDEX + BP_TREE_CHILD1_INDEX]) { + groups[BP_TREE_HIGH_GROUP2_INDEX] = groups[BP_TREE_HIGH_GROUP2_CHILD0_INDEX + BP_TREE_CHILD1_INDEX]; + } + if (groups[BP_TREE_HIGH_GROUP2_INDEX] < groups[BP_TREE_HIGH_GROUP2_CHILD0_INDEX + BP_TREE_CHILD2_INDEX]) { + groups[BP_TREE_HIGH_GROUP2_INDEX] = groups[BP_TREE_HIGH_GROUP2_CHILD0_INDEX + BP_TREE_CHILD2_INDEX]; + } + hi_groups[2] = groups[BP_TREE_HIGH_GROUP2_INDEX]; + + len = 4; + if (groups[BP_TREE_GROUP1_INDEX] < groups[BP_TREE_HIGH_GROUP0_INDEX]) { + groups[BP_TREE_GROUP1_INDEX] = groups[BP_TREE_HIGH_GROUP0_INDEX]; + } + roots = tree_nodes + BP_TREE_CHILD_COUNT; + roots[0] = groups[BP_TREE_GROUP1_INDEX] | BP_GROUP1_NODE_BASE; + if (groups[BP_TREE_GROUP6_INDEX] < groups[BP_TREE_HIGH_GROUP1_INDEX]) { + groups[BP_TREE_GROUP6_INDEX] = groups[BP_TREE_HIGH_GROUP1_INDEX]; + } + roots[1] = groups[BP_TREE_GROUP6_INDEX] | BP_GROUP6_NODE_BASE; + if (groups[BP_TREE_GROUP11_INDEX] < groups[BP_TREE_HIGH_GROUP2_INDEX]) { + groups[BP_TREE_GROUP11_INDEX] = groups[BP_TREE_HIGH_GROUP2_INDEX]; + } + roots[2] = groups[BP_TREE_GROUP11_INDEX] | BP_GROUP11_NODE_BASE; + roots[3] = lens[BP_COEFF1_INDEX] + BP_COEFF1_LEAF_BASE; + roots[4] = lens[BP_COEFF2_INDEX] + BP_COEFF2_LEAF_BASE; + roots[5] = lens[BP_COEFF3_INDEX] + BP_COEFF3_LEAF_BASE; + cur = roots; + end = roots + BP_LOSSLESS_ROOT_NODES; + + /* Expand pending group/branch/coeff nodes one bitplane level at a time. */ + for (; 1 < maxbits; maxbits = (maxbits - 1) & BP_BYTE_MASK) { + total = len; + restart = cur; + if (cur < end) { + do { + entry = *cur; + len = total; + if ((entry == 0) || (len = total + 1, (entry & BP_BYTE_MASK) != maxbits)) { + cur++; + } else { + kind = entry & BP_TREE_KIND_MASK; + if (kind == BP_TREE_GROUP_NODE) { + kind = entry >> BP_TREE_INDEX_SHIFT; + bits = (u32)(entry >> BP_TREE_GROUP_SHIFT); + *cur = (u16)groups[bits] + (entry & BP_TREE_BASE_MASK) + BP_TREE_BRANCH_NODE; + *end = (u16)groups[bits + BP_TREE_CHILD1_INDEX] + (kind + BP_TREE_CHILD1_BASE) * BP_TREE_INDEX_STRIDE + BP_TREE_BRANCH_NODE; + end[1] = (u16)groups[bits + BP_TREE_CHILD2_INDEX] + (kind + BP_TREE_CHILD2_BASE) * BP_TREE_INDEX_STRIDE + BP_TREE_BRANCH_NODE; + end[2] = (u16)groups[bits + BP_TREE_CHILD3_INDEX] + (kind + BP_TREE_CHILD3_BASE) * BP_TREE_INDEX_STRIDE + BP_TREE_BRANCH_NODE; + end += BP_TREE_ADDED_CHILD_COUNT; + } else if ((kind < BP_TREE_AFTER_GROUP_NODE) && ((entry & BP_TREE_KIND_MASK) == 0)) { + *cur = (u16)hi_groups[entry >> BP_TREE_HIGH_GROUP_SHIFT] + ((entry >> BP_TREE_INDEX_SHIFT) + BP_TREE_CHILD1_BASE) * BP_TREE_INDEX_STRIDE + BP_TREE_GROUP_NODE; +handle_children: + kind = entry >> BP_TREE_INDEX_SHIFT; + len = total + 5; + if (lens[kind] == maxbits) { + len += maxbits; + } else { + *--restart = (u16)lens[kind] | (entry & BP_TREE_BASE_MASK) + BP_TREE_COEFF_NODE; + } + if (lens[kind + BP_TREE_CHILD1_INDEX] == maxbits) { + len += maxbits; + } else { + *--restart = (u16)lens[kind + BP_TREE_CHILD1_INDEX] | (kind + BP_TREE_CHILD1_INDEX) * BP_TREE_INDEX_STRIDE + BP_TREE_COEFF_NODE; + } + if (lens[kind + BP_TREE_CHILD2_INDEX] == maxbits) { + len += maxbits; + } else { + *--restart = (u16)lens[kind + BP_TREE_CHILD2_INDEX] | (kind + BP_TREE_CHILD2_INDEX) * BP_TREE_INDEX_STRIDE + BP_TREE_COEFF_NODE; + } + if (lens[kind + BP_TREE_CHILD3_INDEX] == maxbits) { + len += maxbits; + } else { + *--restart = (u16)lens[kind + BP_TREE_CHILD3_INDEX] | (kind + BP_TREE_CHILD3_INDEX) * BP_TREE_INDEX_STRIDE + BP_TREE_COEFF_NODE; + } + } else if (kind == BP_TREE_BRANCH_NODE) { + *cur = 0; + cur++; + goto handle_children; + } else { + if (kind == BP_TREE_COEFF_NODE) { + *cur = 0; + len += maxbits; + } + cur++; + } + } + total = len; + } while (cur < end); + } + cur = restart; + } + + if (active && (total = len, restart = cur, cur < end)) { + do { + entry = *cur; + len = total; + if ((entry == 0) || (len = total + 1, (entry & BP_BYTE_MASK) != 1)) { + cur++; + } else { + kind = entry & BP_TREE_KIND_MASK; + if (kind == BP_TREE_GROUP_NODE) { + kind = entry >> BP_TREE_INDEX_SHIFT; + maxbits = (u32)(entry >> BP_TREE_GROUP_SHIFT); + *cur = (u16)groups[maxbits] + (entry & BP_TREE_BASE_MASK) + BP_TREE_BRANCH_NODE; + *end = (u16)groups[maxbits + BP_TREE_CHILD1_INDEX] + (kind + BP_TREE_CHILD1_BASE) * BP_TREE_INDEX_STRIDE + BP_TREE_BRANCH_NODE; + end[1] = (u16)groups[maxbits + BP_TREE_CHILD2_INDEX] + (kind + BP_TREE_CHILD2_BASE) * BP_TREE_INDEX_STRIDE + BP_TREE_BRANCH_NODE; + end[2] = (u16)groups[maxbits + BP_TREE_CHILD3_INDEX] + (kind + BP_TREE_CHILD3_BASE) * BP_TREE_INDEX_STRIDE + BP_TREE_BRANCH_NODE; + end += BP_TREE_ADDED_CHILD_COUNT; + } else if ((kind < BP_TREE_AFTER_GROUP_NODE) && ((entry & BP_TREE_KIND_MASK) == 0)) { + *cur = (u16)hi_groups[entry >> BP_TREE_HIGH_GROUP_SHIFT] + ((entry >> BP_TREE_INDEX_SHIFT) + BP_TREE_CHILD1_BASE) * BP_TREE_INDEX_STRIDE + BP_TREE_GROUP_NODE; +handle_final_children: + kind = entry >> BP_TREE_INDEX_SHIFT; + len = total + 5; + if (lens[kind] == 1) { + len = total + 6; + } else { + *--restart = (u16)lens[kind] | (entry & BP_TREE_BASE_MASK) + BP_TREE_COEFF_NODE; + } + if (lens[kind + BP_TREE_CHILD1_INDEX] == 1) { + len++; + } else { + *--restart = (u16)lens[kind + BP_TREE_CHILD1_INDEX] | (kind + BP_TREE_CHILD1_INDEX) * BP_TREE_INDEX_STRIDE + BP_TREE_COEFF_NODE; + } + if (lens[kind + BP_TREE_CHILD2_INDEX] == 1) { + len++; + } else { + *--restart = (u16)lens[kind + BP_TREE_CHILD2_INDEX] | (kind + BP_TREE_CHILD2_INDEX) * BP_TREE_INDEX_STRIDE + BP_TREE_COEFF_NODE; + } + if (lens[kind + BP_TREE_CHILD3_INDEX] == 1) { + len++; + } else { + *--restart = (u16)lens[kind + BP_TREE_CHILD3_INDEX] | (kind + BP_TREE_CHILD3_INDEX) * BP_TREE_INDEX_STRIDE + BP_TREE_COEFF_NODE; + } + } else if (kind == BP_TREE_BRANCH_NODE) { + *cur = 0; + cur++; + goto handle_final_children; + } else { + if (kind == BP_TREE_COEFF_NODE) { + *cur = 0; + len = total + 2; + } + cur++; + } + } + total = len; + } while (cur < end); + } + + return len; +} + +void WriteBPLossless(BPBITSTREAM PTR4* bits, s16 PTR4* vals) +{ + u16 entry; + u16 sign; + u16 kind; + s32 i; + s32 count; + u8 PTR4* group_ptr; + u8 PTR4* len_ptr; + u32 maxbits; + u32 lenbits; + u32 bit_count; + BPBITSTYPE bit_buf; + u16 PTR4* cur; + s16 PTR4* ordered_cur; + u16 PTR4* restart; + u16 PTR4* end; + u16 PTR4* roots; + u16 tree_nodes[BP_TREE_NODES]; + u8 lens[BP_BLOCK_COEFFS]; + u8 groups[BP_TREE_GROUPS]; + u8 hi_groups[BP_TREE_HIGH_GROUPS]; + u16 absvals[BP_BLOCK_COEFFS]; + s16 ordered[BP_BLOCK_COEFFS]; + + /* Put coefficients in scan order before building bit-depth tables. */ + count = BP_BLOCK_COEFFS; + i = 0; + ordered_cur = ordered; + do { + *ordered_cur = BP_ZIGZAG_COEFF(vals, i); + i++; + ordered_cur++; + count--; + } while (count != 0); + + count = BP_BLOCK_COEFFS; + i = 0; + do { + entry = (u16)ordered[i]; + sign = (s16)entry >> BP_S16_SIGN_SHIFT; + absvals[i] = BP_ABS_COEFF(entry, sign); + i++; + count--; + } while (count != 0); + + /* The writer uses the same grouped bit-depth tree measured by LenBPLossless. */ + maxbits = 0; + count = BP_AC_COEFFS; + i = 1; + cur = absvals; + do { + cur++; + lenbits = getbitlevelvar((u32)*cur) & BP_BYTE_MASK; + if (maxbits < lenbits) { + maxbits = lenbits; + } + lens[i] = (u8)lenbits; + i++; + count--; + } while (count != 0); + + i = BP_FIRST_LOSSLESS_TREE_GROUP_INDEX; + count = BP_TREE_LAST_GROUP; + group_ptr = lens + BP_FIRST_LOSSLESS_TREE_GROUP_END_INDEX; + len_ptr = lens; + do { + len_ptr = BP_NEXT_TREE_GROUP(len_ptr); + lenbits = *len_ptr; + if (*len_ptr < len_ptr[1]) { + lenbits = len_ptr[1]; + } + if (lenbits < group_ptr[-1]) { + lenbits = group_ptr[-1]; + } + if (lenbits < *group_ptr) { + lenbits = *group_ptr; + } + groups[BP_TREE_GROUP_INDEX(i)] = (u8)lenbits; + group_ptr = BP_NEXT_TREE_GROUP(group_ptr); + i += BP_TREE_CHILD_COUNT; + count--; + } while (count != 0); + + if (groups[BP_TREE_HIGH_GROUP0_INDEX] < groups[BP_TREE_HIGH_GROUP0_CHILD0_INDEX]) { + groups[BP_TREE_HIGH_GROUP0_INDEX] = groups[BP_TREE_HIGH_GROUP0_CHILD0_INDEX]; + } + if (groups[BP_TREE_HIGH_GROUP0_INDEX] < groups[BP_TREE_HIGH_GROUP0_CHILD0_INDEX + BP_TREE_CHILD1_INDEX]) { + groups[BP_TREE_HIGH_GROUP0_INDEX] = groups[BP_TREE_HIGH_GROUP0_CHILD0_INDEX + BP_TREE_CHILD1_INDEX]; + } + if (groups[BP_TREE_HIGH_GROUP0_INDEX] < groups[BP_TREE_HIGH_GROUP0_CHILD0_INDEX + BP_TREE_CHILD2_INDEX]) { + groups[BP_TREE_HIGH_GROUP0_INDEX] = groups[BP_TREE_HIGH_GROUP0_CHILD0_INDEX + BP_TREE_CHILD2_INDEX]; + } + hi_groups[0] = groups[BP_TREE_HIGH_GROUP0_INDEX]; + if (groups[BP_TREE_HIGH_GROUP1_INDEX] < groups[BP_TREE_HIGH_GROUP1_CHILD0_INDEX]) { + groups[BP_TREE_HIGH_GROUP1_INDEX] = groups[BP_TREE_HIGH_GROUP1_CHILD0_INDEX]; + } + if (groups[BP_TREE_HIGH_GROUP1_INDEX] < groups[BP_TREE_HIGH_GROUP1_CHILD0_INDEX + BP_TREE_CHILD1_INDEX]) { + groups[BP_TREE_HIGH_GROUP1_INDEX] = groups[BP_TREE_HIGH_GROUP1_CHILD0_INDEX + BP_TREE_CHILD1_INDEX]; + } + if (groups[BP_TREE_HIGH_GROUP1_INDEX] < groups[BP_TREE_HIGH_GROUP1_CHILD0_INDEX + BP_TREE_CHILD2_INDEX]) { + groups[BP_TREE_HIGH_GROUP1_INDEX] = groups[BP_TREE_HIGH_GROUP1_CHILD0_INDEX + BP_TREE_CHILD2_INDEX]; + } + hi_groups[1] = groups[BP_TREE_HIGH_GROUP1_INDEX]; + if (groups[BP_TREE_HIGH_GROUP2_INDEX] < groups[BP_TREE_HIGH_GROUP2_CHILD0_INDEX]) { + groups[BP_TREE_HIGH_GROUP2_INDEX] = groups[BP_TREE_HIGH_GROUP2_CHILD0_INDEX]; + } + if (groups[BP_TREE_HIGH_GROUP2_INDEX] < groups[BP_TREE_HIGH_GROUP2_CHILD0_INDEX + BP_TREE_CHILD1_INDEX]) { + groups[BP_TREE_HIGH_GROUP2_INDEX] = groups[BP_TREE_HIGH_GROUP2_CHILD0_INDEX + BP_TREE_CHILD1_INDEX]; + } + if (groups[BP_TREE_HIGH_GROUP2_INDEX] < groups[BP_TREE_HIGH_GROUP2_CHILD0_INDEX + BP_TREE_CHILD2_INDEX]) { + groups[BP_TREE_HIGH_GROUP2_INDEX] = groups[BP_TREE_HIGH_GROUP2_CHILD0_INDEX + BP_TREE_CHILD2_INDEX]; + } + + bit_count = BP_STREAM_BITLEN(bits) + BP_LOSSLESS_LEVEL_BITS; + lenbits = maxbits & VarBitsLens[BP_LOSSLESS_LEVEL_BITS]; + hi_groups[2] = groups[BP_TREE_HIGH_GROUP2_INDEX]; + bit_buf = BP_STREAM_BITS(bits) | (lenbits << BP_STREAM_BITLEN(bits)); + BP_STREAM_BITLEN(bits) = bit_count; + BP_STREAM_BITS(bits) = bit_buf; + if (bit_count >= BP_BITS_PER_WORD) { + *BP_STREAM_CUR(bits) = bit_buf; + bit_buf = BP_STREAM_BITLEN(bits) - BP_BITS_PER_WORD; + BP_STREAM_CUR(bits) = BP_STREAM_CUR(bits) + 1; + BP_STREAM_BITLEN(bits) = bit_buf; + BP_STREAM_BITS(bits) = 0; + if (bit_buf != 0) { + BP_STREAM_BITS(bits) = lenbits >> (BP_LOSSLESS_LEVEL_BITS - bit_buf); + } + } + + if (groups[BP_TREE_GROUP1_INDEX] < groups[BP_TREE_HIGH_GROUP0_INDEX]) { + groups[BP_TREE_GROUP1_INDEX] = groups[BP_TREE_HIGH_GROUP0_INDEX]; + } + roots = tree_nodes + BP_TREE_CHILD_COUNT; + roots[0] = groups[BP_TREE_GROUP1_INDEX] | BP_GROUP1_NODE_BASE; + if (groups[BP_TREE_GROUP6_INDEX] < groups[BP_TREE_HIGH_GROUP1_INDEX]) { + groups[BP_TREE_GROUP6_INDEX] = groups[BP_TREE_HIGH_GROUP1_INDEX]; + } + roots[1] = groups[BP_TREE_GROUP6_INDEX] | BP_GROUP6_NODE_BASE; + if (groups[BP_TREE_GROUP11_INDEX] < groups[BP_TREE_HIGH_GROUP2_INDEX]) { + groups[BP_TREE_GROUP11_INDEX] = groups[BP_TREE_HIGH_GROUP2_INDEX]; + } + roots[2] = groups[BP_TREE_GROUP11_INDEX] | BP_GROUP11_NODE_BASE; + roots[3] = lens[BP_COEFF1_INDEX] + BP_COEFF1_LEAF_BASE; + roots[4] = lens[BP_COEFF2_INDEX] + BP_COEFF2_LEAF_BASE; + roots[5] = lens[BP_COEFF3_INDEX] + BP_COEFF3_LEAF_BASE; + + cur = roots; + end = roots + BP_LOSSLESS_ROOT_NODES; + do { + if (maxbits == 0) { + return; + } + lenbits = maxbits - 1; + restart = cur; + /* Active children at lower bit depths are pushed before the current cursor. */ + if (cur < end) { + do { + entry = *cur; + if (entry == 0) { +next_lossless_node: + cur++; + } else { + sign = (entry & BP_BYTE_MASK) != maxbits; + PUT_BP_BIT(bits, !sign); + if (sign) { + goto next_lossless_node; + } + kind = entry & BP_TREE_KIND_MASK; + if (kind == BP_TREE_GROUP_NODE) { + kind = entry >> BP_TREE_INDEX_SHIFT; + count = (u32)(entry >> BP_TREE_GROUP_SHIFT); + *cur = (u16)groups[count] + (entry & BP_TREE_BASE_MASK) + BP_TREE_BRANCH_NODE; + *end = (u16)groups[count + BP_TREE_CHILD1_INDEX] + (kind + BP_TREE_CHILD1_BASE) * BP_TREE_INDEX_STRIDE + BP_TREE_BRANCH_NODE; + end[1] = (u16)groups[count + BP_TREE_CHILD2_INDEX] + (kind + BP_TREE_CHILD2_BASE) * BP_TREE_INDEX_STRIDE + BP_TREE_BRANCH_NODE; + end[2] = (u16)groups[count + BP_TREE_CHILD3_INDEX] + (kind + BP_TREE_CHILD3_BASE) * BP_TREE_INDEX_STRIDE + BP_TREE_BRANCH_NODE; + end += BP_TREE_ADDED_CHILD_COUNT; + } else if (kind < BP_TREE_AFTER_GROUP_NODE) { + if ((entry & BP_TREE_KIND_MASK) == 0) { + *cur = (u16)hi_groups[entry >> BP_TREE_HIGH_GROUP_SHIFT] + ((entry >> BP_TREE_INDEX_SHIFT) + BP_TREE_CHILD1_BASE) * BP_TREE_INDEX_STRIDE + BP_TREE_GROUP_NODE; +handle_lossless_children: + kind = entry >> BP_TREE_INDEX_SHIFT; + PUT_BP_BIT(bits, lens[kind] != maxbits); + if (lens[kind] == maxbits) { + PUT_BP_BITS(bits, absvals[kind], lenbits, VarBitsLens[lenbits]); + PUT_BP_BIT(bits, ordered[kind] < 0); + } else { + *--restart = (u16)lens[kind] | (entry & BP_TREE_BASE_MASK) + BP_TREE_COEFF_NODE; + } + + i = kind + BP_TREE_CHILD1_INDEX; + PUT_BP_BIT(bits, lens[i] != maxbits); + if (lens[i] == maxbits) { + PUT_BP_BITS(bits, absvals[i], lenbits, VarBitsLens[lenbits]); + PUT_BP_BIT(bits, ordered[i] < 0); + } else { + *--restart = (u16)lens[i] | i * BP_TREE_INDEX_STRIDE + BP_TREE_COEFF_NODE; + } + + i = kind + BP_TREE_CHILD2_INDEX; + PUT_BP_BIT(bits, lens[i] != maxbits); + if (lens[i] == maxbits) { + PUT_BP_BITS(bits, absvals[i], lenbits, VarBitsLens[lenbits]); + PUT_BP_BIT(bits, ordered[i] < 0); + } else { + *--restart = (u16)lens[i] | i * BP_TREE_INDEX_STRIDE + BP_TREE_COEFF_NODE; + } + + i = kind + BP_TREE_CHILD3_INDEX; + PUT_BP_BIT(bits, lens[i] != maxbits); + if (lens[i] == maxbits) { + PUT_BP_BITS(bits, absvals[i], lenbits, VarBitsLens[lenbits]); + PUT_BP_BIT(bits, ordered[i] < 0); + } else { + *--restart = (u16)lens[i] | i * BP_TREE_INDEX_STRIDE + BP_TREE_COEFF_NODE; + } + } + } else { + if (kind == BP_TREE_BRANCH_NODE) { + *cur = 0; + cur++; + goto handle_lossless_children; + } + if (kind == BP_TREE_COEFF_NODE) { + kind = entry >> BP_TREE_INDEX_SHIFT; + PUT_BP_BITS(bits, absvals[kind], lenbits, VarBitsLens[lenbits]); + PUT_BP_BIT(bits, ordered[kind] < 0); + *cur = 0; + } + goto next_lossless_node; + } + } + } while (cur < end); + } + maxbits = lenbits & BP_BYTE_MASK; + cur = restart; + } while (1); +} + +void ReadBPLossless(s16 PTR4* out, BPBITSTREAM PTR4* bits) +{ + u32 code; + u32 PTR4* words; + BPBITSTYPE bitbuf; + u32 bitcount; + s32 shift; + u32 level; + u32 maxlevel; + u32 active; + u32 highbit; + u32 value; + u32 mask; + u8 PTR4* cur; + u8 PTR4* next; + u8 PTR4* tree_end; + u8 node; + u8 kind; + u32 base; + u8 tree[BP_BLOCK_COEFFS]; + u16 coeffs[BP_BLOCK_COEFFS]; + BPBITSTREAM bitcopy; + + bitcopy = *bits; + words = bitcopy.cur; + bitbuf = bitcopy.bits; + bitcount = bitcopy.bitlen; + memset(coeffs, 0, sizeof(coeffs)); + + /* The stream starts with the maximum active lossless bitplane level. */ + if (bitcount < BP_LOSSLESS_LEVEL_BITS) { + shift = BP_LOSSLESS_LEVEL_BITS - bitcount; + code = bitbuf & BP_BYTE_MASK; + bitbuf = *words; + words++; + code |= bitbuf << bitcount; + bitbuf >>= shift; + bitcount = bitcount + BP_BITS_PER_WORD - BP_LOSSLESS_LEVEL_BITS; + } else { + code = bitbuf & BP_BYTE_MASK; + bitbuf >>= BP_LOSSLESS_LEVEL_BITS; + bitcount -= BP_LOSSLESS_LEVEL_BITS; + } + + maxlevel = code & BP_LOSSLESS_LEVEL_MASK; + active = maxlevel != 0; + /* Root nodes mirror WriteBPLossless: three grouped roots plus coeffs 1..3. */ + tree[0] = BP_READ_TREE_GROUP1_ROOT; + tree[1] = BP_READ_TREE_GROUP6_ROOT; + tree[2] = BP_READ_TREE_GROUP11_ROOT; + tree[3] = BP_READ_TREE_COEFF1_ROOT; + tree[4] = BP_READ_TREE_COEFF2_ROOT; + tree[5] = BP_READ_TREE_COEFF3_ROOT; + + cur = tree; + tree_end = tree + BP_LOSSLESS_ROOT_NODES; + highbit = (u16)(1 << (maxlevel - 1)); + + /* Non-final planes read lower magnitude bits plus a sign for new coeffs. */ + while (1 < maxlevel) { + level = (maxlevel - 1) & BP_BYTE_MASK; + next = cur; + highbit = (s16)highbit >> 1; + if (cur < tree_end) { + mask = VarBitsLens[level]; + do { + node = *cur; + if (node == 0) { +next_lossless_read_node: + cur++; + } else { + if (bitcount == 0) { + code = *words; + bitcount = BP_WORD_TOP_BIT; + words++; + bitbuf = code >> 1; + if ((code & 1) == 0) { + goto next_lossless_read_node; + } + } else { + bitcount = bitcount - 1; + code = bitbuf & 1; + bitbuf >>= 1; + if (code == 0) { + goto next_lossless_read_node; + } + } + + kind = node & BP_READ_TREE_KIND_MASK; + if (kind == BP_READ_TREE_GROUP_NODE) { + kind = BP_READ_TREE_INDEX(node); + *cur = BP_READ_TREE_BRANCH_FROM_NODE(node); + *tree_end = BP_READ_TREE_NODE(kind + BP_READ_TREE_CHILD1_BASE, BP_READ_TREE_BRANCH_NODE); + tree_end[1] = BP_READ_TREE_NODE(kind + BP_READ_TREE_CHILD2_BASE, BP_READ_TREE_BRANCH_NODE); + tree_end[2] = BP_READ_TREE_NODE(kind + BP_READ_TREE_CHILD3_BASE, BP_READ_TREE_BRANCH_NODE); + tree_end += BP_TREE_ADDED_CHILD_COUNT; + } else { + if (kind < BP_READ_TREE_BRANCH_NODE) { + if ((node & BP_READ_TREE_KIND_MASK) != BP_READ_TREE_HIGH_NODE) { + goto next_lossless_read_node; + } + *cur = BP_READ_TREE_GROUP_FROM_INDEX(BP_READ_TREE_INDEX(node)); + } else { + if (kind != BP_READ_TREE_BRANCH_NODE) { + if (kind == BP_READ_TREE_COEFF_NODE) { + value = bitbuf & mask; + if (bitcount < level) { + code = *words++; + value |= code << bitcount; + bitbuf = code >> (level - bitcount); + bitcount = bitcount + BP_BITS_PER_WORD - level; + } else { + bitbuf >>= level; + bitcount = bitcount - level; + } + value = (value & mask) | highbit; + if (bitcount == 0) { + code = *words; + bitcount = BP_WORD_TOP_BIT; + words++; + bitbuf = code >> 1; + } else { + bitcount = bitcount - 1; + code = bitbuf & 1; + bitbuf >>= 1; + } + if ((code & 1) != 0) { + value = -value; + } + coeffs[BP_READ_TREE_INDEX(node)] = (u16)value; + *cur = 0; + } + goto next_lossless_read_node; + } + *cur = 0; + cur++; + } + + base = BP_READ_TREE_INDEX(node); +#define READ_LOSSLESS_CHILD(slot, label) \ + do { \ + if (bitcount == 0) { \ + code = *words; \ + bitcount = BP_WORD_TOP_BIT; \ + bitbuf = code >> 1; \ + words++; \ + if ((code & 1) != 0) { \ + next--; \ + *next = BP_READ_TREE_COEFF(slot); \ + goto label; \ + } \ + } else { \ + bitcount = bitcount - 1; \ + code = bitbuf & 1; \ + bitbuf >>= 1; \ + if (code != 0) { \ + next--; \ + *next = BP_READ_TREE_COEFF(slot); \ + goto label; \ + } \ + } \ + value = bitbuf & mask; \ + if (bitcount < level) { \ + code = *words++; \ + value |= code << bitcount; \ + bitbuf = code >> (level - bitcount); \ + bitcount = bitcount + BP_BITS_PER_WORD - level; \ + } else { \ + bitbuf >>= level; \ + bitcount = bitcount - level; \ + } \ + value = (value & mask) | highbit; \ + if (bitcount == 0) { \ + code = *words; \ + bitcount = BP_WORD_TOP_BIT; \ + words++; \ + bitbuf = code >> 1; \ + } else { \ + bitcount = bitcount - 1; \ + code = bitbuf & 1; \ + bitbuf >>= 1; \ + } \ + if ((code & 1) != 0) { \ + value = -value; \ + } \ + coeffs[slot] = (u16)value; \ + } while (0) + READ_LOSSLESS_CHILD(base, after_lossless_child0); +after_lossless_child0: + READ_LOSSLESS_CHILD(base + BP_TREE_CHILD1_INDEX, after_lossless_child1); +after_lossless_child1: + READ_LOSSLESS_CHILD(base + BP_TREE_CHILD2_INDEX, after_lossless_child2); +after_lossless_child2: + READ_LOSSLESS_CHILD(base + BP_TREE_CHILD3_INDEX, after_lossless_child3); +after_lossless_child3: +#undef READ_LOSSLESS_CHILD + ; + } + } + } while (cur < tree_end); + } + maxlevel = level; + cur = next; + } + + /* Level one coeffs need only a sign bit; their magnitude is implicit. */ + if (active && cur < tree_end) { + next = cur; + do { + node = *cur; + if (node == 0) { +next_lossless_final_node: + cur++; + } else { + if (bitcount == 0) { + code = *words; + bitcount = BP_WORD_TOP_BIT; + words++; + bitbuf = code >> 1; + if ((code & 1) == 0) { + goto next_lossless_final_node; + } + } else { + bitcount = bitcount - 1; + code = bitbuf & 1; + bitbuf >>= 1; + if (code == 0) { + goto next_lossless_final_node; + } + } + kind = node & BP_READ_TREE_KIND_MASK; + if (kind == BP_READ_TREE_GROUP_NODE) { + kind = BP_READ_TREE_INDEX(node); + *cur = BP_READ_TREE_BRANCH_FROM_NODE(node); + *tree_end = BP_READ_TREE_NODE(kind + BP_READ_TREE_CHILD1_BASE, BP_READ_TREE_BRANCH_NODE); + tree_end[1] = BP_READ_TREE_NODE(kind + BP_READ_TREE_CHILD2_BASE, BP_READ_TREE_BRANCH_NODE); + tree_end[2] = BP_READ_TREE_NODE(kind + BP_READ_TREE_CHILD3_BASE, BP_READ_TREE_BRANCH_NODE); + tree_end += BP_TREE_ADDED_CHILD_COUNT; + } else { + if (kind < BP_READ_TREE_BRANCH_NODE) { + if ((node & BP_READ_TREE_KIND_MASK) != BP_READ_TREE_HIGH_NODE) { + goto next_lossless_final_node; + } + *cur = BP_READ_TREE_GROUP_FROM_INDEX(BP_READ_TREE_INDEX(node)); + } else { + if (kind != BP_READ_TREE_BRANCH_NODE) { + if (kind == BP_READ_TREE_COEFF_NODE) { + if (bitcount == 0) { + code = *words; + bitcount = BP_WORD_TOP_BIT; + words++; + bitbuf = code >> 1; + } else { + bitcount = bitcount - 1; + code = bitbuf & 1; + bitbuf >>= 1; + } + coeffs[BP_READ_TREE_INDEX(node)] = (code & 1) ? BP_NEGATIVE_COEFF_SIGN : BP_POSITIVE_COEFF_SIGN; + *cur = 0; + } + goto next_lossless_final_node; + } + *cur = 0; + cur++; + } + + base = BP_READ_TREE_INDEX(node); +#define READ_LOSSLESS_FINAL_CHILD(slot, label) \ + do { \ + if (bitcount == 0) { \ + code = *words; \ + bitcount = BP_WORD_TOP_BIT; \ + bitbuf = code >> 1; \ + words++; \ + if ((code & 1) != 0) { \ + next--; \ + *next = BP_READ_TREE_COEFF(slot); \ + goto label; \ + } \ + } else { \ + bitcount = bitcount - 1; \ + code = bitbuf & 1; \ + bitbuf >>= 1; \ + if (code != 0) { \ + next--; \ + *next = BP_READ_TREE_COEFF(slot); \ + goto label; \ + } \ + } \ + if (bitcount == 0) { \ + code = *words; \ + bitcount = BP_WORD_TOP_BIT; \ + words++; \ + bitbuf = code >> 1; \ + } else { \ + bitcount = bitcount - 1; \ + code = bitbuf & 1; \ + bitbuf >>= 1; \ + } \ + coeffs[slot] = (code & 1) ? BP_NEGATIVE_COEFF_SIGN : BP_POSITIVE_COEFF_SIGN; \ + } while (0) + READ_LOSSLESS_FINAL_CHILD(base, after_lossless_final0); +after_lossless_final0: + READ_LOSSLESS_FINAL_CHILD(base + BP_TREE_CHILD1_INDEX, after_lossless_final1); +after_lossless_final1: + READ_LOSSLESS_FINAL_CHILD(base + BP_TREE_CHILD2_INDEX, after_lossless_final2); +after_lossless_final2: + READ_LOSSLESS_FINAL_CHILD(base + BP_TREE_CHILD3_INDEX, after_lossless_final3); +after_lossless_final3: +#undef READ_LOSSLESS_FINAL_CHILD + ; + } + } + } while (cur < tree_end); + } + + bitcopy.cur = words; + bitcopy.bits = bitbuf; + bitcopy.bitlen = bitcount; + *bits = bitcopy; + (void)tree; + + /* Scatter scan-order coefficients back into the 8x8 block. */ + out[1] = coeffs[1]; +#define COPY_BP_COEFF_PAIR(out_index, coeff_index) \ + (*(BPCOEFFPAIR PTR4*)(out + (out_index)) = *(BPCOEFFPAIR PTR4*)(coeffs + (coeff_index))) + COPY_BP_COEFF_PAIR(2, 4); + COPY_BP_COEFF_PAIR(4, 8); + COPY_BP_COEFF_PAIR(6, 12); + COPY_BP_COEFF_PAIR(8, 2); + COPY_BP_COEFF_PAIR(10, 6); + COPY_BP_COEFF_PAIR(12, 10); + COPY_BP_COEFF_PAIR(14, 14); + COPY_BP_COEFF_PAIR(16, 24); + COPY_BP_COEFF_PAIR(18, 44); + COPY_BP_COEFF_PAIR(20, 16); + COPY_BP_COEFF_PAIR(22, 20); + COPY_BP_COEFF_PAIR(24, 26); + COPY_BP_COEFF_PAIR(26, 46); + COPY_BP_COEFF_PAIR(28, 18); + COPY_BP_COEFF_PAIR(30, 22); + COPY_BP_COEFF_PAIR(32, 28); + COPY_BP_COEFF_PAIR(34, 32); + COPY_BP_COEFF_PAIR(36, 48); + COPY_BP_COEFF_PAIR(38, 52); + COPY_BP_COEFF_PAIR(40, 30); + COPY_BP_COEFF_PAIR(42, 34); + COPY_BP_COEFF_PAIR(44, 50); + COPY_BP_COEFF_PAIR(46, 54); + COPY_BP_COEFF_PAIR(48, 36); + COPY_BP_COEFF_PAIR(50, 40); + COPY_BP_COEFF_PAIR(52, 56); + COPY_BP_COEFF_PAIR(54, 60); + COPY_BP_COEFF_PAIR(56, 38); + COPY_BP_COEFF_PAIR(58, 42); + COPY_BP_COEFF_PAIR(60, 58); + COPY_BP_COEFF_PAIR(62, 62); +#undef COPY_BP_COEFF_PAIR +} + +u32 WriteBPLossy(BPBITSTREAM PTR4* bits, char PTR4* vals) +{ + u16 entry; + s32 i; + s32 count; + u8 PTR4* group_ptr; + u8 PTR4* len_ptr; + u32 maxbits; + u32 lenbits; + u32 bit_count; + BPBITSTYPE bit_buf; + u16 PTR4* cur; + u16 PTR4* insert; + u16 PTR4* next_node; + u16 PTR4* roots; + u16 mask; + s16 node_entry; + u16 tree_nodes[BP_TREE_NODES]; + u8 lens[BP_BLOCK_COEFFS]; + u8 groups[BP_TREE_GROUPS]; + u8 hi_groups[BP_TREE_HIGH_GROUPS]; + u8 ordered[BP_BLOCK_COEFFS]; + u8 absvals[BP_BLOCK_COEFFS]; + u8 temp[BP_BLOCK_COEFFS]; + + /* Lossy bitplanes scan all 64 byte coefficients, including DC. */ + count = BP_BLOCK_COEFFS; + i = 0; + do { + ordered[i] = BP_ZIGZAG_COEFF(vals, i); + i++; + count--; + } while (count != 0); + + i = 0; + count = BP_BLOCK_COEFFS; + do { + entry = (s8)ordered[i] >> 7; + absvals[i] = (entry ^ ordered[i]) - entry; + i++; + count--; + } while (count != 0); + + maxbits = 0; + i = 0; + count = BP_BLOCK_COEFFS; + do { + lenbits = getbitlevelvar((u32)absvals[i]) & BP_BYTE_MASK; + if (maxbits < lenbits) { + maxbits = lenbits; + } + lens[i] = (u8)lenbits; + i++; + count--; + } while (count != 0); + + if (maxbits == 0) { + return 0; + } + + /* Group tables use the deepest bit depth of each four-coefficient branch. */ + count = BP_TREE_GROUPS; + i = 0; + group_ptr = lens + BP_FIRST_LOSSY_TREE_GROUP_END_INDEX; + len_ptr = lens; + do { + lenbits = *len_ptr; + if (*len_ptr < len_ptr[1]) { + lenbits = len_ptr[1]; + } + if (lenbits < group_ptr[-1]) { + lenbits = group_ptr[-1]; + } + if (lenbits < *group_ptr) { + lenbits = *group_ptr; + } + groups[BP_TREE_GROUP_INDEX(i)] = (u8)lenbits; + group_ptr = BP_NEXT_TREE_GROUP(group_ptr); + len_ptr = BP_NEXT_TREE_GROUP(len_ptr); + i += BP_TREE_CHILD_COUNT; + count--; + } while (count != 0); + + if (groups[BP_TREE_HIGH_GROUP0_INDEX] < groups[BP_TREE_HIGH_GROUP0_CHILD0_INDEX]) { + groups[BP_TREE_HIGH_GROUP0_INDEX] = groups[BP_TREE_HIGH_GROUP0_CHILD0_INDEX]; + } + if (groups[BP_TREE_HIGH_GROUP0_INDEX] < groups[BP_TREE_HIGH_GROUP0_CHILD0_INDEX + BP_TREE_CHILD1_INDEX]) { + groups[BP_TREE_HIGH_GROUP0_INDEX] = groups[BP_TREE_HIGH_GROUP0_CHILD0_INDEX + BP_TREE_CHILD1_INDEX]; + } + if (groups[BP_TREE_HIGH_GROUP0_INDEX] < groups[BP_TREE_HIGH_GROUP0_CHILD0_INDEX + BP_TREE_CHILD2_INDEX]) { + groups[BP_TREE_HIGH_GROUP0_INDEX] = groups[BP_TREE_HIGH_GROUP0_CHILD0_INDEX + BP_TREE_CHILD2_INDEX]; + } + hi_groups[0] = groups[BP_TREE_HIGH_GROUP0_INDEX]; + if (groups[BP_TREE_HIGH_GROUP1_INDEX] < groups[BP_TREE_HIGH_GROUP1_CHILD0_INDEX]) { + groups[BP_TREE_HIGH_GROUP1_INDEX] = groups[BP_TREE_HIGH_GROUP1_CHILD0_INDEX]; + } + if (groups[BP_TREE_HIGH_GROUP1_INDEX] < groups[BP_TREE_HIGH_GROUP1_CHILD0_INDEX + BP_TREE_CHILD1_INDEX]) { + groups[BP_TREE_HIGH_GROUP1_INDEX] = groups[BP_TREE_HIGH_GROUP1_CHILD0_INDEX + BP_TREE_CHILD1_INDEX]; + } + if (groups[BP_TREE_HIGH_GROUP1_INDEX] < groups[BP_TREE_HIGH_GROUP1_CHILD0_INDEX + BP_TREE_CHILD2_INDEX]) { + groups[BP_TREE_HIGH_GROUP1_INDEX] = groups[BP_TREE_HIGH_GROUP1_CHILD0_INDEX + BP_TREE_CHILD2_INDEX]; + } + hi_groups[1] = groups[BP_TREE_HIGH_GROUP1_INDEX]; + if (groups[BP_TREE_HIGH_GROUP2_INDEX] < groups[BP_TREE_HIGH_GROUP2_CHILD0_INDEX]) { + groups[BP_TREE_HIGH_GROUP2_INDEX] = groups[BP_TREE_HIGH_GROUP2_CHILD0_INDEX]; + } + if (groups[BP_TREE_HIGH_GROUP2_INDEX] < groups[BP_TREE_HIGH_GROUP2_CHILD0_INDEX + BP_TREE_CHILD1_INDEX]) { + groups[BP_TREE_HIGH_GROUP2_INDEX] = groups[BP_TREE_HIGH_GROUP2_CHILD0_INDEX + BP_TREE_CHILD1_INDEX]; + } + if (groups[BP_TREE_HIGH_GROUP2_INDEX] < groups[BP_TREE_HIGH_GROUP2_CHILD0_INDEX + BP_TREE_CHILD2_INDEX]) { + groups[BP_TREE_HIGH_GROUP2_INDEX] = groups[BP_TREE_HIGH_GROUP2_CHILD0_INDEX + BP_TREE_CHILD2_INDEX]; + } + + bit_count = BP_STREAM_BITLEN(bits) + BP_LOSSY_LEVEL_BITS; + lenbits = (maxbits - 1) & VarBitsLens[BP_LOSSY_LEVEL_BITS]; + hi_groups[2] = groups[BP_TREE_HIGH_GROUP2_INDEX]; + bit_buf = BP_STREAM_BITS(bits) | (lenbits << BP_STREAM_BITLEN(bits)); + BP_STREAM_BITLEN(bits) = bit_count; + BP_STREAM_BITS(bits) = bit_buf; + if (bit_count >= BP_BITS_PER_WORD) { + *BP_STREAM_CUR(bits) = bit_buf; + bit_buf = BP_STREAM_BITLEN(bits) - BP_BITS_PER_WORD; + BP_STREAM_CUR(bits) = BP_STREAM_CUR(bits) + 1; + BP_STREAM_BITLEN(bits) = bit_buf; + BP_STREAM_BITS(bits) = 0; + if (bit_buf != 0) { + BP_STREAM_BITS(bits) = lenbits >> (BP_LOSSY_LEVEL_BITS - bit_buf); + } + } + + if (groups[BP_TREE_GROUP1_INDEX] < groups[BP_TREE_HIGH_GROUP0_INDEX]) { + groups[BP_TREE_GROUP1_INDEX] = groups[BP_TREE_HIGH_GROUP0_INDEX]; + } + roots = tree_nodes + BP_TREE_CHILD_COUNT; + roots[0] = groups[BP_TREE_GROUP1_INDEX] | BP_GROUP1_NODE_BASE; + if (groups[BP_TREE_GROUP6_INDEX] < groups[BP_TREE_HIGH_GROUP1_INDEX]) { + groups[BP_TREE_GROUP6_INDEX] = groups[BP_TREE_HIGH_GROUP1_INDEX]; + } + roots[1] = groups[BP_TREE_GROUP6_INDEX] | BP_GROUP6_NODE_BASE; + if (groups[BP_TREE_GROUP11_INDEX] < groups[BP_TREE_HIGH_GROUP2_INDEX]) { + groups[BP_TREE_GROUP11_INDEX] = groups[BP_TREE_HIGH_GROUP2_INDEX]; + } + roots[2] = groups[BP_TREE_GROUP11_INDEX] | BP_GROUP11_NODE_BASE; + roots[3] = groups[0] + BP_TREE_BRANCH_NODE; + + cur = roots; + next_node = roots + BP_LOSSY_ROOT_NODES; + mask = (u16)(1 << (maxbits - 1)); + i = 0; + for (; maxbits != 0; maxbits = (maxbits - 1) & BP_BYTE_MASK) { + count = 0; + /* Coefficients introduced on earlier planes emit one residual bit here. */ + if (0 < i) { + do { + PUT_BP_BIT(bits, (temp[count] & mask) != 0); + count++; + } while (count < i); + } + + insert = cur; + if (cur < next_node) { + do { + node_entry = *cur; + if (node_entry == 0) { +next_lossy_node: + cur++; + } else { + count = (node_entry & BP_BYTE_MASK) != maxbits; + PUT_BP_BIT(bits, !count); + if (count) { + goto next_lossy_node; + } + + lenbits = node_entry & BP_TREE_KIND_MASK; + if (lenbits == BP_TREE_GROUP_NODE) { + lenbits = node_entry >> BP_TREE_INDEX_SHIFT; + count = (u32)(node_entry >> BP_TREE_GROUP_SHIFT); + *cur = (u16)groups[count] + (node_entry & BP_TREE_BASE_MASK) + BP_TREE_BRANCH_NODE; + next_node[0] = (u16)groups[count + BP_TREE_CHILD1_INDEX] + (lenbits + BP_TREE_CHILD1_BASE) * BP_TREE_INDEX_STRIDE + BP_TREE_BRANCH_NODE; + next_node[1] = (u16)groups[count + BP_TREE_CHILD2_INDEX] + (lenbits + BP_TREE_CHILD2_BASE) * BP_TREE_INDEX_STRIDE + BP_TREE_BRANCH_NODE; + next_node[2] = (u16)groups[count + BP_TREE_CHILD3_INDEX] + (lenbits + BP_TREE_CHILD3_BASE) * BP_TREE_INDEX_STRIDE + BP_TREE_BRANCH_NODE; + next_node += BP_TREE_ADDED_CHILD_COUNT; + } else if (lenbits < BP_TREE_AFTER_GROUP_NODE) { + if ((node_entry & BP_TREE_KIND_MASK) == 0) { + *cur = (u16)hi_groups[node_entry >> BP_TREE_HIGH_GROUP_SHIFT] + ((node_entry >> BP_TREE_INDEX_SHIFT) + BP_TREE_CHILD1_BASE) * BP_TREE_INDEX_STRIDE + BP_TREE_GROUP_NODE; +handle_lossy_children: + lenbits = node_entry >> BP_TREE_INDEX_SHIFT; + PUT_BP_BIT(bits, lens[lenbits] != maxbits); + if (lens[lenbits] == maxbits) { + temp[i] = absvals[lenbits]; + i++; + PUT_BP_BIT(bits, (ordered[lenbits] & BP_SIGN_BIT) != 0); + } else { + --insert; + *insert = (u16)lens[lenbits] | (node_entry & BP_TREE_BASE_MASK) + BP_TREE_COEFF_NODE; + } + + PUT_BP_BIT(bits, lens[lenbits + BP_TREE_CHILD1_INDEX] != maxbits); + entry = lens[lenbits + BP_TREE_CHILD1_INDEX]; + if (entry == maxbits) { + temp[i] = absvals[lenbits + BP_TREE_CHILD1_INDEX]; + i++; + PUT_BP_BIT(bits, (ordered[lenbits + BP_TREE_CHILD1_INDEX] & BP_SIGN_BIT) != 0); + } else { + --insert; + *insert = (u16)entry | (lenbits + BP_TREE_CHILD1_INDEX) * BP_TREE_INDEX_STRIDE + BP_TREE_COEFF_NODE; + } + + PUT_BP_BIT(bits, lens[lenbits + BP_TREE_CHILD2_INDEX] != maxbits); + entry = lens[lenbits + BP_TREE_CHILD2_INDEX]; + if (entry == maxbits) { + temp[i] = absvals[lenbits + BP_TREE_CHILD2_INDEX]; + i++; + PUT_BP_BIT(bits, (ordered[lenbits + BP_TREE_CHILD2_INDEX] & BP_SIGN_BIT) != 0); + } else { + --insert; + *insert = (u16)entry | (lenbits + BP_TREE_CHILD2_INDEX) * BP_TREE_INDEX_STRIDE + BP_TREE_COEFF_NODE; + } + + PUT_BP_BIT(bits, lens[lenbits + BP_TREE_CHILD3_INDEX] != maxbits); + entry = lens[lenbits + BP_TREE_CHILD3_INDEX]; + if (entry == maxbits) { + temp[i] = absvals[lenbits + BP_TREE_CHILD3_INDEX]; + i++; + PUT_BP_BIT(bits, (ordered[lenbits + BP_TREE_CHILD3_INDEX] & BP_SIGN_BIT) != 0); + } else { + --insert; + *insert = (u16)entry | (lenbits + BP_TREE_CHILD3_INDEX) * BP_TREE_INDEX_STRIDE + BP_TREE_COEFF_NODE; + } + } else { + goto next_lossy_node; + } + } else { + if (lenbits == BP_TREE_BRANCH_NODE) { + *cur = 0; + cur++; + goto handle_lossy_children; + } + if (lenbits == BP_TREE_COEFF_NODE) { + temp[i] = absvals[node_entry >> BP_TREE_INDEX_SHIFT]; + i++; + PUT_BP_BIT(bits, (ordered[node_entry >> BP_TREE_INDEX_SHIFT] & BP_SIGN_BIT) != 0); + *cur = 0; + } + goto next_lossy_node; + } + } + } while (cur < next_node); + } + cur = insert; + mask = (s16)mask >> 1; + } + + return 1; +} + +#pragma dont_inline on +void readlossy(void PTR4* out, BPBITSTREAM PTR4* bits, s32 limit) +{ + s32 reached_limit; + s8 sample; + u32 old_bitcount; + u32 levels_remaining; + u8 node_kind; + u8 PTR4* tree_end; + s32 bit_value; + s32 active_count; + u8 PTR4* node_ptr; + u8 node; + u32 code; + u32 bit; + s32 delta; + s32 scan; + BPBITSTYPE word; + u8 PTR4* next_node_ptr; + s32 sample_count; + u8 tree[BP_TREE_NODES]; + u8 order[BP_BLOCK_COEFFS]; + BPBITSTREAM bitcopy; + u32 PTR4* words; + BPBITSTYPE bitbuf; + u32 bitcount; + s8 PTR4* dest; + BPBITSTREAM PTR4* bitstate; + + dest = (s8 PTR4*)out; + bitcopy = *bits; + bitstate = &bitcopy; + words = bitstate->cur; + bitbuf = bitstate->bits; + bitcount = bitstate->bitlen; + sample_count = 0; + memset(dest, 0, BP_BLOCK_COEFFS); + + /* Lossy blocks store max level minus one in the stream header. */ + word = bitbuf; + old_bitcount = bitcount; + if (bitcount < BP_LOSSY_LEVEL_BITS) { + bit_value = BP_LOSSY_LEVEL_BITS - bitcount; + code = bitbuf & BP_BYTE_MASK; + word = *words; + bitcount = bitcount + BP_BITS_PER_WORD - BP_LOSSY_LEVEL_BITS; + words = words + 1; + bitbuf = word >> bit_value; + code = code | word << old_bitcount; + } else { + bitcount = bitcount - BP_LOSSY_LEVEL_BITS; + bitbuf = bitbuf >> BP_LOSSY_LEVEL_BITS; + code = word & BP_BYTE_MASK; + } + levels_remaining = (code & BP_LOSSY_LEVEL_MASK) + 1; + tree[0] = BP_READ_TREE_GROUP1_ROOT; + tree[1] = BP_READ_TREE_GROUP6_ROOT; + tree[2] = BP_READ_TREE_GROUP11_ROOT; + bit_value = (s32)(s8)(1 << (code & BP_LOSSY_LEVEL_MASK)); + tree[3] = BP_READ_TREE_BRANCH(0); + tree_end = tree + BP_LOSSY_ROOT_NODES; + active_count = 0; + node_ptr = tree; + do { + if (levels_remaining == 0) { + goto done; + } + scan = 0; + /* Active coefficients receive one refinement bit at each lower plane. */ + if (0 < active_count) { + do { + word = bitbuf; + if (bitcount == 0) { + word = *words; + bitcount = BP_WORD_TOP_BIT; + words = words + 1; + bitbuf = word >> 1; + } else { + bitcount = bitcount - 1; + bitbuf = bitbuf >> 1; + } + if ((word & 1) != 0) { + sample = dest[(u32)order[scan]]; + delta = bit_value; + if (sample < 0) { + delta = -bit_value; + } + dest[(u32)order[scan]] = sample + (s8)delta; + reached_limit = sample_count == limit; + sample_count = sample_count + 1; + if (reached_limit) { + goto done; + } + } + scan = scan + 1; + } while (scan < active_count); + } + next_node_ptr = node_ptr; + if (node_ptr < tree_end) { + scan = -bit_value; +read_node: + node = *node_ptr; + if (node == 0) { +next_node: + node_ptr = node_ptr + 1; + } else { + if (bitcount == 0) { + word = *words; + bitcount = BP_WORD_TOP_BIT; + words = words + 1; + bitbuf = word >> 1; + if ((word & 1) != 0) { + goto decode_node; + } + goto next_node; + } + bitcount = bitcount - 1; + code = bitbuf & 1; + bitbuf = bitbuf >> 1; + if (code == 0) { + goto next_node; + } +decode_node: + node_kind = node & BP_READ_TREE_KIND_MASK; + if (node_kind == BP_READ_TREE_GROUP_NODE) { + node_kind = BP_READ_TREE_INDEX(node); + *node_ptr = BP_READ_TREE_BRANCH_FROM_NODE(node); + *tree_end = BP_READ_TREE_NODE(node_kind + BP_READ_TREE_CHILD1_BASE, BP_READ_TREE_BRANCH_NODE); + tree_end[1] = BP_READ_TREE_NODE(node_kind + BP_READ_TREE_CHILD2_BASE, BP_READ_TREE_BRANCH_NODE); + tree_end[2] = BP_READ_TREE_NODE(node_kind + BP_READ_TREE_CHILD3_BASE, BP_READ_TREE_BRANCH_NODE); + tree_end = tree_end + BP_TREE_ADDED_CHILD_COUNT; + goto node_done; + } + if (node_kind < BP_READ_TREE_BRANCH_NODE) { + if ((node & BP_READ_TREE_KIND_MASK) != BP_READ_TREE_HIGH_NODE) { + goto next_node; + } + *node_ptr = BP_READ_TREE_GROUP_FROM_INDEX(BP_READ_TREE_INDEX(node)); + } else { + if (node_kind != BP_READ_TREE_BRANCH_NODE) { + if (node_kind != BP_READ_TREE_COEFF_NODE) { + goto next_node; + } + order[active_count] = BP_READ_TREE_INDEX(node); + /* Deferred coeff nodes already carry their scan index. */ + word = bitbuf; + active_count = active_count + 1; + if (bitcount == 0) { + word = *words; + bitcount = BP_WORD_TOP_BIT; + words = words + 1; + bitbuf = word >> 1; + } else { + bitcount = bitcount - 1; + bitbuf = bitbuf >> 1; + } + delta = scan; + if ((word & 1) == 0) { + delta = bit_value; + } + dest[(u32)BP_READ_TREE_INDEX(node)] = (s8)delta; + reached_limit = sample_count == limit; + sample_count = sample_count + 1; + if (reached_limit) { + goto done; + } + *node_ptr = 0; + goto next_node; + } + *node_ptr = 0; + node_ptr = node_ptr + 1; + } + code = (u32)BP_READ_TREE_INDEX(node); + if (bitcount == 0) { + word = *words; + bitcount = BP_WORD_TOP_BIT; + bitbuf = word >> 1; + words = words + 1; + if ((word & 1) != 0) { +push_0: + next_node_ptr = next_node_ptr + -1; + *next_node_ptr = BP_READ_TREE_BASE(node) + BP_READ_TREE_COEFF_NODE; + goto after_0; + } + } else { + bitcount = bitcount - 1; + word = bitbuf >> 1; + bit = bitbuf & 1; + bitbuf = word; + if (bit != 0) { + goto push_0; + } + } + order[active_count] = BP_READ_TREE_INDEX(node); + /* A zero child-presence bit introduces the coefficient immediately. */ + active_count = active_count + 1; + if (bitcount == 0) { + bitbuf = *words; + bitcount = BP_WORD_TOP_BIT; + words = words + 1; + } else { + bitcount = bitcount - 1; + } + bit = bitbuf & 1; + bitbuf = bitbuf >> 1; + delta = scan; + if (bit == 0) { + delta = bit_value; + } + dest[code] = (s8)delta; + reached_limit = sample_count == limit; + sample_count = sample_count + 1; + if (reached_limit) { + goto done; + } +after_0: + node = (u8)(code + BP_TREE_CHILD1_INDEX); + if (bitcount == 0) { + word = *words; + bitcount = BP_WORD_TOP_BIT; + bitbuf = word >> 1; + words = words + 1; + if ((word & 1) != 0) { +push_1: + next_node_ptr = next_node_ptr + -1; + *next_node_ptr = BP_READ_TREE_COEFF(node); + goto after_1; + } + } else { + bitcount = bitcount - 1; + word = bitbuf >> 1; + bit = bitbuf & 1; + bitbuf = word; + if (bit != 0) { + goto push_1; + } + } + order[active_count] = node; + /* Nonzero children are pushed for later planes instead. */ + active_count = active_count + 1; + if (bitcount == 0) { + bitbuf = *words; + bitcount = BP_WORD_TOP_BIT; + words = words + 1; + } else { + bitcount = bitcount - 1; + } + bit = bitbuf & 1; + bitbuf = bitbuf >> 1; + delta = scan; + if (bit == 0) { + delta = bit_value; + } + dest[code + BP_TREE_CHILD1_INDEX] = (s8)delta; + reached_limit = sample_count == limit; + sample_count = sample_count + 1; + if (reached_limit) { + goto done; + } +after_1: + node = (u8)(code + BP_TREE_CHILD2_INDEX); + if (bitcount == 0) { + word = *words; + bitcount = BP_WORD_TOP_BIT; + bitbuf = word >> 1; + words = words + 1; + if ((word & 1) != 0) { +push_2: + next_node_ptr = next_node_ptr + -1; + *next_node_ptr = BP_READ_TREE_COEFF(node); + goto after_2; + } + } else { + bitcount = bitcount - 1; + word = bitbuf >> 1; + bit = bitbuf & 1; + bitbuf = word; + if (bit != 0) { + goto push_2; + } + } + order[active_count] = node; + active_count = active_count + 1; + if (bitcount == 0) { + bitbuf = *words; + bitcount = BP_WORD_TOP_BIT; + words = words + 1; + } else { + bitcount = bitcount - 1; + } + bit = bitbuf & 1; + bitbuf = bitbuf >> 1; + delta = scan; + if (bit == 0) { + delta = bit_value; + } + dest[code + BP_TREE_CHILD2_INDEX] = (s8)delta; + reached_limit = sample_count == limit; + sample_count = sample_count + 1; + if (reached_limit) { + goto done; + } +after_2: + word = bitbuf; + node = (u8)(code + BP_TREE_CHILD3_INDEX); + if (bitcount == 0) { + word = *words; + bitcount = BP_WORD_TOP_BIT; + words = words + 1; + bitbuf = word >> 1; + } else { + bitcount = bitcount - 1; + bitbuf = bitbuf >> 1; + } + if ((word & 1) == 0) { + order[active_count] = node; + word = bitbuf; + /* The sign bit follows the first nonzero magnitude bit. */ + active_count = active_count + 1; + if (bitcount == 0) { + word = *words; + bitcount = BP_WORD_TOP_BIT; + words = words + 1; + bitbuf = word >> 1; + } else { + bitcount = bitcount - 1; + bitbuf = bitbuf >> 1; + } + delta = scan; + if ((word & 1) == 0) { + delta = bit_value; + } + dest[code + BP_TREE_CHILD3_INDEX] = (s8)delta; + reached_limit = sample_count == limit; + sample_count = sample_count + 1; + if (reached_limit) { + goto done; + } + } else { + next_node_ptr = next_node_ptr + -1; + *next_node_ptr = BP_READ_TREE_COEFF(node); + } + } +node_done: + if (tree_end <= node_ptr) { + goto level_done; + } + goto read_node; + } +level_done: + bit_value = bit_value >> 1; + levels_remaining = (levels_remaining - 1) & BP_BYTE_MASK; + node_ptr = next_node_ptr; + } while (1); + +done: + bitcopy.bitlen = bitcount; + bitcopy.cur = words; + bitcopy.bits = bitbuf; + *bits = bitcopy; +} +#pragma dont_inline reset + +void ReadBPLossy(s16 PTR4* out, BPBITSTREAM PTR4* bits, s32 limit) +{ + BPLOSSYBLOCK coeffs; + + readlossy(coeffs.bytes, bits, limit); + out[0] = coeffs.words[0]; + out[1] = coeffs.words[2]; + out[2] = coeffs.words[4]; + out[3] = coeffs.words[6]; + out[4] = coeffs.words[1]; + out[5] = coeffs.words[3]; + out[6] = coeffs.words[5]; + out[7] = coeffs.words[7]; + out[8] = coeffs.words[12]; + out[9] = coeffs.words[22]; + out[10] = coeffs.words[8]; + out[11] = coeffs.words[10]; + out[12] = coeffs.words[13]; + out[13] = coeffs.words[23]; + out[14] = coeffs.words[9]; + out[15] = coeffs.words[11]; + out[16] = coeffs.words[14]; + out[17] = coeffs.words[16]; + out[18] = coeffs.words[24]; + out[19] = coeffs.words[26]; + out[20] = coeffs.words[15]; + out[21] = coeffs.words[17]; + out[22] = coeffs.words[25]; + out[23] = coeffs.words[27]; + out[24] = coeffs.words[18]; + out[25] = coeffs.words[20]; + out[26] = coeffs.words[28]; + out[27] = coeffs.words[30]; + out[28] = coeffs.words[19]; + out[29] = coeffs.words[21]; + out[30] = coeffs.words[29]; + out[31] = coeffs.words[31]; +} + +void ReadBPLossyWithMotion(char PTR4* out, s32 pitch, BPBITSTREAM PTR4* bits, s32 limit, + char PTR4* prev) +{ + char temp[BP_BLOCK_COEFFS]; + char PTR4* dst = out; + char PTR4* src = prev; + + readlossy(temp, bits, limit); + dst[0] = temp[0] + src[0]; + dst[1] = temp[1] + src[1]; + dst[2] = temp[4] + src[2]; + dst[3] = temp[5] + src[3]; + dst[4] = temp[8] + src[4]; + dst[5] = temp[9] + src[5]; + dst[6] = temp[12] + src[6]; + dst[7] = temp[13] + src[7]; + dst += pitch; + dst[0] = temp[2] + src[8]; + dst[1] = temp[3] + src[9]; + dst[2] = temp[6] + src[10]; + dst[3] = temp[7] + src[11]; + dst[4] = temp[10] + src[12]; + dst[5] = temp[11] + src[13]; + dst[6] = temp[14] + src[14]; + dst[7] = temp[15] + src[15]; + dst += pitch; + dst[0] = temp[24] + src[16]; + dst[1] = temp[25] + src[17]; + dst[2] = temp[44] + src[18]; + dst[3] = temp[45] + src[19]; + dst[4] = temp[16] + src[20]; + dst[5] = temp[17] + src[21]; + dst[6] = temp[20] + src[22]; + dst[7] = temp[21] + src[23]; + dst = dst + pitch; + dst[0] = temp[26] + src[24]; + dst[1] = temp[27] + src[25]; + dst[2] = temp[46] + src[26]; + dst[3] = temp[47] + src[27]; + dst[4] = temp[18] + src[28]; + dst[5] = temp[19] + src[29]; + dst[6] = temp[22] + src[30]; + dst[7] = temp[23] + src[31]; + dst = dst + pitch; + dst[0] = temp[28] + src[32]; + dst[1] = temp[29] + src[33]; + dst[2] = temp[32] + src[34]; + dst[3] = temp[33] + src[35]; + dst[4] = temp[48] + src[36]; + dst[5] = temp[49] + src[37]; + dst[6] = temp[52] + src[38]; + dst[7] = temp[53] + src[39]; + dst = dst + pitch; + dst[0] = temp[30] + src[40]; + dst[1] = temp[31] + src[41]; + dst[2] = temp[34] + src[42]; + dst[3] = temp[35] + src[43]; + dst[4] = temp[50] + src[44]; + dst[5] = temp[51] + src[45]; + dst[6] = temp[54] + src[46]; + dst[7] = temp[55] + src[47]; + dst = dst + pitch; + dst[0] = temp[36] + src[48]; + dst[1] = temp[37] + src[49]; + dst[2] = temp[40] + src[50]; + dst[3] = temp[41] + src[51]; + dst[4] = temp[56] + src[52]; + dst[5] = temp[57] + src[53]; + dst[6] = temp[60] + src[54]; + dst[7] = temp[61] + src[55]; + dst = dst + pitch; + dst[0] = temp[38] + src[56]; + dst[1] = temp[39] + src[57]; + dst[2] = temp[42] + src[58]; + dst[3] = temp[43] + src[59]; + dst[4] = temp[58] + src[60]; + dst[5] = temp[59] + src[61]; + dst[6] = temp[62] + src[62]; + dst[7] = temp[63] + src[63]; +} diff --git a/src/bink/src/sdk/bitplane.h b/src/bink/src/sdk/bitplane.h new file mode 100644 index 000000000..365f73f44 --- /dev/null +++ b/src/bink/src/sdk/bitplane.h @@ -0,0 +1,27 @@ +#ifndef BINK_SDK_BITPLANE_H +#define BINK_SDK_BITPLANE_H + +#include "bink.h" + +typedef u32 BPBITSTYPE; + +#define BPBITSTYPELEN 32 +#define BPBITSTYPEBYTES 4 + +typedef struct BPBITSTREAM +{ + u32 PTR4* cur; + u32 PTR4* init; + BPBITSTYPE bits; + u32 bitlen; /* Number of valid low-order bits currently buffered. */ +} BPBITSTREAM; + +u32 LenBPLossless(s16 PTR4* vals); +void WriteBPLossless(BPBITSTREAM PTR4* bits, s16 PTR4* vals); +u32 WriteBPLossy(BPBITSTREAM PTR4* bits, char PTR4* vals); +void ReadBPLossless(s16 PTR4* out, BPBITSTREAM PTR4* bits); +void ReadBPLossy(s16 PTR4* out, BPBITSTREAM PTR4* bits, s32 limit); +void ReadBPLossyWithMotion(char PTR4* out, s32 pitch, BPBITSTREAM PTR4* bits, s32 limit, + char PTR4* prev); + +#endif diff --git a/src/bink/src/sdk/dct.c b/src/bink/src/sdk/dct.c index e69de29bb..43f02d724 100644 --- a/src/bink/src/sdk/dct.c +++ b/src/bink/src/sdk/dct.c @@ -0,0 +1,1335 @@ +#include "bink.h" +#include "dct.h" + +#define DCT_BLOCK_WIDTH 8 +#define DCT_BLOCK_COEFFS 64 +#define DCT_PATTERN_COUNT 16 +#define DCT_PATTERN_BYTES (DCT_PATTERN_COUNT * DCT_BLOCK_COEFFS) +#define DCT_ROW0 (DCT_BLOCK_WIDTH * 0) +#define DCT_ROW1 (DCT_BLOCK_WIDTH * 1) +#define DCT_ROW2 (DCT_BLOCK_WIDTH * 2) +#define DCT_ROW3 (DCT_BLOCK_WIDTH * 3) +#define DCT_ROW4 (DCT_BLOCK_WIDTH * 4) +#define DCT_ROW5 (DCT_BLOCK_WIDTH * 5) +#define DCT_ROW6 (DCT_BLOCK_WIDTH * 6) +#define DCT_ROW7 (DCT_BLOCK_WIDTH * 7) +#define DCT_COL0 0 +#define DCT_COL1 1 +#define DCT_COL2 2 +#define DCT_COL3 3 +#define DCT_COL4 4 +#define DCT_COL5 5 +#define DCT_COL6 6 +#define DCT_COL7 7 +#define DCT_QUANT_LEVELS 16 +#define DCT_FIXED_SHIFT 11 +#define DCT_OUTPUT_SHIFT 8 +#define DCT_BYTE_ROUND 0x7f +#define DCT_INPUT_SCALE 0x100 +#define DCT_BYTE_MASK 0xff +#define DCT_BYTE_PAIR_MASK 0xff0000 +#define DCT_FIX_0_382683433 0x310 +#define DCT_FIX_0_541196100 0x454 +#define DCT_FIX_0_707106781 0x5a8 +#define DCT_FIX_1_082392200 0x8a9 +#define DCT_FIX_1_306562965 0xa74 +#define DCT_FIX_1_414213562 0xb50 +#define DCT_FIX_1_847759065 0xec8 +#define DCT_FIX_NEG_2_613125930 (-0x14e8) +#define DCT_ADVANCE_U32_BYTES(ptr, bytes) ((u32 PTR4*)((u8 PTR4*)(ptr) + (bytes))) +#define DCT_FIXED_MUL(value, scale) ((value) * (scale) >> DCT_FIXED_SHIFT) +#define DCT_BYTE_SAMPLE(value) ((u8)(((value) + DCT_BYTE_ROUND) >> DCT_OUTPUT_SHIFT)) +#define DCT_AC_MASK(coeffs) \ + ((u16)((coeffs)[DCT_ROW1] | (coeffs)[DCT_ROW2] | (coeffs)[DCT_ROW3] | (coeffs)[DCT_ROW4] | \ + (coeffs)[DCT_ROW5] | (coeffs)[DCT_ROW6] | (coeffs)[DCT_ROW7])) +#define DCT_DEQUANT(coeffs, quant, index) ((coeffs)[index] * (quant)[index] >> DCT_FIXED_SHIFT) + +/* 8x8 inverse DCT dequant tables for the four Bink block/format variants. */ +static const s32 ifiquantlevels8[DCT_QUANT_LEVELS][DCT_BLOCK_COEFFS] = { + { + 0x00010000, 0x00016315, 0x00014e7b, 0x00016577, 0x00010000, 0x0000eeda, 0x0000be80, 0x0000611e, + 0x0001e83d, 0x0002a535, 0x0002f1e6, 0x0002724c, 0x00024102, 0x00017f9b, 0x0001083c, 0x0000a552, + 0x0002346f, 0x00030ee5, 0x0002c628, 0x00027f20, 0x00021f88, 0x0001dc53, 0x00014819, 0x0000a743, + 0x0001fbfa, 0x0002c096, 0x000297b5, 0x00023f32, 0x00027fad, 0x0001f697, 0x00015a31, 0x00009688, + 0x0001d000, 0x00028396, 0x0002346f, 0x0001fbfa, 0x00025000, 0x0001ab6b, 0x00012669, 0x00008d43, + 0x00019247, 0x0001f9aa, 0x0001dc53, 0x000231b8, 0x0001d122, 0x000159b3, 0x0000ee1f, 0x000075ed, + 0x00012f12, 0x0001e06c, 0x0001c48c, 0x00019748, 0x0001490c, 0x00010288, 0x0000e0f1, 0x000072ad, + 0x0000cb10, 0x000119a8, 0x00014e86, 0x000122af, 0x0000f735, 0x0000ef51, 0x0000a4d8, 0x00006517, + }, + { + 0x00015555, 0x0001d971, 0x0001bdf9, 0x0001dc9f, 0x00015555, 0x00013e78, 0x0000fe00, 0x0000817d, + 0x00028afc, 0x000386f1, 0x0003ed33, 0x00034311, 0x00030158, 0x0001ff7a, 0x0001604f, 0x0000dc6d, + 0x0002f095, 0x000413dc, 0x0003b2e0, 0x0003542a, 0x0002d4b5, 0x00027b19, 0x0001b577, 0x0000df04, + 0x0002a54e, 0x0003ab73, 0x000374f1, 0x0002feee, 0x000354e7, 0x00029e1f, 0x0001cd96, 0x0000c8b6, + 0x00026aab, 0x00035a1e, 0x0002f095, 0x0002a54e, 0x00031555, 0x000239e4, 0x0001888c, 0x0000bc59, + 0x0002185e, 0x0002a238, 0x00027b19, 0x0002ecf5, 0x00026c2d, 0x0001ccee, 0x00013d7e, 0x00009d3c, + 0x00019418, 0x00028090, 0x00025b66, 0x00021f0b, 0x0001b6bb, 0x000158b5, 0x00012bec, 0x000098e6, + 0x00010ec0, 0x0001778a, 0x0001be09, 0x00018394, 0x0001499c, 0x00013f17, 0x0000dbcb, 0x000086c9, + }, + { + 0x0001aaab, 0x00024fce, 0x00022d78, 0x000253c7, 0x0001aaab, 0x00018e16, 0x00013d80, 0x0000a1dc, + 0x00032dbb, 0x000468ad, 0x0004e87f, 0x000413d5, 0x0003c1ae, 0x00027f58, 0x0001b863, 0x00011388, + 0x0003acba, 0x000518d3, 0x00049f98, 0x00042935, 0x000389e2, 0x000319df, 0x000222d4, 0x000116c5, + 0x00034ea1, 0x0004964f, 0x0004522d, 0x0003bea9, 0x00042a21, 0x000345a7, 0x000240fc, 0x0000fae3, + 0x00030555, 0x000430a5, 0x0003acba, 0x00034ea1, 0x0003daab, 0x0002c85d, 0x0001eaaf, 0x0000eb6f, + 0x00029e76, 0x00034ac5, 0x000319df, 0x0003a833, 0x00030738, 0x0002402a, 0x00018cde, 0x0000c48a, + 0x0001f91e, 0x000320b4, 0x0002f23f, 0x0002a6ce, 0x00022469, 0x0001aee2, 0x000176e7, 0x0000bf20, + 0x00015270, 0x0001d56d, 0x00022d8b, 0x0001e479, 0x00019c02, 0x00018edd, 0x000112be, 0x0000a87b, + }, + { + 0x00020000, 0x0002c62a, 0x00029cf6, 0x0002caef, 0x00020000, 0x0001ddb4, 0x00017d01, 0x0000c23c, + 0x0003d07a, 0x00054a69, 0x0005e3cc, 0x0004e499, 0x00048204, 0x0002ff36, 0x00021077, 0x00014aa3, + 0x000468df, 0x00061dca, 0x00058c50, 0x0004fe3f, 0x00043f0f, 0x0003b8a6, 0x00029032, 0x00014e86, + 0x0003f7f5, 0x0005812c, 0x00052f69, 0x00047e65, 0x0004ff5a, 0x0003ed2e, 0x0002b461, 0x00012d11, + 0x0003a000, 0x0005072c, 0x000468df, 0x0003f7f5, 0x0004a000, 0x000356d6, 0x00024cd2, 0x00011a85, + 0x0003248d, 0x0003f353, 0x0003b8a6, 0x00046370, 0x0003a243, 0x0002b365, 0x0001dc3e, 0x0000ebd9, + 0x00025e24, 0x0003c0d8, 0x00038919, 0x00032e91, 0x00029218, 0x00020510, 0x0001c1e2, 0x0000e559, + 0x00019620, 0x0002334f, 0x00029d0d, 0x0002455e, 0x0001ee69, 0x0001dea2, 0x000149b0, 0x0000ca2d, + }, + { + 0x0002aaab, 0x0003b2e3, 0x00037bf2, 0x0003b93e, 0x0002aaab, 0x00027cf0, 0x0001fc01, 0x000102fa, + 0x000515f8, 0x00070de2, 0x0007da65, 0x00068621, 0x000602b1, 0x0003fef3, 0x0002c09f, 0x0001b8da, + 0x0005e129, 0x000827b8, 0x000765c0, 0x0006a855, 0x0005a96a, 0x0004f632, 0x00036aed, 0x0001be09, + 0x00054a9c, 0x000756e5, 0x0006e9e2, 0x0005fddb, 0x0006a9ce, 0x00053c3e, 0x00039b2d, 0x0001916b, + 0x0004d555, 0x0006b43b, 0x0005e129, 0x00054a9c, 0x00062aab, 0x000473c8, 0x00031118, 0x000178b2, + 0x000430bc, 0x0005446f, 0x0004f632, 0x0005d9eb, 0x0004d85a, 0x000399dc, 0x00027afd, 0x00013a77, + 0x00032830, 0x00050121, 0x0004b6cc, 0x00043e16, 0x00036d76, 0x0002b16a, 0x000257d8, 0x000131cc, + 0x00021d80, 0x0002ef14, 0x00037c11, 0x00030728, 0x00029337, 0x00027e2e, 0x0001b796, 0x00010d91, + }, + { + 0x00038000, 0x0004daca, 0x000492ae, 0x0004e322, 0x00038000, 0x000343fb, 0x00029ac1, 0x000153e8, + 0x0006acd5, 0x00094238, 0x000a4ea5, 0x0008900c, 0x0007e388, 0x00053e9f, 0x00039cd0, 0x0002429e, + 0x0007b786, 0x000ab421, 0x0009b58c, 0x0008bcef, 0x00076e5b, 0x00068322, 0x00047c57, 0x0002496b, + 0x0006f1ed, 0x0009a20d, 0x000912f8, 0x0007dd30, 0x0008bede, 0x0006df11, 0x0004bbab, 0x00020edd, + 0x00065800, 0x0008cc8e, 0x0007b786, 0x0006f1ed, 0x00081800, 0x0005d7f7, 0x00040670, 0x0001ee69, + 0x00057ff7, 0x0006e9d2, 0x00068322, 0x0007ae04, 0x00065bf6, 0x0004b9f1, 0x0003416c, 0x00019cbc, + 0x000424bf, 0x0006917b, 0x00062feb, 0x0005917d, 0x00047faa, 0x000388dc, 0x0003134c, 0x0001915c, + 0x0002c6b8, 0x0003d9cb, 0x000492d7, 0x0003f964, 0x00036138, 0x0003459c, 0x000240f5, 0x000161cf, + }, + { + 0x00040000, 0x00058c54, 0x000539ec, 0x000595dd, 0x00040000, 0x0003bb68, 0x0002fa01, 0x00018477, + 0x0007a0f4, 0x000a94d3, 0x000bc798, 0x0009c932, 0x00090409, 0x0005fe6d, 0x000420ee, 0x00029547, + 0x0008d1be, 0x000c3b94, 0x000b18a0, 0x0009fc7f, 0x00087e1f, 0x0007714c, 0x00052064, 0x00029d0d, + 0x0007efea, 0x000b0258, 0x000a5ed3, 0x0008fcc9, 0x0009feb5, 0x0007da5d, 0x000568c3, 0x00025a21, + 0x00074000, 0x000a0e59, 0x0008d1be, 0x0007efea, 0x00094000, 0x0006adac, 0x000499a5, 0x0002350b, + 0x0006491a, 0x0007e6a7, 0x0007714c, 0x0008c6e0, 0x00074487, 0x000566ca, 0x0003b87b, 0x0001d7b3, + 0x0004bc48, 0x000781b1, 0x00071232, 0x00065d22, 0x00052430, 0x00040a20, 0x000383c5, 0x0001cab3, + 0x00032c3f, 0x0004669f, 0x00053a1a, 0x00048abc, 0x0003dcd3, 0x0003bd45, 0x00029361, 0x0001945a, + }, + { + 0x00050000, 0x0006ef69, 0x00068867, 0x0006fb55, 0x00050000, 0x0004aa42, 0x0003b881, 0x0001e595, + 0x00098931, 0x000d3a07, 0x000eb97e, 0x000c3b7e, 0x000b450b, 0x00077e08, 0x0005292a, 0x00033a99, + 0x000b062d, 0x000f4a78, 0x000ddec8, 0x000c7b9f, 0x000a9da7, 0x00094d9f, 0x0006687d, 0x00034450, + 0x0009ebe4, 0x000dc2ee, 0x000cf687, 0x000b3bfb, 0x000c7e62, 0x0009d0f4, 0x0006c2f4, 0x0002f0aa, + 0x00091000, 0x000c91ef, 0x000b062d, 0x0009ebe4, 0x000b9000, 0x00085917, 0x0005c00e, 0x0002c24d, + 0x0007db61, 0x0009e050, 0x00094d9f, 0x000af898, 0x000915a8, 0x0006c07d, 0x0004a69a, 0x00024d9f, + 0x0005eb59, 0x0009621d, 0x0008d6be, 0x0007f46a, 0x00066d3c, 0x00050ca7, 0x000464b6, 0x00023d5f, + 0x0003f74f, 0x00058046, 0x000688a0, 0x0005ad6b, 0x0004d407, 0x0004ac96, 0x00033839, 0x0001f971, + }, + { + 0x00060000, 0x0008527e, 0x0007d6e1, 0x000860cc, 0x00060000, 0x0005991c, 0x00047702, 0x000246b3, + 0x000b716e, 0x000fdf3c, 0x0011ab63, 0x000eadcb, 0x000d860d, 0x0008fda3, 0x00063165, 0x0003dfea, + 0x000d3a9c, 0x0012595d, 0x0010a4f0, 0x000efabe, 0x000cbd2e, 0x000b29f1, 0x0007b096, 0x0003eb93, + 0x000be7df, 0x00108384, 0x000f8e3c, 0x000d7b2e, 0x000efe0f, 0x000bc78b, 0x00081d24, 0x00038732, + 0x000ae000, 0x000f1585, 0x000d3a9c, 0x000be7df, 0x000de000, 0x000a0482, 0x0006e677, 0x00034f90, + 0x00096da8, 0x000bd9fa, 0x000b29f1, 0x000d2a50, 0x000ae6ca, 0x00081a2f, 0x000594b9, 0x0002c38c, + 0x00071a6b, 0x000b4289, 0x000a9b4a, 0x00098bb2, 0x0007b649, 0x00060f2f, 0x000545a7, 0x0002b00c, + 0x0004c25f, 0x000699ee, 0x0007d727, 0x0006d01a, 0x0005cb3c, 0x00059be7, 0x0003dd11, 0x00025e87, + }, + { + 0x00080000, 0x000b18a8, 0x000a73d7, 0x000b2bbb, 0x00080000, 0x000776cf, 0x0005f402, 0x000308ef, + 0x000f41e8, 0x001529a5, 0x00178f2f, 0x00139264, 0x00120812, 0x000bfcd9, 0x000841dc, 0x00052a8e, + 0x0011a37b, 0x00187727, 0x00163140, 0x0013f8fe, 0x0010fc3e, 0x000ee297, 0x000a40c8, 0x00053a1a, + 0x000fdfd4, 0x001604b0, 0x0014bda5, 0x0011f992, 0x0013fd69, 0x000fb4b9, 0x000ad186, 0x0004b442, + 0x000e8000, 0x00141cb1, 0x0011a37b, 0x000fdfd4, 0x00128000, 0x000d5b58, 0x00093349, 0x00046a15, + 0x000c9235, 0x000fcd4d, 0x000ee297, 0x00118dc0, 0x000e890d, 0x000acd94, 0x000770f7, 0x0003af65, + 0x0009788f, 0x000f0362, 0x000e2463, 0x000cba43, 0x000a4861, 0x0008143f, 0x00070789, 0x00039565, + 0x0006587f, 0x0008cd3d, 0x000a7434, 0x00091577, 0x0007b9a6, 0x00077a89, 0x000526c2, 0x000328b4, + }, + { + 0x000c0000, 0x0010a4fd, 0x000fadc3, 0x0010c198, 0x000c0000, 0x000b3237, 0x0008ee03, 0x00048d66, + 0x0016e2db, 0x001fbe78, 0x002356c7, 0x001d5b96, 0x001b0c1a, 0x0011fb46, 0x000c62ca, 0x0007bfd5, + 0x001a7539, 0x0024b2bb, 0x002149e1, 0x001df57d, 0x00197a5d, 0x001653e3, 0x000f612c, 0x0007d727, + 0x0017cfbd, 0x00210709, 0x001f1c78, 0x001af65b, 0x001dfc1e, 0x00178f16, 0x00103a49, 0x00070e64, + 0x0015c000, 0x001e2b0a, 0x001a7539, 0x0017cfbd, 0x001bc000, 0x00140904, 0x000dccee, 0x00069f20, + 0x0012db4f, 0x0017b3f4, 0x001653e3, 0x001a54a0, 0x0015cd94, 0x0010345e, 0x000b2972, 0x00058718, + 0x000e34d7, 0x00168513, 0x00153695, 0x00131765, 0x000f6c91, 0x000c1e5e, 0x000a8b4e, 0x00056018, + 0x000984be, 0x000d33dc, 0x000fae4e, 0x000da033, 0x000b9678, 0x000b37ce, 0x0007ba22, 0x0004bd0e, + }, + { + 0x00110000, 0x00179466, 0x0016362a, 0x0017bced, 0x00110000, 0x000fdc79, 0x000ca685, 0x000672fb, + 0x00206c0c, 0x002cf87f, 0x00321044, 0x00299714, 0x00265125, 0x0019794e, 0x00118bf4, 0x000afa6d, + 0x00257b66, 0x0033fd33, 0x002f28a9, 0x002a711b, 0x00241804, 0x001fa181, 0x0015c9a9, 0x000b1b77, + 0x0021bba2, 0x002ec9f7, 0x002c12ff, 0x00263256, 0x002a7a80, 0x0021600a, 0x0016fd3c, 0x0009ff0d, + 0x001ed000, 0x002abcf9, 0x00257b66, 0x0021bba2, 0x00275000, 0x001c621b, 0x00138cfb, 0x0009616e, + 0x001ab6b0, 0x00219444, 0x001fa181, 0x00254d38, 0x001ee33c, 0x0016f4db, 0x000fd00c, 0x0007d4b7, + 0x00142030, 0x001fe730, 0x001e0d52, 0x001b0bcf, 0x0015d9ce, 0x00112b06, 0x000ef004, 0x00079d77, + 0x000d7c0e, 0x0012b423, 0x001636ee, 0x00134d9e, 0x00106a80, 0x000fe464, 0x000af25b, 0x0006b67f, + }, + { + 0x00160000, 0x001e83cf, 0x001cbe90, 0x001eb842, 0x00160000, 0x001486ba, 0x00105f06, 0x00085891, + 0x0029f53d, 0x003a3286, 0x0040c9c2, 0x0035d293, 0x00319630, 0x0020f756, 0x0016b51e, 0x000e3506, + 0x00308193, 0x004347ac, 0x003d0771, 0x0036ecba, 0x002eb5aa, 0x0028ef20, 0x001c3225, 0x000e5fc7, + 0x002ba786, 0x003c8ce5, 0x00390986, 0x00316e52, 0x0036f8e1, 0x002b30fe, 0x001dc030, 0x000cefb7, + 0x0027e000, 0x00374ee7, 0x00308193, 0x002ba786, 0x0032e000, 0x0024bb33, 0x00194d09, 0x000c23bb, + 0x00229212, 0x002b7494, 0x0028ef20, 0x003045d0, 0x0027f8e4, 0x001db557, 0x001476a6, 0x000a2256, + 0x001a0b89, 0x0029494d, 0x0026e410, 0x00230039, 0x001c470a, 0x001637ad, 0x001354b9, 0x0009dad6, + 0x0011735d, 0x00183469, 0x001cbf8f, 0x0018fb09, 0x00153e87, 0x001490fa, 0x000e2a94, 0x0008aff0, + }, + { + 0x001c0000, 0x0026d64d, 0x00249572, 0x0027190e, 0x001c0000, 0x001a1fd6, 0x0014d607, 0x000a9f44, + 0x003566aa, 0x004a11c2, 0x00527525, 0x0044805e, 0x003f1c3e, 0x0029f4f9, 0x001ce683, 0x001214f0, + 0x003dbc30, 0x0055a109, 0x004dac61, 0x0045e778, 0x003b72d9, 0x00341911, 0x0023e2bb, 0x00124b5b, + 0x00378f64, 0x004d1069, 0x004897c2, 0x003ee97f, 0x0045f6f0, 0x0036f889, 0x0025dd54, 0x001076e9, + 0x0032c000, 0x0046646c, 0x003dbc30, 0x00378f64, 0x0040c000, 0x002ebfb5, 0x00203380, 0x000f734b, + 0x002bffb9, 0x00374e8e, 0x00341911, 0x003d7020, 0x0032dfae, 0x0025cf86, 0x001a0b5f, 0x000ce5e2, + 0x002125f5, 0x00348bd6, 0x00317f5b, 0x002c8beb, 0x0023fd53, 0x001c46dc, 0x00189a60, 0x000c8ae2, + 0x001635bc, 0x001ece57, 0x002496b6, 0x001fcb22, 0x001b09c4, 0x001a2ce1, 0x001207a5, 0x000b0e77, + }, + { + 0x00220000, 0x002f28cc, 0x002c6c53, 0x002f79da, 0x00220000, 0x001fb8f1, 0x00194d09, 0x000ce5f7, + 0x0040d818, 0x0059f0fe, 0x00642089, 0x00532e29, 0x004ca24b, 0x0032f29c, 0x002317e8, 0x0015f4db, + 0x004af6cc, 0x0067fa67, 0x005e5152, 0x0054e237, 0x00483007, 0x003f4303, 0x002b9351, 0x001636ee, + 0x00437743, 0x005d93ee, 0x005825fe, 0x004c64ad, 0x0054f4ff, 0x0042c014, 0x002dfa79, 0x0013fe1a, + 0x003da000, 0x005579f1, 0x004af6cc, 0x00437743, 0x004ea000, 0x0038c437, 0x002719f7, 0x0012c2db, + 0x00356d61, 0x00432888, 0x003f4303, 0x004a9a70, 0x003dc678, 0x002de9b5, 0x001fa018, 0x000fa96e, + 0x00284060, 0x003fce60, 0x003c1aa5, 0x0036179d, 0x002bb39b, 0x0022560c, 0x001de007, 0x000f3aee, + 0x001af81b, 0x00256845, 0x002c6ddd, 0x00269b3c, 0x0020d500, 0x001fc8c8, 0x0015e4b7, 0x000d6cfe, + }, + { + 0x002c0000, 0x003d079e, 0x00397d20, 0x003d7083, 0x002c0000, 0x00290d75, 0x0020be0c, 0x0010b121, + 0x0053ea79, 0x0074650c, 0x00819383, 0x006ba525, 0x00632c61, 0x0041eeac, 0x002d6a3b, 0x001c6a0c, + 0x00610326, 0x00868f57, 0x007a0ee2, 0x006dd974, 0x005d6b54, 0x0051de40, 0x0038644b, 0x001cbf8f, + 0x00574f0b, 0x007919ca, 0x0072130c, 0x0062dca3, 0x006df1c2, 0x005661fb, 0x003b8060, 0x0019df6d, + 0x004fc000, 0x006e9dce, 0x00610326, 0x00574f0b, 0x0065c000, 0x00497665, 0x00329a12, 0x00184776, + 0x00452423, 0x0056e928, 0x0051de40, 0x00608ba0, 0x004ff1c9, 0x003b6aae, 0x0028ed4d, 0x001444ac, + 0x00341713, 0x0052929a, 0x004dc821, 0x00460071, 0x00388e14, 0x002c6f5a, 0x0026a973, 0x0013b5ad, + 0x0022e6ba, 0x003068d2, 0x00397f1e, 0x0031f611, 0x002a7d0f, 0x002921f4, 0x001c5528, 0x00115fdf, + }, +}; +static const double ifiquantlevels8align = 0.0; +static const s32 ifquantlevels8[DCT_QUANT_LEVELS][DCT_BLOCK_COEFFS] = { + { + 0x00080000, 0x0005c487, 0x00061f79, 0x0005baae, 0x00080000, 0x0008930a, 0x000ac027, 0x0015168b, + 0x000431d6, 0x00030631, 0x0002b770, 0x0003451f, 0x00038ca2, 0x000556bc, 0x0007c031, 0x000c635e, + 0x0003a0df, 0x00029dae, 0x0002e245, 0x00033453, 0x0003c499, 0x00044cb2, 0x00063df7, 0x000c3e85, + 0x0004081b, 0x0002e81c, 0x000315f1, 0x00038f7e, 0x0003339d, 0x0004132c, 0x0005ea73, 0x000d9ae4, + 0x000469ee, 0x00032ea3, 0x0003a0df, 0x0004081b, 0x0003759f, 0x0004caa4, 0x0006f4ce, 0x000e7f80, + 0x0005174e, 0x00040cd5, 0x00044cb2, 0x0003a55d, 0x0004672f, 0x0005ec9c, 0x000899c7, 0x00115df8, + 0x0006c1ed, 0x0004434e, 0x00048686, 0x00050748, 0x0006395a, 0x0007ebf4, 0x00091ac5, 0x0011dbf8, + 0x000a15e9, 0x00074574, 0x00061f42, 0x00070ba4, 0x000848db, 0x00088ec4, 0x000c6c80, 0x00144274, + }, + { + 0x00060000, 0x00045365, 0x0004979b, 0x00044c02, 0x00060000, 0x00066e48, 0x0008101d, 0x000fd0e8, + 0x00032561, 0x000244a5, 0x00020994, 0x000273d7, 0x0002a979, 0x0004010d, 0x0005d025, 0x00094a87, + 0x0002b8a7, 0x0001f642, 0x000229b4, 0x0002673e, 0x0002d373, 0x00033986, 0x0004ae79, 0x00092ee4, + 0x00030615, 0x00022e15, 0x00025075, 0x0002ab9f, 0x000266b6, 0x00030e61, 0x00046fd6, 0x000a342b, + 0x00034f73, 0x000262fa, 0x0002b8a7, 0x00030615, 0x00029837, 0x000397fb, 0x0005379b, 0x000adfa0, + 0x0003d17b, 0x000309a0, 0x00033986, 0x0002bc06, 0x00034d63, 0x00047175, 0x00067355, 0x000d067a, + 0x00051172, 0x0003327b, 0x000364e5, 0x0003c576, 0x0004ab04, 0x0005f0f7, 0x0006d414, 0x000d64fa, + 0x0007906f, 0x00057417, 0x00049772, 0x000548bb, 0x000636a4, 0x00066b13, 0x00095160, 0x000f31d7, + }, + { + 0x0004cccd, 0x000375ea, 0x0003ac7c, 0x00037002, 0x0004cccd, 0x00052506, 0x0006734b, 0x000ca720, + 0x0002844d, 0x0001d084, 0x0001a143, 0x0001f646, 0x0002212e, 0x0003340b, 0x0004a684, 0x00076ed2, + 0x00022d53, 0x000191cf, 0x0001baf6, 0x0001ec32, 0x000242c2, 0x0002946b, 0x0003bec7, 0x000758b6, + 0x00026b44, 0x0001be77, 0x0001d9f7, 0x000222e5, 0x0001ebc5, 0x000271e7, 0x00038cac, 0x000829bc, + 0x0002a5f5, 0x0001e8c8, 0x00022d53, 0x00026b44, 0x0002135f, 0x0002dffc, 0x00042c7c, 0x0008b2e6, + 0x00030dfc, 0x00026e1a, 0x0002946b, 0x00023005, 0x0002a44f, 0x00038df7, 0x00052911, 0x000a6b95, + 0x00040df4, 0x00028ec9, 0x0002b71d, 0x0003045f, 0x0003bc03, 0x0004c0c5, 0x00057676, 0x000ab72f, + 0x00060d26, 0x00045cdf, 0x0003ac5b, 0x00043a2f, 0x0004f883, 0x00052276, 0x0007744d, 0x000c27df, + }, + { + 0x00040000, 0x0002e243, 0x00030fbc, 0x0002dd57, 0x00040000, 0x00044985, 0x00056014, 0x000a8b45, + 0x000218eb, 0x00018319, 0x00015bb8, 0x0001a290, 0x0001c651, 0x0002ab5e, 0x0003e019, 0x000631af, + 0x0001d070, 0x00014ed7, 0x00017123, 0x00019a29, 0x0001e24d, 0x00022659, 0x00031efb, 0x00061f42, + 0x0002040e, 0x0001740e, 0x00018af8, 0x0001c7bf, 0x000199cf, 0x00020996, 0x0002f53a, 0x0006cd72, + 0x000234f7, 0x00019751, 0x0001d070, 0x0002040e, 0x0001bad0, 0x00026552, 0x00037a67, 0x00073fc0, + 0x00028ba7, 0x0002066b, 0x00022659, 0x0001d2af, 0x00023397, 0x0002f64e, 0x00044ce3, 0x0008aefc, + 0x000360f6, 0x000221a7, 0x00024343, 0x000283a4, 0x00031cad, 0x0003f5fa, 0x00048d63, 0x0008edfc, + 0x00050af5, 0x0003a2ba, 0x00030fa1, 0x000385d2, 0x0004246e, 0x00044762, 0x00063640, 0x000a213a, + }, + { + 0x00030000, 0x000229b2, 0x00024bcd, 0x00022601, 0x00030000, 0x00033724, 0x0004080f, 0x0007e874, + 0x000192b0, 0x00012253, 0x000104ca, 0x000139ec, 0x000154bd, 0x00020087, 0x0002e812, 0x0004a543, + 0x00015c54, 0x0000fb21, 0x000114da, 0x0001339f, 0x000169b9, 0x00019cc3, 0x0002573d, 0x00049772, + 0x0001830a, 0x0001170a, 0x0001283a, 0x000155cf, 0x0001335b, 0x00018730, 0x000237eb, 0x00051a16, + 0x0001a7b9, 0x0001317d, 0x00015c54, 0x0001830a, 0x00014c1c, 0x0001cbfd, 0x00029bcd, 0x00056fd0, + 0x0001e8bd, 0x000184d0, 0x00019cc3, 0x00015e03, 0x0001a6b2, 0x000238bb, 0x000339ab, 0x0006833d, + 0x000288b9, 0x0001993d, 0x0001b272, 0x0001e2bb, 0x00025582, 0x0002f87b, 0x00036a0a, 0x0006b27d, + 0x0003c838, 0x0002ba0c, 0x00024bb9, 0x0002a45d, 0x00031b52, 0x0003358a, 0x0004a8b0, 0x000798eb, + }, + { + 0x00024925, 0x0001a5dd, 0x0001bfd9, 0x0001a30d, 0x00024925, 0x00027327, 0x00031254, 0x00060671, + 0x000132d0, 0x0000dd33, 0x0000c6b2, 0x0000ef2d, 0x0001039c, 0x0001867f, 0x000236e9, 0x00038a1b, + 0x00010964, 0x0000bf56, 0x0000d2ef, 0x0000ea61, 0x00011399, 0x00013a7c, 0x0001c890, 0x00037f94, + 0x000126e3, 0x0000d49a, 0x0000e1b3, 0x0001046d, 0x0000ea2d, 0x00012a0d, 0x0001b0b3, 0x0003e31d, + 0x000142d6, 0x0000e8c1, 0x00010964, 0x000126e3, 0x0000fd09, 0x00015e78, 0x0001fccd, 0x0004246e, + 0x0001745f, 0x0001283d, 0x00013a7c, 0x00010aad, 0x0001420d, 0x0001b151, 0x00027514, 0x0004f647, + 0x0001ee44, 0x000137cd, 0x00014b02, 0x00016fcc, 0x0001c73e, 0x0002436a, 0x000299ef, 0x00051a47, + 0x0002e1b0, 0x000213d8, 0x0001bfca, 0x00020353, 0x00025df5, 0x000271ef, 0x00038cb7, 0x0005c9d8, + }, + { + 0x00020000, 0x00017122, 0x000187de, 0x00016eab, 0x00020000, 0x000224c3, 0x0002b00a, 0x000545a3, + 0x00010c76, 0x0000c18c, 0x0000addc, 0x0000d148, 0x0000e328, 0x000155af, 0x0001f00c, 0x000318d7, + 0x0000e838, 0x0000a76b, 0x0000b891, 0x0000cd15, 0x0000f126, 0x0001132d, 0x00018f7e, 0x00030fa1, + 0x00010207, 0x0000ba07, 0x0000c57c, 0x0000e3e0, 0x0000cce7, 0x000104cb, 0x00017a9d, 0x000366b9, + 0x00011a7c, 0x0000cba9, 0x0000e838, 0x00010207, 0x0000dd68, 0x000132a9, 0x0001bd34, 0x00039fe0, + 0x000145d4, 0x00010335, 0x0001132d, 0x0000e957, 0x000119cc, 0x00017b27, 0x00022672, 0x0004577e, + 0x0001b07b, 0x000110d4, 0x000121a2, 0x000141d2, 0x00018e57, 0x0001fafd, 0x000246b1, 0x000476fe, + 0x0002857a, 0x0001d15d, 0x000187d1, 0x0001c2e9, 0x00021237, 0x000223b1, 0x00031b20, 0x0005109d, + }, + { + 0x0001999a, 0x0001274e, 0x0001397f, 0x00012556, 0x0001999a, 0x0001b702, 0x0002266e, 0x000437b5, + 0x0000d6c4, 0x00009ad7, 0x00008b16, 0x0000a76d, 0x0000b5ba, 0x00011159, 0x00018cd7, 0x00027a46, + 0x0000b9c6, 0x000085f0, 0x000093a7, 0x0000a411, 0x0000c0eb, 0x0000dc24, 0x00013f98, 0x000272e7, + 0x0000ce6c, 0x000094d2, 0x00009dfd, 0x0000b64c, 0x0000a3ec, 0x0000d0a2, 0x00012ee4, 0x0002b894, + 0x0000e1fc, 0x0000a2ed, 0x0000b9c6, 0x0000ce6c, 0x0000b120, 0x0000f554, 0x00016429, 0x0002e64d, + 0x000104a9, 0x0000cf5e, 0x0000dc24, 0x0000baac, 0x0000e170, 0x00012f52, 0x0001b85b, 0x00037932, + 0x000159fc, 0x0000da43, 0x0000e7b4, 0x00010175, 0x00013eac, 0x00019597, 0x0001d227, 0x00039265, + 0x00020462, 0x0001744a, 0x00013974, 0x000168ba, 0x0001a82c, 0x0001b627, 0x00027c1a, 0x00040d4a, + }, + { + 0x00015555, 0x0000f616, 0x0001053f, 0x0000f472, 0x00015555, 0x00016dd7, 0x0001cab1, 0x000383c2, + 0x0000b2f9, 0x00008108, 0x000073e8, 0x00008b85, 0x00009770, 0x0000e3ca, 0x00014ab3, 0x00021090, + 0x00009ad0, 0x00006f9d, 0x00007b0c, 0x000088b8, 0x0000a0c4, 0x0000b773, 0x00010a54, 0x00020a6b, + 0x0000ac05, 0x00007c05, 0x000083a8, 0x000097ea, 0x0000889a, 0x0000addd, 0x0000fc69, 0x0002447b, + 0x0000bc52, 0x000087c6, 0x00009ad0, 0x0000ac05, 0x0000939b, 0x0000cc71, 0x000128cd, 0x00026a95, + 0x0000d938, 0x0000acce, 0x0000b773, 0x00009b90, 0x0000bbdd, 0x0000fcc5, 0x00016ef6, 0x0002e4ff, + 0x00012052, 0x0000b5e2, 0x0000c116, 0x0000d68c, 0x0001098f, 0x000151fe, 0x00018476, 0x0002f9ff, + 0x0001ae52, 0x0001363e, 0x00010536, 0x00012c9b, 0x0001617a, 0x00016d21, 0x00021215, 0x00036069, + }, + { + 0x00010000, 0x0000b891, 0x0000c3ef, 0x0000b756, 0x00010000, 0x00011261, 0x00015805, 0x0002a2d1, + 0x0000863b, 0x000060c6, 0x000056ee, 0x000068a4, 0x00007194, 0x0000aad8, 0x0000f806, 0x00018c6c, + 0x0000741c, 0x000053b6, 0x00005c49, 0x0000668a, 0x00007893, 0x00008996, 0x0000c7bf, 0x000187d1, + 0x00008103, 0x00005d03, 0x000062be, 0x000071f0, 0x00006674, 0x00008265, 0x0000bd4e, 0x0001b35d, + 0x00008d3e, 0x000065d4, 0x0000741c, 0x00008103, 0x00006eb4, 0x00009954, 0x0000de9a, 0x0001cff0, + 0x0000a2ea, 0x0000819b, 0x00008996, 0x000074ac, 0x00008ce6, 0x0000bd94, 0x00011339, 0x00022bbf, + 0x0000d83e, 0x0000886a, 0x000090d1, 0x0000a0e9, 0x0000c72b, 0x0000fd7e, 0x00012359, 0x00023b7f, + 0x000142bd, 0x0000e8af, 0x0000c3e8, 0x0000e174, 0x0001091b, 0x000111d9, 0x00018d90, 0x0002884e, + }, + { + 0x0000aaab, 0x00007b0b, 0x0000829f, 0x00007a39, 0x0000aaab, 0x0000b6ec, 0x0000e559, 0x0001c1e1, + 0x0000597d, 0x00004084, 0x000039f4, 0x000045c3, 0x00004bb8, 0x000071e5, 0x0000a559, 0x00010848, + 0x00004d68, 0x000037ce, 0x00003d86, 0x0000445c, 0x00005062, 0x00005bba, 0x0000852a, 0x00010536, + 0x00005602, 0x00003e02, 0x000041d4, 0x00004bf5, 0x0000444d, 0x000056ee, 0x00007e34, 0x0001223e, + 0x00005e29, 0x000043e3, 0x00004d68, 0x00005602, 0x000049cd, 0x00006638, 0x00009467, 0x0001354b, + 0x00006c9c, 0x00005667, 0x00005bba, 0x00004dc8, 0x00005def, 0x00007e62, 0x0000b77b, 0x0001727f, + 0x00009029, 0x00005af1, 0x0000608b, 0x00006b46, 0x000084c8, 0x0000a8ff, 0x0000c23b, 0x00017cff, + 0x0000d729, 0x00009b1f, 0x0000829b, 0x0000964e, 0x0000b0bd, 0x0000b690, 0x0001090b, 0x0001b034, + }, + { + 0x00007878, 0x000056db, 0x00005c34, 0x00005646, 0x00007878, 0x0000811f, 0x0000a1e4, 0x00013d90, + 0x00003f2b, 0x00002d8a, 0x000028e8, 0x0000313e, 0x00003573, 0x00005065, 0x000074b8, 0x0000ba8d, + 0x000036a4, 0x00002765, 0x00002b6d, 0x00003041, 0x000038be, 0x000040bf, 0x00005dff, 0x0000b862, + 0x00003cb6, 0x00002bc5, 0x00002e78, 0x0000359e, 0x00003036, 0x00003d5d, 0x00005916, 0x0000cce0, + 0x00004277, 0x00002feb, 0x000036a4, 0x00003cb6, 0x00003418, 0x00004828, 0x000068c1, 0x0000da53, + 0x00004caa, 0x00003cfd, 0x000040bf, 0x000036e7, 0x0000424e, 0x00005936, 0x00008184, 0x00010587, + 0x000065c3, 0x00004032, 0x00004426, 0x00004bb9, 0x00005dba, 0x0000774b, 0x0000891b, 0x00010cf0, + 0x000097e1, 0x00006d7f, 0x00005c31, 0x00006a19, 0x00007cc2, 0x000080de, 0x0000bb17, 0x00013116, + }, + { + 0x00005d17, 0x0000431d, 0x00004740, 0x000042ab, 0x00005d17, 0x000063c6, 0x00007d19, 0x0000f563, + 0x000030d0, 0x00002331, 0x00001f9c, 0x0000260d, 0x0000294d, 0x00003e20, 0x00005a31, 0x00009027, + 0x00002a39, 0x00001e71, 0x0000218f, 0x0000254a, 0x00002bd8, 0x00003208, 0x000048a2, 0x00008e7a, + 0x00002eea, 0x000021d3, 0x000023e8, 0x0000296e, 0x00002541, 0x00002f6b, 0x000044d7, 0x00009e50, + 0x0000335c, 0x00002507, 0x00002a39, 0x00002eea, 0x00002841, 0x000037c2, 0x000050f2, 0x0000a8b4, + 0x00003b3e, 0x00002f21, 0x00003208, 0x00002a6d, 0x0000333c, 0x000044f0, 0x00006415, 0x0000ca17, + 0x00004ea2, 0x0000319b, 0x000034a9, 0x00003a83, 0x0000486d, 0x00005c2e, 0x000069f2, 0x0000cfd1, + 0x0000755c, 0x0000549d, 0x0000473d, 0x000051fc, 0x00006067, 0x00006395, 0x00009091, 0x0000ebbf, + }, + { + 0x00004925, 0x000034bc, 0x000037fb, 0x00003462, 0x00004925, 0x00004e65, 0x0000624b, 0x0000c0ce, + 0x0000265a, 0x00001ba6, 0x000018d6, 0x00001de6, 0x00002073, 0x000030d0, 0x000046dd, 0x00007143, + 0x0000212d, 0x000017eb, 0x00001a5e, 0x00001d4c, 0x00002273, 0x00002750, 0x00003912, 0x00006ff2, + 0x000024dc, 0x00001a93, 0x00001c36, 0x0000208e, 0x00001d46, 0x00002542, 0x00003616, 0x00007c64, + 0x0000285b, 0x00001d18, 0x0000212d, 0x000024dc, 0x00001fa1, 0x00002bcf, 0x00003f9a, 0x0000848e, + 0x00002e8c, 0x00002508, 0x00002750, 0x00002156, 0x00002842, 0x0000362a, 0x00004ea3, 0x00009ec9, + 0x00003dc8, 0x000026fa, 0x00002960, 0x00002df9, 0x000038e8, 0x0000486d, 0x0000533e, 0x0000a349, + 0x00005c36, 0x0000427b, 0x000037f9, 0x0000406a, 0x00004bbf, 0x00004e3e, 0x00007197, 0x0000b93b, + }, + { + 0x00003c3c, 0x00002b6d, 0x00002e1a, 0x00002b23, 0x00003c3c, 0x0000408f, 0x000050f2, 0x00009ec8, + 0x00001f95, 0x000016c5, 0x00001474, 0x0000189f, 0x00001ab9, 0x00002833, 0x00003a5c, 0x00005d47, + 0x00001b52, 0x000013b2, 0x000015b7, 0x00001821, 0x00001c5f, 0x00002060, 0x00002f00, 0x00005c31, + 0x00001e5b, 0x000015e3, 0x0000173c, 0x00001acf, 0x0000181b, 0x00001eae, 0x00002c8b, 0x00006670, + 0x0000213c, 0x000017f6, 0x00001b52, 0x00001e5b, 0x00001a0c, 0x00002414, 0x00003460, 0x00006d29, + 0x00002655, 0x00001e7f, 0x00002060, 0x00001b74, 0x00002127, 0x00002c9b, 0x000040c2, 0x000082c4, + 0x000032e1, 0x00002019, 0x00002213, 0x000025dc, 0x00002edd, 0x00003ba5, 0x0000448d, 0x00008678, + 0x00004bf0, 0x000036c0, 0x00002e19, 0x0000350c, 0x00003e61, 0x0000406f, 0x00005d8b, 0x0000988b, + }, + { + 0x00002e8c, 0x0000218f, 0x000023a0, 0x00002155, 0x00002e8c, 0x000031e3, 0x00003e8d, 0x00007ab2, + 0x00001868, 0x00001198, 0x00000fce, 0x00001307, 0x000014a7, 0x00001f10, 0x00002d18, 0x00004814, + 0x0000151c, 0x00000f38, 0x000010c7, 0x000012a5, 0x000015ec, 0x00001904, 0x00002451, 0x0000473d, + 0x00001775, 0x000010e9, 0x000011f4, 0x000014b7, 0x000012a1, 0x000017b5, 0x0000226b, 0x00004f28, + 0x000019ae, 0x00001284, 0x0000151c, 0x00001775, 0x00001421, 0x00001be1, 0x00002879, 0x0000545a, + 0x00001d9f, 0x00001790, 0x00001904, 0x00001536, 0x0000199e, 0x00002278, 0x0000320a, 0x0000650b, + 0x00002751, 0x000018cd, 0x00001a55, 0x00001d42, 0x00002436, 0x00002e17, 0x000034f9, 0x000067e9, + 0x00003aae, 0x00002a4e, 0x0000239f, 0x000028fe, 0x00003034, 0x000031ca, 0x00004849, 0x000075e0, + }, +}; +static const double ifquantlevels8align = 0.0; +static const s32 ifimquantlevels8[DCT_QUANT_LEVELS][DCT_BLOCK_COEFFS] = { + { + 0x00010000, 0x00017946, 0x00016363, 0x000152a7, 0x00012000, 0x0000e248, 0x0000a486, 0x000053e0, + 0x0001a5a9, 0x000248dc, 0x000243ec, 0x000209ea, 0x0001bbda, 0x00015cbc, 0x0000f036, 0x00008095, + 0x0001b701, 0x000260eb, 0x00023d97, 0x00020437, 0x0001b701, 0x00016959, 0x0000f8e7, 0x00007ee4, + 0x00019de9, 0x00023e1b, 0x00021ccc, 0x0001e6b4, 0x0001b0b9, 0x000153fd, 0x0000ea30, 0x00007763, + 0x00017000, 0x0001fe6e, 0x0001e0d1, 0x0001b0b9, 0x00018000, 0x00012db5, 0x0000cfd2, 0x00006e5c, + 0x00012db5, 0x0001a27b, 0x00018a33, 0x0001718d, 0x000146d9, 0x000100ce, 0x0000b0e4, 0x00005a2d, + 0x0000d87a, 0x00014449, 0x00013178, 0x000112ea, 0x0000e9cc, 0x0000b7b1, 0x00008337, 0x000042e5, + 0x00007b9a, 0x0000ab71, 0x0000ad08, 0x00009bb9, 0x0000846f, 0x00006b85, 0x00004a10, 0x00002831, + }, + { + 0x00015555, 0x0001f708, 0x0001d9d9, 0x0001c389, 0x00018000, 0x00012db5, 0x0000db5d, 0x00006fd5, + 0x00023237, 0x00030bd0, 0x0003053b, 0x0002b7e3, 0x00024fce, 0x0001d0fa, 0x00014048, 0x0000ab71, + 0x00024957, 0x00032be4, 0x0002fcc9, 0x0002b04a, 0x00024957, 0x0001e1cc, 0x00014bde, 0x0000a92f, + 0x000227e1, 0x0002fd7a, 0x0002d110, 0x000288f1, 0x000240f7, 0x0001c551, 0x00013840, 0x00009f2f, + 0x0001eaab, 0x0002a893, 0x00028116, 0x000240f7, 0x00020000, 0x00019247, 0x00011518, 0x00009325, + 0x00019247, 0x00022df9, 0x00020d99, 0x0001ecbc, 0x0001b3cc, 0x00015668, 0x0000ebda, 0x0000783d, + 0x000120a3, 0x0001b061, 0x0001974b, 0x00016e8e, 0x000137bb, 0x0000f4ed, 0x0000aef4, 0x00005931, + 0x0000a4ce, 0x0000e497, 0x0000e6b5, 0x0000cfa2, 0x0000b093, 0x00008f5c, 0x000062bf, 0x00003597, + }, + { + 0x0001aaab, 0x000274cb, 0x0002504f, 0x0002346c, 0x0001e000, 0x00017922, 0x00011235, 0x00008bca, + 0x0002bec4, 0x0003cec4, 0x0003c689, 0x000365dc, 0x0002e3c1, 0x00024539, 0x0001905a, 0x0000d64d, + 0x0002dbad, 0x0003f6dd, 0x0003bbfb, 0x00035c5c, 0x0002dbad, 0x00025a40, 0x00019ed6, 0x0000d37b, + 0x0002b1d9, 0x0003bcd8, 0x00038554, 0x00032b2d, 0x0002d134, 0x000236a5, 0x00018650, 0x0000c6fb, + 0x00026555, 0x000352b8, 0x0003215c, 0x0002d134, 0x00028000, 0x0001f6d8, 0x00015a5e, 0x0000b7ef, + 0x0001f6d8, 0x0002b977, 0x00029100, 0x000267eb, 0x000220c0, 0x0001ac02, 0x000126d1, 0x0000964c, + 0x000168cc, 0x00021c7a, 0x0001fd1e, 0x0001ca31, 0x000185a9, 0x00013228, 0x0000dab2, 0x00006f7d, + 0x0000ce01, 0x00011dbd, 0x00012062, 0x0001038a, 0x0000dcb8, 0x0000b333, 0x00007b6f, 0x000042fc, + }, + { + 0x00020000, 0x0002f28d, 0x0002c6c5, 0x0002a54e, 0x00024000, 0x0001c48f, 0x0001490c, 0x0000a7bf, + 0x00034b52, 0x000491b8, 0x000487d8, 0x000413d5, 0x000377b5, 0x0002b977, 0x0001e06c, 0x0001012a, + 0x00036e03, 0x0004c1d6, 0x00047b2d, 0x0004086e, 0x00036e03, 0x0002d2b3, 0x0001f1ce, 0x0000fdc7, + 0x00033bd1, 0x00047c37, 0x00043998, 0x0003cd69, 0x00036172, 0x0002a7fa, 0x0001d460, 0x0000eec7, + 0x0002e000, 0x0003fcdd, 0x0003c1a1, 0x00036172, 0x00030000, 0x00025b6a, 0x00019fa3, 0x0000dcb8, + 0x00025b6a, 0x000344f5, 0x00031466, 0x0002e31b, 0x00028db3, 0x0002019b, 0x000161c7, 0x0000b45b, + 0x0001b0f5, 0x00028892, 0x000262f1, 0x000225d5, 0x0001d398, 0x00016f63, 0x0001066f, 0x000085c9, + 0x0000f735, 0x000156e2, 0x00015a10, 0x00013772, 0x000108dd, 0x0000d70a, 0x0000941f, 0x00005062, + }, + { + 0x0002aaab, 0x0003ee11, 0x0003b3b2, 0x00038713, 0x00030000, 0x00025b6a, 0x0001b6bb, 0x0000dfaa, + 0x0004646d, 0x000617a0, 0x00060a75, 0x00056fc6, 0x00049f9b, 0x0003a1f4, 0x00028090, 0x000156e2, + 0x000492ae, 0x000657c8, 0x0005f991, 0x00056093, 0x000492ae, 0x0003c399, 0x000297bd, 0x0001525f, + 0x00044fc1, 0x0005faf4, 0x0005a220, 0x000511e1, 0x000481ed, 0x00038aa2, 0x00027080, 0x00013e5e, + 0x0003d555, 0x00055126, 0x0005022d, 0x000481ed, 0x00040000, 0x0003248d, 0x00022a2f, 0x0001264b, + 0x0003248d, 0x00045bf2, 0x00041b33, 0x0003d979, 0x00036799, 0x0002accf, 0x0001d7b5, 0x0000f079, + 0x00024147, 0x000360c3, 0x00032e96, 0x0002dd1c, 0x00026f75, 0x0001e9d9, 0x00015de9, 0x0000b262, + 0x0001499c, 0x0001c92e, 0x0001cd6a, 0x00019f43, 0x00016127, 0x00011eb8, 0x0000c57f, 0x00006b2d, + }, + { + 0x00038000, 0x00052876, 0x0004dbd9, 0x0004a148, 0x0003f000, 0x000317fb, 0x00023fd5, 0x0001258f, + 0x0005c3cf, 0x0007ff02, 0x0007edba, 0x000722b4, 0x0006117c, 0x0004c491, 0x000348bd, 0x0001c209, + 0x00060085, 0x00085336, 0x0007d78f, 0x00070ec1, 0x00060085, 0x0004f0b9, 0x00036728, 0x0001bc1c, + 0x0005a8ae, 0x0007d960, 0x000764ca, 0x0006a777, 0x0005ea87, 0x0004a5f5, 0x000333a8, 0x0001a1db, + 0x00050800, 0x0006fa82, 0x000692da, 0x0005ea87, 0x00054000, 0x00041ff9, 0x0002d75e, 0x00018242, + 0x00041ff9, 0x0005b8ae, 0x000563b2, 0x00050d6e, 0x000477f9, 0x000382d0, 0x00026b1d, 0x00013b9f, + 0x0002f5ad, 0x00046f00, 0x00042d25, 0x0003c235, 0x0003324a, 0x000282ed, 0x0001cb42, 0x0000ea21, + 0x0001b09c, 0x0002580c, 0x00025d9b, 0x00022108, 0x0001cf83, 0x00017851, 0x00010336, 0x00008cac, + }, + { + 0x00040000, 0x0005e519, 0x00058d8a, 0x00054a9c, 0x00048000, 0x0003891f, 0x00029218, 0x00014f7e, + 0x000696a4, 0x00092370, 0x00090fb0, 0x000827aa, 0x0006ef69, 0x000572ee, 0x0003c0d8, 0x00020254, + 0x0006dc05, 0x000983ac, 0x0008f65a, 0x000810dd, 0x0006dc05, 0x0005a565, 0x0003e39b, 0x0001fb8e, + 0x000677a2, 0x0008f86e, 0x00087330, 0x00079ad1, 0x0006c2e4, 0x00054ff3, 0x0003a8c0, 0x0001dd8d, + 0x0005c000, 0x0007f9b9, 0x00078343, 0x0006c2e4, 0x00060000, 0x0004b6d4, 0x00033f47, 0x0001b970, + 0x0004b6d4, 0x000689eb, 0x000628cc, 0x0005c635, 0x00051b65, 0x00040337, 0x0002c38f, 0x000168b6, + 0x000361ea, 0x00051124, 0x0004c5e1, 0x00044baa, 0x0003a730, 0x0002dec6, 0x00020cdd, 0x00010b93, + 0x0001ee69, 0x0002adc5, 0x0002b41f, 0x00026ee5, 0x000211ba, 0x0001ae14, 0x0001283e, 0x0000a0c4, + }, + { + 0x00050000, 0x00075e60, 0x0006f0ed, 0x00069d43, 0x0005a000, 0x00046b67, 0x0003369e, 0x0001a35e, + 0x00083c4d, 0x000b6c4c, 0x000b539c, 0x000a3194, 0x0008ab44, 0x0006cfaa, 0x0004b10f, 0x000282e8, + 0x00089307, 0x000be497, 0x000b33f1, 0x000a1514, 0x00089307, 0x00070ebf, 0x0004dc82, 0x00027a72, + 0x0008158b, 0x000b3689, 0x000a8ffc, 0x00098186, 0x0008739c, 0x0006a3f0, 0x000492f0, 0x000254f0, + 0x00073000, 0x0009f827, 0x00096413, 0x0008739c, 0x00078000, 0x0005e489, 0x00040f19, 0x000227cc, + 0x0005e489, 0x00082c66, 0x0007b2ff, 0x000737c2, 0x0006623f, 0x00050405, 0x00037473, 0x0001c2e3, + 0x00043a64, 0x0006556d, 0x0005f75a, 0x00055e94, 0x000490fc, 0x00039677, 0x00029015, 0x00014e78, + 0x00026a04, 0x00035936, 0x00036127, 0x00030a9e, 0x00029629, 0x00021999, 0x0001724e, 0x0000c8f5, + }, + { + 0x00060000, 0x0008d7a6, 0x00085450, 0x0007efea, 0x0006c000, 0x00054dae, 0x0003db24, 0x0001f73e, + 0x0009e1f6, 0x000db528, 0x000d9788, 0x000c3b7e, 0x000a671e, 0x00082c66, 0x0005a145, 0x0003037d, + 0x000a4a08, 0x000e4582, 0x000d7187, 0x000c194b, 0x000a4a08, 0x00087818, 0x0005d569, 0x0002f955, + 0x0009b373, 0x000d74a5, 0x000cacc8, 0x000b683a, 0x000a2455, 0x0007f7ed, 0x00057d20, 0x0002cc54, + 0x0008a000, 0x000bf696, 0x000b44e4, 0x000a2455, 0x00090000, 0x0007123e, 0x0004deea, 0x00029629, + 0x0007123e, 0x0009cee0, 0x00093d32, 0x0008a950, 0x0007a918, 0x000604d2, 0x00042556, 0x00021d11, + 0x000512df, 0x000799b6, 0x000728d2, 0x0006717f, 0x00057ac8, 0x00044e28, 0x0003134c, 0x0001915c, + 0x0002e59e, 0x000404a7, 0x00040e2f, 0x0003a657, 0x00031a97, 0x0002851e, 0x0001bc5d, 0x0000f126, + }, + { + 0x00080000, 0x000bca33, 0x000b1b15, 0x000a9538, 0x00090000, 0x0007123e, 0x00052430, 0x00029efd, + 0x000d2d48, 0x001246e0, 0x00121f5f, 0x00104f53, 0x000dded2, 0x000ae5dd, 0x000781b1, 0x000404a7, + 0x000db80b, 0x00130757, 0x0011ecb4, 0x001021b9, 0x000db80b, 0x000b4acb, 0x0007c736, 0x0003f71d, + 0x000cef44, 0x0011f0dc, 0x0010e661, 0x000f35a3, 0x000d85c7, 0x000a9fe7, 0x00075180, 0x0003bb1a, + 0x000b8000, 0x000ff372, 0x000f0686, 0x000d85c7, 0x000c0000, 0x00096da8, 0x00067e8e, 0x000372e1, + 0x00096da8, 0x000d13d6, 0x000c5198, 0x000b8c6a, 0x000a36cb, 0x0008066e, 0x0005871e, 0x0002d16b, + 0x0006c3d4, 0x000a2248, 0x00098bc3, 0x00089754, 0x00074e60, 0x0005bd8b, 0x000419bb, 0x00021726, + 0x0003dcd3, 0x00055b8a, 0x0005683e, 0x0004ddc9, 0x00042374, 0x00035c28, 0x0002507c, 0x00014188, + }, + { + 0x000c0000, 0x0011af4c, 0x0010a89f, 0x000fdfd4, 0x000d8000, 0x000a9b5d, 0x0007b649, 0x0003ee7b, + 0x0013c3ec, 0x001b6a50, 0x001b2f0f, 0x001876fd, 0x0014ce3c, 0x001058cb, 0x000b4289, 0x000606fb, + 0x00149410, 0x001c8b03, 0x001ae30e, 0x00183296, 0x00149410, 0x0010f030, 0x000baad2, 0x0005f2ab, + 0x001366e6, 0x001ae949, 0x00195991, 0x0016d074, 0x001448ab, 0x000fefda, 0x000afa40, 0x000598a7, + 0x00114000, 0x0017ed2b, 0x001689c8, 0x001448ab, 0x00120000, 0x000e247c, 0x0009bdd5, 0x00052c51, + 0x000e247c, 0x00139dc1, 0x00127a63, 0x0011529f, 0x000f5230, 0x000c09a5, 0x00084aac, 0x00043a21, + 0x000a25be, 0x000f336d, 0x000e51a4, 0x000ce2fe, 0x000af590, 0x00089c51, 0x00062698, 0x000322b9, + 0x0005cb3c, 0x0008094e, 0x00081c5d, 0x00074cae, 0x0006352e, 0x00050a3b, 0x000378ba, 0x0001e24d, + }, + { + 0x00110000, 0x00190dac, 0x0017998c, 0x00167d16, 0x00132000, 0x000f06c3, 0x000aece7, 0x000591d9, + 0x001c0039, 0x0026d69c, 0x002682ab, 0x0022a891, 0x001d797f, 0x00172876, 0x000ff398, 0x000889e3, + 0x001d2717, 0x00286f9a, 0x002616ff, 0x002247aa, 0x001d2717, 0x0017feef, 0x00108754, 0x00086d1d, + 0x001b7c71, 0x00261fd3, 0x0023e98d, 0x002051fa, 0x001cbc47, 0x001693ca, 0x000f8d30, 0x0007ed98, + 0x00187000, 0x0021e552, 0x001feddc, 0x001cbc47, 0x00198000, 0x00140904, 0x000dccee, 0x0007541e, + 0x00140904, 0x001bca27, 0x001a2d62, 0x00188a62, 0x0015b46f, 0x00110daa, 0x000bbf1f, 0x0005fd04, + 0x000e6022, 0x001588da, 0x001448fe, 0x00124192, 0x000f868b, 0x000c32c8, 0x0008b6ad, 0x00047130, + 0x00083540, 0x000b6284, 0x000b7d84, 0x000a574b, 0x0008cb57, 0x000723d4, 0x0004eb08, 0x0002ab42, + }, + { + 0x00160000, 0x00206c0c, 0x001e8a79, 0x001d1a59, 0x0018c000, 0x0013722a, 0x000e2385, 0x00073537, + 0x00243c86, 0x003242e8, 0x0031d646, 0x002cda25, 0x002624c3, 0x001df820, 0x0014a4a7, 0x000b0ccc, + 0x0025ba1d, 0x00345430, 0x00314aef, 0x002c5cbe, 0x0025ba1d, 0x001f0dae, 0x001563d6, 0x000ae78e, + 0x002391fb, 0x0031565c, 0x002e798a, 0x0029d380, 0x00252fe4, 0x001d37bb, 0x00142021, 0x000a4288, + 0x001fa000, 0x002bdd7a, 0x002951ef, 0x00252fe4, 0x00210000, 0x0019ed8d, 0x0011dc06, 0x00097bea, + 0x0019ed8d, 0x0023f68c, 0x0021e061, 0x001fc224, 0x001c16ae, 0x001611ae, 0x000f3391, 0x0007bfe7, + 0x00129a87, 0x001bde47, 0x001a4058, 0x0017a026, 0x00141787, 0x000fc93e, 0x000b46c1, 0x0005bfa8, + 0x000a9f44, 0x000ebbba, 0x000edeab, 0x000d61e9, 0x000b617f, 0x00093d6d, 0x00065d55, 0x00037437, + }, + { + 0x001c0000, 0x002943b2, 0x0026dec9, 0x00250a43, 0x001f8000, 0x0018bfd8, 0x0011fea9, 0x00092c75, + 0x002e1e7c, 0x003ff810, 0x003f6dce, 0x003915a3, 0x00308be1, 0x00262485, 0x001a45eb, 0x000e1049, + 0x00300425, 0x004299b2, 0x003ebc76, 0x00387608, 0x00300425, 0x002785c6, 0x001b393f, 0x000de0e4, + 0x002d456e, 0x003ecb00, 0x003b2652, 0x00353bba, 0x002f5439, 0x00252fa8, 0x00199d41, 0x000d0edc, + 0x00284000, 0x0037d40f, 0x003496d3, 0x002f5439, 0x002a0000, 0x0020ffcb, 0x0016baf1, 0x000c1213, + 0x0020ffcb, 0x002dc56d, 0x002b1d93, 0x00286b74, 0x0023bfc6, 0x001c1681, 0x001358e8, 0x0009dcf8, + 0x0017ad66, 0x002377fe, 0x0021692a, 0x001e11a5, 0x0019924f, 0x00141767, 0x000e5a0d, 0x00075104, + 0x000d84e2, 0x0012c062, 0x0012ecda, 0x00110840, 0x000e7c16, 0x000bc28a, 0x000819b2, 0x0004655d, + }, + { + 0x00220000, 0x00321b58, 0x002f3318, 0x002cfa2d, 0x00264000, 0x001e0d86, 0x0015d9ce, 0x000b23b2, + 0x00380072, 0x004dad38, 0x004d0556, 0x00455122, 0x003af2fe, 0x002e50eb, 0x001fe730, 0x001113c7, + 0x003a4e2d, 0x0050df33, 0x004c2dfd, 0x00448f54, 0x003a4e2d, 0x002ffddf, 0x00210ea8, 0x0010da39, + 0x0036f8e1, 0x004c3fa5, 0x0047d31b, 0x0040a3f5, 0x0039788e, 0x002d2795, 0x001f1a61, 0x000fdb2f, + 0x0030e000, 0x0043caa5, 0x003fdbb7, 0x0039788e, 0x00330000, 0x00281209, 0x001b99db, 0x000ea83b, + 0x00281209, 0x0037944d, 0x00345ac4, 0x003114c3, 0x002b68df, 0x00221b53, 0x00177e3e, 0x000bfa09, + 0x001cc044, 0x002b11b4, 0x002891fc, 0x00248324, 0x001f0d17, 0x0018658f, 0x00116d5a, 0x0008e260, + 0x00106a80, 0x0016c509, 0x0016fb08, 0x0014ae97, 0x001196ae, 0x000e47a8, 0x0009d60f, 0x00055684, + }, + { + 0x002c0000, 0x0040d818, 0x003d14f2, 0x003a34b2, 0x00318000, 0x0026e454, 0x001c470a, 0x000e6a6e, + 0x0048790c, 0x006485d0, 0x0063ac8d, 0x0059b44a, 0x004c4986, 0x003bf03f, 0x0029494d, 0x00161998, + 0x004b743a, 0x0068a861, 0x006295de, 0x0058b97b, 0x004b743a, 0x003e1b5c, 0x002ac7ac, 0x0015cf1d, + 0x004723f6, 0x0062acb8, 0x005cf313, 0x0053a701, 0x004a5fc7, 0x003a6f75, 0x00284041, 0x00148510, + 0x003f4000, 0x0057baf3, 0x0052a3de, 0x004a5fc7, 0x00420000, 0x0033db1a, 0x0023b80d, 0x0012f7d4, + 0x0033db1a, 0x0047ed19, 0x0043c0c2, 0x003f8448, 0x00382d5c, 0x002c235d, 0x001e6723, 0x000f7fcf, + 0x0025350d, 0x0037bc8e, 0x003480b0, 0x002f404c, 0x00282f0e, 0x001f927d, 0x00168d83, 0x000b7f50, + 0x00153e87, 0x001d7775, 0x001dbd56, 0x001ac3d2, 0x0016c2ff, 0x00127ad9, 0x000cbaaa, 0x0006e86e, + }, +}; +static const double ifimquantlevels8align = 0.0; +static const s32 ifmquantlevels8[DCT_QUANT_LEVELS][DCT_BLOCK_COEFFS] = { + { + 0x00080000, 0x00056dac, 0x0005c344, 0x00060c29, 0x00071c72, 0x00090cfd, 0x000c72b4, 0x00186af2, + 0x0004db64, 0x0003806f, 0x00038811, 0x0003ec8c, 0x00049d38, 0x0005df69, 0x0008869c, 0x000fed79, + 0x0004aa44, 0x00035d04, 0x0003920d, 0x0003f7a3, 0x0004aa44, 0x0005aaeb, 0x00083a68, 0x001023de, + 0x0004f2ad, 0x00039139, 0x0003c979, 0x00043538, 0x0004bb9b, 0x00060614, 0x0008bec1, 0x00112779, + 0x000590b2, 0x00040326, 0x0004426a, 0x0004bb9b, 0x00055555, 0x0006c9bd, 0x0009dacf, 0x00128eb8, + 0x0006c9bd, 0x0004e4d7, 0x00053202, 0x00058ab7, 0x00064411, 0x0007f997, 0x000b93ee, 0x0016b5f5, + 0x000975e5, 0x000650c0, 0x0006b455, 0x00077316, 0x0008c27f, 0x000b2628, 0x000f9b9b, 0x001e9da9, + 0x001091b6, 0x000bf21b, 0x000bd609, 0x000d26cb, 0x000f76ee, 0x00130c39, 0x001ba72e, 0x0032f4b7, + }, + { + 0x00060000, 0x00041241, 0x00045273, 0x0004891f, 0x00055555, 0x0006c9bd, 0x00095607, 0x00125035, + 0x0003a48b, 0x0002a053, 0x0002a60d, 0x0002f169, 0x000375ea, 0x0004678e, 0x000664f5, 0x000bf21b, + 0x00037fb3, 0x000285c3, 0x0002ad8a, 0x0002f9ba, 0x00037fb3, 0x00044030, 0x00062bce, 0x000c1ae6, + 0x0003b602, 0x0002aceb, 0x0002d71b, 0x000327ea, 0x00038cb4, 0x0004848f, 0x00068f10, 0x000cdd9a, + 0x00042c86, 0x0003025c, 0x000331d0, 0x00038cb4, 0x00040000, 0x0005174e, 0x0007641b, 0x000deb0a, + 0x0005174e, 0x0003aba1, 0x0003e582, 0x00042809, 0x0004b30d, 0x0005fb31, 0x0008aef3, 0x00110878, + 0x0007186c, 0x0004bc90, 0x00050740, 0x00059650, 0x000691df, 0x00085c9e, 0x000bb4b4, 0x0016f63f, + 0x000c6d49, 0x0008f594, 0x0008e087, 0x0009dd19, 0x000b9933, 0x000e492b, 0x0014bd62, 0x00263789, + }, + { + 0x0004cccd, 0x000341ce, 0x00037529, 0x0003a0e5, 0x00044444, 0x00056e31, 0x00077806, 0x000ea691, + 0x0002ea09, 0x000219dc, 0x00021e71, 0x00025aba, 0x0002c4bb, 0x0003860c, 0x00051d91, 0x00098e7c, + 0x0002cc8f, 0x0002049c, 0x0002246e, 0x00026162, 0x0002cc8f, 0x0003668d, 0x0004efd8, 0x0009af1f, + 0x0002f802, 0x000223ef, 0x000245af, 0x00028655, 0x0002d6f6, 0x00039d3f, 0x00053f40, 0x000a4ae2, + 0x000356d1, 0x0002684a, 0x00028e40, 0x0002d6f6, 0x00033333, 0x000412a5, 0x0005e9af, 0x000b226e, + 0x000412a5, 0x0002efb4, 0x00031e01, 0x0003533a, 0x0003c271, 0x0004c8f4, 0x0006f25c, 0x000da060, + 0x0005ad23, 0x0003ca0d, 0x000405cd, 0x00047840, 0x0005417f, 0x0006b07e, 0x00095d5d, 0x00125e99, + 0x0009f107, 0x00072add, 0x00071a05, 0x0007e414, 0x0009475c, 0x000b6dbc, 0x00109782, 0x001e92d4, + }, + { + 0x00040000, 0x0002b6d6, 0x0002e1a2, 0x00030615, 0x00038e39, 0x0004867e, 0x0006395a, 0x000c3579, + 0x00026db2, 0x0001c038, 0x0001c409, 0x0001f646, 0x00024e9c, 0x0002efb4, 0x0004434e, 0x0007f6bc, + 0x00025522, 0x0001ae82, 0x0001c906, 0x0001fbd2, 0x00025522, 0x0002d576, 0x00041d34, 0x000811ef, + 0x00027957, 0x0001c89d, 0x0001e4bc, 0x00021a9c, 0x00025dcd, 0x0003030a, 0x00045f60, 0x000893bc, + 0x0002c859, 0x00020193, 0x00022135, 0x00025dcd, 0x0002aaab, 0x000364df, 0x0004ed67, 0x0009475c, + 0x000364df, 0x0002726c, 0x00029901, 0x0002c55b, 0x00032209, 0x0003fccc, 0x0005c9f7, 0x000b5afb, + 0x0004baf3, 0x00032860, 0x00035a2a, 0x0003b98b, 0x0004613f, 0x00059314, 0x0007cdce, 0x000f4ed5, + 0x000848db, 0x0005f90d, 0x0005eb04, 0x00069366, 0x0007bb77, 0x0009861d, 0x000dd397, 0x00197a5b, + }, + { + 0x00030000, 0x00020920, 0x0002293a, 0x0002448f, 0x0002aaab, 0x000364df, 0x0004ab04, 0x0009281b, + 0x0001d245, 0x0001502a, 0x00015306, 0x000178b4, 0x0001baf5, 0x000233c7, 0x0003327b, 0x0005f90d, + 0x0001bfd9, 0x000142e2, 0x000156c5, 0x00017cdd, 0x0001bfd9, 0x00022018, 0x000315e7, 0x00060d73, + 0x0001db01, 0x00015675, 0x00016b8d, 0x000193f5, 0x0001c65a, 0x00024248, 0x00034788, 0x00066ecd, + 0x00021643, 0x0001812e, 0x000198e8, 0x0001c65a, 0x00020000, 0x00028ba7, 0x0003b20d, 0x0006f585, + 0x00028ba7, 0x0001d5d1, 0x0001f2c1, 0x00021404, 0x00025986, 0x0002fd99, 0x00045779, 0x0008843c, + 0x00038c36, 0x00025e48, 0x000283a0, 0x0002cb28, 0x000348f0, 0x00042e4f, 0x0005da5a, 0x000b7b20, + 0x000636a4, 0x00047aca, 0x00047043, 0x0004ee8c, 0x0005cc99, 0x00072495, 0x000a5eb1, 0x00131bc5, + }, + { + 0x00024925, 0x00018d0d, 0x0001a581, 0x0001ba55, 0x00020821, 0x000295ff, 0x00038e7d, 0x0006f9fc, + 0x00016341, 0x00010020, 0x0001024e, 0x00011f03, 0x0001517e, 0x0001ad8c, 0x00026f9a, 0x00048cfe, + 0x00015538, 0x0000f601, 0x00010528, 0x0001222f, 0x00015538, 0x00019e8c, 0x000259d5, 0x00049c88, + 0x000169e8, 0x000104ec, 0x000114fe, 0x000133c7, 0x00015a2c, 0x0001b898, 0x00027fa5, 0x0004e6b5, + 0x0001970e, 0x00012579, 0x0001378c, 0x00015a2c, 0x00018618, 0x0001f07f, 0x0002d0cd, 0x00054d59, + 0x0001f07f, 0x000165f4, 0x00017c01, 0x00019559, 0x0001ca4e, 0x00024750, 0x00034ed6, 0x00067d22, + 0x0002b3f8, 0x0001cdee, 0x0001ea61, 0x000220e2, 0x000280b7, 0x00032f79, 0x0004759a, 0x0008bf55, + 0x0004bbeb, 0x000369be, 0x000361b9, 0x0003c1f1, 0x00046b20, 0x00057135, 0x0007e69f, 0x000e8f10, + }, + { + 0x00020000, 0x00015b6b, 0x000170d1, 0x0001830a, 0x0001c71c, 0x0002433f, 0x00031cad, 0x00061abc, + 0x000136d9, 0x0000e01c, 0x0000e204, 0x0000fb23, 0x0001274e, 0x000177da, 0x000221a7, 0x0003fb5e, + 0x00012a91, 0x0000d741, 0x0000e483, 0x0000fde9, 0x00012a91, 0x00016abb, 0x00020e9a, 0x000408f7, + 0x00013cab, 0x0000e44e, 0x0000f25e, 0x00010d4e, 0x00012ee7, 0x00018185, 0x00022fb0, 0x000449de, + 0x0001642d, 0x000100c9, 0x0001109b, 0x00012ee7, 0x00015555, 0x0001b26f, 0x000276b4, 0x0004a3ae, + 0x0001b26f, 0x00013936, 0x00014c81, 0x000162ae, 0x00019104, 0x0001fe66, 0x0002e4fc, 0x0005ad7d, + 0x00025d79, 0x00019430, 0x0001ad15, 0x0001dcc5, 0x000230a0, 0x0002c98a, 0x0003e6e7, 0x0007a76a, + 0x0004246e, 0x0002fc87, 0x0002f582, 0x000349b3, 0x0003ddbc, 0x0004c30e, 0x0006e9cb, 0x000cbd2e, + }, + { + 0x0001999a, 0x000115ef, 0x0001270e, 0x000135a2, 0x00016c17, 0x0001cf66, 0x00027d57, 0x0004e230, + 0x0000f8ae, 0x0000b349, 0x0000b4d0, 0x0000c8e9, 0x0000ec3e, 0x00012caf, 0x0001b486, 0x00032f7f, + 0x0000eeda, 0x0000ac34, 0x0000b6cf, 0x0000cb21, 0x0000eeda, 0x0001222f, 0x0001a548, 0x00033a60, + 0x0000fd56, 0x0000b6a5, 0x0000c1e5, 0x0000d772, 0x0000f252, 0x0001346a, 0x0001bfc0, 0x00036e4b, + 0x00011cf0, 0x0000cd6e, 0x0000da15, 0x0000f252, 0x00011111, 0x00015b8c, 0x0001f890, 0x0003b625, + 0x00015b8c, 0x0000fa91, 0x00010a00, 0x00011bbe, 0x000140d0, 0x00019851, 0x000250c9, 0x00048acb, + 0x0001e461, 0x0001435a, 0x00015744, 0x00017d6b, 0x0001c080, 0x00023ad5, 0x00031f1f, 0x00061f88, + 0x00035058, 0x0002639f, 0x00025e02, 0x0002a15c, 0x000317c9, 0x0003cf3f, 0x000587d6, 0x000a30f1, + }, + { + 0x00015555, 0x0000e79d, 0x0000f5e1, 0x00010207, 0x00012f68, 0x0001822a, 0x0002131e, 0x000411d3, + 0x0000cf3b, 0x00009568, 0x000096ae, 0x0000a76d, 0x0000c4df, 0x0000fa91, 0x00016bc5, 0x0002a794, + 0x0000c70b, 0x00008f81, 0x00009857, 0x0000a946, 0x0000c70b, 0x0000f1d2, 0x00015f11, 0x0002b0a5, + 0x0000d31d, 0x00009834, 0x0000a194, 0x0000b389, 0x0000c9ef, 0x00010103, 0x00017520, 0x0002dbe9, + 0x0000ed73, 0x0000ab31, 0x0000b5bc, 0x0000c9ef, 0x0000e38e, 0x000121a0, 0x0001a478, 0x000317c9, + 0x000121a0, 0x0000d0cf, 0x0000ddab, 0x0000ec74, 0x00010b58, 0x00015444, 0x0001edfd, 0x0003c8fe, + 0x000193a6, 0x00010d75, 0x00011e0e, 0x00013dd9, 0x000175c0, 0x0001dbb1, 0x000299ef, 0x00051a47, + 0x0002c2f4, 0x0001fdaf, 0x0001f901, 0x00023122, 0x000293d2, 0x00032cb4, 0x00049bdd, 0x00087e1e, + }, + { + 0x00010000, 0x0000adb5, 0x0000b869, 0x0000c185, 0x0000e38e, 0x000121a0, 0x00018e57, 0x00030d5e, + 0x00009b6c, 0x0000700e, 0x00007102, 0x00007d91, 0x000093a7, 0x0000bbed, 0x000110d4, 0x0001fdaf, + 0x00009548, 0x00006ba1, 0x00007242, 0x00007ef4, 0x00009548, 0x0000b55d, 0x0001074d, 0x0002047c, + 0x00009e56, 0x00007227, 0x0000792f, 0x000086a7, 0x00009773, 0x0000c0c3, 0x000117d8, 0x000224ef, + 0x0000b216, 0x00008065, 0x0000884d, 0x00009773, 0x0000aaab, 0x0000d938, 0x00013b5a, 0x000251d7, + 0x0000d938, 0x00009c9b, 0x0000a640, 0x0000b157, 0x0000c882, 0x0000ff33, 0x0001727e, 0x0002d6bf, + 0x00012ebd, 0x0000ca18, 0x0000d68b, 0x0000ee63, 0x00011850, 0x000164c5, 0x0001f373, 0x0003d3b5, + 0x00021237, 0x00017e43, 0x00017ac1, 0x0001a4d9, 0x0001eede, 0x00026187, 0x000374e6, 0x00065e97, + }, + { + 0x0000aaab, 0x000073ce, 0x00007af0, 0x00008103, 0x000097b4, 0x0000c115, 0x0001098f, 0x000208e9, + 0x0000679e, 0x00004ab4, 0x00004b57, 0x000053b6, 0x0000626f, 0x00007d49, 0x0000b5e2, 0x000153ca, + 0x00006386, 0x000047c0, 0x00004c2c, 0x000054a3, 0x00006386, 0x000078e9, 0x0000af89, 0x00015852, + 0x0000698e, 0x00004c1a, 0x000050ca, 0x000059c5, 0x000064f8, 0x00008082, 0x0000ba90, 0x00016df5, + 0x000076ba, 0x00005598, 0x00005ade, 0x000064f8, 0x000071c7, 0x000090d0, 0x0000d23c, 0x00018be5, + 0x000090d0, 0x00006867, 0x00006ed6, 0x0000763a, 0x000085ac, 0x0000aa22, 0x0000f6ff, 0x0001e47f, + 0x0000c9d3, 0x000086bb, 0x00008f07, 0x00009eec, 0x0000bae0, 0x0000edd9, 0x00014cf8, 0x00028d23, + 0x0001617a, 0x0000fed8, 0x0000fc81, 0x00011891, 0x000149e9, 0x0001965a, 0x00024dee, 0x00043f0f, + }, + { + 0x00007878, 0x000051bf, 0x000056c8, 0x00005b11, 0x00006b16, 0x0000884b, 0x0000bb74, 0x00016fb4, + 0x00004924, 0x000034bb, 0x0000352e, 0x00003b17, 0x0000457c, 0x00005870, 0x00008064, 0x0000efda, + 0x00004640, 0x000032a6, 0x000035c5, 0x00003bbe, 0x00004640, 0x00005559, 0x00007be8, 0x0000f30d, + 0x00004a83, 0x000035b8, 0x00003907, 0x00003f5e, 0x00004745, 0x00005ab6, 0x000083b1, 0x00010252, + 0x000053ce, 0x00003c6c, 0x00004024, 0x00004745, 0x00005050, 0x00006638, 0x00009467, 0x00011774, + 0x00006638, 0x000049b2, 0x00004e3c, 0x00005374, 0x00005e5b, 0x00007818, 0x0000ae59, 0x000155ff, + 0x00008e77, 0x00005f1a, 0x000064f6, 0x0000702e, 0x000083e9, 0x0000a7e4, 0x0000eb09, 0x0001cd0a, + 0x0000f983, 0x0000b3e3, 0x0000b23d, 0x0000c60c, 0x0000e8e1, 0x00011ed6, 0x0001a06c, 0x0002ff56, + }, + { + 0x00005d17, 0x00003f2b, 0x0000430f, 0x0000465f, 0x000052bf, 0x00006951, 0x000090da, 0x00011c22, + 0x00003885, 0x000028bf, 0x00002918, 0x00002da9, 0x000035b1, 0x00004456, 0x00006336, 0x0000b957, + 0x00003649, 0x00002723, 0x0000298c, 0x00002e2a, 0x00003649, 0x000041f3, 0x00005fbf, 0x0000bbd0, + 0x00003994, 0x00002983, 0x00002c11, 0x000030f7, 0x00003713, 0x00004618, 0x000065c3, 0x0000c79d, + 0x000040c2, 0x00002eb0, 0x00003190, 0x00003713, 0x00003e10, 0x00004efd, 0x000072ac, 0x0000d7f1, + 0x00004efd, 0x000038f3, 0x00003c74, 0x0000407d, 0x000048ea, 0x00005ccd, 0x000086b9, 0x00010845, + 0x00006e16, 0x0000497d, 0x00004e04, 0x000056b0, 0x000065ee, 0x000081bc, 0x0000b59e, 0x00016442, + 0x0000c0ce, 0x00008b01, 0x000089bb, 0x00009909, 0x0000b3f4, 0x0000dda6, 0x000141c8, 0x000250f1, + }, + { + 0x00004925, 0x000031a2, 0x000034b0, 0x0000374b, 0x00004104, 0x000052c0, 0x000071d0, 0x0000df3f, + 0x00002c68, 0x00002004, 0x0000204a, 0x000023e0, 0x00002a30, 0x000035b1, 0x00004df3, 0x000091a0, + 0x00002aa7, 0x00001ec0, 0x000020a5, 0x00002446, 0x00002aa7, 0x000033d2, 0x00004b3b, 0x00009391, + 0x00002d3d, 0x0000209d, 0x000022a0, 0x00002679, 0x00002b46, 0x00003713, 0x00004ff5, 0x00009cd7, + 0x000032e2, 0x000024af, 0x000026f2, 0x00002b46, 0x000030c3, 0x00003e10, 0x00005a1a, 0x0000a9ab, + 0x00003e10, 0x00002cbf, 0x00002f80, 0x000032ab, 0x0000394a, 0x000048ea, 0x000069db, 0x0000cfa4, + 0x0000567f, 0x000039be, 0x00003d4c, 0x0000441c, 0x00005017, 0x000065ef, 0x00008eb3, 0x000117eb, + 0x0000977d, 0x00006d38, 0x00006c37, 0x0000783e, 0x00008d64, 0x0000ae27, 0x0000fcd4, 0x0001d1e2, + }, + { + 0x00003c3c, 0x000028df, 0x00002b64, 0x00002d89, 0x0000358b, 0x00004426, 0x00005dba, 0x0000b7da, + 0x00002492, 0x00001a5e, 0x00001a97, 0x00001d8c, 0x000022be, 0x00002c38, 0x00004032, 0x000077ed, + 0x00002320, 0x00001953, 0x00001ae2, 0x00001ddf, 0x00002320, 0x00002aad, 0x00003df4, 0x00007987, + 0x00002541, 0x00001adc, 0x00001c84, 0x00001faf, 0x000023a3, 0x00002d5b, 0x000041d8, 0x00008129, + 0x000029e7, 0x00001e36, 0x00002012, 0x000023a3, 0x00002828, 0x0000331c, 0x00004a33, 0x00008bba, + 0x0000331c, 0x000024d9, 0x0000271e, 0x000029ba, 0x00002f2e, 0x00003c0c, 0x0000572d, 0x0000ab00, + 0x0000473b, 0x00002f8d, 0x0000327b, 0x00003817, 0x000041f5, 0x000053f2, 0x00007585, 0x0000e685, + 0x00007cc2, 0x000059f2, 0x0000591e, 0x00006306, 0x00007470, 0x00008f6b, 0x0000d036, 0x00017fab, + }, + { + 0x00002e8c, 0x00001f95, 0x00002187, 0x0000232f, 0x00002960, 0x000034a9, 0x0000486d, 0x00008e11, + 0x00001c42, 0x00001460, 0x0000148c, 0x000016d5, 0x00001ad9, 0x0000222b, 0x0000319b, 0x00005cab, + 0x00001b24, 0x00001392, 0x000014c6, 0x00001715, 0x00001b24, 0x000020fa, 0x00002fdf, 0x00005de8, + 0x00001cca, 0x000014c1, 0x00001609, 0x0000187b, 0x00001b89, 0x0000230c, 0x000032e1, 0x000063ce, + 0x00002061, 0x00001758, 0x000018c8, 0x00001b89, 0x00001f08, 0x0000277e, 0x00003956, 0x00006bf9, + 0x0000277e, 0x00001c79, 0x00001e3a, 0x0000203e, 0x00002475, 0x00002e66, 0x0000435d, 0x00008423, + 0x0000370b, 0x000024bf, 0x00002702, 0x00002b58, 0x000032f7, 0x000040de, 0x00005acf, 0x0000b221, + 0x00006067, 0x00004581, 0x000044dd, 0x00004c85, 0x000059fa, 0x00006ed3, 0x0000a0e4, 0x00012879, + }, +}; +static const double ifmquantlevels8align = 0.0; +const double zigzagalign = 0.0; +/* Bink DCT and residue 8x8 block scan order. */ +const u8 zigzag[DCT_BLOCK_COEFFS] RAD_ATTRIBUTE_ALIGN(32) = { + 0x00, 0x01, 0x08, 0x09, 0x02, 0x03, 0x0a, 0x0b, + 0x04, 0x05, 0x0c, 0x0d, 0x06, 0x07, 0x0e, 0x0f, + 0x14, 0x15, 0x1c, 0x1d, 0x16, 0x17, 0x1e, 0x1f, + 0x10, 0x11, 0x18, 0x19, 0x20, 0x21, 0x28, 0x29, + 0x22, 0x23, 0x2a, 0x2b, 0x30, 0x31, 0x38, 0x39, + 0x32, 0x33, 0x3a, 0x3b, 0x12, 0x13, 0x1a, 0x1b, + 0x24, 0x25, 0x2c, 0x2d, 0x26, 0x27, 0x2e, 0x2f, + 0x34, 0x35, 0x3c, 0x3d, 0x36, 0x37, 0x3e, 0x3f +}; + +const double patternsalign = 0.0; +/* Sixteen custom scan orders used by run and pattern-coded blocks. */ +const u8 patterns[DCT_PATTERN_BYTES] RAD_ATTRIBUTE_ALIGN(32) = { + 0x00, 0x08, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38, + 0x39, 0x31, 0x29, 0x21, 0x19, 0x11, 0x09, 0x01, + 0x02, 0x0a, 0x12, 0x1a, 0x22, 0x2a, 0x32, 0x3a, + 0x3b, 0x33, 0x2b, 0x23, 0x1b, 0x13, 0x0b, 0x03, + 0x04, 0x0c, 0x14, 0x1c, 0x24, 0x2c, 0x34, 0x3c, + 0x3d, 0x35, 0x2d, 0x25, 0x1d, 0x15, 0x0d, 0x05, + 0x06, 0x0e, 0x16, 0x1e, 0x26, 0x2e, 0x36, 0x3e, + 0x3f, 0x37, 0x2f, 0x27, 0x1f, 0x17, 0x0f, 0x07, + 0x3b, 0x3a, 0x39, 0x38, 0x30, 0x31, 0x32, 0x33, + 0x2b, 0x2a, 0x29, 0x28, 0x20, 0x21, 0x22, 0x23, + 0x1b, 0x1a, 0x19, 0x18, 0x10, 0x11, 0x12, 0x13, + 0x0b, 0x0a, 0x09, 0x08, 0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x06, 0x07, 0x0f, 0x0e, 0x0d, 0x0c, + 0x14, 0x15, 0x16, 0x17, 0x1f, 0x1e, 0x1d, 0x1c, + 0x24, 0x25, 0x26, 0x27, 0x2f, 0x2e, 0x2d, 0x2c, + 0x34, 0x35, 0x36, 0x37, 0x3f, 0x3e, 0x3d, 0x3c, + 0x19, 0x11, 0x12, 0x1a, 0x1b, 0x13, 0x0b, 0x03, + 0x02, 0x0a, 0x09, 0x01, 0x00, 0x08, 0x10, 0x18, + 0x20, 0x28, 0x30, 0x38, 0x39, 0x31, 0x29, 0x2a, + 0x32, 0x3a, 0x3b, 0x33, 0x2b, 0x23, 0x22, 0x21, + 0x1d, 0x15, 0x16, 0x1e, 0x1f, 0x17, 0x0f, 0x07, + 0x06, 0x0e, 0x0d, 0x05, 0x04, 0x0c, 0x14, 0x1c, + 0x24, 0x2c, 0x34, 0x3c, 0x3d, 0x35, 0x2d, 0x2e, + 0x36, 0x3e, 0x3f, 0x37, 0x2f, 0x27, 0x26, 0x25, + 0x03, 0x0b, 0x02, 0x0a, 0x01, 0x09, 0x00, 0x08, + 0x10, 0x18, 0x11, 0x19, 0x12, 0x1a, 0x13, 0x1b, + 0x23, 0x2b, 0x22, 0x2a, 0x21, 0x29, 0x20, 0x28, + 0x30, 0x38, 0x31, 0x39, 0x32, 0x3a, 0x33, 0x3b, + 0x3c, 0x34, 0x3d, 0x35, 0x3e, 0x36, 0x3f, 0x37, + 0x2f, 0x27, 0x2e, 0x26, 0x2d, 0x25, 0x2c, 0x24, + 0x1c, 0x14, 0x1d, 0x15, 0x1e, 0x16, 0x1f, 0x17, + 0x0f, 0x07, 0x0e, 0x06, 0x0d, 0x05, 0x0c, 0x04, + 0x18, 0x19, 0x10, 0x11, 0x08, 0x09, 0x00, 0x01, + 0x02, 0x03, 0x0a, 0x0b, 0x12, 0x13, 0x1a, 0x1b, + 0x1c, 0x1d, 0x14, 0x15, 0x0c, 0x0d, 0x04, 0x05, + 0x06, 0x07, 0x0e, 0x0f, 0x16, 0x17, 0x1e, 0x1f, + 0x27, 0x26, 0x2f, 0x2e, 0x37, 0x36, 0x3f, 0x3e, + 0x3d, 0x3c, 0x35, 0x34, 0x2d, 0x2c, 0x25, 0x24, + 0x23, 0x22, 0x2b, 0x2a, 0x33, 0x32, 0x3b, 0x3a, + 0x39, 0x38, 0x31, 0x30, 0x29, 0x28, 0x21, 0x20, + 0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b, + 0x10, 0x11, 0x12, 0x13, 0x18, 0x19, 0x1a, 0x1b, + 0x20, 0x21, 0x22, 0x23, 0x28, 0x29, 0x2a, 0x2b, + 0x30, 0x31, 0x32, 0x33, 0x38, 0x39, 0x3a, 0x3b, + 0x04, 0x05, 0x06, 0x07, 0x0c, 0x0d, 0x0e, 0x0f, + 0x14, 0x15, 0x16, 0x17, 0x1c, 0x1d, 0x1e, 0x1f, + 0x24, 0x25, 0x26, 0x27, 0x2c, 0x2d, 0x2e, 0x2f, + 0x34, 0x35, 0x36, 0x37, 0x3c, 0x3d, 0x3e, 0x3f, + 0x06, 0x07, 0x0f, 0x0e, 0x0d, 0x05, 0x0c, 0x04, + 0x03, 0x0b, 0x02, 0x0a, 0x09, 0x01, 0x00, 0x08, + 0x10, 0x18, 0x11, 0x19, 0x12, 0x1a, 0x13, 0x1b, + 0x14, 0x1c, 0x15, 0x1d, 0x16, 0x1e, 0x17, 0x1f, + 0x27, 0x2f, 0x26, 0x2e, 0x25, 0x2d, 0x24, 0x2c, + 0x23, 0x2b, 0x22, 0x2a, 0x21, 0x29, 0x20, 0x28, + 0x31, 0x30, 0x38, 0x39, 0x3a, 0x32, 0x3b, 0x33, + 0x3c, 0x34, 0x3d, 0x35, 0x36, 0x37, 0x3f, 0x3e, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x2f, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29, 0x28, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x3f, 0x3e, 0x3d, 0x3c, 0x3b, 0x3a, 0x39, 0x38, + 0x00, 0x08, 0x09, 0x01, 0x02, 0x03, 0x0b, 0x0a, + 0x12, 0x13, 0x1b, 0x1a, 0x19, 0x11, 0x10, 0x18, + 0x20, 0x28, 0x29, 0x21, 0x22, 0x23, 0x2b, 0x2a, + 0x32, 0x31, 0x30, 0x38, 0x39, 0x3a, 0x3b, 0x33, + 0x34, 0x3c, 0x3d, 0x3e, 0x3f, 0x37, 0x36, 0x35, + 0x2d, 0x2c, 0x24, 0x25, 0x26, 0x2e, 0x2f, 0x27, + 0x1f, 0x17, 0x16, 0x1e, 0x1d, 0x1c, 0x14, 0x15, + 0x0d, 0x0c, 0x04, 0x05, 0x06, 0x0e, 0x0f, 0x07, + 0x18, 0x19, 0x10, 0x11, 0x08, 0x09, 0x00, 0x01, + 0x02, 0x03, 0x0a, 0x0b, 0x12, 0x13, 0x1a, 0x1b, + 0x1c, 0x1d, 0x14, 0x15, 0x0c, 0x0d, 0x04, 0x05, + 0x06, 0x07, 0x0e, 0x0f, 0x16, 0x17, 0x1e, 0x1f, + 0x26, 0x27, 0x2e, 0x2f, 0x36, 0x37, 0x3e, 0x3f, + 0x3c, 0x3d, 0x34, 0x35, 0x2c, 0x2d, 0x24, 0x25, + 0x22, 0x23, 0x2a, 0x2b, 0x32, 0x33, 0x3a, 0x3b, + 0x38, 0x39, 0x30, 0x31, 0x28, 0x29, 0x20, 0x21, + 0x00, 0x08, 0x01, 0x09, 0x02, 0x0a, 0x03, 0x0b, + 0x13, 0x1b, 0x12, 0x1a, 0x11, 0x19, 0x10, 0x18, + 0x20, 0x28, 0x21, 0x29, 0x22, 0x2a, 0x23, 0x2b, + 0x33, 0x3b, 0x32, 0x3a, 0x31, 0x39, 0x30, 0x38, + 0x3c, 0x34, 0x3d, 0x35, 0x3e, 0x36, 0x3f, 0x37, + 0x2f, 0x27, 0x2e, 0x26, 0x2d, 0x25, 0x2c, 0x24, + 0x1f, 0x17, 0x1e, 0x16, 0x1d, 0x15, 0x1c, 0x14, + 0x0c, 0x04, 0x0d, 0x05, 0x0e, 0x06, 0x0f, 0x07, + 0x00, 0x08, 0x10, 0x18, 0x19, 0x1a, 0x1b, 0x13, + 0x0b, 0x03, 0x02, 0x01, 0x09, 0x11, 0x12, 0x0a, + 0x04, 0x0c, 0x14, 0x1c, 0x1d, 0x1e, 0x1f, 0x17, + 0x0f, 0x07, 0x06, 0x05, 0x0d, 0x15, 0x16, 0x0e, + 0x24, 0x2c, 0x34, 0x3c, 0x3d, 0x3e, 0x3f, 0x37, + 0x2f, 0x27, 0x26, 0x25, 0x2d, 0x35, 0x36, 0x2e, + 0x20, 0x28, 0x30, 0x38, 0x39, 0x3a, 0x3b, 0x33, + 0x2b, 0x23, 0x22, 0x21, 0x29, 0x31, 0x32, 0x2a, + 0x00, 0x08, 0x09, 0x01, 0x02, 0x03, 0x0b, 0x0a, + 0x13, 0x1b, 0x1a, 0x12, 0x11, 0x10, 0x18, 0x19, + 0x21, 0x20, 0x28, 0x29, 0x2a, 0x22, 0x23, 0x2b, + 0x33, 0x3b, 0x3a, 0x32, 0x31, 0x39, 0x38, 0x30, + 0x34, 0x3c, 0x3d, 0x35, 0x36, 0x3e, 0x3f, 0x37, + 0x2f, 0x27, 0x26, 0x2e, 0x2d, 0x2c, 0x24, 0x25, + 0x1d, 0x1c, 0x14, 0x15, 0x16, 0x1e, 0x1f, 0x17, + 0x0e, 0x0f, 0x07, 0x06, 0x05, 0x0d, 0x0c, 0x04, + 0x18, 0x10, 0x08, 0x00, 0x01, 0x02, 0x03, 0x0b, + 0x13, 0x1b, 0x1a, 0x19, 0x11, 0x0a, 0x09, 0x12, + 0x1c, 0x14, 0x0c, 0x04, 0x05, 0x06, 0x07, 0x0f, + 0x17, 0x1f, 0x1e, 0x1d, 0x15, 0x0e, 0x0d, 0x16, + 0x3c, 0x34, 0x2c, 0x24, 0x25, 0x26, 0x27, 0x2f, + 0x37, 0x3f, 0x3e, 0x3d, 0x35, 0x2e, 0x2d, 0x36, + 0x38, 0x30, 0x28, 0x20, 0x21, 0x22, 0x23, 0x2b, + 0x33, 0x3b, 0x3a, 0x39, 0x31, 0x2a, 0x29, 0x32, + 0x00, 0x08, 0x09, 0x01, 0x02, 0x0a, 0x12, 0x11, + 0x10, 0x18, 0x19, 0x1a, 0x1b, 0x13, 0x0b, 0x03, + 0x07, 0x06, 0x0e, 0x0f, 0x17, 0x16, 0x15, 0x0d, + 0x05, 0x04, 0x0c, 0x14, 0x1c, 0x1d, 0x1e, 0x1f, + 0x3f, 0x3e, 0x36, 0x37, 0x2f, 0x2e, 0x2d, 0x35, + 0x3d, 0x3c, 0x34, 0x2c, 0x24, 0x25, 0x26, 0x27, + 0x38, 0x30, 0x31, 0x39, 0x3a, 0x32, 0x2a, 0x29, + 0x28, 0x20, 0x21, 0x22, 0x23, 0x2b, 0x33, 0x3b, + 0x00, 0x01, 0x08, 0x09, 0x10, 0x11, 0x18, 0x19, + 0x20, 0x21, 0x28, 0x29, 0x30, 0x31, 0x38, 0x39, + 0x3a, 0x3b, 0x32, 0x33, 0x2a, 0x2b, 0x22, 0x23, + 0x1a, 0x1b, 0x12, 0x13, 0x0a, 0x0b, 0x02, 0x03, + 0x04, 0x05, 0x0c, 0x0d, 0x14, 0x15, 0x1c, 0x1d, + 0x24, 0x25, 0x2c, 0x2d, 0x34, 0x35, 0x3c, 0x3d, + 0x3e, 0x3f, 0x36, 0x37, 0x2e, 0x2f, 0x26, 0x27, + 0x1e, 0x1f, 0x16, 0x17, 0x0e, 0x0f, 0x06, 0x07 +}; + +void fastidct8x8(u8 PTR4* dest, s32 pitch, s16 PTR4* in, const s32 PTR4* q) +{ + s32 temp[DCT_BLOCK_COEFFS]; + s32 PTR4* out; + u8 PTR4* d; + s32 i; + + out = temp; + /* First pass dequantizes columns into a transposed workspace. */ + for (i = DCT_BLOCK_WIDTH; i != 0; --i) { + if (DCT_AC_MASK(in) == 0) { + s32 dc = DCT_DEQUANT(in, q, DCT_ROW0); + + out[DCT_ROW0] = dc; + out[DCT_ROW7] = dc; + out[DCT_ROW6] = dc; + out[DCT_ROW5] = dc; + out[DCT_ROW4] = dc; + out[DCT_ROW3] = dc; + out[DCT_ROW2] = dc; + out[DCT_ROW1] = dc; + } else { + s32 a0 = DCT_DEQUANT(in, q, DCT_ROW2); + s32 a1 = DCT_DEQUANT(in, q, DCT_ROW6); + s32 a2 = DCT_DEQUANT(in, q, DCT_ROW1); + s32 a3 = DCT_DEQUANT(in, q, DCT_ROW3); + s32 a4 = DCT_DEQUANT(in, q, DCT_ROW5); + s32 a5 = DCT_DEQUANT(in, q, DCT_ROW7); + s32 b0 = a0 + a1; + s32 b1 = a4 - a3; + s32 b2 = a2 - a5; + s32 b3 = a2 + a5; + s32 b4 = a4 + a3; + s32 b5 = DCT_DEQUANT(in, q, DCT_ROW4); + s32 b6 = DCT_DEQUANT(in, q, DCT_ROW0); + s32 b7 = b6 - b5; + s32 b8 = DCT_FIXED_MUL(a0 - a1, DCT_FIX_1_414213562) - b0; + s32 b9 = DCT_FIXED_MUL(b1 + b2, DCT_FIX_1_847759065); + s32 c0 = b7 - b8; + s32 c1 = b7 + b8; + s32 c2 = b6 + b5; + s32 c3 = b3 + b4; + s32 c4 = c2 + b0; + s32 c5 = (DCT_FIXED_MUL(b1, DCT_FIX_NEG_2_613125930) + b9) - c3; + s32 c6 = c2 - b0; + s32 c7 = DCT_FIXED_MUL(b3 - b4, DCT_FIX_1_414213562) - c5; + s32 c8 = (DCT_FIXED_MUL(b2, DCT_FIX_1_082392200) - b9) + c7; + + out[DCT_ROW0] = c4 + c3; + out[DCT_ROW7] = c4 - c3; + out[DCT_ROW1] = c1 + c5; + out[DCT_ROW6] = c1 - c5; + out[DCT_ROW2] = c0 + c7; + out[DCT_ROW5] = c0 - c7; + out[DCT_ROW4] = c6 + c8; + out[DCT_ROW3] = c6 - c8; + } + + ++in; + ++q; + ++out; + } + + out = temp; + d = dest; + /* Second pass writes reconstructed rows to the output block. */ + for (i = DCT_BLOCK_WIDTH; i != 0; --i) { + s32 a0 = out[DCT_COL2] + out[DCT_COL6]; + s32 a1 = out[DCT_COL0] + out[DCT_COL4]; + s32 a2 = a1 - a0; + s32 a3 = out[DCT_COL0] - out[DCT_COL4]; + s32 a4 = a1 + a0; + s32 a5 = DCT_FIXED_MUL(out[DCT_COL2] - out[DCT_COL6], DCT_FIX_1_414213562) - a0; + s32 a6 = a3 - a5; + s32 a7 = a3 + a5; + s32 a8 = out[DCT_COL5] - out[DCT_COL3]; + s32 a9 = out[DCT_COL5] + out[DCT_COL3]; + s32 b0 = out[DCT_COL1] - out[DCT_COL7]; + s32 b1 = out[DCT_COL1] + out[DCT_COL7]; + s32 b2 = b1 + a9; + s32 b3 = DCT_FIXED_MUL(a8 + b0, DCT_FIX_1_847759065); + s32 b4 = (DCT_FIXED_MUL(a8, DCT_FIX_NEG_2_613125930) + b3) - b2; + s32 b5 = DCT_FIXED_MUL(b1 - a9, DCT_FIX_1_414213562) - b4; + s32 b6 = (DCT_FIXED_MUL(b0, DCT_FIX_1_082392200) - b3) + b5; + + d[DCT_COL0] = DCT_BYTE_SAMPLE(a4 + b2); + d[DCT_COL7] = DCT_BYTE_SAMPLE(a4 - b2); + d[DCT_COL1] = DCT_BYTE_SAMPLE(a7 + b4); + d[DCT_COL6] = DCT_BYTE_SAMPLE(a7 - b4); + d[DCT_COL2] = DCT_BYTE_SAMPLE(a6 + b5); + d[DCT_COL5] = DCT_BYTE_SAMPLE(a6 - b5); + d[DCT_COL4] = DCT_BYTE_SAMPLE(a2 + b6); + d[DCT_COL3] = DCT_BYTE_SAMPLE(a2 - b6); + + out += DCT_BLOCK_WIDTH; + d += pitch; + } +} + +void fastidct8x8d(u32 PTR4* dest, s32 pitch, s16 PTR4* in, const s32 PTR4* q) +{ + s32 temp[DCT_BLOCK_COEFFS]; + s32 PTR4* out; + u32 PTR4* d0; + u32 PTR4* d1; + s32 i; + + out = temp; + /* First pass dequantizes columns into a transposed workspace. */ + for (i = DCT_BLOCK_WIDTH; i != 0; --i) { + if (DCT_AC_MASK(in) == 0) { + s32 dc = DCT_DEQUANT(in, q, DCT_ROW0); + + out[DCT_ROW0] = dc; + out[DCT_ROW7] = dc; + out[DCT_ROW6] = dc; + out[DCT_ROW5] = dc; + out[DCT_ROW4] = dc; + out[DCT_ROW3] = dc; + out[DCT_ROW2] = dc; + out[DCT_ROW1] = dc; + } else { + s32 a0 = DCT_DEQUANT(in, q, DCT_ROW2); + s32 a1 = DCT_DEQUANT(in, q, DCT_ROW6); + s32 a2 = DCT_DEQUANT(in, q, DCT_ROW1); + s32 a3 = DCT_DEQUANT(in, q, DCT_ROW3); + s32 a4 = DCT_DEQUANT(in, q, DCT_ROW5); + s32 a5 = DCT_DEQUANT(in, q, DCT_ROW7); + s32 b0 = a0 + a1; + s32 b1 = a4 - a3; + s32 b2 = a2 - a5; + s32 b3 = a2 + a5; + s32 b4 = a4 + a3; + s32 b5 = DCT_DEQUANT(in, q, DCT_ROW4); + s32 b6 = DCT_DEQUANT(in, q, DCT_ROW0); + s32 b7 = b6 - b5; + s32 b8 = DCT_FIXED_MUL(a0 - a1, DCT_FIX_1_414213562) - b0; + s32 b9 = DCT_FIXED_MUL(b1 + b2, DCT_FIX_1_847759065); + s32 c0 = b7 - b8; + s32 c1 = b7 + b8; + s32 c2 = b6 + b5; + s32 c3 = b3 + b4; + s32 c4 = c2 + b0; + s32 c5 = (DCT_FIXED_MUL(b1, DCT_FIX_NEG_2_613125930) + b9) - c3; + s32 c6 = c2 - b0; + s32 c7 = DCT_FIXED_MUL(b3 - b4, DCT_FIX_1_414213562) - c5; + s32 c8 = (DCT_FIXED_MUL(b2, DCT_FIX_1_082392200) - b9) + c7; + + out[DCT_ROW0] = c4 + c3; + out[DCT_ROW7] = c4 - c3; + out[DCT_ROW1] = c1 + c5; + out[DCT_ROW6] = c1 - c5; + out[DCT_ROW2] = c0 + c7; + out[DCT_ROW5] = c0 - c7; + out[DCT_ROW4] = c6 + c8; + out[DCT_ROW3] = c6 - c8; + } + + ++in; + ++q; + ++out; + } + + out = temp; + d0 = dest; + d1 = DCT_ADVANCE_U32_BYTES(dest, pitch); + /* The doubled variant expands each row into two adjacent output rows. */ + for (i = DCT_BLOCK_WIDTH; i != 0; --i) { + s32 a0 = out[DCT_COL2] + out[DCT_COL6]; + s32 a1 = out[DCT_COL0] + out[DCT_COL4]; + s32 a2 = a1 - a0; + s32 a3 = out[DCT_COL0] - out[DCT_COL4]; + s32 a4 = a1 + a0; + s32 a5 = DCT_FIXED_MUL(out[DCT_COL2] - out[DCT_COL6], DCT_FIX_1_414213562) - a0; + s32 a6 = a3 - a5; + s32 a7 = a3 + a5; + s32 a8 = out[DCT_COL5] - out[DCT_COL3]; + s32 a9 = out[DCT_COL5] + out[DCT_COL3]; + s32 b0 = out[DCT_COL1] - out[DCT_COL7]; + s32 b1 = out[DCT_COL1] + out[DCT_COL7]; + s32 b2 = b1 + a9; + s32 b3 = DCT_FIXED_MUL(a8 + b0, DCT_FIX_1_847759065); + s32 b4 = (DCT_FIXED_MUL(a8, DCT_FIX_NEG_2_613125930) + b3) - b2; + s32 b5 = DCT_FIXED_MUL(b1 - a9, DCT_FIX_1_414213562) - b4; + s32 b6 = (DCT_FIXED_MUL(b0, DCT_FIX_1_082392200) - b3) + b5; + u32 p0 = (((u32)(a4 + b2 + DCT_BYTE_ROUND) << 8) & DCT_BYTE_PAIR_MASK) | (((u32)(a7 + b4 + DCT_BYTE_ROUND) >> DCT_OUTPUT_SHIFT) & DCT_BYTE_MASK); + u32 p1 = (((u32)(a6 + b5 + DCT_BYTE_ROUND) << 8) & DCT_BYTE_PAIR_MASK) | (((u32)(a2 - b6 + DCT_BYTE_ROUND) >> DCT_OUTPUT_SHIFT) & DCT_BYTE_MASK); + u32 p2 = (((u32)(a6 - b5 + DCT_BYTE_ROUND) >> DCT_OUTPUT_SHIFT) & DCT_BYTE_MASK) | (((u32)(a2 + b6 + DCT_BYTE_ROUND) << 8) & DCT_BYTE_PAIR_MASK); + u32 p3 = (((u32)(a4 - b2 + DCT_BYTE_ROUND) >> DCT_OUTPUT_SHIFT) & DCT_BYTE_MASK) | (((u32)(a7 - b4 + DCT_BYTE_ROUND) << 8) & DCT_BYTE_PAIR_MASK); + + p0 |= p0 << 8; + p1 |= p1 << 8; + p2 |= p2 << 8; + p3 |= p3 << 8; + d0[DCT_COL0] = p0; + d0[DCT_COL1] = p1; + d0[DCT_COL2] = p2; + d0[DCT_COL3] = p3; + d1[DCT_COL0] = p0; + d1[DCT_COL1] = p1; + d1[DCT_COL2] = p2; + d1[DCT_COL3] = p3; + + out += DCT_BLOCK_WIDTH; + d0 = DCT_ADVANCE_U32_BYTES(d0, pitch * 2); + d1 = DCT_ADVANCE_U32_BYTES(d1, pitch * 2); + } +} + +void FastmIDCT8x8(u8 PTR4* dest, s32 pitch, s16 PTR4* data, u32 quant) +{ + fastidct8x8(dest, pitch, data, ifimquantlevels8[quant]); +} + +void FastIDCT8x8(u8 PTR4* dest, s32 pitch, s16 PTR4* data, u32 quant) +{ + fastidct8x8(dest, pitch, data, ifiquantlevels8[quant]); +} + +void FastIDCT8x8d(u8 PTR4* dest, s32 pitch, s16 PTR4* data, u32 quant) +{ + fastidct8x8d((u32 PTR4*)dest, pitch, data, ifiquantlevels8[quant]); +} + +void FastmIDCT8x8WithMotion(u8 PTR4* dest, s32 pitch, s16 PTR4* in, u32 quant, u8 PTR4* motion) +{ + s32 temp[DCT_BLOCK_COEFFS]; + s32 PTR4* out; + const s32 PTR4* q; + s32 i; + + out = temp; + q = ifimquantlevels8[quant]; + /* First pass dequantizes motion-compensated columns into a workspace. */ + for (i = DCT_BLOCK_WIDTH; i != 0; --i) { + if (DCT_AC_MASK(in) == 0) { + s32 dc = DCT_DEQUANT(in, q, DCT_ROW0); + + out[DCT_ROW0] = dc; + out[DCT_ROW7] = dc; + out[DCT_ROW6] = dc; + out[DCT_ROW5] = dc; + out[DCT_ROW4] = dc; + out[DCT_ROW3] = dc; + out[DCT_ROW2] = dc; + out[DCT_ROW1] = dc; + } else { + s32 a0 = DCT_DEQUANT(in, q, DCT_ROW2); + s32 a1 = DCT_DEQUANT(in, q, DCT_ROW6); + s32 a2 = DCT_DEQUANT(in, q, DCT_ROW1); + s32 a3 = DCT_DEQUANT(in, q, DCT_ROW3); + s32 a4 = DCT_DEQUANT(in, q, DCT_ROW5); + s32 a5 = DCT_DEQUANT(in, q, DCT_ROW7); + s32 b0 = a0 + a1; + s32 b1 = a4 - a3; + s32 b2 = a2 - a5; + s32 b3 = a2 + a5; + s32 b4 = a4 + a3; + s32 b5 = DCT_DEQUANT(in, q, DCT_ROW4); + s32 b6 = DCT_DEQUANT(in, q, DCT_ROW0); + s32 b7 = b6 - b5; + s32 b8 = DCT_FIXED_MUL(a0 - a1, DCT_FIX_1_414213562) - b0; + s32 b9 = DCT_FIXED_MUL(b1 + b2, DCT_FIX_1_847759065); + s32 c0 = b7 - b8; + s32 c1 = b7 + b8; + s32 c2 = b6 + b5; + s32 c3 = b3 + b4; + s32 c4 = c2 + b0; + s32 c5 = (DCT_FIXED_MUL(b1, DCT_FIX_NEG_2_613125930) + b9) - c3; + s32 c6 = c2 - b0; + s32 c7 = DCT_FIXED_MUL(b3 - b4, DCT_FIX_1_414213562) - c5; + s32 c8 = (DCT_FIXED_MUL(b2, DCT_FIX_1_082392200) - b9) + c7; + + out[DCT_ROW0] = c4 + c3; + out[DCT_ROW7] = c4 - c3; + out[DCT_ROW1] = c1 + c5; + out[DCT_ROW6] = c1 - c5; + out[DCT_ROW2] = c0 + c7; + out[DCT_ROW5] = c0 - c7; + out[DCT_ROW4] = c6 + c8; + out[DCT_ROW3] = c6 - c8; + } + + ++in; + ++q; + ++out; + } + + out = temp; + /* Final pass adds the residual IDCT result to the prediction block. */ + for (i = DCT_BLOCK_WIDTH; i != 0; --i) { + s32 a0 = out[DCT_COL2] + out[DCT_COL6]; + s32 a1 = out[DCT_COL0] + out[DCT_COL4]; + s32 a2 = a1 - a0; + s32 a3 = out[DCT_COL0] - out[DCT_COL4]; + s32 a4 = a1 + a0; + s32 a5 = DCT_FIXED_MUL(out[DCT_COL2] - out[DCT_COL6], DCT_FIX_1_414213562) - a0; + s32 a6 = a3 - a5; + s32 a7 = a3 + a5; + s32 a8 = out[DCT_COL5] - out[DCT_COL3]; + s32 a9 = out[DCT_COL5] + out[DCT_COL3]; + s32 b0 = out[DCT_COL1] - out[DCT_COL7]; + s32 b1 = out[DCT_COL1] + out[DCT_COL7]; + s32 b2 = b1 + a9; + s32 b3 = DCT_FIXED_MUL(a8 + b0, DCT_FIX_1_847759065); + s32 b4 = (DCT_FIXED_MUL(a8, DCT_FIX_NEG_2_613125930) + b3) - b2; + s32 b5 = DCT_FIXED_MUL(b1 - a9, DCT_FIX_1_414213562) - b4; + s32 b6 = (DCT_FIXED_MUL(b0, DCT_FIX_1_082392200) - b3) + b5; + + dest[DCT_COL0] = motion[DCT_COL0] + DCT_BYTE_SAMPLE(a4 + b2); + dest[DCT_COL7] = motion[DCT_COL7] + DCT_BYTE_SAMPLE(a4 - b2); + dest[DCT_COL1] = motion[DCT_COL1] + DCT_BYTE_SAMPLE(a7 + b4); + dest[DCT_COL6] = motion[DCT_COL6] + DCT_BYTE_SAMPLE(a7 - b4); + dest[DCT_COL2] = motion[DCT_COL2] + DCT_BYTE_SAMPLE(a6 + b5); + dest[DCT_COL5] = motion[DCT_COL5] + DCT_BYTE_SAMPLE(a6 - b5); + dest[DCT_COL4] = motion[DCT_COL4] + DCT_BYTE_SAMPLE(a2 + b6); + dest[DCT_COL3] = motion[DCT_COL3] + DCT_BYTE_SAMPLE(a2 - b6); + + out += DCT_BLOCK_WIDTH; + motion += DCT_BLOCK_WIDTH; + dest += pitch; + } +} + +void FastFDCT8x8(s32 PTR4* out, u8 PTR4* in) +{ + s32 PTR4* row; + s32 i; + + row = out; + /* Forward DCT scales unsigned pixels into fixed-point workspace rows. */ + for (i = DCT_BLOCK_WIDTH; i != 0; --i) { + s32 d0 = (in[DCT_COL0] - in[DCT_COL7]) * DCT_INPUT_SCALE; + s32 d1 = (in[DCT_COL1] - in[DCT_COL6]) * DCT_INPUT_SCALE; + s32 d2 = (in[DCT_COL2] - in[DCT_COL5]) * DCT_INPUT_SCALE; + s32 d3 = (in[DCT_COL3] - in[DCT_COL4]) * DCT_INPUT_SCALE; + s32 a0 = (in[DCT_COL0] + in[DCT_COL7]) * DCT_INPUT_SCALE; + s32 a1 = (in[DCT_COL1] + in[DCT_COL6]) * DCT_INPUT_SCALE; + s32 a2 = (in[DCT_COL2] + in[DCT_COL5]) * DCT_INPUT_SCALE; + s32 a3 = (in[DCT_COL3] + in[DCT_COL4]) * DCT_INPUT_SCALE; + s32 b6 = d3 + d2; + s32 b7 = d1 + d0; + s32 b1 = a0 - a3; + s32 b3 = a0 + a3; + s32 b4 = a1 + a2; + s32 c0 = DCT_FIXED_MUL(b6 - b7, DCT_FIX_0_382683433); + s32 c1 = DCT_FIXED_MUL(b6, DCT_FIX_0_541196100) + c0; + s32 c2 = DCT_FIXED_MUL(b7, DCT_FIX_1_306562965) + c0; + s32 c3 = DCT_FIXED_MUL(d2 + d1, DCT_FIX_0_707106781); + s32 c4 = d0 - c3; + s32 c5 = d0 + c3; + s32 c6 = DCT_FIXED_MUL(a1 - a2 + b1, DCT_FIX_0_707106781); + + row[DCT_COL0] = b3 + b4; + row[DCT_COL4] = b3 - b4; + row[DCT_COL2] = b1 + c6; + row[DCT_COL6] = b1 - c6; + row[DCT_COL5] = c4 + c1; + row[DCT_COL3] = c4 - c1; + row[DCT_COL1] = c5 + c2; + row[DCT_COL7] = c5 - c2; + + in += DCT_BLOCK_WIDTH; + row += DCT_BLOCK_WIDTH; + } + + /* Column pass completes the transform coefficients. */ + for (i = DCT_BLOCK_WIDTH; i != 0; --i) { + s32 a0 = out[DCT_ROW0] - out[DCT_ROW7]; + s32 a1 = out[DCT_ROW0] + out[DCT_ROW7]; + s32 a2 = out[DCT_ROW3] + out[DCT_ROW4]; + s32 a3 = a1 - a2; + s32 a4 = out[DCT_ROW2] + out[DCT_ROW5]; + s32 a5 = out[DCT_ROW2] - out[DCT_ROW5]; + s32 a6 = out[DCT_ROW1] + out[DCT_ROW6]; + s32 a7 = a1 + a2; + s32 a8 = out[DCT_ROW1] - out[DCT_ROW6]; + s32 b0 = a6 + a4; + s32 b1 = out[DCT_ROW3] - out[DCT_ROW4] + a5; + s32 b2 = a8 + a0; + s32 c0 = DCT_FIXED_MUL(b1 - b2, DCT_FIX_0_382683433); + s32 c1 = DCT_FIXED_MUL(b1, DCT_FIX_0_541196100) + c0; + s32 c2 = DCT_FIXED_MUL(b2, DCT_FIX_1_306562965) + c0; + s32 c3 = DCT_FIXED_MUL(a5 + a8, DCT_FIX_0_707106781); + s32 c4 = a0 - c3; + s32 c5 = a0 + c3; + s32 c6 = DCT_FIXED_MUL(a6 - a4 + a3, DCT_FIX_0_707106781); + + out[DCT_ROW0] = a7 + b0; + out[DCT_ROW4] = a7 - b0; + out[DCT_ROW2] = a3 + c6; + out[DCT_ROW6] = a3 - c6; + out[DCT_ROW5] = c4 + c1; + out[DCT_ROW3] = c4 - c1; + out[DCT_ROW1] = c5 + c2; + out[DCT_ROW7] = c5 - c2; + + ++out; + } +} + +void FastFDCTs8x8(s32 PTR4* out, s8 PTR4* in) +{ + s32 PTR4* row; + s32 i; + + row = out; + /* Signed forward DCT uses already-centered residual samples. */ + for (i = DCT_BLOCK_WIDTH; i != 0; --i) { + s32 a0 = (in[DCT_COL2] + in[DCT_COL5]) * DCT_INPUT_SCALE; + s32 a1 = (in[DCT_COL3] + in[DCT_COL4]) * DCT_INPUT_SCALE; + s32 a2 = (in[DCT_COL0] + in[DCT_COL7]) * DCT_INPUT_SCALE; + s32 a3 = (in[DCT_COL1] + in[DCT_COL6]) * DCT_INPUT_SCALE; + s32 b3 = (in[DCT_COL0] - in[DCT_COL7]) * DCT_INPUT_SCALE; + s32 b4 = (in[DCT_COL1] - in[DCT_COL6]) * DCT_INPUT_SCALE; + s32 b5 = (in[DCT_COL2] - in[DCT_COL5]) * DCT_INPUT_SCALE; + s32 b0 = a2 - a1; + s32 b1 = a2 + a1; + s32 b2 = a3 + a0; + s32 b6 = b4 + b3; + s32 b7 = (in[DCT_COL3] - in[DCT_COL4]) * DCT_INPUT_SCALE + b5; + s32 c0; + s32 c1; + s32 c2; + s32 c3; + s32 c4; + s32 c5; + s32 c6; + + row[DCT_COL0] = b1 + b2; + row[DCT_COL4] = b1 - b2; + in += DCT_BLOCK_WIDTH; + c0 = DCT_FIXED_MUL(b7 - b6, DCT_FIX_0_382683433); + c1 = DCT_FIXED_MUL(b6, DCT_FIX_1_306562965) + c0; + c2 = DCT_FIXED_MUL(b7, DCT_FIX_0_541196100) + c0; + c3 = DCT_FIXED_MUL(b5 + b4, DCT_FIX_0_707106781); + c4 = b3 - c3; + c5 = b3 + c3; + c6 = DCT_FIXED_MUL(a3 - a0 + b0, DCT_FIX_0_707106781); + row[DCT_COL2] = b0 + c6; + row[DCT_COL6] = b0 - c6; + row[DCT_COL5] = c4 + c2; + row[DCT_COL3] = c4 - c2; + row[DCT_COL1] = c5 + c1; + row[DCT_COL7] = c5 - c1; + + row += DCT_BLOCK_WIDTH; + } + + /* Column pass completes the transform coefficients. */ + for (i = DCT_BLOCK_WIDTH; i != 0; --i) { + s32 a0 = out[DCT_ROW0] - out[DCT_ROW7]; + s32 a1 = out[DCT_ROW0] + out[DCT_ROW7]; + s32 a2 = out[DCT_ROW1] + out[DCT_ROW6]; + s32 a3 = out[DCT_ROW1] - out[DCT_ROW6]; + s32 a4 = out[DCT_ROW2] + out[DCT_ROW5]; + s32 a5 = out[DCT_ROW2] - out[DCT_ROW5]; + s32 a6 = a2 + a4; + s32 a7 = out[DCT_ROW3] + out[DCT_ROW4]; + s32 a8 = a1 - a7; + s32 a9 = a1 + a7; + s32 b0 = out[DCT_ROW3] - out[DCT_ROW4] + a5; + s32 b1 = a3 + a0; + s32 c0; + s32 c1; + s32 c2; + s32 c3; + s32 c4; + s32 c5; + s32 c6; + + out[DCT_ROW0] = a9 + a6; + out[DCT_ROW4] = a9 - a6; + c0 = DCT_FIXED_MUL(b0 - b1, DCT_FIX_0_382683433); + c1 = DCT_FIXED_MUL(b0, DCT_FIX_0_541196100) + c0; + c2 = DCT_FIXED_MUL(b1, DCT_FIX_1_306562965) + c0; + c3 = DCT_FIXED_MUL(a5 + a3, DCT_FIX_0_707106781); + c4 = a0 - c3; + c5 = a0 + c3; + c6 = DCT_FIXED_MUL(a2 - a4 + a8, DCT_FIX_0_707106781); + out[DCT_ROW2] = a8 + c6; + out[DCT_ROW6] = a8 - c6; + out[DCT_ROW5] = c4 + c1; + out[DCT_ROW3] = c4 - c1; + out[DCT_ROW1] = c5 + c2; + out[DCT_ROW7] = c5 - c2; + + ++out; + } +} diff --git a/src/bink/src/sdk/dct.h b/src/bink/src/sdk/dct.h new file mode 100644 index 000000000..8d2e5c5e5 --- /dev/null +++ b/src/bink/src/sdk/dct.h @@ -0,0 +1,16 @@ +#ifndef BINK_SDK_DCT_H +#define BINK_SDK_DCT_H + +#include "bink.h" + +extern const u8 patterns[]; +extern const u8 zigzag[]; + +void FastIDCT8x8(u8 PTR4* dest, s32 pitch, s16 PTR4* data, u32 quant); +void FastIDCT8x8d(u8 PTR4* dest, s32 pitch, s16 PTR4* data, u32 quant); +void FastmIDCT8x8(u8 PTR4* dest, s32 pitch, s16 PTR4* data, u32 quant); +void FastmIDCT8x8WithMotion(u8 PTR4* dest, s32 pitch, s16 PTR4* in, u32 quant, u8 PTR4* motion); +void FastFDCT8x8(s32 PTR4* out, u8 PTR4* in); +void FastFDCTs8x8(s32 PTR4* out, s8 PTR4* in); + +#endif diff --git a/src/bink/src/sdk/decode/binkacd.c b/src/bink/src/sdk/decode/binkacd.c index e69de29bb..aeb85dc60 100644 --- a/src/bink/src/sdk/decode/binkacd.c +++ b/src/bink/src/sdk/decode/binkacd.c @@ -0,0 +1,483 @@ +#include "bink.h" +#include "binkacd.h" +#include "popmal.h" +#include "../fft.h" +#include "../varbits.h" + +#define MAX_TRANSFORM 4096 +#define WINDOWRATIO 16 +#define BINKAC_RATE_44K 44100 +#define BINKAC_RATE_22K 22050 +#define BINKAC_TRANSFORM_44K 2048 +#define BINKAC_TRANSFORM_22K 1024 +#define BINKAC_TRANSFORM_11K 512 +#define TOTBANDS 25 +#define RLEBITS 4 +#define MAXRLE (1 << RLEBITS) +#define FXPBITS 29 +#define FXP_SIGN_MASK 0x10000000 +#define FXP_VALUE_MASK ((1 << FXPBITS) - 1) +#define FXP_BIN_MASK 31 +#define FXP_VALUE_SHIFT 5 +#define BINKAC_INVERT_BINS 24 +#define BINKACNEWFORMAT_SKIP_BITS 2 +#define VQLENGTH 8 +#define BINKAC_S16_MAX 0x7fff +#define BINKAC_S16_MIN (-0x8000) +#define BINKAC_FIRST_COEFF 2 +#define BINKAC_BAND_LIMIT_SCALE 2 +#define BINKAC_THRESHOLD_BITS 8 +#define BINKAC_NYQUIST_DIVISOR 2 +#define BINKAC_TRANSFORM_HALF_DIVISOR 2 +#define BINKAC_QUANT_POWER_BASE 10.0 +#define BINKAC_QUANT_INDEX_SCALE 0.664f +#define BINKAC_QUANT_POWER_SCALE 0.10f +#define BINKAC_NYQUIST_ROUNDING 1 +#define BINKAC_FFT_WORK_EXTRA 2 +#define BINKAC_DCT_COEFF_BYTES_PER_SAMPLE 5 +#define BINKAC_RDFT_COEFF_TAIL_ADJUST 1 +#define BINKAC_TRANSFORM_ROOT_SCALE 2.0f +#define BINKAC_START_FRAME 1 +#define BINKAC_MONO_CHANNELS 1 +#define BINKAC_RSQRT_NEWTON_HALF 0.5 +#define BINKAC_RSQRT_NEWTON_THREE 3.0 +#define BINKAC_LOAD32(ptr) (*(const u32 PTR4*)(ptr)) +#define BINKAC_BAND_SAMPLE_LIMIT(band_limits, band) ((band_limits)[band] * BINKAC_BAND_LIMIT_SCALE) +#define BINKAC_RLE_SAMPLE_RUN(index) (bink_rlelens_snd[(index)] * VQLENGTH) +#define BINKAC_WINDOW_BYTES(buffer_size) ((buffer_size) / WINDOWRATIO) +#define BINKAC_OUTPUT_BYTES(buffer_size, window_size) ((buffer_size) - (window_size)) +#define BINKAC_SAMPLE_BYTES(samples) ((samples) * sizeof(s16)) +#define BINKAC_FFT_WORK_BYTES(transform_size_half, work) \ + (((u32)radfsqrt((f32)(transform_size_half)) + BINKAC_FFT_WORK_EXTRA) * sizeof(*(work))) +#define BINKAC_DCT_COEFF_BYTES(transform_size) \ + ((transform_size) * BINKAC_DCT_COEFF_BYTES_PER_SAMPLE) +#define BINKAC_RDFT_COEFF_BYTES(transform_size_half, coeffs) \ + ((transform_size_half) * sizeof(*(coeffs)) - BINKAC_RDFT_COEFF_TAIL_ADJUST) +#define BINKAC_OVERLAP_BYTES(buffer_size) ((buffer_size) / BINKAC_TRANSFORM_HALF_DIVISOR) + +/* RLE code lengths, in VQLENGTH sample groups, for sparse audio coefficients. */ +static u8 bink_rlelens_snd[MAXRLE] = { + 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 32, 64 +}; + +/* Upper frequency for each Bink audio critical band. */ +static u32 bink_bandtopfreq[TOTBANDS] = { + 0, 100, 200, 300, 400, 510, 630, 770, 920, 1080, 1270, 1480, 1720, + 2000, 2320, 2700, 3150, 3700, 4400, 5300, 6400, 7700, 9500, 12000, 15500 +}; + +/* Reciprocals used by fxptof for the 29-bit packed fixed-point coefficients. */ +static f64 bink_invertbins[BINKAC_INVERT_BINS] = { + 1.0 / (1 << 23), 1.0 / (1 << 22), 1.0 / (1 << 21), 1.0 / (1 << 20), + 1.0 / (1 << 19), 1.0 / (1 << 18), 1.0 / (1 << 17), 1.0 / (1 << 16), + 1.0 / (1 << 15), 1.0 / (1 << 14), 1.0 / (1 << 13), 1.0 / (1 << 12), + 1.0 / (1 << 11), 1.0 / (1 << 10), 1.0 / (1 << 9), 1.0 / (1 << 8), + 1.0 / (1 << 7), 1.0 / (1 << 6), 1.0 / (1 << 5), 1.0 / (1 << 4), + 1.0 / (1 << 3), 1.0 / (1 << 2), 1.0 / (1 << 1), 1.0 / (1 << 0) +}; + +static f32 fxptof(u32 val) +{ + f32 f; + + f = (f32)((f64)((val & ~FXP_SIGN_MASK) >> FXP_VALUE_SHIFT) * + bink_invertbins[val & FXP_BIN_MASK]); + return (val & FXP_SIGN_MASK) ? -f : f; +} + +static inline s16 clamp_to_s16(s32 value) +{ + s32 clamped; + + if (value < BINKAC_S16_MAX) { + if (value > BINKAC_S16_MIN) { + clamped = value; + } else { + clamped = BINKAC_S16_MIN; + } + } else { + clamped = BINKAC_S16_MAX; + } + + return clamped; +} + +static void quanttos16s(s16 PTR4* dest, const f32 PTR4* src, f32 scale, u32 count) +{ + if (count != 0) { + do { + s16 PTR4* out = dest; + s32 value = (s32)(*src * scale); + + dest = out + 1; + *out = clamp_to_s16(value); + ++src; + --count; + } while (count != 0); + } +} + +static void quanttos16chans2(s16 PTR4* dest, const f32 PTR4* src, f32 scale, u32 count) +{ + u32 remaining; + u32 stride; + + remaining = count - 1; + if (count != 0) { + stride = count; + do { + s16 PTR4* out = dest++; + s32 value = (s32)(src[0] * scale); + + *out = clamp_to_s16(value); + out = dest++; + value = (s32)(src[stride] * scale); + *out = clamp_to_s16(value); + ++src; + } while (remaining-- != 0); + } +} + +static inline u32 read_bits(VARBITS PTR4* vb, u32 count) +{ + u32 value; + u32 bits = vb->bitlen; + + if (bits >= count) { + value = vb->bits & GetBitsLen(count); + vb->bitlen = bits - count; + vb->bits >>= count; + } else { + u32 word = BINKAC_LOAD32(vb->cur); + u32 temp = vb->bits | (word << bits); + + VARBITS_ADVANCE_CUR(vb->cur); + value = temp & GetBitsLen(count); + vb->bitlen = bits + BITSTYPELEN - count; + vb->bits = word >> (count - bits); + } + + return value; +} + +static inline u32 read_bit(VARBITS PTR4* vb) +{ + u32 bitcount = vb->bitlen; + u32 value; + + if (bitcount != 0) { + value = vb->bits & 1; + vb->bitlen = bitcount - 1; + vb->bits >>= 1; + } else { + u32 word = BINKAC_LOAD32(vb->cur); + + VARBITS_ADVANCE_CUR(vb->cur); + value = word & 1; + vb->bitlen = BITSTYPELEN - 1; + vb->bits = word >> 1; + } + + return value; +} + +static void read_rle_samples(f32 PTR4* samples, u32 transform_size, VARBITS PTR4* vb, + const f32 PTR4* threshold, const u32 PTR4* band_limits) +{ + u32 i; + u32 band = 0; + f32 scale = 0.0f; + f32 PTR4* out; + + while (BINKAC_BAND_SAMPLE_LIMIT(band_limits, band) < BINKAC_FIRST_COEFF) { + scale = threshold[band]; + ++band; + } + + i = BINKAC_FIRST_COEFF; + out = samples + BINKAC_FIRST_COEFF; + + while (i < transform_size) { + u32 end; + u32 bitlen; + + /* Each sparse coefficient packet is either 5 bits (literal VQ run) or + 9 bits (RLE flag, 4-bit run index, 4-bit coefficient bit length). */ + if (read_bit(vb) != 0) { + end = i + BINKAC_RLE_SAMPLE_RUN(read_bits(vb, RLEBITS)); + } else { + end = i + VQLENGTH; + } + + if (end > transform_size) { + end = transform_size; + } + + bitlen = read_bits(vb, RLEBITS); + if (bitlen == 0) { + memset(out, 0, (end - i) * sizeof(*out)); + out += end - i; + i = end; + + while (i > BINKAC_BAND_SAMPLE_LIMIT(band_limits, band)) { + scale = threshold[band]; + ++band; + } + } else { + while (i < end) { + if (i == BINKAC_BAND_SAMPLE_LIMIT(band_limits, band)) { + scale = threshold[band]; + ++band; + } + + { + s32 value = read_bits(vb, bitlen); + + if (value != 0) { + /* Bink audio 1 stores the sign bit after each nonzero coefficient. */ + s32 sign = -(s32)read_bit(vb); + value = (value ^ sign) - sign; + *out = value * scale; + } else { + *out = 0.0f; + } + } + + ++i; + ++out; + } + } + } +} + +f64 pow(f64 x, f64 y); + +static inline f32 Undecibel(f32 d) +{ + return (f32)pow(BINKAC_QUANT_POWER_BASE, d * BINKAC_QUANT_POWER_SCALE); +} + +static u32 Unquant(u32 transform_size, u32 chans, u32 flags, s32 PTR4* fft_work, + f32 PTR4* fft_coeffs, s16 PTR4* samples, void PTR4* inptr, + u32 num_bands, const u32 PTR4* bands, + f32 transform_size_root) +{ + f32 threshold[TOTBANDS + 1]; + VARBITS vb; + f32 decoded[MAX_TRANSFORM]; + u32 ch; + f32 PTR4* channel; + + vb.init = inptr; + vb.cur = inptr; + vb.bits = 0; + vb.bitlen = 0; + + if ((flags & BINKACNEWFORMAT) != 0) { + /* New-format streams reserve two leading bits before the coefficient payload. */ + vb.bits = BINKAC_LOAD32(vb.cur) >> BINKACNEWFORMAT_SKIP_BITS; + VARBITS_ADVANCE_CUR(vb.cur); + vb.bitlen = BITSTYPELEN - BINKACNEWFORMAT_SKIP_BITS; + } + + channel = decoded; + for (ch = 0; ch < chans; ++ch) { + u32 i; + + channel[0] = fxptof(read_bits(&vb, FXPBITS)); + channel[1] = fxptof(read_bits(&vb, FXPBITS)); + + for (i = 0; i < num_bands; ++i) { + s32 q = read_bits(&vb, BINKAC_THRESHOLD_BITS); + + threshold[i] = Undecibel((f32)q * BINKAC_QUANT_INDEX_SCALE); + } + + read_rle_samples(channel, transform_size, &vb, threshold, bands); + if ((flags & BINKACNEWFORMAT) != 0) { + ddct(transform_size, 1, channel, fft_work, fft_coeffs); + } else { + rdft(transform_size, -1, channel, fft_work, fft_coeffs); + } + + channel += transform_size; + } + + if (chans == BINKAC_MONO_CHANNELS) { + quanttos16s(samples, decoded, transform_size_root, transform_size); + } else { + quanttos16chans2(samples, decoded, transform_size_root, transform_size); + } + + vb.bitlen = 0; + return ((u32)((u8 PTR4*)vb.cur - (u8 PTR4*)vb.init)) & FXP_VALUE_MASK; +} + +static inline f32 radfsqrt(f32 value) +{ + if (value > 0.0f) { + f64 guess; + + __asm__ volatile("frsqrte %0,%1" : "=f"(guess) : "f"(value)); + + guess = BINKAC_RSQRT_NEWTON_HALF * guess * + (BINKAC_RSQRT_NEWTON_THREE - guess * guess * value); + guess = BINKAC_RSQRT_NEWTON_HALF * guess * + (BINKAC_RSQRT_NEWTON_THREE - guess * guess * value); + guess = BINKAC_RSQRT_NEWTON_HALF * guess * + (BINKAC_RSQRT_NEWTON_THREE - guess * guess * value); + return value * guess; + } + + return value; +} + +HBINKAUDIODECOMP BinkAudioDecompressOpen(u32 rate, u32 chans, u32 flags) +{ + u32 transform_size; + u32 buffer_size; + u32 transform_size_half; + s32 nyquist; + u32 num_bands; + u32 i; + f32 transform_size_root; + HBINKAUDIODECOMP ba; + u32 PTR4* bands; + s32 PTR4* fft_work; + f32 PTR4* fft_coeffs; + s16 PTR4* overlap; + s16 PTR4* samples; + + if (rate >= BINKAC_RATE_44K) { + transform_size = BINKAC_TRANSFORM_44K; + } else if (rate >= BINKAC_RATE_22K) { + transform_size = BINKAC_TRANSFORM_22K; + } else { + transform_size = BINKAC_TRANSFORM_11K; + } + + buffer_size = BINKAC_SAMPLE_BYTES(transform_size * chans); + if ((flags & BINKACNEWFORMAT) == 0) { + /* Legacy RDFT streams interleave stereo by decoding one larger mono transform. */ + rate *= chans; + transform_size *= chans; + chans = BINKAC_MONO_CHANNELS; + } + + nyquist = (rate + BINKAC_NYQUIST_ROUNDING) / BINKAC_NYQUIST_DIVISOR; + transform_size_half = transform_size / BINKAC_TRANSFORM_HALF_DIVISOR; + /* Calculate the number of critical bands below Nyquist. */ + for (i = 0; i < TOTBANDS; ++i) { + if (bink_bandtopfreq[i] >= (u32)nyquist) { + break; + } + } + + num_bands = i; + pushmalloc((void PTR4* PTR4*)&bands, (num_bands + 1) * sizeof(*bands)); + pushmalloc((void PTR4* PTR4*)&fft_work, BINKAC_FFT_WORK_BYTES(transform_size_half, fft_work)); + if ((flags & BINKACNEWFORMAT) != 0) { + pushmalloc((void PTR4* PTR4*)&fft_coeffs, BINKAC_DCT_COEFF_BYTES(transform_size)); + } else { + pushmalloc((void PTR4* PTR4*)&fft_coeffs, + BINKAC_RDFT_COEFF_BYTES(transform_size_half, fft_coeffs)); + } + pushmalloc((void PTR4* PTR4*)&overlap, BINKAC_OVERLAP_BYTES(buffer_size)); + pushmalloc((void PTR4* PTR4*)&samples, buffer_size); + + ba = (HBINKAUDIODECOMP)popmalloc(sizeof(*ba)); + if (ba == 0) { + return 0; + } + + memset(ba, 0, sizeof(*ba)); + ba->bands = bands; + ba->fft_work = fft_work; + ba->fft_coeffs = fft_coeffs; + ba->overlap = overlap; + ba->samples = samples; + ba->flags = flags; + ba->chans = chans; + ba->num_bands = num_bands; + ba->transform_size = transform_size; + ba->buffer_size = buffer_size; + ba->window_size_in_bytes = BINKAC_WINDOW_BYTES(buffer_size); + transform_size_root = BINKAC_TRANSFORM_ROOT_SCALE / radfsqrt((f32)transform_size); + ba->transform_size_root = transform_size_root; + + for (i = 0; i < num_bands; ++i) { + ba->bands[i] = (bink_bandtopfreq[i] * transform_size_half) / nyquist; + if (ba->bands[i] == 0) { + ba->bands[i] = 1; + } + } + ba->bands[i] = transform_size_half; + ba->fft_work[0] = 0; + ba->start_frame = BINKAC_START_FRAME; + + return ba; +} + +void BinkAudioDecompress(HBINKAUDIODECOMP ba, void PTR4* PTR4* outptr, u32 PTR4* outbytes, + void PTR4* inptr, void PTR4* PTR4* inoutptr) +{ + u32 transform_size; + f32 transform_size_root; + u32 chans; + u32 flags; + s32 PTR4* fft_work; + f32 PTR4* fft_coeffs; + s16 PTR4* samples; + u32 num_bands; + const u32 PTR4* bands; + u32 used; + + transform_size = ba->transform_size; + transform_size_root = ba->transform_size_root; + chans = ba->chans; + flags = ba->flags; + fft_work = ba->fft_work; + fft_coeffs = ba->fft_coeffs; + samples = ba->samples; + num_bands = ba->num_bands; + bands = ba->bands; + used = Unquant(transform_size, chans, flags, fft_work, fft_coeffs, samples, inptr, + num_bands, bands, transform_size_root); + + /* Later frames overlap-add their leading window against the saved tail from the last frame. */ + if (ba->start_frame != 0) { + ba->start_frame = 0; + } else { + u32 i; + u32 count = ba->window_size_in_bytes / sizeof(*ba->samples); + + for (i = 0; i < count; ++i) { + ba->samples[i] = (ba->samples[i] * i + ba->overlap[i] * (count - i)) / count; + } + } + + /* Save the trailing window for the next frame's overlap blend. */ + memcpy(ba->overlap, (u8 PTR4*)ba->samples + (ba->buffer_size - ba->window_size_in_bytes), + ba->window_size_in_bytes); + + if (outbytes != 0) { + /* The public frame excludes the saved overlap tail. */ + *outbytes = BINKAC_OUTPUT_BYTES(ba->buffer_size, ba->window_size_in_bytes); + } + + if (outptr != 0) { + *outptr = ba->samples; + } + + if (inoutptr != 0) { + /* Return the compressed stream cursor after the bits consumed by Unquant. */ + *inoutptr = (u8 PTR4*)inptr + used; + } +} + +void radfree(void PTR4* ptr); + +void BinkAudioDecompressClose(HBINKAUDIODECOMP handle) +{ + radfree(handle); +} diff --git a/src/bink/src/sdk/decode/binkacd.h b/src/bink/src/sdk/decode/binkacd.h new file mode 100644 index 000000000..ce7905d6a --- /dev/null +++ b/src/bink/src/sdk/decode/binkacd.h @@ -0,0 +1,38 @@ +#ifndef BINK_SDK_DECODE_BINKACD_H +#define BINK_SDK_DECODE_BINKACD_H + +#include "bink.h" + +#define BINKACNEWFORMAT 1 // use the newer DCT transform path instead of the legacy RDFT path +#define BINKACNODEINTERLACE 2 // keep stereo channels as separate output planes +#define BINKAC20 4 // newer coefficient packet coding + +// extra padding after inpend, that, given random data, Bink might read past (very unlikely, but possible) +#define BINKACD_EXTRA_INPUT_SPACE 72 + +typedef struct BINKAUDIODECOMP BINKAUDIODECOMP; +typedef BINKAUDIODECOMP PTR4* HBINKAUDIODECOMP; + +struct BINKAUDIODECOMP +{ + u32 transform_size; + f32 transform_size_root; // output scale applied after the inverse transform + u32 buffer_size; + u32 window_size_in_bytes; // overlap/crossfade tail length in bytes + u32 chans; + s16 PTR4* samples; // decoded PCM frame plus overlap tail + s32 start_frame; // suppress overlap blending for the first decoded frame + u32 num_bands; // number of critical bands below Nyquist + u32 PTR4* bands; // critical band sample ranges + s32 PTR4* fft_work; // FFT/DCT work area + f32 PTR4* fft_coeffs; // transform coefficient table/work buffer + s16 PTR4* overlap; // previous frame tail for the startup crossfade + u32 flags; // BINKACNEWFORMAT/BINKACNODEINTERLACE/BINKAC20 +}; + +HBINKAUDIODECOMP BinkAudioDecompressOpen(u32 rate, u32 chans, u32 flags); +void BinkAudioDecompress(HBINKAUDIODECOMP handle, void PTR4* PTR4* outptr, u32 PTR4* outbytes, + void PTR4* inptr, void PTR4* PTR4* inoutptr); +void BinkAudioDecompressClose(HBINKAUDIODECOMP handle); + +#endif diff --git a/src/bink/src/sdk/decode/binkread.c b/src/bink/src/sdk/decode/binkread.c index 5d79f1ce1..bdd03a9a6 100644 --- a/src/bink/src/sdk/decode/binkread.c +++ b/src/bink/src/sdk/decode/binkread.c @@ -1,708 +1,784 @@ -#include "binkread.h" +#include "bink.h" +#include "binkacd.h" +#include "expand.h" #include "radcb.h" #include "binkngc.h" +#include "ngc/ngcfile.h" +#include "ngc/ngcsnd.h" +#include "popmal.h" +#include "yuv.h" -static char binkerr[256]; - -static u32 TrackNums = 0; -u16 LogoData[0x1D00] = { - - 0x4249, 0x4B69, 0xF839, 0x0000, 0x0100, 0x0000, 0xCC39, 0x0000, 0x0100, 0x0000, 0xE000, 0x0000, - 0xFC00, 0x0000, 0x0A00, 0x0000, 0x0100, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x3500, 0x0000, - 0x003A, 0x0000, 0xEC2B, 0x0000, 0xBE63, 0x15EC, 0x38C6, 0xB0CE, 0x702C, 0x6C80, 0x1E7D, 0x1243, - 0xBE13, 0x4265, 0x9F14, 0x53A2, 0x5964, 0x31B3, 0x5176, 0xC4F4, 0x6187, 0xB9E5, 0xF478, 0x916E, - 0xBCF4, 0x198E, 0x7ACD, 0xB3BE, 0xA19C, 0xB3BE, 0xAD1C, 0xB3CE, 0xD1BA, 0xF9CE, 0x3DBA, 0x49BE, - 0xDEBC, 0x1A20, 0x9770, 0xE809, 0x1C43, 0x009A, 0x1F27, 0x03A1, 0x97EB, 0x42C4, 0x1188, 0x1010, - 0x228E, 0x1084, 0x4042, 0x8488, 0x3842, 0x4412, 0x228E, 0x8084, 0x1010, 0x4188, 0x237E, 0xC40F, - 0x026D, 0x1D71, 0xE929, 0xC4A1, 0xAF9E, 0xB645, 0x2171, 0xF4B4, 0xAD96, 0x2441, 0xD0D3, 0x16F4, - 0xF54A, 0x1C81, 0x3ADA, 0x56D1, 0x4242, 0x46DA, 0x163D, 0x4516, 0x32F4, 0xB455, 0x42B2, 0x90D1, - 0x5378, 0x95DA, 0x4216, 0xD0B6, 0x68B1, 0x856C, 0xA1A7, 0x2DB6, 0x856C, 0xD1B6, 0x506C, 0x0BD9, - 0x9668, 0x69C9, 0xB690, 0x6DA1, 0x6D49, 0xB22D, 0x32E0, 0x7BC1, 0x5038, 0xA609, 0x0026, 0xA0A7, - 0x7418, 0xFEFB, 0xC07A, 0x7906, 0x9840, 0x4A47, 0x9C13, 0x48E9, 0x8873, 0x0229, 0x1D71, 0x4E20, - 0xA57A, 0x6D0D, 0x3BEF, 0xE4E7, 0xD576, 0xE498, 0xB136, 0x1CFB, 0xCEFE, 0x7424, 0x6002, 0x8509, - 0x0026, 0x905F, 0xA543, 0x7FBD, 0xD876, 0x1F26, 0x90D2, 0x5DC4, 0x8909, 0x13F8, 0x3D9C, 0xC8F7, - 0x6162, 0x2B2F, 0x9830, 0x6122, 0xFFF9, 0xF8EC, 0x7BD0, 0xDEDF, 0x730B, 0xE3BD, 0x708F, 0xF1AF, - 0xE685, 0x7302, 0x403B, 0x1300, 0x4C00, 0x3801, 0x2762, 0x431E, 0x4CA4, 0x74A8, 0x714E, 0x20A5, - 0x234E, 0x62F7, 0x98E7, 0xD4B7, 0x2313, 0x403A, 0xD261, 0xC044, 0x4A87, 0xB19B, 0x15F3, 0xFD26, - 0x90D2, 0x1127, 0xC065, 0xC6B7, 0x7032, 0x01E0, 0x720F, 0xD03E, 0xEF75, 0xC044, 0x4E95, 0x0EF5, - 0x0A9C, 0x1348, 0xE988, 0x7302, 0x29F5, 0xB0DD, 0xB311, 0x5757, 0x87FA, 0xAD72, 0x4E00, 0x3061, - 0xC284, 0x0A13, 0x0080, 0x8D92, 0x2449, 0xF6AA, 0x378B, 0x927C, 0x5B8F, 0x5C96, 0x6444, 0x2439, - 0xF25D, 0x23C8, 0xF36C, 0xAE1F, 0x696B, 0x9E56, 0xEEA1, 0xEF05, 0x00E7, 0x5892, 0x5C4F, 0x00D0, - 0x726A, 0xEC46, 0x3633, 0x9164, 0xBCE7, 0xAF8F, 0xCFB8, 0xCBE6, 0x8DF5, 0x4FD2, 0x908A, 0xAD35, - 0x4B4E, 0x803C, 0xF91B, 0x291C, 0xFDB3, 0xF200, 0x8013, 0x9664, 0x9CAE, 0x6F49, 0xD603, 0x0048, - 0xEF17, 0x49F2, 0x1400, 0x00FD, 0x60DF, 0x7C0B, 0x9DBB, 0x61FE, 0xD7A2, 0x036D, 0xC1E3, 0xEC92, - 0x9991, 0x2419, 0x2DEE, 0x64FC, 0x0AAD, 0xDB22, 0xF14C, 0xBCED, 0xF7E9, 0x1580, 0x14BC, 0xF75D, - 0x0000, 0xA830, 0x0100, 0xD828, 0x490E, 0xBE07, 0x0000, 0xA4CE, 0x03DA, 0x17F9, 0x5A11, 0xBB6E, - 0x9531, 0xE6D0, 0x5CE7, 0x1073, 0x4592, 0x2447, 0xD1FC, 0x0FEA, 0x8148, 0x9DF5, 0x350F, 0xEC00, - 0x38C7, 0x92E4, 0xBA02, 0x404A, 0xDF35, 0x2549, 0x9249, 0xDC91, 0x1392, 0x4FC7, 0x7705, 0xFA77, - 0x0609, 0xE0AC, 0xEC63, 0x9D57, 0xBE3B, 0xF75D, 0x2880, 0x1396, 0xE479, 0x4A00, 0x80F4, 0x7E91, - 0x248F, 0x00E5, 0xF768, 0xEDD7, 0x8133, 0xA03D, 0xE25E, 0x7BC8, 0x9C91, 0xBCB3, 0xCB47, 0x4B0E, - 0xF43E, 0xCF82, 0xF4B7, 0x41AE, 0x4FEF, 0xD6F7, 0x6335, 0xE807, 0x20D7, 0xF812, 0x0080, 0x0A1C, - 0xBEFF, 0xAAAA, 0xAA9A, 0x9999, 0xE1FF, 0x802C, 0x832C, 0x1391, 0x3C45, 0x9E39, 0xC93F, 0x7F33, - 0x4892, 0xAAAA, 0xAAC1, 0x711C, 0xC771, 0xDC01, 0x0000, 0x34B4, 0xA842, 0x5524, 0x570C, 0xFAA1, - 0x710F, 0x608A, 0x5941, 0x0725, 0xF479, 0x5428, 0x3D4A, 0xA1FA, 0xE8F4, 0x182E, 0xDA11, 0x2E94, - 0xF083, 0x0A26, 0x47E8, 0xF3C3, 0xABE2, 0x775E, 0x2BA3, 0x7FB4, 0xC761, 0x1209, 0x5B0D, 0xE51C, - 0x5399, 0xBCB5, 0x4A7A, 0xD2E4, 0x5874, 0xEAA8, 0xE60C, 0x491A, 0x5E20, 0x9937, 0x0624, 0xCF99, - 0x28CD, 0x1C81, 0x9296, 0x8C95, 0x80A9, 0xD152, 0xF255, 0x67D1, 0xC3A6, 0x86A2, 0x0A69, 0x7A80, - 0x01E0, 0xC369, 0xBED9, 0xD9A0, 0x3C3D, 0x6321, 0xA462, 0xA0DB, 0x266A, 0xE37A, 0xD05B, 0xD554, - 0x0604, 0x6E07, 0x6059, 0xA1D6, 0x98E0, 0x0568, 0x4560, 0xB13E, 0x0800, 0x2609, 0x9070, 0xC6EA, - 0x723C, 0x8624, 0x8424, 0x8424, 0x8458, 0x1546, 0x750D, 0xC21D, 0x2021, 0x63AF, 0xFA42, 0x3236, - 0xD54F, 0x73FA, 0x1965, 0x558F, 0x54DB, 0x76F7, 0xE8D2, 0x003B, 0x4002, 0x642C, 0xD778, 0xF274, - 0x8D84, 0x24B4, 0xEA29, 0xC858, 0xAEF1, 0xE469, 0x6B85, 0x2400, 0xAB6A, 0x5480, 0xD845, 0xE519, - 0xA509, 0x4908, 0x99A3, 0xED76, 0x5A01, 0x323E, 0x3FFF, 0x2440, 0x1200, 0xC812, 0x880B, 0xF881, - 0x5535, 0xACB2, 0x8019, 0x2001, 0x32BA, 0x1C3D, 0x8095, 0xAAAB, 0xF9E0, 0xEDAA, 0x4302, 0x77DB, - 0x0521, 0x0921, 0x0158, 0xF338, 0xCBC9, 0x1900, 0xCF58, 0x5596, 0xD5CC, 0x0620, 0x0990, 0x0448, - 0x00E0, 0x1FF3, 0xB2E1, 0x02FE, 0x1907, 0x1020, 0x0192, 0x2500, 0xF4D0, 0x6A9A, 0xD94F, 0x1509, - 0xD00D, 0x2109, 0x2161, 0xCDE9, 0xEE33, 0x6015, 0x64BC, 0x73BA, 0xFB0C, 0x5805, 0x2401, 0x9200, - 0x2E20, 0xB380, 0xAC79, 0x9C65, 0x9DAE, 0xB266, 0x8004, 0x4802, 0x003F, 0xF003, 0x9FAD, 0x1328, - 0x0221, 0x09A1, 0x46F6, 0x5D3F, 0xF4D6, 0xD16B, 0x01C9, 0xFDBB, 0xD5FD, 0xF7DF, 0x7FAF, 0xA1FE, - 0xFEDD, 0xDEBB, 0x7F57, 0x7755, 0xDFBF, 0x7F3F, 0xD4F7, 0x7E68, 0x3FDC, 0xF77E, 0xFFBD, 0x1F7A, - 0x7BFF, 0xBDD6, 0xFBE0, 0x0F1F, 0xFE7D, 0xF7DE, 0xF32B, 0xD4C3, 0x870F, 0xAE53, 0xB55D, 0xE1C3, - 0x3EFC, 0xF0E1, 0xEADB, 0x71D5, 0xB7E1, 0xC387, 0x0FBF, 0x1ED4, 0xEBA1, 0x0E1F, 0x3EFC, 0xFCCA, - 0x5AE1, 0xC30F, 0x3F7C, 0x491D, 0x5C49, 0x1DFC, 0x5D61, 0x0200, 0xB051, 0x92CC, 0xF56A, 0x712C, - 0xC97D, 0xEAD9, 0x3D94, 0x2419, 0xEB3D, 0x616D, 0x447C, 0x5675, 0xC81D, 0x6B77, 0xAC99, 0x8B82, - 0xDEEC, 0xE7D5, 0x28E3, 0x3F46, 0x6D30, 0xCE1C, 0x2F4E, 0xCEB1, 0x24B9, 0x9E00, 0x10FB, 0x4492, - 0x24C7, 0x5592, 0xB725, 0xC97C, 0xEF46, 0xFC86, 0xC8C7, 0x89D8, 0xA7BF, 0x7AAF, 0x5521, 0x49F2, - 0x2400, 0x407A, 0xFF48, 0x923A, 0xF3BF, 0x8AA6, 0x0AA4, 0xBF0B, 0x8EFB, 0xDE3D, 0x22C9, 0x2CAF, - 0x8C36, 0x4EAC, 0x7E13, 0x06F0, 0x9411, 0xB1CA, 0x0D00, 0x0040, 0x8509, 0x00C0, 0x4649, 0x92EB, - 0xA2AF, 0x3EBE, 0x1F39, 0x9BEF, 0x41CE, 0xE67B, 0x90B3, 0xD1E7, 0xAF89, 0x2B54, 0x0097, 0x2D24, - 0x9F89, 0xCFAA, 0x4611, 0xE6B7, 0x6124, 0xBD45, 0x9224, 0xD752, 0xFF3D, 0xFA94, 0x7308, 0xE4CD, - 0x7272, 0xB795, 0x4F8B, 0x6900, 0x38C7, 0x92E4, 0x7A02, 0x404A, 0xDF48, 0x9264, 0x75E5, 0x3D27, - 0x7B52, 0x8306, 0x0D1A, 0x3468, 0x10EB, 0x6FD2, 0x31B4, 0xF7B1, 0xC985, 0x395F, 0xA124, 0x473B, - 0xA312, 0xB256, 0xA124, 0x4FEA, 0x0100, 0xA4F7, 0x8F24, 0x493E, 0x1FC9, 0x054D, 0x630D, 0xB5F7, - 0x3320, 0x57FB, 0xC6AD, 0x368D, 0x648D, 0x753F, 0x3B0B, 0xA838, 0x0168, 0x5F95, 0x61B7, 0x74BE, - 0x2D54, 0x748F, 0x739F, 0x7002, 0xB4AB, 0x8EB7, 0xE4D6, 0x0FA0, 0xA577, 0xBFF9, 0x03D8, 0xF342, - 0xAFEB, 0x9C00, 0x006C, 0x9424, 0xBF83, 0x9CCD, 0x4EAC, 0x546D, 0x3E09, 0x2B1B, 0x64E8, 0xF824, - 0x6F1E, 0xE543, 0x8F5F, 0xADB0, 0xE676, 0xAF54, 0x2449, 0xBE6D, 0x66CC, 0x71F6, 0xE93D, 0x18C8, - 0xCFAF, 0xAB00, 0x9C63, 0x4972, 0x3D01, 0x20A5, 0x4651, 0xE74E, 0x2F2D, 0x2F92, 0x2459, 0xBD1B, - 0x00C0, 0x5C01, 0x207B, 0xA958, 0x2B9F, 0x5D7B, 0x56E5, 0x58CA, 0x88B8, 0x56D6, 0xF773, 0x60D7, - 0xE35E, 0xBB4B, 0x324E, 0x6D8F, 0xE765, 0x1700, 0x00E9, 0xFD23, 0x4932, 0xC749, 0xF22D, 0x681F, - 0x2F72, 0xED37, 0xC0FC, 0x7DE5, 0x1E99, 0xD4C5, 0xB774, 0x2FE3, 0x653C, 0x7F56, 0x9CFD, 0xEF74, - 0xB8DE, 0x7183, 0xC723, 0x48C3, 0x8EF3, 0x6B1C, 0x91B9, 0xF595, 0x6A15, 0x6B68, 0xF1AF, 0xFA26, - 0xA97C, 0x265F, 0x1FA9, 0xF1E3, 0x7DEE, 0x03D9, 0xA7F9, 0xAE19, 0x63BF, 0x1949, 0x92A4, 0xA764, - 0x3DD5, 0xDECB, 0x6175, 0x8D77, 0x3081, 0x43D8, 0x7C8C, 0xD8A2, 0x672C, 0x8936, 0x8245, 0xA1F9, - 0x80DF, 0xCAF0, 0x3109, 0x924B, 0x009D, 0xC6A4, 0x516D, 0x711A, 0xEE2A, 0x9546, 0xA476, 0xA566, - 0x9CBD, 0x4E20, 0x8C9C, 0xDC8A, 0x77F2, 0xB3DC, 0x6B02, 0x2AE0, 0xC33F, 0xFFFA, 0xABAA, 0xC287, - 0x0FBF, 0xFEFE, 0x9FDA, 0xC387, 0x1F56, 0x6155, 0xF827, 0x1220, 0x0990, 0xD5CF, 0x703C, 0xC3F1, - 0x18D7, 0x1E10, 0xC771, 0x1E68, 0xE0F7, 0xAA0E, 0xCC00, 0x0990, 0x0400, 0xB2FF, 0xBBF6, 0xBAEA, - 0xC0BF, 0x6E66, 0x4148, 0x4280, 0x1F4E, 0xCDEE, 0x5D3D, 0x8B69, 0xAC17, 0xD8B6, 0x40C6, 0x1470, - 0x5CC3, 0x0049, 0x0063, 0x8FC7, 0xC178, 0x8AB8, 0x96A4, 0x6703, 0x15CF, 0xAC12, 0x0224, 0x40EC, - 0x6E01, 0x2009, 0x9016, 0xDE5E, 0x85EC, 0x959E, 0xA939, 0x8710, 0x9210, 0xE073, 0xD2CE, 0x6B5D, - 0xEA19, 0xAB1C, 0xBD66, 0x8F1A, 0x9EEA, 0x3981, 0x05F4, 0xA95E, 0xDD35, 0xD4A9, 0x5D3D, 0x7E9D, - 0xCE39, 0x4F35, 0xA57E, 0x7C49, 0x803C, 0xB644, 0x47AF, 0x1248, 0x8004, 0x4820, 0x6377, 0x27E3, - 0x9DA9, 0x35FF, 0x025E, 0x9F5E, 0x8EB6, 0x8B00, 0xC91A, 0xC0D1, 0x54D9, 0x4EBB, 0xC8F6, 0xF373, - 0xEF91, 0x055C, 0xC05B, 0x7F1F, 0xA7ED, 0x31AE, 0x68B7, 0xDDAA, 0xCBA1, 0xA5F6, 0xD22E, 0x9735, - 0x5C0E, 0xBBDB, 0x33EB, 0xDC45, 0x0D05, 0x9656, 0x95C5, 0x9866, 0xCCE1, 0xB4DA, 0x2E6D, 0xBBA6, - 0x7589, 0xF025, 0xF0E1, 0xDFAA, 0xBA76, 0xA80E, 0x0FEA, 0xF0E1, 0xC387, 0xFFAA, 0x76E1, 0x87BF, - 0x9EAE, 0xA8AE, 0xF7BE, 0xF7BD, 0xC287, 0x2F6F, 0x7DDB, 0xA387, 0xEAF0, 0xE1C3, 0x3E7C, 0xF8DC, - 0x65AD, 0xD01E, 0xFE7A, 0x6675, 0xAE5E, 0x053D, 0xBF72, 0x5577, 0xEE3B, 0xE83D, 0x8509, 0x00B8, - 0x3C03, 0x364A, 0x9E0E, 0xE938, 0x1BCE, 0x86B3, 0xE16C, 0xB824, 0xF786, 0x4B72, 0x6FB8, 0x24F7, - 0x864B, 0x3289, 0x7BC7, 0xD7CE, 0x1A03, 0x9769, 0xCCF7, 0x06BA, 0x4EC0, 0x8723, 0x42D9, 0x7FF5, - 0xEE27, 0x9EE3, 0xBDF2, 0x0380, 0x73EC, 0x3E76, 0xAF7C, 0x7036, 0x9C6D, 0x4972, 0x6FF8, 0xBDFE, - 0x0020, 0xE7CE, 0xF259, 0x8ED7, 0xCC77, 0x78E1, 0x263B, 0x9C0D, 0x675B, 0xB23A, 0xAC0E, 0xABF3, - 0xE1DE, 0xBAA0, 0x0100, 0x00A0, 0x2BF7, 0x2C5A, 0xA5FD, 0xBD35, 0xE233, 0xAA41, 0xD69A, 0xF419, - 0x2598, 0xFDAF, 0x3FF4, 0x72EA, 0xC7D2, 0xE4E5, 0x5A2F, 0x3462, 0xAFD6, 0xDB53, 0xBA3F, 0x28A3, - 0xD7C8, 0x2DDD, 0x2061, 0xB4E5, 0x8BA3, 0x4985, 0x4C8C, 0x1527, 0x56CC, 0x4C12, 0xA747, 0x4657, - 0x46D4, 0x7F91, 0x2FB6, 0x8974, 0x64F4, 0x8B59, 0x62EE, 0x3353, 0xE247, 0xC58F, 0x5627, 0x5ABD, - 0x68F5, 0xA359, 0x6439, 0x7F05, 0x5FF4, 0xBAF9, 0x9A19, 0x1A3D, 0x77D5, 0x639C, 0xBE53, 0x1834, - 0x57DE, 0x6925, 0x9947, 0x9AEB, 0xBCFA, 0x9CF7, 0x6F8E, 0x68C6, 0x6C31, 0x2246, 0x99CC, 0xB4CD, - 0xFC18, 0x6DEE, 0x3CE7, 0x1AB1, 0x383D, 0x05FD, 0x39A8, 0xF971, 0x39CF, 0x2A83, 0x9ADE, 0xBC7B, - 0x7E52, 0x5D9E, 0xBEBA, 0xB27A, 0xD77E, 0x3498, 0x5DB0, 0xE607, 0x4085, 0x0900, 0x0004, 0xD4AC, - 0x5C80, 0x9A5B, 0x2BE6, 0xA9EB, 0x1900, 0x31B3, 0xBE7A, 0x9C68, 0x1570, 0xE739, 0x8306, 0x10A6, - 0x7E8E, 0x6656, 0x4DDD, 0xA0AE, 0xF279, 0xBC1E, 0x6A4E, 0x7A9B, 0xC9FC, 0x0135, 0xA756, 0xCDDC, - 0xF9FA, 0x9616, 0x81E7, 0x1373, 0xB616, 0xFA07, 0x3AEA, 0x0B0D, 0xD5B8, 0x31EA, 0xACED, 0xADB6, - 0xA0BF, 0xF503, 0x39F5, 0x5503, 0xBF40, 0xF582, 0x5CEA, 0xFCDE, 0x9979, 0x8EDC, 0x6BA5, 0xA3F7, - 0x87DC, 0xCDD0, 0x8E5A, 0xF128, 0xB031, 0x4E73, 0xB77D, 0x09F9, 0xBB11, 0x3155, 0x94A3, 0x9E4E, - 0xFBD2, 0xFC4E, 0x6C8B, 0x89B3, 0xCE66, 0x3BFD, 0x43C2, 0xE0A7, 0x6AEA, 0xEF6C, 0xA3D2, 0xA81F, - 0x0CCF, 0xF489, 0xBA57, 0x0BD3, 0x5D3F, 0xB29E, 0xC807, 0x6D9E, 0xD618, 0xE9EB, 0xFF86, 0x0CC2, - 0xFBB9, 0xC923, 0x8BEA, 0xFB38, 0xBCFC, 0x1C19, 0x94E4, 0xCE59, 0xE205, 0x3D54, 0xEAC4, 0xAFE3, - 0xC82F, 0xAEA3, 0xA1A3, 0x9FAD, 0x9E8E, 0x128C, 0xBCAA, 0x455E, 0xB5B7, 0x1CCC, 0x73B5, 0x73E9, - 0x2BC3, 0xE4F8, 0x7B85, 0xC361, 0x114C, 0xEAEA, 0x7598, 0xEED3, 0x95BC, 0x51FD, 0xB353, 0x195E, - 0x46F6, 0xBBDE, 0x3C4B, 0x89CA, 0x30CE, 0x3A4F, 0x4F0F, 0x9915, 0xDF7D, 0x2B23, 0xACA5, 0xC88D, - 0xF77C, 0xF2E9, 0x3852, 0x736E, 0x7DAC, 0x1CEB, 0x9C53, 0x4472, 0xC69D, 0x67F6, 0x59E5, 0x76EE, - 0xD9CD, 0xC384, 0x9901, 0x6819, 0x0919, 0x1700, 0x0040, 0x057C, 0xFBD7, 0xF7EF, 0xDFBF, 0xDBAB, - 0xAA1E, 0xBFFF, 0x3EF4, 0x78F5, 0xDFB7, 0xF0FF, 0x5DD5, 0x426B, 0xDB8E, 0xEBAA, 0x7FF0, 0x3AA9, - 0x5E0C, 0x16B2, 0xB54E, 0x8241, 0x9926, 0x0026, 0x235A, 0x8422, 0x3FDD, 0xA6E6, 0xC111, 0x93E6, - 0x3539, 0x3ED5, 0x5575, 0x0381, 0xE181, 0xEAF9, 0x4AEF, 0x181E, 0x8274, 0xACD4, 0xC0A6, 0x313B, - 0xE2E6, 0xC167, 0x08C3, 0x6307, 0xC458, 0xC659, 0x241F, 0x0208, 0x30FA, 0x8FFA, 0xC7FD, 0xCEEC, - 0x764E, 0x08AB, 0xF67D, 0x87AA, 0xEAED, 0x55C1, 0xF792, 0x00E1, 0xB289, 0x0BC8, 0x1254, 0x1F75, - 0x826A, 0x562F, 0x48D7, 0x6857, 0x13D5, 0x580E, 0xA392, 0xA176, 0xB907, 0x9D67, 0xD470, 0x8490, - 0x1AEA, 0x229D, 0xEEAE, 0xD1E9, 0xA471, 0xCD61, 0x18D5, 0x24C7, 0x59D3, 0xC558, 0xDDE5, 0x67A9, - 0xC5D2, 0x7876, 0x5A39, 0x93AE, 0xD2D3, 0x39DA, 0x3B95, 0x211E, 0x794B, 0xE003, 0x9200, 0x812A, - 0xAE35, 0x6AE8, 0x59BE, 0x6D5D, 0x593A, 0xBB99, 0x9D33, 0x787B, 0xD0E4, 0x2D32, 0xF61E, 0x671D, - 0xB239, 0xCB71, 0xA61B, 0xD6D2, 0xD6AD, 0xD1BE, 0xD6A9, 0x4E5B, 0xF574, 0x1CD3, 0x51A3, 0x4727, - 0x35AA, 0x8E0E, 0xA82A, 0xABAA, 0x6BD7, 0xAAFA, 0x8124, 0xE06D, 0xD96B, 0xAD4A, 0xF3EB, 0xAE65, - 0xD5A5, 0xAF1B, 0xC6CE, 0x98AB, 0x884B, 0xABD4, 0xEA9E, 0xFC83, 0xE3D4, 0x6E9D, 0x5D95, 0x4126, - 0xF7E0, 0x2773, 0xB7B5, 0x355E, 0x1FAC, 0x3362, 0x59D0, 0x40E2, 0xADDE, 0xD265, 0xF578, 0xF5D1, - 0x75EC, 0xD975, 0xEDA4, 0x6ABA, 0x56A5, 0x6B40, 0x3F73, 0x9F72, 0xF32B, 0x718D, 0xB3EC, 0xAA3A, - 0x400B, 0xEC29, 0xBA1C, 0x5475, 0xB531, 0xB5B5, 0xE6E8, 0xD1AE, 0x7614, 0x670F, 0xDAE9, 0xA8B9, - 0xCA51, 0x3AB4, 0x9CB3, 0x468D, 0xD5F7, 0x0080, 0x047E, 0xDDDD, 0x75BF, 0xBFD7, 0x6FFB, 0x7BDD, - 0xBFDD, 0xBDAB, 0xDFF5, 0xF7FB, 0xEFDD, 0xF7D7, 0x55FD, 0xF77E, 0xA18A, 0xE882, 0x20DE, 0x7183, - 0x0B0A, 0x73AC, 0x6BDF, 0x7626, 0x77A4, 0xB021, 0x2422, 0x2DA6, 0x5120, 0x28A8, 0xA338, 0x51F9, - 0xAC30, 0xC71C, 0x2CD1, 0x7966, 0x1270, 0x8A36, 0x74C8, 0x36AD, 0x5C4D, 0xB71D, 0x8030, 0xE3A3, - 0xFE3F, 0x6F66, 0xF31C, 0x6030, 0x8386, 0x46CA, 0xF861, 0x3324, 0x0570, 0x547F, 0xE683, 0x1ED7, - 0x9D61, 0x0C01, 0x34CE, 0x1658, 0x014C, 0xEBB1, 0x781C, 0xCDB7, 0x6627, 0x5234, 0xBC2F, 0x0342, - 0x6880, 0x3F0E, 0x2000, 0x7209, 0x5FDD, 0xAD4E, 0x5A79, 0xF878, 0xC204, 0x0040, 0x7483, 0x0AE4, - 0xE4BE, 0x8CB9, 0x0CF2, 0xE955, 0xE76D, 0xFACE, 0x9BEF, 0x1E1E, 0x475F, 0x2765, 0x4D47, 0xDF84, - 0x307E, 0xA455, 0x2848, 0xFADA, 0xDDED, 0xEE76, 0x0652, 0x6804, 0x8DC6, 0x2411, 0x6934, 0x64D1, - 0x6834, 0xDF6B, 0x1698, 0x38E8, 0x68D0, 0x071B, 0x0B13, 0x1585, 0x9B2F, 0x9E76, 0x3DD7, 0x85BA, - 0xBA86, 0x1AF7, 0xD85B, 0x5D89, 0x281A, 0x924D, 0xC3B6, 0x2FDA, 0x1ACE, 0xD55C, 0xDDB4, 0xC0BD, - 0xEA96, 0x6557, 0x9A8A, 0x35AD, 0xD5F3, 0x5B8D, 0x2664, 0x5FBF, 0xE67B, 0x68EF, 0x16D2, 0xD77C, - 0x4FF5, 0x94AA, 0x764A, 0xEABE, 0xEE19, 0xA6A7, 0x49F9, 0x1BA9, 0x77CE, 0x7894, 0xF7D6, 0x9A21, - 0xCDB9, 0xB465, 0x6EF4, 0xBCBB, 0x2CA3, 0xBDE9, 0x81D4, 0xBBD7, 0x8BF6, 0x202A, 0xFAF6, 0xAC06, - 0x0000, 0x1526, 0xF0DD, 0xAFA3, 0xB966, 0x79CF, 0xE703, 0xCCDE, 0xC19D, 0x2C06, 0x99EB, 0x9F51, - 0x8EFF, 0x5A94, 0x4F87, 0xDC71, 0xD8AD, 0xC5E3, 0xB719, 0x8CEA, 0xDBD1, 0x62CC, 0xF62C, 0x7A06, - 0xEF42, 0xE958, 0xF88C, 0x5057, 0x1277, 0x413A, 0x3A28, 0x396A, 0xB796, 0x1FCB, 0x6FED, 0xFE5C, - 0xBDDA, 0xB6CC, 0x53CD, 0xB887, 0x75EF, 0x27D1, 0xBC4F, 0x0B71, 0xBC5C, 0x3051, 0x5C73, 0x69F6, - 0x298E, 0xE159, 0x9EF7, 0x289C, 0x6CF9, 0x729A, 0xB2B3, 0x29A5, 0x9452, 0x4AA1, 0x1B4C, 0xF774, - 0x83BA, 0x0C21, 0x856E, 0xCA8D, 0xF702, 0x3D99, 0x79FA, 0x523F, 0x7519, 0xED6D, 0x27BE, 0x7F3F, - 0x2527, 0x9D60, 0xFC86, 0xAB60, 0xDE77, 0xF500, 0x0000, 0x2A4C, 0x9830, 0x6102, 0x1948, 0x7FC0, - 0xDA6F, 0xC278, 0x778C, 0xAEE6, 0xD1CB, 0x3377, 0xCEE8, 0xDDD6, 0x6DC2, 0x5452, 0x9EAA, 0xF79A, - 0x0329, 0x78F7, 0x5B67, 0xD154, 0x55A8, 0x2AB4, 0xA2C7, 0x5797, 0xB1EA, 0xA37A, 0xE4D7, 0xB232, - 0x7AF2, 0xB2F4, 0xAEEE, 0x703E, 0x5495, 0x3A91, 0x959E, 0x6CBB, 0x7C8A, 0x86AC, 0x1954, 0xAC7D, - 0x66D6, 0x6C5E, 0xB134, 0xD7EE, 0xE37C, 0x424B, 0x5B29, 0xCB31, 0x4F59, 0x2D8C, 0xA191, 0x4571, - 0x1E5B, 0x9284, 0x655B, 0x702C, 0x58CE, 0xF3B8, 0xA4A3, 0x99A6, 0x69B2, 0xDF77, 0xA5DC, 0xA52D, - 0xA594, 0xCD55, 0x8242, 0xD699, 0xA273, 0x354B, 0xB59A, 0xB2A5, 0xDB54, 0x5B1B, 0x3911, 0x5AAE, - 0xA088, 0xCC63, 0x18F5, 0x3023, 0xAA54, 0x9946, 0x254C, 0xF17D, 0xC5A7, 0x4EDD, 0xD3C3, 0xF430, - 0xD3D3, 0x3D3D, 0x8CB9, 0xE25A, 0xB5B0, 0xB71B, 0xC6FB, 0xC4DB, 0x770C, 0xA34E, 0x9D95, 0x2773, - 0x5360, 0x0000, 0x0015, 0xF0FF, 0xB2EA, 0x35F6, 0x7FDD, 0xF80F, 0x0A7F, 0x56A7, 0x4F9C, 0x1BD2, - 0x4690, 0xA64C, 0xCDC1, 0xDA6A, 0xD071, 0x8CFB, 0x58CC, 0xFA5E, 0x737F, 0xFA23, 0x6130, 0xA673, - 0x00FE, 0xFA66, 0x3409, 0x6D65, 0xA48B, 0x8C47, 0x7CDF, 0x69CD, 0x7E27, 0x0CF2, 0x9032, 0x6081, - 0xB356, 0x483F, 0x6751, 0xD767, 0xBED0, 0x6BE6, 0x2099, 0xD6B3, 0x5400, 0x8A09, 0x1181, 0x1F67, - 0x633E, 0x4786, 0xACEA, 0x1227, 0xA92E, 0x6E0F, 0x4198, 0xC033, 0xCE72, 0x9F26, 0xAF86, 0x1B37, - 0x304F, 0x0AF3, 0x2950, 0x1550, 0x57A7, 0xFCD4, 0xC91B, 0xB928, 0xDC21, 0x5118, 0x58AE, 0xD0F2, - 0xF4E7, 0xF263, 0x180D, 0xAAA9, 0x8714, 0xD29A, 0xE248, 0x018E, 0xF620, 0x8EC5, 0x756F, 0x7B6F, - 0xF688, 0x2100, 0xC4E1, 0x0D67, 0x7100, 0xDAFD, 0x2D8E, 0x0A67, 0xEC92, 0x4A41, 0xF9D9, 0x89C2, - 0x34D0, 0x0481, 0xE106, 0x5719, 0x939F, 0x1BB3, 0x2CB7, 0xCD20, 0xB00F, 0xDFDE, 0xAD08, 0x3500, - 0x7CD8, 0x46DF, 0x1848, 0x7772, 0x5039, 0x3C1C, 0x0914, 0x628A, 0x30A5, 0x605C, 0x3BD3, 0x6EAD, - 0x7F7A, 0x7996, 0x6E66, 0xB8ED, 0x5F6E, 0xAB6F, 0xDFA1, 0x5D85, 0x0F57, 0xAFFA, 0xACDA, 0xDB7E, - 0xFF7F, 0x8300, 0xE036, 0x1361, 0x4CA0, 0x3CBA, 0xE5E0, 0x87EF, 0x7DFA, 0x4EEE, 0x273D, 0x29BC, - 0x09C1, 0xA707, 0x1628, 0x5B11, 0xF92F, 0x4BDB, 0x0322, 0xAF1E, 0x305C, 0x0102, 0xFBED, 0x24D8, - 0xB94C, 0x38AE, 0xF739, 0xECC6, 0xAB3B, 0xC1CF, 0xA6D2, 0x4AE6, 0xF77A, 0x074C, 0xC5A9, 0xA6B9, - 0x8168, 0x770D, 0xBF01, 0xD34E, 0x3620, 0xABAE, 0x80EE, 0x7AC4, 0xC126, 0xA076, 0x7E17, 0xD383, - 0xA870, 0x9475, 0x4111, 0xE555, 0xC66B, 0xAD6B, 0x420E, 0xFA61, 0xCC80, 0x1638, 0xA641, 0x9BE5, - 0x2F97, 0x2DC5, 0xE6EE, 0xCE4B, 0x1DBF, 0x5E79, 0xEE67, 0xE060, 0x94D0, 0xD304, 0x8CE7, 0x7202, - 0x8223, 0xDC31, 0x30B3, 0x9B62, 0xF68D, 0x38DD, 0xCD40, 0xA10E, 0x8925, 0xCD7D, 0x3B0E, 0x7D72, - 0x8E79, 0x9E7E, 0x4D2F, 0x8C18, 0x2C20, 0x0754, 0x3B54, 0xE299, 0xB00F, 0xE0D1, 0xE3D6, 0x2571, - 0x8103, 0xC145, 0x1256, 0xBCBF, 0x4343, 0xFE9F, 0xEF32, 0x5C11, 0x9F3A, 0xCF9F, 0x21F5, 0x29FF, - 0xC707, 0x2B67, 0x4058, 0x0338, 0x342B, 0x76EE, 0x0C1C, 0x102E, 0x4F57, 0x127D, 0xDEA9, 0xCB42, - 0x4552, 0xE28B, 0xCDE7, 0x0A5C, 0xEF17, 0x9DCA, 0xC9DD, 0xDE65, 0xCF6D, 0x4F9F, 0xD6E1, 0xF37F, - 0x237B, 0x65B0, 0x07DC, 0x6582, 0xE70A, 0x287F, 0x9C2B, 0x7906, 0xA90F, 0xFF49, 0x7023, 0x84B6, - 0x808B, 0x5206, 0x0F06, 0x3CE1, 0x4279, 0x21BC, 0xF975, 0x34E0, 0xC72E, 0xC991, 0x797E, 0x8CE7, - 0x19A0, 0xE32F, 0x24C0, 0xDC86, 0x8368, 0x403A, 0x70E2, 0x7A00, 0xC052, 0xD50E, 0x3DA6, 0x8012, - 0xF585, 0x117D, 0xF2BE, 0x5F39, 0xE8A6, 0xFA34, 0x01CA, 0x6792, 0x35E1, 0xA0E1, 0x1E4B, 0x4FF6, - 0xBC49, 0xADCD, 0x747A, 0xF5E2, 0x2095, 0xF2D6, 0x8461, 0xF85A, 0x8CCA, 0x0663, 0x9EF0, 0x6953, - 0x517E, 0x1F15, 0x7823, 0xF212, 0x3314, 0x7A84, 0x7062, 0xFD37, 0x332A, 0x6496, 0x27B9, 0xFF7F, - 0xADB6, 0x9E20, 0x4400, 0xC8C7, 0x0144, 0x4221, 0x244E, 0xCEA1, 0xD944, 0xE325, 0x4C91, 0x6FED, - 0xD459, 0xF0DB, 0x9FE5, 0x1E71, 0xBF15, 0xB577, 0x7DBB, 0xFFFC, 0xF811, 0x6526, 0xBCD2, 0xFDEF, - 0xBEBE, 0x7B8F, 0x6D57, 0x55D0, 0xE157, 0x0A13, 0x0000, 0xA040, 0x5983, 0xE8BE, 0x1A4F, 0x3C0B, - 0xBA66, 0x27B9, 0x954E, 0x50FD, 0x5C92, 0x9E1C, 0x1C7E, 0xA513, 0x542F, 0x33FF, 0x5AF7, 0xA765, - 0xBBC9, 0x4A35, 0xA8CF, 0x8A56, 0x59DF, 0x04DE, 0x3FE0, 0x8FE4, 0xF789, 0xAECD, 0xBA5A, 0xB5A0, - 0x06C9, 0xAF1D, 0x57CF, 0x0D9F, 0xEF8E, 0x44E7, 0x7347, 0xA0A2, 0xF069, 0xA9F1, 0xABA6, 0xA8EC, - 0xEBE2, 0xEB9A, 0x43DF, 0x032B, 0xD355, 0x1F29, 0x6762, 0xE1E3, 0x0274, 0xE88B, 0x44E1, 0x5267, - 0x4A1A, 0x2F4F, 0xE255, 0xEB21, 0xF72E, 0x7911, 0x6ED8, 0x6E60, 0x3F0E, 0x9FCD, 0xE7F2, 0x9FE2, - 0xC1E4, 0x5237, 0xD9C0, 0xE6B0, 0x5940, 0x028B, 0xCD02, 0x163B, 0x691A, 0xA6C1, 0xFEDA, 0x03FB, - 0x8C96, 0xBDFA, 0xD9D6, 0x8E8C, 0xA3D9, 0x9FED, 0x7B96, 0xEF88, 0xF15C, 0x3497, 0x3C79, 0xC7AD, - 0xAF7F, 0xEC28, 0x3162, 0xAB8A, 0x9E52, 0x34F7, 0xEECA, 0xAFCA, 0x9673, 0x7911, 0xDB58, 0x1F80, - 0x01D1, 0x1138, 0x2E18, 0x483C, 0x1C24, 0x2AF4, 0x46A2, 0x436E, 0x7724, 0x8E0B, 0x1A12, 0x679B, - 0x2D4C, 0xB5BF, 0x4DE9, 0x6EA9, 0xBE63, 0x5B42, 0x13B6, 0x670B, 0x4338, 0xD216, 0x99EA, 0x3BD7, - 0x4B8D, 0x6667, 0x30FA, 0xB5F7, 0x7C05, 0x6AE7, 0x9BE7, 0x548D, 0x9AAF, 0x8729, 0x7E54, 0xC6E4, - 0x316B, 0x90DD, 0xAFEE, 0xB044, 0x8DCF, 0xDCF3, 0x75E4, 0xCF0B, 0x976F, 0xD4B9, 0x5607, 0x0000, - 0x0015, 0x2600, 0x0000, 0x203A, 0xD6BD, 0x5CEE, 0x75F3, 0x3D15, 0x097E, 0xF72B, 0xC84C, 0xCF71, - 0x6AB9, 0x4A15, 0x0AC5, 0x4A45, 0x4A3B, 0x3731, 0x8EE3, 0x7A27, 0xD2FC, 0xE338, 0xDCBC, 0xD0B8, - 0xD5E2, 0x60E2, 0x7970, 0xB0A9, 0xF401, 0x4E4F, 0x8B7D, 0x7A1D, 0xD4A8, 0x3D1B, 0xDE60, 0x9034, - 0x02F3, 0x5799, 0xF966, 0xB367, 0x25B3, 0xD907, 0xA7F1, 0x188F, 0xD1C7, 0x6F74, 0x3AD2, 0x701B, - 0xD7AD, 0x0E7A, 0xE25B, 0x335A, 0x3BF7, 0xDE95, 0x0151, 0xF63B, 0xC717, 0x0000, 0x0050, 0x6102, - 0x00A0, 0xC358, 0x7B41, 0xDF77, 0x1B15, 0x978C, 0x15DF, 0x587C, 0x4E76, 0x10E8, 0x824B, 0xDEE7, - 0x2C91, 0x9BFA, 0x4E96, 0xFD28, 0xD591, 0x16B5, 0x1869, 0xE45D, 0xA4BA, 0x893C, 0x4444, 0x77F4, - 0x7C3D, 0xD424, 0x33B5, 0x22E6, 0x2B69, 0x5866, 0xAB86, 0xA961, 0x6A98, 0xA878, 0x3BD7, 0xBC67, - 0xDB07, 0x8901, 0x7050, 0xD8EA, 0x6F59, 0x6BA6, 0x4657, 0xE8EC, 0xDC11, 0x9EED, 0xFAA7, 0xF34B, - 0xCF97, 0xA2BB, 0xB9DB, 0x13FC, 0x7D3C, 0x75FD, 0xB485, 0x23A5, 0x9452, 0x186A, 0xD4A2, 0xC9F3, - 0x26F7, 0xCB7B, 0xDA0C, 0x19E3, 0x5F6F, 0x0B6A, 0xDA3C, 0x2072, 0xD753, 0x60AD, 0xC755, 0x6E07, - 0x2296, 0x63FF, 0x0000, 0xA820, 0x9000, 0x01FA, 0x2947, 0x7B75, 0xF951, 0x7327, 0xDD35, 0x1C3A, - 0xE65C, 0xF6AC, 0xAEA5, 0x53F7, 0xA04F, 0x6A68, 0x15AF, 0x5553, 0x772B, 0xAC6B, 0xAD24, 0xA7E6, - 0x4E5A, 0xED51, 0x6D14, 0xE887, 0x3354, 0x9635, 0xB767, 0x8D63, 0xADAA, 0x6A95, 0x76D7, 0x965D, - 0xEF99, 0x7ADA, 0x3D56, 0x3B6A, 0xB456, 0xB9B5, 0x1DBD, 0x7A56, 0xAB73, 0x5896, 0xF312, 0x1BB8, - 0x8024, 0x4080, 0xA73C, 0x6577, 0xFA1A, 0x006F, 0x0135, 0xCBAA, 0xB261, 0x96E6, 0x38CB, 0x5E7B, - 0xEBF8, 0xF48C, 0x1EDD, 0x4EED, 0xB6A3, 0x021F, 0x9004, 0xC819, 0x5759, 0xCD9E, 0xCDF5, 0xB422, - 0x63B7, 0xD543, 0x6795, 0x4E9B, 0x6FB8, 0x5C56, 0x97D6, 0xB0AA, 0xCBD2, 0xF63A, 0x2A3D, 0xAACA, - 0xAE41, 0xA0CA, 0x9EF5, 0xCCBD, 0xBA87, 0x72C6, 0x1448, 0x0264, 0x0EA8, 0x71DA, 0x51E5, 0x547D, - 0xD4A1, 0x3AAD, 0xC773, 0xAAE7, 0x3AE3, 0x4C67, 0x8D55, 0x6B16, 0x90EE, 0xE619, 0x0E31, 0x1D7A, - 0x00A7, 0xAC9A, 0x934B, 0x2DCB, 0xF68C, 0x3DEC, 0xEA7F, 0xA581, 0x043E, 0xBFD3, 0x6A84, 0xDCB4, - 0x39C0, 0xCE03, 0x2220, 0x06C0, 0x751C, 0x0501, 0x3893, 0x0457, 0xCF1B, 0xD4C3, 0x88FB, 0x6BBD, - 0xFFEB, 0xBDFB, 0x7EFB, 0xFA9F, 0x6F3F, 0xE100, 0xE260, 0xCF5F, 0xA813, 0x8251, 0x0614, 0x081B, - 0x1C60, 0x5F06, 0x08FC, 0xF811, 0x65A6, 0xB677, 0xEFDA, 0x77C5, 0xB41F, 0xDDAF, 0x920F, 0x00BE, - 0x2BB8, 0x571D, 0x4151, 0x5E59, 0x02BA, 0xBE4E, 0x663A, 0x22D5, 0x0E8F, 0x3819, 0x5F77, 0x61C8, - 0x8009, 0xC282, 0xCD18, 0x1743, 0xD9D1, 0x74F6, 0xBCA8, 0xCD3C, 0xD705, 0xFCEF, 0xADBF, 0xAADB, - 0x55AD, 0x1E3E, 0x7CFC, 0xAA55, 0xC188, 0xEF39, 0x8860, 0x001B, 0x1CA0, 0x5FCE, 0x1982, 0xF008, - 0x0700, 0x9897, 0xA10E, 0xD4BB, 0xB7B8, 0x2BC2, 0x5BC0, 0x0129, 0x93F5, 0x260E, 0xBEDF, 0xE001, - 0xD1BB, 0xEF9C, 0xFEF2, 0xA6D8, 0x6631, 0x22EE, 0x18C5, 0x0205, 0x5607, 0xC612, 0x84A8, 0xD43D, - 0xA3E4, 0xE11A, 0x30C8, 0xB580, 0x8B8E, 0x32DB, 0x2862, 0x33ED, 0x1B0A, 0x18A3, 0x8EBE, 0x5DEA, - 0x3DAB, 0x2822, 0xB917, 0x35B2, 0x3A0F, 0xC3E3, 0xA278, 0xDDD3, 0xEBDF, 0xE5B2, 0x151E, 0x2CC9, - 0xB57A, 0x680B, 0xB5EF, 0x0C7B, 0xFF9F, 0xFD7D, 0x831C, 0x96BE, 0x801E, 0x9844, 0xB51E, 0x29C1, - 0x01D3, 0x0E0D, 0xC73B, 0x42FD, 0xA21A, 0x7704, 0x06BF, 0x288F, 0x3EE6, 0x8021, 0x84C0, 0x4938, - 0x03AF, 0x6C77, 0x0C37, 0x6377, 0xF3BC, 0x2D54, 0xBE19, 0x7B4E, 0x0C05, 0x3050, 0x0B92, 0xDEE6, - 0x2B35, 0xA965, 0x3D7F, 0xA78E, 0x3E5D, 0x5F6A, 0xCEF2, 0x23D1, 0x5FA1, 0xAA6A, 0xE5E1, 0xC3D7, - 0x6AC5, 0xFA75, 0xFD23, 0x2880, 0xFBA3, 0x0700, 0x6E59, 0x50C0, 0x1CCE, 0xF814, 0x421A, 0xC74D, - 0xB0C7, 0xDBE6, 0x7DD9, 0x2685, 0x3B2D, 0xDCE7, 0xD2B8, 0x73A1, 0x9DBC, 0x61BF, 0x8F8B, 0x0C6A, - 0x94BB, 0x3DF9, 0xDD8B, 0x8369, 0x2E4F, 0x1F8B, 0x3703, 0x81C9, 0x3319, 0x7A62, 0xDE48, 0x9D42, - 0x23DA, 0xCFE1, 0x983F, 0xC200, 0x001C, 0xF492, 0x70D2, 0x4220, 0x323F, 0x5006, 0xA26F, 0xCD9A, - 0xCB6B, 0x74FA, 0x6AC2, 0xFBDC, 0x7B88, 0x8355, 0x2872, 0x2424, 0x0E07, 0x1EE3, 0xAC61, 0x4220, - 0xEEAE, 0x67D8, 0x91DD, 0x1DC2, 0x204B, 0xF3DC, 0x8078, 0x8C5B, 0x0002, 0x1CE7, 0x9DC4, 0xA6DB, - 0x5C59, 0x4E0C, 0x4590, 0x924B, 0x9BDF, 0x66C4, 0xA09D, 0x8EAE, 0x60FF, 0x4DED, 0x409C, 0x40D8, - 0x2F22, 0x423F, 0x1A1F, 0x1F49, 0x608D, 0x4441, 0x8CEC, 0x6237, 0xDA22, 0xF451, 0x0BF8, 0xF6F5, - 0x3B87, 0x6150, 0x2A83, 0xCF2D, 0x6A8C, 0xE35C, 0xEB5F, 0xF75A, 0x4DF0, 0x6BFB, 0x8EB5, 0xC6D5, - 0x51B5, 0xE0DB, 0x0813, 0x0000, 0x2800, 0x08F5, 0x7BA2, 0x7E97, 0x8C15, 0xDF13, 0xF5BB, 0x5462, - 0xD832, 0x2532, 0x2532, 0x2556, 0x4AEC, 0x665A, 0x7E77, 0x854A, 0x67B0, 0xD83C, 0x7E2B, 0x3219, - 0x143F, 0x9B41, 0x1D79, 0xEF44, 0xBE0F, 0xE53B, 0xEEEE, 0x79E7, 0xAE9D, 0xDE16, 0x868B, 0x7C4F, - 0xA1D0, 0xD27D, 0x2D7A, 0x7C45, 0xCCBA, 0x7EBC, 0x9C2A, 0x7D8F, 0x7D1D, 0xEFFD, 0xB257, 0x66A9, - 0xA311, 0x54BA, 0xE6CA, 0x95DC, 0xBD74, 0x5DDB, 0x12E6, 0x65AC, 0x8858, 0x1995, 0x31EF, 0xC937, - 0xA6B1, 0x97D6, 0x5F35, 0x4D57, 0x1555, 0x65FF, 0xEA52, 0x4AD9, 0xBDD5, 0xC9A1, 0x3B59, 0xFC36, - 0x61B2, 0x99D9, 0x0D29, 0x45B3, 0x21BB, 0x5729, 0x99F6, 0x0A81, 0xD9A6, 0x62AF, 0xA8CE, 0xBB97, - 0xBAE2, 0xE97C, 0xC7F9, 0xEFCC, 0x82EC, 0xD4AC, 0x87C6, 0x4062, 0xDE79, 0x7402, 0x0050, 0x6102, - 0x0000, 0xE829, 0xC05E, 0x5038, 0x21E7, 0x1BB5, 0x2420, 0x3F53, 0x97C5, 0xBB5A, 0x4C69, 0xEECE, - 0xFF7C, 0x4D28, 0xA7BE, 0xA703, 0xDFC2, 0x2BF6, 0x69A6, 0xE663, 0xF338, 0x5CB8, 0xF3B5, 0xF7EC, - 0x7DAE, 0xBAF8, 0xA95C, 0x5A6E, 0x92CD, 0x06C8, 0x14A6, 0xA95E, 0x730E, 0xFFD5, 0x34AD, 0xE771, - 0xF2CB, 0x563B, 0x6BED, 0x88D8, 0x5A59, 0x849E, 0xB262, 0xA8F4, 0x59C6, 0xBD99, 0x7936, 0x2DAF, - 0xA577, 0xFF64, 0x5617, 0x6C24, 0x6E18, 0x3D76, 0xFD37, 0x5563, 0xC78C, 0x5A07, 0x2387, 0xBB0B, - 0x6ECC, 0x78AA, 0x4B17, 0x2D57, 0x2B4E, 0xBB3B, 0x5A7A, 0x1BBD, 0x566A, 0xEC16, 0x43E9, 0x15F1, - 0xF6CD, 0xE9E9, 0xABA5, 0x466B, 0x0FF7, 0x3DA7, 0x61C8, 0x9CF1, 0x464D, 0xC9DD, 0x9188, 0xA11A, - 0x2A45, 0x06DF, 0x5993, 0x816F, 0xABE9, 0x12C1, 0x748E, 0xF758, 0x4D02, 0x02BD, 0x6FC7, 0xA101, - 0x0000, 0xA830, 0x0100, 0x009E, 0xFD23, 0xFC06, 0x7DEF, 0x7B30, 0x819F, 0x2D0D, 0x3BA7, 0x409D, - 0xBAAF, 0xF36E, 0xEF4E, 0x494F, 0x66F5, 0x912D, 0xBBE4, 0x4636, 0x3D35, 0x35E6, 0x8293, 0xA387, - 0xD0E2, 0x355C, 0x74A2, 0xA755, 0xF8DB, 0x098A, 0x6D47, 0x78FE, 0xF9D0, 0xB227, 0xD633, 0xB739, - 0x6C92, 0x4125, 0x29CB, 0xCEC6, 0x062D, 0xB743, 0xDC14, 0xCFA9, 0xA0C1, 0x842C, 0x51DF, 0x1673, - 0x865C, 0x2E5E, 0x31A2, 0xF9FC, 0xF505, 0x0000, 0x0000, 0x15F0, 0xB9AB, 0xAC56, 0x6FA7, 0xE619, - 0xAC67, 0x0900, 0x4C9B, 0x684E, 0xA144, 0x1F08, 0x8B8D, 0x2758, 0xF60E, 0x30A6, 0xAACD, 0x5BC9, - 0xDE09, 0x30CF, 0x702A, 0x1ED3, 0x6CA1, 0x0EA3, 0x0700, 0x3837, 0xCA97, 0x6B05, 0x860C, 0xD51E, - 0x40F1, 0xF348, 0x310F, 0xC014, 0x9EE6, 0x969D, 0xD103, 0x9BCA, 0x866C, 0xF453, 0xE6B0, 0x78ED, - 0xA264, 0x1E26, 0x081D, 0x751D, 0x4FE3, 0xD097, 0x47DE, 0xCC0D, 0xE891, 0x093C, 0x3654, 0x2C9E, - 0x120A, 0x09E0, 0x39F1, 0x60F1, 0xCFC8, 0xAAC7, 0xE1BB, 0x0575, 0xE253, 0x8310, 0x00D0, 0x7F90, - 0xC0B0, 0x36AE, 0x7188, 0x23DC, 0x657B, 0xBCB8, 0xA99C, 0x4183, 0xCDA6, 0x3C9A, 0xDCAB, 0xDFD6, - 0xB166, 0xFD0F, 0x0A70, 0x7B75, 0x38C2, 0x1B7A, 0x55C3, 0x0E20, 0x3CA0, 0x025F, 0xEDF1, 0x3195, - 0x8743, 0xCC3B, 0x5B6F, 0x4F7C, 0xF2B8, 0x3D71, 0xBCF7, 0xFD3D, 0x3832, 0xC7C2, 0x5415, 0xFC0B, - 0x0910, 0x90C7, 0x9A25, 0x502E, 0xEDB6, 0x8657, 0xB3A6, 0x63D9, 0xE933, 0xFB2E, 0xDD83, 0x06E0, - 0x74C1, 0xB2AA, 0x7A0E, 0x0B7B, 0xB9B5, 0x6A1A, 0xED76, 0x4DAB, 0x5477, 0xCD5D, 0xDBF2, 0xA7C7, - 0xA3C0, 0x0724, 0x0162, 0xF71C, 0x3CEB, 0xB387, 0xE3EC, 0x3663, 0x9E39, 0x4A6B, 0x68CF, 0x3539, - 0xD55E, 0xF956, 0xCDEE, 0x28F0, 0x0049, 0x800C, 0xC838, 0x76F5, 0x5CDD, 0xE598, 0xEE0C, 0xE8DD, - 0x3DBB, 0x0676, 0xAC82, 0xAB1F, 0xAFF5, 0xD869, 0x0778, 0xA6E5, 0xB0E6, 0xB09C, 0xC3A1, 0x9D35, - 0xD65B, 0xC078, 0x8024, 0x40BA, 0xC6AE, 0xB28A, 0xD355, 0x9C35, 0x781A, 0x6655, 0xB97A, 0x6D97, - 0xD0A9, 0xEE61, 0xCFE2, 0x1A67, 0x0249, 0x8040, 0xCD57, 0xA776, 0xFAB6, 0x4497, 0x5AAB, 0xBAAC, - 0xC1A9, 0xB11C, 0x05ED, 0xBE05, 0x9200, 0x019F, 0x35BB, 0xEAD4, 0xAC53, 0x3DD3, 0xDDC0, 0x9C76, - 0x69C9, 0xEFD0, 0xE1DD, 0x6BF7, 0xE8D1, 0xA31C, 0xADD5, 0x2920, 0x0910, 0x61D7, 0xDC8E, 0x12CA, - 0xD7B4, 0xCBD6, 0xDEF1, 0xB457, 0xBB9D, 0xA7BF, 0x81C0, 0x0724, 0xF07F, 0x3BFF, 0xBBB5, 0x6F7D, - 0x553D, 0x2100, 0x4078, 0x0E98, 0x4131, 0x9E07, 0x9017, 0xB67E, 0x0712, 0xC750, 0x7A37, 0x8720, - 0x00AB, 0xEFDE, 0xEAAE, 0xD5D3, 0xF600, 0xB05F, 0x77F8, 0x8404, 0x9649, 0x75E4, 0x87C3, 0xC788, - 0x5C3B, 0xF147, 0x32D3, 0xE3B1, 0xF371, 0x286D, 0x742E, 0xF025, 0x0E11, 0x25C0, 0x301A, 0xDF71, - 0x95E7, 0x3260, 0xDBD3, 0x6FD6, 0xEBC3, 0x613F, 0xB6D9, 0x817C, 0xEC7E, 0xEDB7, 0xEB1E, 0x5CEB, - 0x55A4, 0x5679, 0xF8F0, 0x436D, 0xACAA, 0x1654, 0xF4A7, 0x2EB7, 0xDED3, 0xCE7D, 0x01BA, 0xBCE6, - 0xA687, 0x10EC, 0xE092, 0x2FE6, 0xA111, 0x3343, 0x28D0, 0x4120, 0x8096, 0x8B4F, 0x7BF0, 0x4580, - 0x4D38, 0xDC45, 0x8C28, 0x6846, 0x7163, 0x10AA, 0x02FE, 0x804B, 0x0310, 0x029B, 0x7413, 0x072A, - 0x2024, 0xBF46, 0x0C17, 0x6012, 0x0CD0, 0xDD4D, 0x20C4, 0x26E5, 0xD9A5, 0x0FB0, 0x8FC5, 0xCCC0, - 0x744C, 0x7642, 0x882B, 0x87CA, 0x39B6, 0xE751, 0x3F6A, 0x4533, 0x6C75, 0x9BBC, 0xABD6, 0xC110, - 0x05FF, 0xAA95, 0xE150, 0x0EFF, 0x5D8F, 0x78B4, 0xEF7C, 0x767D, 0xE0DF, 0xCFCC, 0x20DE, 0x001C, - 0x90DD, 0x7507, 0x474C, 0x1E7E, 0x6C0C, 0xAF0F, 0xCA63, 0x1434, 0xEAB9, 0x454D, 0x2322, 0x2446, - 0xE7EF, 0xC761, 0x44C0, 0xE1E6, 0x89DC, 0xA3FA, 0x80EC, 0xAE33, 0x2E02, 0x18D2, 0x09EF, 0x3708, - 0x00D7, 0x0640, 0x7B48, 0xEDDC, 0xB90B, 0xBF6F, 0xA956, 0xF4E1, 0x8309, 0x1300, 0x803D, 0x0076, - 0x6067, 0x165A, 0x62A6, 0x78F1, 0x9C59, 0xDE45, 0xA112, 0x4F5D, 0x1947, 0x5D46, 0xAF83, 0xF042, - 0xA174, 0x9C53, 0xFB3A, 0xF7E1, 0xE188, 0xA6AB, 0xCCA2, 0x515B, 0x3566, 0xF515, 0x9783, 0x4FE8, - 0x2A6B, 0x8F7B, 0xFE3A, 0x1171, 0xB4B2, 0x9FC3, 0x9999, 0x669A, 0xE66B, 0xDE9D, 0x234B, 0xAAAE, - 0xE99A, 0xAEA9, 0xDA5A, 0xB2DF, 0x87EB, 0x996E, 0xE9D5, 0xED36, 0xAFA6, 0x18F4, 0x743A, 0x31F3, - 0x6822, 0x723E, 0x115B, 0xDF2A, 0xEF3A, 0x7E1C, 0xA1B5, 0xE253, 0x9DBA, 0xC2F3, 0x7C9F, 0x4AD1, - 0xBF8C, 0x0A00, 0x7400, 0x800A, 0x1300, 0x0000, 0x78D5, 0x754E, 0xCB80, 0x1CB9, 0xB1FA, 0xBDA0, - 0x6F67, 0x8E6B, 0xDD73, 0xF76E, 0xB47A, 0x8D9C, 0x7B66, 0xC554, 0x388E, 0x7D68, 0x7065, 0xE074, - 0xAC69, 0x0AED, 0xD9B3, 0x5574, 0x177B, 0xE788, 0x172D, 0xB65E, 0x3EA1, 0x5E57, 0xA112, 0xB12A, - 0xCDA4, 0x652B, 0xDD42, 0x56B9, 0x4256, 0xA68F, 0x9A60, 0xAF2D, 0x52A3, 0xBA55, 0x31FD, 0x916B, - 0x0BA5, 0x3D66, 0x7F18, 0xB3EB, 0xB563, 0x553F, 0x5959, 0xAE5D, 0x3B75, 0xC0E8, 0xCB74, 0x6798, - 0x1E35, 0xC137, 0x77FF, 0xDC4C, 0x30D1, 0x0ED0, 0xD013, 0xF9B4, 0xD9E5, 0x0400, 0x00A8, 0x3001, - 0x0000, 0xE419, 0xC4AF, 0x0B15, 0xCAF7, 0xC973, 0x4E99, 0x1995, 0x8895, 0xD06A, 0xA84C, 0xA7E2, - 0x45D5, 0x6CD7, 0x3199, 0xA17B, 0xD195, 0x0D6E, 0x1BC9, 0xE5AD, 0xC26F, 0xACBA, 0x6BEF, 0xB1A3, - 0x9CBC, 0x9383, 0xD7E1, 0xACAA, 0xA917, 0xB3BE, 0x9067, 0xD543, 0x0DE3, 0x44BF, 0x97BD, 0xEECE, - 0x7CF9, 0x7AF3, 0x63E8, 0xEF4C, 0xD5F4, 0xDDD4, 0x6B8C, 0xA3B0, 0xE72E, 0x33FC, 0xAAF4, 0x9F8B, - 0x4778, 0xF77C, 0xC778, 0xBCB7, 0x857E, 0x72FE, 0x2D7F, 0xA503, 0xE7D7, 0xEF7A, 0x1709, 0x99BE, - 0x987D, 0xAD71, 0x4340, 0xEF03, 0x0000, 0x5498, 0x0000, 0x0000, 0x801D, 0xC89B, 0xEDFC, 0x8A2F, - 0xB341, 0xE377, 0xE28E, 0xEFB9, 0xCE63, 0x3DCC, 0xF437, 0xD7B9, 0x57EF, 0xEA3A, 0xA346, 0x3B7E, - 0x711D, 0xD771, 0x1DD7, 0x344C, 0xE390, 0x13E9, 0x6362, 0xD6F3, 0x2734, 0x6924, 0xE368, 0x7269, - 0xFBF2, 0x34AD, 0xB747, 0x5BCF, 0xE96D, 0xACB7, 0x4BAE, 0xD29D, 0xEA67, 0x78DD, 0xCF56, 0x5D43, - 0xCA9A, 0x97F1, 0x3BAD, 0xC07D, 0x60AD, 0x8C96, 0xCF16, 0xF4DF, 0xA131, 0x62FC, 0x5763, 0xA0FD, - 0x2C30, 0x0100, 0x0050, 0x017F, 0xDC3A, 0xA827, 0xE420, 0x07E1, 0x634E, 0x477A, 0xABA2, 0x2ADD, - 0x258C, 0x9671, 0xF275, 0x47BD, 0x7BCF, 0xBCAE, 0xE1CE, 0x11C2, 0x6AEE, 0x1C68, 0x4159, 0x5446, - 0x2055, 0xC54B, 0x758F, 0x4AEA, 0x73A0, 0xE4B9, 0x7198, 0xF444, 0x05D4, 0xFC49, 0xFAA5, 0x7BFE, - 0xE67F, 0xEEA8, 0x382B, 0x94C1, 0x6979, 0x00F8, 0x3130, 0x7DF5, 0x0C77, 0x02A2, 0x61D5, 0xB033, - 0x3C40, 0xBAA6, 0x3802, 0x6106, 0x290F, 0x00F0, 0x6652, 0xB8AF, 0x3410, 0x5CAB, 0x4CC7, 0x10DA, - 0xC839, 0x7D9E, 0xCF83, 0x4BB9, 0x79EE, 0xB8DF, 0x1ABD, 0xDEFA, 0xF33F, 0xC277, 0x6A44, 0xF4AF, - 0x6855, 0xD52E, 0x7CF8, 0xA16A, 0x0D8E, 0x35C8, 0x4BA1, 0x1888, 0x1C51, 0x75BB, 0xB70E, 0xA201, - 0x6B5C, 0xB418, 0xF32F, 0x039C, 0x4785, 0x74F8, 0x1815, 0xB554, 0x473E, 0x60AF, 0x4F23, 0x937F, - 0x12B6, 0xD3FB, 0xBBB1, 0x1F1C, 0x5514, 0x22CF, 0xF995, 0x0F0F, 0xCCCD, 0x5CC3, 0x21E9, 0x4511, - 0xCED5, 0xA807, 0x3938, 0xE4D5, 0x337B, 0x9CA2, 0x512D, 0xF8F0, 0xFFDE, 0xA9D2, 0x8375, 0x4042, - 0xFAE5, 0x6E98, 0x496C, 0x54A8, 0x0AC5, 0x33ED, 0x6662, 0xD0C7, 0x33E6, 0x4586, 0x002B, 0x3538, - 0x9DC6, 0x89E9, 0x3CC5, 0x3C11, 0x9492, 0xF590, 0xF840, 0x8E11, 0xD208, 0x3302, 0x9F9A, 0xFBA6, - 0xBD0B, 0x83DA, 0x3CF0, 0x008A, 0x2201, 0xEC17, 0x24AB, 0xD5A1, 0x5BE9, 0x9620, 0x8E10, 0xDCB8, - 0x5F63, 0x98C7, 0x48E4, 0x5B8D, 0x201F, 0xCF91, 0x8303, 0x0C68, 0x8070, 0x8890, 0x07E0, 0x610A, - 0xEBE2, 0x2DDD, 0x8C9F, 0x9901, 0x9BE1, 0xED70, 0xBA5E, 0xA618, 0x6176, 0xFF3E, 0x7DDC, 0x6E1F, - 0x994B, 0x13BF, 0xED42, 0x59F0, 0x7D22, 0xDB16, 0x8A6C, 0x5BC8, 0xB645, 0x21DB, 0x1610, 0x3F81, - 0x8038, 0x1047, 0xFC20, 0x8E20, 0xE288, 0x0802, 0x11C4, 0x4144, 0x9020, 0xE288, 0x1F5E, 0x4924, - 0x0E89, 0x9FBC, 0x725D, 0x595C, 0x3F58, 0x5C97, 0xFC12, 0x7225, 0x81FF, 0xAB78, 0xFD07, 0x07E8, - 0xC7A1, 0x4CDC, 0x2BD4, 0xAD53, 0xB516, 0x7C5E, 0x6102, 0x0090, 0x0A60, 0x1332, 0x9192, 0xBF9B, - 0xF7D8, 0x4797, 0x7ABC, 0x7FA6, 0x1AC1, 0xCF8C, 0x70F7, 0x8AA5, 0xA716, 0x51DB, 0x0785, 0xC93B, - 0xA72B, 0x7E84, 0xAA56, 0x56A5, 0x96A6, 0x6A1D, 0xA56A, 0xFC18, 0x31E3, 0xC4D7, 0x5497, 0x31C6, - 0x94BB, 0xE7F0, 0xF18F, 0xBC95, 0x3C73, 0x24C4, 0x736A, 0xA25F, 0x4F33, 0x8754, 0xCBA2, 0x1BE6, - 0xD6CD, 0xD47D, 0xB186, 0x30B7, 0xCE98, 0x25F7, 0x0CCE, 0x1AD6, 0xF384, 0x2EEC, 0x9E26, 0x7E05, - 0x70AC, 0xA999, 0xCA55, 0xCE26, 0xD2BF, 0x3E5F, 0xBB71, 0x1EDD, 0x95DD, 0x7E9E, 0xD344, 0x76CA, - 0x236B, 0xB5A8, 0xCC2F, 0xA890, 0x6DB3, 0xBF02, 0x34D9, 0xA003, 0x0000, 0x4085, 0x0900, 0x0070, - 0xBEDE, 0x3BF4, 0xDEEF, 0x0670, 0xBE9D, 0x003A, 0xB614, 0xE573, 0x5F05, 0x4E42, 0x02C0, 0xF9B9, - 0xE6D5, 0x0970, 0xBEBD, 0xCBDD, 0xCA09, 0x709E, 0x1603, 0x5950, 0xDE05, 0x1F5E, 0x0180, 0xF3ED, - 0x64A5, 0xEC15, 0x0F00, 0x009C, 0xAF49, 0x435A, 0x8F38, 0xC6B7, 0x02C0, 0xF976, 0x02EC, 0xAAC6, - 0xB713, 0x7EC7, 0xCABB, 0x2FD0, 0xBF5E, 0xE7EA, 0xFB31, 0x70EE, 0x98CF, 0xA706, 0x2533, 0x481B, - 0x6BEF, 0x9CCB, 0x174E, 0xF57B, 0xF6D1, 0x9CAA, 0x30D5, 0x336E, 0x19F2, 0x578B, 0x6ED7, 0x1AC1, - 0x6774, 0x5167, 0x00BD, 0x20B2, 0x16B9, 0x56F9, 0x01E0, 0xFC77, 0xB36C, 0x2F00, 0xCEB7, 0x13E0, - 0x7C3B, 0x01BA, 0xF3DB, 0x35E9, 0x0400, 0xB401, 0x80FE, 0x9E33, 0xEEC8, 0x111B, 0x6AFB, 0xF350, - 0x566D, 0x10F2, 0xA2D6, 0xBFB7, 0x35DB, 0xDFD5, 0x09A0, 0xC7D9, 0x5F64, 0x87F9, 0xEE9E, 0xFD3A, - 0x7E7D, 0x874E, 0x0044, 0x0050, 0x6102, 0x0000, 0x75B5, 0xAC92, 0x9F89, 0xDE3B, 0x6744, 0x663E, - 0xA9C4, 0x39AB, 0x9424, 0x79D8, 0x0FAB, 0xE7A6, 0x42DB, 0x3A05, 0x56CB, 0x4D83, 0x0600, 0x4885, - 0xD175, 0xFFAE, 0x1AEB, 0xFE57, 0x1AF2, 0xE738, 0x153E, 0x22C9, 0xC77B, 0x47CE, 0xA339, 0x9EA3, - 0x26BA, 0x6F91, 0x55F2, 0x33A0, 0x01CA, 0x1B24, 0x7B03, 0x80D4, 0x3D94, 0x24F9, 0xDE7B, 0x0268, - 0xB524, 0xAB1E, 0xF5E1, 0x9274, 0x9E57, 0x05B2, 0xBF94, 0x93A5, 0xEB5D, 0x3632, 0x46E4, 0xBDAF, - 0x7C0F, 0xEF59, 0xCDC6, 0x8FD1, 0x5F17, 0xD065, 0x4992, 0xE4DA, 0x78CE, 0x1AD0, 0x6EC9, 0xB5C5, - 0xE54A, 0xBA92, 0xAEDE, 0xDEF0, 0x1200, 0xB001, 0xC9BB, 0x7F64, 0x43CB, 0xFD06, 0x13EE, 0x36FA, - 0x8FCE, 0xD320, 0xF95E, 0xE6BB, 0xA724, 0xE3DE, 0xBB47, 0xCFDD, 0x9264, 0x9C9E, 0xB8A7, 0x5E7E, - 0x45F5, 0xD7C4, 0x2579, 0x6779, 0x7317, 0x9FD3, 0x515E, 0xBE87, 0xF7AF, 0x6547, 0xD338, 0xEF2A, - 0x6023, 0xC738, 0xD7C0, 0x3891, 0x1320, 0x1747, 0x0054, 0xC09F, 0x5725, 0x7C09, 0x00D4, 0xC3FE, - 0x6420, 0x7010, 0x1CB7, 0xB943, 0xE62D, 0x7D70, 0x4C24, 0xD37E, 0x4D51, 0x88A7, 0x13A9, 0x7D87, - 0x4952, 0x1C9A, 0x546B, 0xA76A, 0x79F8, 0x0008, 0x4A80, 0x005D, 0x4E21, 0x01DE, 0xD67B, 0xAC01, - 0x4FB9, 0x21D9, 0x6E88, 0x9665, 0xDB90, 0x3087, 0x0E60, 0x42DA, 0x360A, 0x0910, 0x17BA, 0xDD56, - 0x5DED, 0xB73C, 0xFECC, 0x7126, 0x64FB, 0x4102, 0x2440, 0xC6DE, 0x6EE0, 0x13F6, 0xAABE, 0x61CC, - 0x6E81, 0x7BD6, 0x5EC0, 0x0F24, 0x0102, 0xDFDA, 0x652F, 0x60E1, 0x555D, 0xD633, 0x02FC, 0xDDB6, - 0xF50C, 0x6077, 0x6A69, 0x3D90, 0x9000, 0x2400, 0xCC72, 0x4C88, 0x0BF8, 0xD7BD, 0xF9E3, 0xF00D, - 0x10A0, 0xAD05, 0x09DB, 0xCF4F, 0x27C0, 0x3DD4, 0xD373, 0x4217, 0x702D, 0xB846, 0x6955, 0x80F1, - 0xD48A, 0xF5F5, 0x1900, 0xE303, 0x9200, 0xF9EB, 0xAA2E, 0xEB19, 0x01C3, 0xAAB2, 0x1E9F, 0xB2BA, - 0x2E81, 0xB5FA, 0xAA6A, 0xAE01, 0x2424, 0x0009, 0x0009, 0xE38D, 0x4D77, 0x2FC0, 0x0001, 0x4AB7, - 0x2450, 0xFDE8, 0x99A3, 0x4709, 0x749B, 0x6FA6, 0xEB2A, 0x7EC6, 0x3B53, 0xAB33, 0xEE76, 0x090B, - 0x4802, 0x04F0, 0x2EE4, 0xF4AC, 0xD3C0, 0x93F1, 0x94D5, 0x75FA, 0x8784, 0x0420, 0x0120, 0x6157, - 0xE66C, 0x80BF, 0xA60B, 0x0810, 0x78CA, 0xE522, 0x8135, 0x1C4F, 0x2F80, 0xD97D, 0x4C65, 0xF67A, - 0x0761, 0xD76D, 0x55F5, 0xB37B, 0xAE08, 0x4002, 0x3FBE, 0xEEDB, 0x35F4, 0xDEF7, 0xDEF5, 0x3EB4, - 0xBF57, 0xEFDF, 0x7D55, 0x756C, 0xDD7B, 0xFFFE, 0xDDD7, 0xBBEB, 0xADDE, 0xF51E, 0x3E7C, 0xB842, - 0x0F55, 0xAD87, 0xAA61, 0x55F8, 0xF0E1, 0xC3EB, 0xDD76, 0xC71A, 0x7E7C, 0xEFDF, 0xF77A, 0x91BA, - 0x7877, 0xEB5D, 0x54F5, 0x2EFC, 0x6D61, 0x0200, 0xA0BC, 0x4F15, 0xE8BF, 0x394B, 0x1CED, 0xB9B5, - 0xA0AD, 0xAF5D, 0x5642, 0x9260, 0xD53C, 0x34EC, 0x9C0A, 0x39D1, 0xC67E, 0xF8DC, 0x9354, 0x6850, - 0x96A7, 0xC227, 0xF67F, 0x0789, 0x31D7, 0x2AC7, 0x7B0E, 0x2579, 0xCFEB, 0x4258, 0x779D, 0x38D9, - 0xA47D, 0x068C, 0xF288, 0xEA4C, 0xA00F, 0x0028, 0x6F90, 0xEC0D, 0x0052, 0xF750, 0x92DC, 0x0090, - 0x87B2, 0x3FE6, 0x7AD0, 0xDFA9, 0xE2FB, 0x90F9, 0xF229, 0x79B8, 0xFB44, 0x13B7, 0x3154, 0x75F2, - 0xC505, 0x802E, 0x4B92, 0xE419, 0x2F7A, 0xC21A, 0xB403, 0xD64A, 0x4ED6, 0x6055, 0x41C1, 0x65FE, - 0x6FBD, 0x76CC, 0xEFC4, 0x0990, 0x97AB, 0x5194, 0x31CF, 0xD8A3, 0xAB13, 0xBFB7, 0x9F92, 0x51CE, - 0x5CA5, 0x5B6E, 0x6EE6, 0x4ECC, 0xFB02, 0xD13A, 0xFB1B, 0x0090, 0x097E, 0x8B7C, 0xDB03, 0x39B3, - 0xAD5B, 0xDE7B, 0x9F7B, 0x354B, 0x8ED0, 0x88F3, 0x9E57, 0x0DF9, 0xE9D9, 0xEE66, 0xE578, 0x1500, - 0x0015, 0x2600, 0x0000, 0x1268, 0xBEB1, 0x8877, 0xBF2D, 0x99E5, 0x3EEE, 0xA021, 0x7395, 0xFDB5, - 0xDBA9, 0x79F7, 0x4292, 0x7CC4, 0xC16B, 0xBC8B, 0xF4E5, 0xBC31, 0xFAFD, 0xEF02, 0x6377, 0xEF27, - 0x0028, 0x6F90, 0xEC0D, 0x0052, 0xF750, 0x92DC, 0x0000, 0xF37B, 0xDE80, 0x9A95, 0xF962, 0x6FBD, - 0x7BDE, 0x9C78, 0x9224, 0x9318, 0x65F5, 0xFC7B, 0xE403, 0x233D, 0xA55A, 0x4408, 0x4097, 0x2549, - 0xF2AD, 0xF96A, 0x403B, 0xB45B, 0xBE35, 0xC065, 0xFE6F, 0xED1F, 0x23BE, 0xA5F2, 0x4BD4, 0x00C0, - 0x329B, 0x762C, 0x03E9, 0x639D, 0xBBB7, 0x8EFE, 0x0CCA, 0xAAB4, 0x47AC, 0x96C5, 0xA59E, 0x066D, - 0x6395, 0xF29E, 0xCF81, 0x4E00, 0x80BC, 0xD534, 0x90BF, 0x3A7D, 0xCEB9, 0x142C, 0xE4EB, 0x7722, - 0x99FB, 0x73DF, 0x38B5, 0x7C9A, 0xDFE4, 0x167D, 0x6FA8, 0x0000, 0x1526, 0x0000, 0xD0CE, 0x07FC, - 0xDA6A, 0xB1C6, 0x9DB0, 0x4243, 0xD759, 0xF09E, 0x6F9E, 0x668F, 0xDC77, 0xCD32, 0xEF73, 0x1788, - 0x1F73, 0x45D6, 0xCC41, 0x433B, 0x0100, 0x80F2, 0x5E48, 0xF606, 0x00A9, 0x7B28, 0x49B2, 0xF659, - 0x71E0, 0x0038, 0x794F, 0x4E24, 0x27AD, 0x94CF, 0x0828, 0x992D, 0x92EF, 0xA937, 0xBBEE, 0x05FA, - 0xF7C9, 0x5AEF, 0x23E7, 0x1B72, 0x9457, 0xA3A2, 0xFFAE, 0xA0CB, 0x9224, 0xC9D1, 0x6A39, 0xDE77, - 0x2377, 0x660E, 0x48EE, 0xAAE4, 0x24D1, 0xFF60, 0xE858, 0x3A96, 0xCECC, 0x0090, 0x0AED, 0x3053, - 0x9F8E, 0xF55F, 0xA07F, 0x17AC, 0xF63A, 0x7257, 0x538C, 0x5D91, 0x64AE, 0xE7AD, 0x29BF, 0x676F, - 0xE5E4, 0x547D, 0xF7CA, 0xACEA, 0x65EC, 0x95AC, 0x48CE, 0xB677, 0x0AE4, 0x7390, 0x24EF, 0x3ED7, - 0xFBC4, 0x84B3, 0xEAD0, 0xF8F1, 0xFCC5, 0x47FA, 0xF767, 0x8FAB, 0xD604, 0x0000, 0x2AE0, 0x4355, - 0xE586, 0xBA0A, 0x5EAB, 0x850F, 0x1F3E, 0x7CA8, 0x063D, 0x7CBE, 0xBFBF, 0x6F15, 0xAE50, 0x0D55, - 0x413D, 0x7CF8, 0x7A59, 0xEB78, 0xA82A, 0x7CF8, 0xF0E1, 0xC36A, 0x3DD4, 0xE1C7, 0xFBFD, 0xAB0E, - 0xABA0, 0x1A56, 0x857A, 0xF8B7, 0x1320, 0x4BE2, 0x6DA6, 0xEE01, 0x5ED5, 0x655F, 0x0290, 0x9000, - 0x2400, 0x5C8B, 0x2564, 0x829C, 0x5DCF, 0xD0E3, 0xB4EB, 0x3240, 0x0087, 0x0EAB, 0x9240, 0x92ED, - 0xE7E7, 0x7641, 0xC653, 0x4FD7, 0xDC93, 0x0716, 0x57AA, 0xBA06, 0x5CC5, 0x3D80, 0xBB3F, 0x2B4B, - 0xE003, 0x925A, 0x136E, 0x672B, 0x1B08, 0xC471, 0x0D01, 0x8D6D, 0x4308, 0x2463, 0xCF82, 0xCC32, - 0xF08D, 0xAF16, 0x8440, 0x0864, 0xBBF7, 0x8038, 0x8017, 0xF81C, 0x77D7, 0x8685, 0xDD73, 0xC05A, - 0x3DAF, 0x0103, 0xF881, 0x1F38, 0xC31E, 0x0EE0, 0x14CF, 0x70F0, 0x0E79, 0x9A77, 0x085C, 0xC3E2, - 0x1F3C, 0x0AA4, 0x268F, 0xFC45, 0xE9A2, 0x6B8D, 0x099C, 0x6117, 0x7054, 0xE06A, 0xE01A, 0x96C0, - 0x3D04, 0x7BF4, 0xC838, 0x0EE0, 0x6EE0, 0x2D4A, 0xFE5A, 0x05BF, 0x7BA8, 0xC02F, 0x700F, 0x81A7, - 0x810C, 0x4701, 0x194E, 0xE0B3, 0xD69C, 0xD339, 0xA6CE, 0x1ABA, 0xDA1E, 0xE5AC, 0xBB4E, 0x395A, - 0x4795, 0x3DAA, 0x966D, 0x770D, 0x47DB, 0xAE1E, 0x55D3, 0xF21E, 0xD6AC, 0x39AC, 0xD9A3, 0x75D4, - 0xEC51, 0xCEBA, 0x470D, 0x87C3, 0x5135, 0x1C0E, 0xC733, 0xD61A, 0x8E6D, 0x8DED, 0x7074, 0x2A8E, - 0x1E76, 0x8F39, 0x346B, 0x38A6, 0xEDD1, 0x3147, 0x8F35, 0xDC5A, 0x0E1D, 0x7B38, 0xD61A, 0x67D9, - 0x35A7, 0xE5F0, 0x3E63, 0xEFE3, 0x1CDE, 0xB3D4, 0xEAD1, 0xA566, 0xB467, 0x0DC7, 0x331C, 0x3D1C, - 0x7BB4, 0x6B38, 0x66ED, 0xD163, 0x0FC7, 0x1A73, 0x1CE7, 0xB0D4, 0xB287, 0xDACE, 0x51A3, 0xC699, - 0xB719, 0x35F6, 0x70EC, 0x6159, 0x6A6D, 0xE7A8, 0xB1ED, 0xB2E6, 0x74D4, 0x70F4, 0x98C3, 0xF10C, - 0xAB47, 0x8DBD, 0xACDA, 0x8E35, 0x7ACC, 0xE1B8, 0xC65A, 0xC3B1, 0x4A5B, 0xADD9, 0xBA56, 0xAB23, - 0xB586, 0x434B, 0xC71C, 0xCEB6, 0x9DC7, 0xB9D4, 0xD153, 0x2DC7, 0x1CA5, 0xE358, 0xD535, 0xDAD6, - 0xDDD6, 0xB8A6, 0x6BD4, 0xE872, 0xDC6B, 0x389E, 0xE168, 0xDDA3, 0xC71E, 0x8E59, 0x7BF4, 0xD8C3, - 0xB1C6, 0x1CC0, 0x812F, 0xAD82, 0x5655, 0x870F, 0x1FFE, 0xBFDB, 0xD586, 0xAA0A, 0x3EBF, 0xEDFE, - 0xBB0F, 0x5F58, 0x1DFB, 0xAABF, 0x4DAB, 0xE0E1, 0xBF12, 0x9800, 0x7E59, 0xEBDE, 0xE5DB, 0xDE9E, - 0x9FB1, 0x0460, 0x67AF, 0x0770, 0x79FF, 0xF00A, 0x4CEC, 0xC21A, 0xAC03, 0x7079, 0x3A00, 0x97A7, - 0x435E, 0x1600, 0x48DD, 0x43E3, 0x7F2B, 0xB4A1, 0x0EC0, 0xE5E9, 0x005C, 0x46C4, 0xD395, 0x7AB9, - 0x01B8, 0xBCEF, 0xA10F, 0xE4A6, 0xBD9F, 0x9B7D, 0xFC6F, 0xAD0F, 0x00E8, 0xB2BD, 0x280B, 0x7500, - 0x2E4F, 0x07E0, 0xF274, 0x002E, 0xC74A, 0xC4D3, 0xB500, 0xC8D0, 0x0100, 0x0072, 0x46AC, 0x578E, - 0xB86E, 0x4E1F, 0xA4CF, 0x7823, 0xAD78, 0x8F0E, 0xE032, 0x7F4B, 0x8CB3, 0xFB8D, 0x2C0D, 0x7DC0, - 0x658C, 0x478D, 0xD9F5, 0xF2FC, 0xF2FE, 0xBBDD, 0x9123, 0x2E08, 0x7400, 0x0040, 0x0500, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x5801, 0x0035, 0xF30D, 0x8CC8, 0x7C99, 0x6F00, 0xC088, 0x5401, - 0x648C, 0x4C01, 0x1923, 0x017D, 0x5522, 0x0520, 0x6324, 0x00E8, 0xCB7C, 0x0315, 0x9001, 0x9C4C, - 0xCF6F, 0x007C, 0x7EFC, 0xDAC8, 0xF856, 0x80DF, 0xAF01, 0x64AF, 0x91BF, 0xC177, 0xDDB4, 0x1334, - 0xA00F, 0x0020, 0x6364, 0xFA46, 0xFE18, 0xF9E3, 0xD706, 0x808C, 0xF5F3, 0x9F7C, 0x0300, 0x3E3F, - 0x4647, 0x2600, 0xA8E7, 0x271E, 0xDFC2, 0x6A00, 0xC0CA, 0xF90D, 0x1A99, 0xF016, 0x0F30, 0xE0BB, - 0xD203, 0x0018, 0x77F1, 0x7B05, 0x00C6, 0x5DFC, 0x0DBE, 0xAB01, 0x34EC, 0x04C0, 0x49F8, 0x2EFE, - 0x181D, 0x7012, 0x0034, 0xCECF, 0x0200, 0x56CE, 0x6F00, 0x8CBB, 0xF8BD, 0x0200, 0x8DCC, 0x3730, - 0x0040, 0x3D3F, 0xF1F8, 0x167F, 0x0C01, 0x008D, 0x37E8, 0x583F, 0x56EE, 0xAA78, 0x833E, 0xAE5F, - 0x039F, 0x023B, 0x62A0, 0x018D, 0x18A8, 0x6FD4, 0x38AD, 0x02D7, 0xA0A7, 0xCCA5, 0x8FD8, 0x0F56, - 0xEE92, 0x1D50, 0xDFA0, 0x5512, 0x6F64, 0xACD1, 0x0119, 0xEB5F, 0x712C, 0xADF2, 0x061A, 0x768D, - 0x0E58, 0xB94B, 0x7640, 0x7D63, 0x2F3D, 0xC503, 0x196F, 0x818F, 0x8FBE, 0x3768, 0x0734, 0xECBB, - 0xBA94, 0x6BB4, 0x903D, 0x3905, 0x8EA5, 0x6B74, 0xC4C0, 0xDBF5, 0x6995, 0x3790, 0x8106, 0x3420, - 0x23BE, 0x41D7, 0xA063, 0xE929, 0xE5BB, 0xBA64, 0xC05B, 0xACF2, 0x061A, 0x768D, 0x7EB0, 0x7297, - 0xEC80, 0xFA06, 0xAD92, 0x7823, 0x03D9, 0x89D8, 0x68D8, 0x0E40, 0xC376, 0x40C6, 0xC837, 0x909D, - 0xB01D, 0xD0B0, 0x633D, 0x23F2, 0x0D54, 0xFBF8, 0x788B, 0x6BA8, 0xFC7E, 0xFE93, 0x6FA0, 0x0119, - 0x23EF, 0xDA4B, 0xD76F, 0xEF92, 0x1D70, 0xD2EE, 0x1473, 0x5D05, 0x7640, 0xC6C8, 0x3436, 0x2E3B, - 0xA061, 0xC7FA, 0xD190, 0x3190, 0x3D89, 0xCC07, 0xBF41, 0x1FF1, 0x0027, 0x6D95, 0xABE2, 0x0DCA, - 0x6F2B, 0xD51E, 0x2063, 0xAF57, 0x9E02, 0x3BA0, 0x613B, 0x6223, 0x7B12, 0xA9E2, 0x818C, 0xD39D, - 0xF9E6, 0xD2B1, 0xF4B1, 0xB463, 0x7DC7, 0xE921, 0xE37D, 0xFA6D, 0xE553, 0xE82B, 0x9F0F, 0xD7CF, - 0x0FD7, 0x883C, 0x053E, 0x6207, 0xE02D, 0x8EA5, 0x6BA8, 0xFC7E, 0xFE93, 0x6F20, 0x63E4, 0x5D7B, - 0xE9FA, 0xED5D, 0xB223, 0xD3EE, 0x1473, 0x6915, 0xD801, 0x1923, 0xD3D8, 0x0940, 0x43C6, 0x4023, - 0x361A, 0xB603, 0x3246, 0x6EB1, 0x85BE, 0xF3F7, 0x5276, 0x40C3, 0x8EA5, 0x3B21, 0x3BF1, 0x06ED, - 0x8086, 0xEDC8, 0x7C03, 0xF80D, 0x5A1F, 0xAEAF, 0xF21B, 0xB423, 0xD356, 0xD157, 0x813B, 0x6BD0, - 0x9DF9, 0x6CFD, 0x1B77, 0xF19F, 0xB4EB, 0x8BBE, 0xCC7C, 0x8046, 0x6646, 0x6C7C, 0x7E02, 0x27CF, - 0xA778, 0x8093, 0x7627, 0x54BB, 0x1332, 0x66F1, 0x4623, 0x131A, 0xF01B, 0x7427, 0x649C, 0x1E20, - 0x6316, 0xC898, 0x25D2, 0x56B9, 0x710A, 0xEC90, 0x94AA, 0xA07C, 0x977C, 0x975C, 0x831E, 0x2063, - 0x6406, 0x648C, 0xCC80, 0xDFA0, 0x07C8, 0x387D, 0x5785, 0x3C05, 0x9E02, 0x276D, 0x07E5, 0x92EF, - 0xAA90, 0xA7C0, 0x5320, 0xE374, 0x0D7A, 0x808C, 0x9119, 0x909D, 0xB01D, 0xD0B0, 0x3BA1, 0x611F, - 0xB143, 0x2FD3, 0xEE84, 0x9390, 0xB197, 0x3EAE, 0xF18A, 0x074E, 0xC278, 0x23FF, 0x7D31, 0x4E3B, - 0xE0E4, 0x1BC8, 0x9825, 0x52C0, 0xE5B7, 0x1390, 0x8DEC, 0x35E8, 0x58F7, 0xAA07, 0x8DD8, 0xC846, - 0x031A, 0x9081, 0x9376, 0x9281, 0x0634, 0x009F, 0x0FD7, 0xC80F, 0xDF15, 0x7C97, 0x6C33, 0x6D07, - 0x6427, 0x62A3, 0xDA9D, 0xF96C, 0x951B, 0xE7C3, 0x35E8, 0x4EE8, 0xAB12, 0x990F, 0x2EDF, 0x15E5, - 0xBBE4, 0x1A74, 0xD280, 0x0C9C, 0xB41D, 0x92D2, 0x90A7, 0xC00E, 0x5B05, 0x1E3C, 0x3020, 0x032B, - 0x912A, 0x70F9, 0x0D8C, 0x720D, 0xD1C8, 0x7C03, 0x8DD8, 0x68D8, 0x0EC8, 0x18B9, 0x45B6, 0xD077, - 0xFE5E, 0xDA01, 0x0D3B, 0x96EE, 0x84EC, 0xC41B, 0xB403, 0x1AB6, 0x23F3, 0x0D00, 0x00EA, 0x5AFB, - 0x3ED4, 0x5FF5, 0xF7AF, 0xFDEA, 0xA7BF, 0x7FFF, 0xDAA1, 0xFF77, 0xEA57, 0x7DD5, 0xE001, 0x00A0, - 0xFE5E, 0xA157, 0x6B6B, 0x7BAF, 0xDADD, 0x579B, 0xFCF7, 0x5EE1, 0x0DEF, 0x5EE1, 0xF555, 0x8557, - 0x4FF6, 0xEFEB, 0x7D78, 0xEBDD, 0xEBBD, 0xE061, 0x5555, 0xA0AA, 0x2A50, 0x5555, 0xA3AA, 0x2A50, - 0x5555, 0x85AA, 0x5675, 0xAAAA, 0x0A55, 0x5505, 0x6AB5, 0x0A55, 0xFD54, 0x550D, 0x5A55, 0x0555, - 0x553D, 0x55AB, 0x5550, 0x5515, 0x7655, 0x5550, 0x5555, 0x5055, 0xEB59, 0xB5AA, 0xAAAD, 0xDE53, - 0x5555, 0xADBA, 0xAA50, 0xB52A, 0x5055, 0x55D0, 0x9F00, 0x0000, 0x0000, 0x00C6, 0x29F0, 0x5DF5, - 0x0000, 0x0000, 0x0000, 0x00C0, 0x0200, 0xFF28, 0x810B, 0xFEF4, 0x9F31, 0x658C, 0x630C, 0x0000, - 0xC075, 0x685C, 0x87E6, 0x8997, 0xBA38, 0x897D, 0xBA98, 0x5CCB, 0xD8C7, 0xB87D, 0xD5EC, 0x110E, - 0x36F0, 0x1800, 0xA013, 0xEF09, 0x7EF4, 0x5BA5, 0x4E4D, 0x8266, 0x896A, 0x1B4D, 0xA26A, 0x8B26, - 0x31B1, 0xA026, 0x492C, 0x6092, 0x08D0, 0x57F2, 0x14A0, 0x2600, 0xB4CA, 0xEF37, 0x0881, 0x387D, - 0x2F68, 0x0802, 0x0000, 0x0000, 0x0000, 0x004A, 0x0000, 0x0305, 0x209C, 0x7A60, 0x908C, 0x1600, - 0xCAA5, 0x718A, 0xF09E, 0xF686, 0x50B6, 0xC80B, 0xC0CA, 0xF648, 0x9C25, 0x1523, 0x00EC, 0xD8F3, - 0xF4FE, 0x9556, 0x0840, 0x8C35, 0x356E, 0x40BB, 0xC200, 0x002C, 0x01AE, 0xDC2A, 0xBA00, 0x87EB, - 0x955E, 0x3500, 0xF0BD, 0xE0FB, 0x9936, 0xA27F, 0x5B59, 0x7E53, 0x38AE, 0x9FDF, 0x27BE, 0x672B, - 0x4EB6, 0xC4D1, 0xF916, 0x71EE, 0x2E0D, 0xDBF5, 0xEC82, 0x2B0A, 0x2994, 0x409A, 0xA039, 0x88A6, - 0xB3F0, 0x24D1, 0xF3F5, 0x5C48, 0x122E, 0x9F43, 0x2609, 0x8A50, 0x89E4, 0xB010, 0x901C, 0x0100, - 0xC1C0, 0x946D, 0x296E, 0xD9C6, 0xB22D, 0xC31B, 0xF7E8, 0xCECC, 0xEA27, 0xF9A7, 0xD0CA, 0xB21D, - 0xCB75, 0x6CD7, 0xF6BF, 0x8374, 0xE759, 0xFB12, 0x9FD9, 0xC4B7, 0xA5ED, 0x938A, 0xE75B, 0xDC68, - 0x25D0, 0xB765, 0xBE96, 0x2511, 0x46B7, 0xD25C, 0xDA97, 0x4B25, 0x4942, 0xF16D, 0xE4E2, 0x5822, - 0xCA92, 0xB6CC, 0x4D12, 0x8DBE, 0x852D, 0x121C, 0x00C0, 0x41CC, 0x77D3, 0x0A27, 0xF57E, 0x7FAF, - 0xA080, 0xDFB7, 0xA54D, 0x7C4B, 0x5AC4, 0x927D, 0xFCBA, 0xA2FB, 0x0703, 0x4685, 0xEA37, 0xCA4D, - 0x1CE3, 0x912A, 0xBAD2, 0xF6AA, 0xB467, 0x2948, 0x2539, 0x7225, 0x8E31, 0xC295, 0x441A, 0x4B80, - 0x2F39, 0x0925, 0xB184, 0x2BF1, 0x4400, 0x3009, 0x4030, 0x6368, 0xFFED, 0xFE38, 0x0C8C, 0xE354, - 0xDBA7, 0xF8AE, 0xA921, 0xDCD5, 0xD43C, 0x7A08, 0xC6D1, 0x2407, 0x83A0, 0xE000, 0x0074, 0xBF47, - 0xF3FC, 0xA9D0, 0x21A8, 0xB34D, 0xDAC7, 0xCA91, 0xC513, 0xA121, 0x8CA0, 0xDAAA, 0xC75B, 0xA804, - 0x4B0C, 0x64C8, 0x84CF, 0x1680, 0x2B64, 0x3889, 0x0002, 0x3800, 0xD0F5, 0xBF7F, 0xBF24, 0x0000, - 0x0008, 0x295B, 0x0FCD, 0x8713, 0x4AD7, 0x2180, 0xC101, 0x1009, 0x7D22, 0xA879, 0x53C0, 0x14D2, - 0x610C, 0x90CD, 0x1594, 0x8A9E, 0x2129, 0x95C0, 0x3516, 0xF50D, 0x83A9, 0x400A, 0xB726, 0xEA8C, - 0x0094, 0x85FA, 0xF456, 0x976F, 0xF9B4, 0xAA5A, 0x7481, 0xEEDD, 0x00A0, 0x6755, 0x8D76, 0x532B, - 0xDEBA, 0x5975, 0x70D7, 0x667C, 0xDDEA, 0x5B8B, 0xCD98, 0xE330, 0x0655, 0x7D66, 0x2FF0, 0x9BAB, - 0xF6F1, 0xEF3D, 0x86A6, 0x3228, 0x5277, 0xF410, 0x08A1, 0xD61A, 0x9919, 0xAC82, 0x54AA, 0xE953, - 0x3DEE, 0x82D0, 0xA95E, 0x9C91, 0xBDF7, 0x5E7C, 0x39A3, 0x9F90, 0xA993, 0x5AA3, 0x87B8, 0x9D55, - 0xCBE3, 0x7572, 0x7A2C, 0xDF19, 0x722A, 0xE419, 0x4551, 0xA4C3, 0xD875, 0x46C0, 0xD945, 0xB8A1, - 0x72B2, 0x3273, 0x826B, 0xB466, 0xC63D, 0xFBCD, 0x9EC2, 0x4C31, 0x8A50, 0xA38B, 0x919B, 0x511D, - 0xEF02, 0x20FD, 0xAD05, 0x4D16, 0x1CBD, 0xE852, 0x7FF5, 0xA686, 0x275A, 0x3F7E, 0x6D77, 0x77EF, - 0xFE7F, 0xFFF8, 0x7DD7, 0xF79F, 0xD4C7, 0x1440, 0x9377, 0x445D, 0xC813, 0xCA9B, 0x3B36, 0x04A2, - 0x6D4D, 0x9B18, 0x9345, 0x0573, 0x1B7D, 0xA7AA, 0xA20E, 0x8F04, 0x0084, 0xA003, 0x43A4, 0x1B8D, - 0xF387, 0x04AC, 0x1CB0, 0x9273, 0x5843, 0x51AA, 0x9B51, 0xA1A9, 0xA4D0, 0x0407, 0x21E4, 0x9E05, - 0x4A7B, 0x07D5, 0x1813, 0xEA1B, 0xA09E, 0x58BB, 0x5811, 0xAD3A, 0x15CB, 0x6EC4, 0xE005, 0xD520, - 0xAA21, 0x1F6E, 0xDCE2, 0x79C4, 0x90C5, 0x5235, 0xB33C, 0x46BF, 0xA511, 0xA439, 0x4212, 0x8131, - 0xF6CD, 0x9C37, 0x3C9E, 0xD6CF, 0x735B, 0x0CF6, 0xF5A1, 0xFB14, 0x5E9D, 0x38E7, 0x2408, 0x41CC, - 0x9743, 0x8ADA, 0xFE07, 0x54A6, 0x33ED, 0x684D, 0xF668, 0x2864, 0x1D0F, 0xC00F, 0xD076, 0x6DD9, - 0xF9DB, 0xDD06, 0x331F, 0xF33A, 0xF972, 0x164E, 0x1FF6, 0x1EEE, 0xF5FD, 0xEFD7, 0xB97D, 0xC4B3, - 0x009C, 0xB9B3, 0xEE91, 0xA36B, 0x7DAD, 0xA77F, 0xF541, 0x38E0, 0x4ED0, 0x9E2D, 0x0D80, 0x4EF2, - 0x15C8, 0x3C8D, 0x1DF9, 0x4EB7, 0xEDEE, 0xCAE5, 0xA012, 0x103D, 0x8CC0, 0xE67C, 0x2C01, 0x446B, - 0x75EB, 0xDCE4, 0x8858, 0xBAE6, 0x7AB9, 0x14CC, 0xF465, 0xDEB5, 0xF13E, 0x809D, 0xC10A, 0x6586, - 0x9792, 0x20E0, 0xF770, 0xAB77, 0x3DFE, 0xFA53, 0xF5D5, 0xAAF7, 0xE08A, 0xDC03, 0xD029, 0xA25D, - 0x1CE4, 0x0E6C, 0x9505, 0xCA5C, 0x8032, 0x1B22, 0x0755, 0x30E0, 0x74E8, 0xD786, 0xECE0, 0xCB30, - 0xC6C2, 0xC764, 0x753E, 0x4EF9, 0x37E4, 0xDD06, 0x6110, 0x0054, 0x27A5, 0xC67A, 0x88BE, 0xFAEA, - 0x6395, 0xE090, 0x035A, 0x31A5, 0xD70A, 0x1278, 0xF45F, 0x97F0, 0x247A, 0xE628, 0x3F66, 0x8B97, - 0xEBD5, 0x6583, 0xD329, 0x88C1, 0x0D16, 0xA636, 0x70FB, 0xD000, 0x02EE, 0xF323, 0x4096, 0x459D, - 0x2407, 0xA7AA, 0x6AFF, 0xA7EB, 0x7DDF, 0x1B84, 0x0169, 0x1082, 0x6934, 0x0F5D, 0x1E3D, 0x6C2D, - 0xA39D, 0x2483, 0x20C1, 0x03B0, 0xAECE, 0x7AB0, 0x1C5F, 0x02A8, 0xE7ED, 0xF47D, 0x52DE, 0x4B4B, - 0x6102, 0xC8C7, 0xD653, 0x7CBC, 0x7D64, 0x1079, 0xCE63, 0x933D, 0x9B73, 0x2904, 0x08CE, 0x0F9B, - 0x9C86, 0x8644, 0xB89A, 0x2CFE, 0x75FF, 0x0F79, 0xD784, 0xF43B, 0xA220, 0x273C, 0xCC14, 0xB6E3, - 0x063B, 0xAE2F, 0x3E61, 0xF342, 0x073B, 0x79EB, 0x3E57, 0xBF26, 0xED18, 0x8037, 0x0304, 0x701E, - 0x6CBF, 0x610C, 0xCBE1, 0x9397, 0xD1AE, 0x6246, 0xB2BF, 0x6731, 0x4507, 0x1406, 0x4E0A, 0x0268, - 0xED30, 0x6417, 0x0304, 0x58D6, 0x3BEB, 0xE259, 0x9CF2, 0xD787, 0x261B, 0xEEDB, 0x0C84, 0x45BE, - 0x72FE, 0x0FB1, 0x2364, 0x7FF4, 0x958E, 0x3EDB, 0x48E1, 0x4170, 0x060C, 0x435A, 0xEBF8, 0x9D91, - 0x4D1C, 0x71CD, 0x7D46, 0x38BE, 0x7C82, 0x74C1, 0x01E6, 0x9F16, 0x9CF5, 0xB3E9, 0x74EF, 0x139C, - 0xF573, 0xE860, 0x2774, 0xD6CF, 0xA103, 0xEE47, 0x593F, 0x870E, 0x467B, 0x6F9E, 0x6789, 0xC214, - 0x0317, 0x820B, 0xAD8D, 0xEDD8, 0x00E3, 0x52D9, 0x3E24, 0x0000, 0xC04E, 0x0100, 0x0090, 0xFDBE, - 0xF6FA, 0x8D80, 0x0200, 0x0000, 0x0000, 0x80C1, 0x8A9B, 0x2902, 0x118C, 0x97B0, 0xA58B, 0x18BF, - 0xBA6F, 0x3F02, 0x52BB, 0x7A74, 0xDE22, 0x270A, 0x0340, 0xFA47, 0x11A8, 0x90C6, 0x18C7, 0x35B4, - 0x8747, 0x3567, 0x0600, 0xF883, 0x0CA0, 0x7A1A, 0x7767, 0x147B, 0x2E5F, 0xF40A, 0x405D, 0x1EE5, - 0x61B7, 0xCEAF, 0x2175, 0x42B9, 0x9CFB, 0x0CF5, 0x3C8E, 0x76D2, 0x4B79, 0x0F86, 0x19A8, 0x888E, - 0x2674, 0x910D, 0xA7E2, 0xDBD0, 0xCE41, 0x3E1F, 0x403C, 0x72C6, 0x313E, 0xDD27, 0xFF2D, 0x60BC, - 0x0700, 0xFE7F, 0x1B00, 0x0000, 0x0000, 0x0000, 0x9F31, 0x568C, 0x630C, 0x1DB1, 0x111B, 0xD447, - 0x65D3, 0x5746, 0xE769, 0x7598, 0x7C67, 0x5878, 0x7869, 0x0031, 0xCB9F, 0xECB8, 0x89E3, 0x1DE3, - 0x0C66, 0x2709, 0x3100, 0xFF11, 0x00A0, 0xF321, 0x0A7E, 0x2409, 0x49A2, 0xCF13, 0x15B5, 0x6DA2, - 0x5DAB, 0x1ABB, 0x5A6D, 0x825D, 0x35A0, 0x6D5B, 0x0DC4, 0xB6AD, 0x4092, 0xB606, 0x2056, 0x0390, - 0x68C2, 0xEF37, 0x3E20, 0x56DF, 0x8B89, 0x2000, 0x0000, 0x0000, 0xB469, 0x92C0, 0x0200, 0x5004, - 0x7227, 0xE9B4, 0x32CA, 0xCFCB, 0x6239, 0xF500, 0x3067, 0xE441, 0x0054, 0x7420, 0x756B, 0x6B00, - 0x0DC9, 0x3106, 0x90E0, 0xBC00, 0x400A, 0xE3B9, 0xE71B, 0xE0F8, 0x95FE, 0x4E32, 0xE2F6, 0xDC44, - 0xF56F, 0x7078, 0xB311, 0x99FE, 0x8C69, 0x6EDE, 0x7199, 0x2EFD, 0x26F5, 0x0579, 0x0000, 0xE8F8, - 0xD3B1, 0x7426, 0x4BC5, 0x463E, 0x6A37, 0x2A3E, 0xC2A7, 0xE3BC, 0x9429, 0x131C, 0x6414, 0xF71C, - 0x9732, 0x6152, 0xE3CD, 0xFEE7, 0x7D57, 0xACA2, 0xB5B7, 0x6F55, 0xC6BC, 0xE70B, 0x55CB, 0xAFB3, - 0x9235, 0xB932, 0xDD63, 0xAADA, 0x3A7B, 0xA934, 0x7DB4, 0xA45E, 0xDF95, 0x31FB, 0x72EA, 0x0B4B, - 0xE950, 0x6878, 0xBE17, 0x4AFF, 0x5E41, 0x9836, 0xA6B7, 0x1157, 0x91F1, 0x7899, 0xA632, 0x425B, - 0x57FB, 0x1B31, 0x8E1B, 0x3BE9, 0xE23A, 0x7801, 0x00C0, 0x1C2A, 0x2B31, 0x62DD, 0xF275, 0x8092, - 0x8C63, 0x8C13, 0xB12A, 0x5AE9, 0xF9A5, 0x5CAA, 0x9932, 0xDE3D, 0xBDA9, 0xD011, 0x23BD, 0x2696, - 0xFDFB, 0xCC4F, 0x8993, 0xCAA5, 0xBE97, 0x91FE, 0xD293, 0x8E9C, 0xB36B, 0xA736, 0xC278, 0xD1DB, - 0x7853, 0x70F5, 0x6A96, 0x5D1C, 0x2D19, 0x97DA, 0xF4DA, 0x920F, 0x00C0, 0xF451, 0x8CDA, 0xE80F, - 0x2326, 0x8F4B, 0xCB58, 0x9251, 0xA723, 0x750E, 0x65D2, 0x9A7E, 0xD679, 0xCEF5, 0x68B4, 0x657D, - 0x75CF, 0x2E6D, 0x8479, 0x8FF7, 0x9E37, 0x07E5, 0xD5F6, 0x4766, 0x6EF7, 0x7C7E, 0x8A11, 0x75E3, - 0xFA3E, 0xF102, 0x7DCB, 0xDC46, 0xC8AB, 0xA602, 0xA01E, 0x2BD2, 0x4B1B, 0xE9E4, 0x4083, 0x635C, - 0xB7C5, 0xBD27, 0x8449, 0x153C, 0xEEBB, 0x1D25, 0x9907, 0xC284, 0xB273, 0xE512, 0xE7D8, 0x3DAE, - 0x6477, 0x6734, 0xD151, 0x3DA4, 0xF318, 0xA729, 0x341E, 0x3523, 0x973D, 0x87B7, 0x660C, 0x35BC, - 0x73F7, 0xA945, 0x9DFA, 0xE898, 0xDD8B, 0x815B, 0x5935, 0x3AFC, 0x6FE8, 0xAE6A, 0x0911, 0x393B, - 0x0700, 0x0880, 0xFCDF, 0x7F5F, 0x0178, 0x91A3, 0x478F, 0x0E00, 0x004C, 0x3094, 0x7A81, 0x1917, - 0x2EE3, 0x0D38, 0x30CB, 0x2DE8, 0x5C64, 0xF881, 0x7D1E, 0x6187, 0xFE3A, 0x8FA6, 0xB9D1, 0x1BB1, - 0x4551, 0x8338, 0x9008, 0x6A37, 0x5D21, 0xB0BF, 0x32F6, 0x0ED5, 0x0489, 0xCEC8, 0xB603, 0xF562, - 0x0400, 0xA008, 0xE0A4, 0x8A3F, 0xDCC3, 0x2930, 0xF8E4, 0x6960, 0x9413, 0x1847, 0xE054, 0xC3AE, - 0xD9B3, 0xD935, 0x7B76, 0xB4DA, 0x023E, 0x335D, 0x391D, 0x7367, 0x5865, 0xCD3E, 0x7EBC, 0xDAF5, - 0x3AE1, 0xAD85, 0xEF6C, 0xEF23, 0x6CF8, 0xA34F, 0xD60A, 0x7BEF, 0x0D83, 0x9B26, 0xC586, 0x93E3, - 0x7139, 0xA5AF, 0xD097, 0xA595, 0x93E9, 0xCC56, 0x68BB, 0x822C, 0xF9B2, 0x292C, 0xAABB, 0xEB1D, - 0x0270, 0xD97A, 0x4E20, 0x4E56, 0x9F33, 0x6F81, 0x2B1D, 0x715A, 0x54BB, 0x0A78, 0x014F, 0x59CD, - 0x0ADC, 0xABB6, 0xE803, 0x50DB, 0xFDEF, 0xAFEF, 0xAE6F, 0xED3E, 0xB58E, 0x9381, 0x2780, 0x8351, - 0x2789, 0x03C4, 0x67F0, 0x058D, 0xFE76, 0x79E2, 0xB611, 0x9A4E, 0x631C, 0xFBF2, 0x74B5, 0xC31F, - 0x752F, 0x2E86, 0x8344, 0xF8B2, 0xE5C4, 0x0945, 0x52C1, 0x9D9B, 0x50A1, 0x8234, 0x09B6, 0xF06C, - 0x1E05, 0x1C7E, 0xAD1D, 0xEFC1, 0xF03E, 0x7336, 0xB92D, 0xCE5A, 0xFD2C, 0x94B6, 0xE5CD, 0x1ACB, - 0x6D7A, 0x3D38, 0xC16A, 0xA5C1, 0xF365, 0xDF2C, 0x8E9A, 0xBD01, 0xFC73, 0xABBE, 0x040E, 0x0068, - 0xD653, 0x0F08, 0x5C79, 0x65AB, 0x3753, 0x5001, 0x428E, 0x4EEC, 0xA0E0, 0x523D, 0x29E8, 0x2002, - 0xC0C3, 0x5FDF, 0xC544, 0xBD04, 0x644C, 0xE5C3, 0xCEDF, 0xCBB9, 0x0BE6, 0x600E, 0x46D4, 0x7A4F, - 0x0EB6, 0x0307, 0x6DA7, 0xB97D, 0x0D37, 0x7DE1, 0x7E9C, 0x1D16, 0x710D, 0x61C8, 0x8460, 0xD180, - 0x64A1, 0x4038, 0xAD54, 0x5AE5, 0xE122, 0xC0C6, 0x9152, 0xE980, 0x9938, 0x5522, 0x6639, 0x3A29, - 0x330E, 0xB82D, 0x07FD, 0x0C82, 0xC793, 0xA874, 0x2F2F, 0xCADA, 0x8B1F, 0x9E4B, 0x3855, 0x9D11, - 0xFA14, 0x5285, 0x65B3, 0xD943, 0x4282, 0x3091, 0x46C7, 0x1928, 0x07F3, 0x0CD1, 0x6275, 0x0B2D, - 0x5F78, 0x491B, 0x047A, 0x46DC, 0x5631, 0x28F3, 0x9121, 0x8E22, 0xA1BA, 0xD02E, 0x4E7D, 0xAA4F, - 0x75EA, 0x7835, 0x75A7, 0x0100, 0x04F1, 0x21F2, 0x8F15, 0x7511, 0x10CE, 0x1CF4, 0xE564, 0x7EA5, - 0x6B93, 0xB038, 0x350E, 0x801F, 0x3073, 0xA053, 0xA5C3, 0x8071, 0x980F, 0x1523, 0x3045, 0x99C2, - 0xC6FC, 0xB91F, 0x7747, 0xE9E6, 0x76EA, 0x27BC, 0x8309, 0x39DA, 0x001D, 0x5CA9, 0x0282, 0xA956, - 0xE385, 0xDA19, 0x0F2E, 0x0F47, 0x7D21, 0x8D40, 0xF809, 0xB821, 0x28A0, 0x3FEA, 0xB2D4, 0xD600, - 0x8274, 0xDBA8, 0xBBA8, 0xE7FC, 0x55F3, 0xD99A, 0x1EE2, 0x209F, 0xD8D3, 0x3460, 0xE220, 0x061E, - 0x1091, 0x70E4, 0xEF87, 0x9BBF, 0x0AC3, 0xD098, 0xD6F7, 0x3B18, 0x849B, 0x948E, 0x9975, 0xD87A, - 0x6F34, 0x2739, 0x85C0, 0x810D, 0x4822, 0xBC22, 0xABB8, 0x90B9, 0x65F9, 0xEB84, 0x3356, 0xD58F, - 0x83FB, 0x5F62, 0x522C, 0x00FB, 0x2767, 0x81B8, 0x9A0D, 0xFFCC, 0xBD00, 0xCAED, 0x43F0, 0x9006, - 0x29A0, 0x1D9C, 0x2ED4, 0x7971, 0xCDFB, 0x6FEA, 0xC8C9, 0x11E7, 0xBB7D, 0x6FA2, 0xEC70, 0x2728, - 0x070B, 0x15E2, 0x3493, 0x4AB2, 0x40A2, 0x0561, 0x1010, 0xDC94, 0x4A6C, 0xE4E0, 0xB1EA, 0x88E0, - 0xF58F, 0xBA3E, 0xC303, 0x57FB, 0x5536, 0x0330, 0x7705, 0xA9E6, 0x9AE3, 0xDA62, 0x8793, 0x80E7, - 0x3531, 0xE8C8, 0xD7CE, 0x6618, 0x8311, 0xD163, 0x85C1, 0x814C, 0x2E9F, 0x4611, 0x18A0, 0x5DBB, - 0xC1CE, 0xE622, 0x8150, 0x0950, 0x7A64, 0xADBC, 0xD185, 0xBF14, 0x6817, 0xA47F, 0x1CDF, 0x6200, - 0x2433, 0xF7BA, 0x38FE, 0x537F, 0x96B3, 0xE400, 0x211C, 0xA004, 0x381F, 0x6168, 0x7A02, 0x44D4, - 0xE2C5, 0xC35B, 0xAD04, 0x0EB5, 0x98A6, 0xE5F3, 0x3CB5, 0x6FD5, 0x789F, 0x542C, 0x5028, 0xC71F, - 0xB0C1, 0x6903, 0x6F27, 0x948C, 0x1C4E, 0x4D5B, 0xCA77, 0xBB93, 0x010D, 0x034A, 0x84F0, 0x9F4B, - 0x0601, 0x8218, 0x0335, 0x733C, 0x9F1F, 0x4302, 0x0040, 0x42A2, 0xD470, 0xD292, 0xC815, 0x3601, - 0x00CE, 0x433F, 0x1CE9, 0x5247, 0xCB5C, 0x6610, 0xBC5B, 0x0283, 0x0B1C, 0x5301, 0x0E91, 0xEF88, - 0x4790, 0x547D, 0xDCEC, 0xF132, 0x9CD2, 0xABAA, 0x1AAF, 0x8F8A, 0xB014, 0x7055, 0x4434, 0x03F0, - 0x9CE0, 0x2457, 0x061F, 0x4E5C, 0x2760, 0x919C, 0x6EE8, 0xE81D, 0x3435, 0x1C9C, 0x03C2, 0x4940, - 0xE829, 0x1204, 0x760E, 0x2E09, 0x886D, 0x3A55, 0xDF6B, 0x030C, 0x4355, 0xFBB1, 0xE23E, 0xCFF4, - 0x2209, 0x4240, 0x721E, 0x6461, 0x0F47, 0x1A56, 0xF80B, 0x360B, 0x45F2, 0x85BB, 0xAE77, 0x30DD, - 0xFC1C, 0xF49F, 0x344F, 0x4CE6, 0x9093, 0x7448, 0xE8C6, 0xF0A5, 0x1CC0, 0x9274, 0xAF7D, 0x75F0, - 0xE19B, 0x00C4, 0x5296, 0x0740, 0xB795, 0x441C, 0x5A4F, 0xE29E, 0x871A, 0x437F, 0x576E, 0xCFD1, - 0x00E1, 0x1BA5, 0x0F29, 0xDC39, 0x4538, 0x8FA5, 0x65B8, 0x1122, 0xD18D, 0xD2FA, 0x31ED, 0x3CEE, - 0xAA00, 0x69D7, 0x9375, 0x4014, 0x1435, 0x5361, 0xA58F, 0xA07C, 0xEF5D, 0x0000, 0x20B3, 0xC737, - 0x118E, 0x635E, 0xAE1F, 0x7EC7, 0xDCA7, 0x0BA1, 0x00AA, 0x20B8, 0xD71D, 0x5F15, 0x0078, 0x25FF, - 0xFCFB, 0x6F79, 0xAD0A, 0xE156, 0xC843, 0xE2DC, 0x1993, 0x8CB0, 0x1A13, 0x258C, 0x165E, 0x7AFB, - 0x9E0B, 0x72FD, 0xF648, 0xDEAE, 0x07FE, 0x9301, 0x0000, 0xCC3F, 0x8660, 0xDE8B, 0xDFDC, 0x6368, - 0x3D67, 0x5D4D, 0x7AD4, 0x1E2F, 0x6398, 0xFE35, 0xCD74, 0xC027, 0x99AE, 0x2814, 0x5AB9, 0x3053, - 0x5C4B, 0x309E, 0x6AC5, 0x50BC, 0xB80C, 0x4F16, 0x0000, 0x0000, 0x6083, 0xF02B, 0x5778, 0x2EDD, - 0xF828, 0x300C, 0x0100, 0x0000, 0x0000, 0x00E0, 0x3587, 0x2650, 0x4263, 0xE64E, 0x844B, 0x043B, - 0xC926, 0x7F1C, 0xCBA0, 0x06E5, 0x37F1, 0x59AF, 0xCE21, 0x4EBF, 0x8231, 0x45D6, 0x8003, 0x6E00, - 0x603D, 0x1BDA, 0x205B, 0xEC12, 0x9EC8, 0x3DF1, 0x78B1, 0x81E7, 0x3968, 0x3AEC, 0x7D2E, 0x9B3D, - 0xA066, 0x0E00, 0x12E0, 0x05C9, 0x63E5, 0x0726, 0x87D2, 0x16E9, 0x01EA, 0xDD8D, 0xC9E0, 0x4B0E, - 0x33CC, 0x4C1F, 0x2739, 0x960B, 0xDCE3, 0x6091, 0x1032, 0x6016, 0x9DAC, 0x53AA, 0xBA2F, 0x5040, - 0xB9A7, 0x3638, 0x7364, 0x5D22, 0xA90C, 0x6887, 0x8B19, 0x29BC, 0x348E, 0x19BA, 0x58D5, 0xFA48, - 0xEA56, 0xCFB4, 0x660C, 0x0901, 0x92E5, 0x01B8, 0x6F66, 0xE1B6, 0x4164, 0x134F, 0x99EE, 0xD4BC, - 0xE409, 0x3A20, 0xF3DC, 0x9B15, 0x7BDB, 0x6257, 0xF81A, 0x88DE, 0xD389, 0x8FF8, 0x0DC8, 0xB1DF, - 0xDC40, 0x6A1D, 0x9411, 0x78EC, 0x39EC, 0x1403, 0x31CC, 0x870C, 0xCD59, 0x6240, 0xF31C, 0x473E, - 0x3106, 0x79F9, 0xE4B4, 0x5FFA, 0x5785, 0x8A93, 0x3A20, 0xBCE1, 0xA861, 0x1266, 0xCB01, 0x0400, - 0x91E4, 0x50E5, 0x55B3, 0xFA18, 0x60F4, 0x76B4, 0xF828, 0x2E2F, 0xC1FB, 0x4E33, 0x1428, 0x8F83, - 0x43C0, 0xC011, 0xC20F, 0x855B, 0x7DF5, 0x0207, 0x0A12, 0x2AAB, 0xE638, 0x330D, 0x2C68, 0xFBB9, - 0x3433, 0x41DF, 0xD993, 0x733D, 0x194B, 0x8E0A, 0xDE73, 0xC0B8, 0x3100, 0x949D, 0xBD27, 0x0EE0, - 0x729C, 0x7E0F, 0xA607, 0x3ED7, 0x50BF, 0x8E80, 0x59B3, 0x3AEE, 0x9FAF, 0x7379, 0x925B, 0x15F7, - 0x92EF, 0x1272, 0x0A1E, 0x00BD, 0x7E1B, 0x0000, 0x0000, 0x0000 +void PTR4* memmove(void PTR4* dest, const void PTR4* src, u32 len); +extern const char BINK_ERROR_OPENING_FILE[]; +extern const char BINK_ERROR_NOT_BINK[]; +extern const char BINK_ERROR_NO_COMPRESSED_FRAMES[]; +extern const char BINK_ERROR_OUT_OF_MEMORY[]; + +#define BINKTRACKNEWFORMAT 0x10000000 +#define BINKTRACKFREQMASK 0xffff +#define BINKTRACKBITDEPTHSHIFT 27 +#define BINKTRACKCHANNELSSHIFT 29 +#define BINKTRACKBITDEPTHFLAG 8 +#define BINKTRACKBASEBITS 8 +#define BINKTRACKSTEREOFLAG 1 +#define BINKTRACKBASECHANNELS 1 +/* Track types pack frequency in bits 0..15, 8/16-bit audio in bit 27, stereo in bit 29. */ +#define BINKTRACKFREQ(tracktype) ((tracktype) & BINKTRACKFREQMASK) +#define BINKTRACKBITS(tracktype) \ + ((((tracktype) >> BINKTRACKBITDEPTHSHIFT) & BINKTRACKBITDEPTHFLAG) + BINKTRACKBASEBITS) +#define BINKTRACKCHANNELS(tracktype) \ + ((((tracktype) >> BINKTRACKCHANNELSSHIFT) & BINKTRACKSTEREOFLAG) + BINKTRACKBASECHANNELS) +#define BINKTRACKCHANNELSHIFT(tracktype) (((tracktype) >> BINKTRACKCHANNELSSHIFT) & BINKTRACKSTEREOFLAG) +#define BINKTRACKDECOMPFLAGS(tracktype) \ + (((s32)(tracktype) >= 0 && ((tracktype) & BINKTRACKNEWFORMAT) != 0) ? BINKACNEWFORMAT : 0) +#define BINKTRACKISOPENABLE(tracktype) ((s32)(tracktype) < 0 || ((tracktype) & BINKTRACKNEWFORMAT) != 0) +#define BINKFRAMEOFFSETMASK 0xfffffffe +#define BINKFRAMEKEYFLAG 1 +/* Frame-offset table entries use bit 0 as the key-frame marker. */ +#define BINK_FRAME_OFFSET(frameoffset) ((frameoffset) & BINKFRAMEOFFSETMASK) +#define BINK_FRAME_KEY(frameoffset) ((frameoffset) & BINKFRAMEKEYFLAG) +#define BINK_MEMBER_OFFSET(type, member) ((u32)&((type*)0)->member) +#define BINK_SND_CALLBACK_OFFSET BINK_MEMBER_OFFSET(BINK, snd_callback_buffer) +#define BINK_FROM_SOUND_CALLBACK(callback) ((HBINK)((u8 PTR4*)(callback) - BINK_SND_CALLBACK_OFFSET)) +#define BINK_IO_CALLBACK(io) ((RADCB_CALLBACK PTR4*)&(io)->callback_control.callback) +#define BINK_SOUND_CALLBACK(bink) (&(bink)->snd_callback_buffer.callback) +#define BINK_NEXT_TRACK_FRAME(frame) \ + ((BINKTRACKFRAME PTR4*)((u8 PTR4*)(frame) + sizeof((frame)->size) + (frame)->size)) +#define BINK_SOUND_CALLBACK_PRIORITY 20 +#define BINK_ERROR_BUFFER_SIZE 256 +#define BINK_MAX_TRACKS 8 +#define BINK_LOGO_DWORDS 0xE80 +#define BINK_MILLISECONDS_PER_SECOND 1000 +#define BINK_TWO_FRAME_MILLISECONDS 2000 +#define BINK_FIXED_SHIFT 16 +#define BINK_FIXED_1 0x10000 +#define BINK_RECT_SORT_KEY_SCALE 0x10000 +#define BINK_RECT_SORT_KEY_SENTINEL 0x7fff0000 +#define BINK_RECT_SORT_KEY(top, left) ((top) * BINK_RECT_SORT_KEY_SCALE + (left)) +#define BINK_CHROMA_SHIFT 1 +#define BINK_CHROMA_SCALE (1 << BINK_CHROMA_SHIFT) +#define BINK_CHROMA_PLANE_COUNT 2 +#define BINK_CHROMA_ROUND_MASK (BINK_CHROMA_SCALE - 1) +#define BINK_CHROMA_ALIGN 8 +#define BINK_CHROMA_ALIGN_MASK (BINK_CHROMA_ALIGN - 1) +#define BINK_COPY_SCALE 2 +#define BINK_MASK_BLOCK_SHIFT 4 +#define BINK_MASK_BLOCK_SIZE (1 << BINK_MASK_BLOCK_SHIFT) +#define BINK_MASK_BLOCK_ROUND_MASK (BINK_MASK_BLOCK_SIZE - 1) +#define BINK_MASK_BLOCKS(value) ((value) / BINK_MASK_BLOCK_SIZE) +#define BINK_DIRTY_SPLIT_MIN_SIZE (BINK_MASK_BLOCK_SIZE * 2) +#define BINK_RECT_SORT_TOP_SENTINEL 0x7fffffff +#define BINK_OPEN_OVERRIDE_UNSET 0xffffffffU +#define BINK_SOUND_OFF 0 +#define BINK_SOUND_ON 1 +#define BINK_VIDEO_OFF 0 +#define BINK_VIDEO_ON 1 +#define BINK_MASK_PLANE_GUARD_BYTES BINK_MASK_BLOCK_SIZE +#define BINK_MEMORY_HEADER_WORDS (sizeof(BINKHDR) / sizeof(u32)) +#define BINK_SOUND_BUFFER_ALIGNMENT 0x100 +#define BINK_SOUND_SAMPLE_ALIGNMENT 4 +#define BINK_SOUND_BUFFER_ALIGN_MASK (BINK_SOUND_BUFFER_ALIGNMENT - 1) +#define BINK_SOUND_SAMPLE_ALIGN_MASK (BINK_SOUND_SAMPLE_ALIGNMENT - 1) +#define BINK_SOUND_BITS_8 8 +#define BINK_SOUND_BITS_16 16 +#define BINK_SOUND_BYTES_PER_16_BIT_SAMPLE sizeof(s16) +#define BINK_SOUND_PRIME_MILLISECONDS 0x2ee +#define BINK_SOUND_END_PREROLL_NUMERATOR 3 +#define BINK_SOUND_END_PREROLL_DENOM_SHIFT 2 +#define BINK_16_TO_8_SAMPLE_SHIFT 8 +#define BINK_UNSIGNED_8_SAMPLE_BIAS 0x80 +#define BINK_PERCENT_SCALE 100 +#define BINK_IO_BUFFER_LOW_PERCENT 75 +#define BINK_IO_BUFFER_RESUME_PERCENT 89 +#define BINK_IO_BUFFER_USED_PERCENT(bink) \ + ((((bink)->bio.CurBufUsed + 1) * BINK_PERCENT_SCALE) / ((bink)->bio.CurBufSize + 1)) +#define BINK_PRELOAD_THRESHOLD_NUMERATOR 9 +#define BINK_PRELOAD_THRESHOLD_DENOMINATOR 10 +#define BINK_FILE_HEADER_BYTES 8 +#define BINKGETKEYDIRECTIONMASK (BINKGETKEYNOTEQUAL - 1) +#define BINKGETKEY_DIRECTION(flags) ((flags) & BINKGETKEYDIRECTIONMASK) +#define BINK_FRAME_BEFORE_FIRST ((u32)-1) +#define BINK_FIRST_FRAME 1 +#define BINK_RECTS_UNCALCULATED -1 +#define BINK_MAX_CONSECUTIVE_SKIPS 4 +#define BINK_ARRAY_BYTES(count, ptr) ((count) * sizeof(*(ptr))) +#define BINK_FRAME_OFFSETS_BYTES(frames, ptr) (((frames) + 1) * sizeof(*(ptr))) +#define BINK_SOUND_BUFFER_BYTES(bytes) (((bytes) + BINK_SOUND_BUFFER_ALIGN_MASK) & ~BINK_SOUND_BUFFER_ALIGN_MASK) +#define BINK_TRACK_BUFFER_BYTES(bytes) (((bytes) + BINK_SOUND_SAMPLE_ALIGN_MASK) & ~BINK_SOUND_SAMPLE_ALIGN_MASK) +#define BINK_SOUND_PRIME_BYTES(freq, tracktype, dropped) \ + (mult64anddiv(((freq) << BINKTRACKCHANNELSHIFT(tracktype)) * BINK_SOUND_BYTES_PER_16_BIT_SAMPLE, \ + BINK_SOUND_PRIME_MILLISECONDS - (dropped), BINK_MILLISECONDS_PER_SECOND) & \ + ~BINK_SOUND_SAMPLE_ALIGN_MASK) +#define BINK_SOUND_END_PREROLL_FRAMES(rate, div) \ + (((rate) * BINK_SOUND_END_PREROLL_NUMERATOR) / ((div) << BINK_SOUND_END_PREROLL_DENOM_SHIFT)) +typedef struct BINKTRACKFRAME +{ + u32 size; + u32 decoded_size; + u8 data[1]; +} BINKTRACKFRAME; + +static char binkerr[BINK_ERROR_BUFFER_SIZE]; + +static u32 TrackNums[BINK_MAX_TRACKS] = { 0 }; +u32 LogoData[BINK_LOGO_DWORDS] = { + 0x42494B69, 0xF8390000, 0x01000000, 0xCC390000, 0x01000000, 0xE0000000, 0xFC000000, 0x0A000000, + 0x01000000, 0x00000000, 0x00000000, 0x35000000, 0x003A0000, 0xEC2B0000, 0xBE6315EC, 0x38C6B0CE, + 0x702C6C80, 0x1E7D1243, 0xBE134265, 0x9F1453A2, 0x596431B3, 0x5176C4F4, 0x6187B9E5, 0xF478916E, + 0xBCF4198E, 0x7ACDB3BE, 0xA19CB3BE, 0xAD1CB3CE, 0xD1BAF9CE, 0x3DBA49BE, 0xDEBC1A20, 0x9770E809, + 0x1C43009A, 0x1F2703A1, 0x97EB42C4, 0x11881010, 0x228E1084, 0x40428488, 0x38424412, 0x228E8084, + 0x10104188, 0x237EC40F, 0x026D1D71, 0xE929C4A1, 0xAF9EB645, 0x2171F4B4, 0xAD962441, 0xD0D316F4, + 0xF54A1C81, 0x3ADA56D1, 0x424246DA, 0x163D4516, 0x32F4B455, 0x42B290D1, 0x537895DA, 0x4216D0B6, + 0x68B1856C, 0xA1A72DB6, 0x856CD1B6, 0x506C0BD9, 0x966869C9, 0xB6906DA1, 0x6D49B22D, 0x32E07BC1, + 0x5038A609, 0x0026A0A7, 0x7418FEFB, 0xC07A7906, 0x98404A47, 0x9C1348E9, 0x88730229, 0x1D714E20, + 0xA57A6D0D, 0x3BEFE4E7, 0xD576E498, 0xB1361CFB, 0xCEFE7424, 0x60028509, 0x0026905F, 0xA5437FBD, + 0xD8761F26, 0x90D25DC4, 0x890913F8, 0x3D9CC8F7, 0x61622B2F, 0x98306122, 0xFFF9F8EC, 0x7BD0DEDF, + 0x730BE3BD, 0x708FF1AF, 0xE6857302, 0x403B1300, 0x4C003801, 0x2762431E, 0x4CA474A8, 0x714E20A5, + 0x234E62F7, 0x98E7D4B7, 0x2313403A, 0xD261C044, 0x4A87B19B, 0x15F3FD26, 0x90D21127, 0xC065C6B7, + 0x703201E0, 0x720FD03E, 0xEF75C044, 0x4E950EF5, 0x0A9C1348, 0xE9887302, 0x29F5B0DD, 0xB3115757, + 0x87FAAD72, 0x4E003061, 0xC2840A13, 0x00808D92, 0x2449F6AA, 0x378B927C, 0x5B8F5C96, 0x64442439, + 0xF25D23C8, 0xF36CAE1F, 0x696B9E56, 0xEEA1EF05, 0x00E75892, 0x5C4F00D0, 0x726AEC46, 0x36339164, + 0xBCE7AF8F, 0xCFB8CBE6, 0x8DF54FD2, 0x908AAD35, 0x4B4E803C, 0xF91B291C, 0xFDB3F200, 0x80139664, + 0x9CAE6F49, 0xD6030048, 0xEF1749F2, 0x140000FD, 0x60DF7C0B, 0x9DBB61FE, 0xD7A2036D, 0xC1E3EC92, + 0x99912419, 0x2DEE64FC, 0x0AADDB22, 0xF14CBCED, 0xF7E91580, 0x14BCF75D, 0x0000A830, 0x0100D828, + 0x490EBE07, 0x0000A4CE, 0x03DA17F9, 0x5A11BB6E, 0x9531E6D0, 0x5CE71073, 0x45922447, 0xD1FC0FEA, + 0x81489DF5, 0x350FEC00, 0x38C792E4, 0xBA02404A, 0xDF352549, 0x9249DC91, 0x13924FC7, 0x7705FA77, + 0x0609E0AC, 0xEC639D57, 0xBE3BF75D, 0x28801396, 0xE4794A00, 0x80F47E91, 0x248F00E5, 0xF768EDD7, + 0x8133A03D, 0xE25E7BC8, 0x9C91BCB3, 0xCB474B0E, 0xF43ECF82, 0xF4B741AE, 0x4FEFD6F7, 0x6335E807, + 0x20D7F812, 0x00800A1C, 0xBEFFAAAA, 0xAA9A9999, 0xE1FF802C, 0x832C1391, 0x3C459E39, 0xC93F7F33, + 0x4892AAAA, 0xAAC1711C, 0xC771DC01, 0x000034B4, 0xA8425524, 0x570CFAA1, 0x710F608A, 0x59410725, + 0xF4795428, 0x3D4AA1FA, 0xE8F4182E, 0xDA112E94, 0xF0830A26, 0x47E8F3C3, 0xABE2775E, 0x2BA37FB4, + 0xC7611209, 0x5B0DE51C, 0x5399BCB5, 0x4A7AD2E4, 0x5874EAA8, 0xE60C491A, 0x5E209937, 0x0624CF99, + 0x28CD1C81, 0x92968C95, 0x80A9D152, 0xF25567D1, 0xC3A686A2, 0x0A697A80, 0x01E0C369, 0xBED9D9A0, + 0x3C3D6321, 0xA462A0DB, 0x266AE37A, 0xD05BD554, 0x06046E07, 0x6059A1D6, 0x98E00568, 0x4560B13E, + 0x08002609, 0x9070C6EA, 0x723C8624, 0x84248424, 0x84581546, 0x750DC21D, 0x202163AF, 0xFA423236, + 0xD54F73FA, 0x1965558F, 0x54DB76F7, 0xE8D2003B, 0x4002642C, 0xD778F274, 0x8D8424B4, 0xEA29C858, + 0xAEF1E469, 0x6B852400, 0xAB6A5480, 0xD845E519, 0xA5094908, 0x99A3ED76, 0x5A01323E, 0x3FFF2440, + 0x1200C812, 0x880BF881, 0x5535ACB2, 0x80192001, 0x32BA1C3D, 0x8095AAAB, 0xF9E0EDAA, 0x430277DB, + 0x05210921, 0x0158F338, 0xCBC91900, 0xCF585596, 0xD5CC0620, 0x09900448, 0x00E01FF3, 0xB2E102FE, + 0x19071020, 0x01922500, 0xF4D06A9A, 0xD94F1509, 0xD00D2109, 0x2161CDE9, 0xEE336015, 0x64BC73BA, + 0xFB0C5805, 0x24019200, 0x2E20B380, 0xAC799C65, 0x9DAEB266, 0x80044802, 0x003FF003, 0x9FAD1328, + 0x022109A1, 0x46F65D3F, 0xF4D6D16B, 0x01C9FDBB, 0xD5FDF7DF, 0x7FAFA1FE, 0xFEDDDEBB, 0x7F577755, + 0xDFBF7F3F, 0xD4F77E68, 0x3FDCF77E, 0xFFBD1F7A, 0x7BFFBDD6, 0xFBE00F1F, 0xFE7DF7DE, 0xF32BD4C3, + 0x870FAE53, 0xB55DE1C3, 0x3EFCF0E1, 0xEADB71D5, 0xB7E1C387, 0x0FBF1ED4, 0xEBA10E1F, 0x3EFCFCCA, + 0x5AE1C30F, 0x3F7C491D, 0x5C491DFC, 0x5D610200, 0xB05192CC, 0xF56A712C, 0xC97DEAD9, 0x3D942419, + 0xEB3D616D, 0x447C5675, 0xC81D6B77, 0xAC998B82, 0xDEECE7D5, 0x28E33F46, 0x6D30CE1C, 0x2F4ECEB1, + 0x24B99E00, 0x10FB4492, 0x24C75592, 0xB725C97C, 0xEF46FC86, 0xC8C789D8, 0xA7BF7AAF, 0x552149F2, + 0x2400407A, 0xFF48923A, 0xF3BF8AA6, 0x0AA4BF0B, 0x8EFBDE3D, 0x22C92CAF, 0x8C364EAC, 0x7E1306F0, + 0x9411B1CA, 0x0D000040, 0x850900C0, 0x464992EB, 0xA2AF3EBE, 0x1F399BEF, 0x41CEE67B, 0x90B3D1E7, + 0xAF892B54, 0x00972D24, 0x9F89CFAA, 0x4611E6B7, 0x6124BD45, 0x9224D752, 0xFF3DFA94, 0x7308E4CD, + 0x7272B795, 0x4F8B6900, 0x38C792E4, 0x7A02404A, 0xDF489264, 0x75E53D27, 0x7B528306, 0x0D1A3468, + 0x10EB6FD2, 0x31B4F7B1, 0xC985395F, 0xA124473B, 0xA312B256, 0xA1244FEA, 0x0100A4F7, 0x8F24493E, + 0x1FC9054D, 0x630DB5F7, 0x332057FB, 0xC6AD368D, 0x648D753F, 0x3B0BA838, 0x01685F95, 0x61B774BE, + 0x2D54748F, 0x739F7002, 0xB4AB8EB7, 0xE4D60FA0, 0xA577BFF9, 0x03D8F342, 0xAFEB9C00, 0x006C9424, + 0xBF839CCD, 0x4EAC546D, 0x3E092B1B, 0x64E8F824, 0x6F1EE543, 0x8F5FADB0, 0xE676AF54, 0x2449BE6D, + 0x66CC71F6, 0xE93D18C8, 0xCFAFAB00, 0x9C634972, 0x3D0120A5, 0x4651E74E, 0x2F2D2F92, 0x2459BD1B, + 0x00C05C01, 0x207BA958, 0x2B9F5D7B, 0x56E558CA, 0x88B856D6, 0xF77360D7, 0xE35EBB4B, 0x324E6D8F, + 0xE7651700, 0x00E9FD23, 0x4932C749, 0xF22D681F, 0x2F72ED37, 0xC0FC7DE5, 0x1E99D4C5, 0xB7742FE3, + 0x653C7F56, 0x9CFDEF74, 0xB8DE7183, 0xC72348C3, 0x8EF36B1C, 0x91B9F595, 0x6A156B68, 0xF1AFFA26, + 0xA97C265F, 0x1FA9F1E3, 0x7DEE03D9, 0xA7F9AE19, 0x63BF1949, 0x92A4A764, 0x3DD5DECB, 0x61758D77, + 0x308143D8, 0x7C8CD8A2, 0x672C8936, 0x8245A1F9, 0x80DFCAF0, 0x3109924B, 0x009DC6A4, 0x516D711A, + 0xEE2A9546, 0xA476A566, 0x9CBD4E20, 0x8C9CDC8A, 0x77F2B3DC, 0x6B022AE0, 0xC33FFFFA, 0xABAAC287, + 0x0FBFFEFE, 0x9FDAC387, 0x1F566155, 0xF8271220, 0x0990D5CF, 0x703CC3F1, 0x18D71E10, 0xC7711E68, + 0xE0F7AA0E, 0xCC000990, 0x0400B2FF, 0xBBF6BAEA, 0xC0BF6E66, 0x41484280, 0x1F4ECDEE, 0x5D3D8B69, + 0xAC17D8B6, 0x40C61470, 0x5CC30049, 0x00638FC7, 0xC1788AB8, 0x96A46703, 0x15CFAC12, 0x022440EC, + 0x6E012009, 0x9016DE5E, 0x85EC959E, 0xA9398710, 0x9210E073, 0xD2CE6B5D, 0xEA19AB1C, 0xBD668F1A, + 0x9EEA3981, 0x05F4A95E, 0xDD35D4A9, 0x5D3D7E9D, 0xCE394F35, 0xA57E7C49, 0x803CB644, 0x47AF1248, + 0x80044820, 0x637727E3, 0x9DA935FF, 0x025E9F5E, 0x8EB68B00, 0xC91AC0D1, 0x54D94EBB, 0xC8F6F373, + 0xEF91055C, 0xC05B7F1F, 0xA7ED31AE, 0x68B7DDAA, 0xCBA1A5F6, 0xD22E9735, 0x5C0EBBDB, 0x33EBDC45, + 0x0D059656, 0x95C59866, 0xCCE1B4DA, 0x2E6DBBA6, 0x7589F025, 0xF0E1DFAA, 0xBA76A80E, 0x0FEAF0E1, + 0xC387FFAA, 0x76E187BF, 0x9EAEA8AE, 0xF7BEF7BD, 0xC2872F6F, 0x7DDBA387, 0xEAF0E1C3, 0x3E7CF8DC, + 0x65ADD01E, 0xFE7A6675, 0xAE5E053D, 0xBF725577, 0xEE3BE83D, 0x850900B8, 0x3C03364A, 0x9E0EE938, + 0x1BCE86B3, 0xE16CB824, 0xF7864B72, 0x6FB824F7, 0x864B3289, 0x7BC7D7CE, 0x1A039769, 0xCCF706BA, + 0x4EC08723, 0x42D97FF5, 0xEE279EE3, 0xBDF20380, 0x73EC3E76, 0xAF7C7036, 0x9C6D4972, 0x6FF8BDFE, + 0x0020E7CE, 0xF2598ED7, 0xCC7778E1, 0x263B9C0D, 0x675BB23A, 0xAC0EABF3, 0xE1DEBAA0, 0x010000A0, + 0x2BF72C5A, 0xA5FDBD35, 0xE233AA41, 0xD69AF419, 0x2598FDAF, 0x3FF472EA, 0xC7D2E4E5, 0x5A2F3462, + 0xAFD6DB53, 0xBA3F28A3, 0xD7C82DDD, 0x2061B4E5, 0x8BA34985, 0x4C8C1527, 0x56CC4C12, 0xA7474657, + 0x46D47F91, 0x2FB68974, 0x64F48B59, 0x62EE3353, 0xE247C58F, 0x56275ABD, 0x68F5A359, 0x64397F05, + 0x5FF4BAF9, 0x9A191A3D, 0x77D5639C, 0xBE531834, 0x57DE6925, 0x99479AEB, 0xBCFA9CF7, 0x6F8E68C6, + 0x6C312246, 0x99CCB4CD, 0xFC186DEE, 0x3CE71AB1, 0x383D05FD, 0x39A8F971, 0x39CF2A83, 0x9ADEBC7B, + 0x7E525D9E, 0xBEBAB27A, 0xD77E3498, 0x5DB0E607, 0x40850900, 0x0004D4AC, 0x5C809A5B, 0x2BE6A9EB, + 0x190031B3, 0xBE7A9C68, 0x1570E739, 0x830610A6, 0x7E8E6656, 0x4DDDA0AE, 0xF279BC1E, 0x6A4E7A9B, + 0xC9FC0135, 0xA756CDDC, 0xF9FA9616, 0x81E71373, 0xB616FA07, 0x3AEA0B0D, 0xD5B831EA, 0xACEDADB6, + 0xA0BFF503, 0x39F55503, 0xBF40F582, 0x5CEAFCDE, 0x99798EDC, 0x6BA5A3F7, 0x87DCCDD0, 0x8E5AF128, + 0xB0314E73, 0xB77D09F9, 0xBB113155, 0x94A39E4E, 0xFBD2FC4E, 0x6C8B89B3, 0xCE663BFD, 0x43C2E0A7, + 0x6AEAEF6C, 0xA3D2A81F, 0x0CCFF489, 0xBA570BD3, 0x5D3FB29E, 0xC8076D9E, 0xD618E9EB, 0xFF860CC2, + 0xFBB9C923, 0x8BEAFB38, 0xBCFC1C19, 0x94E4CE59, 0xE2053D54, 0xEAC4AFE3, 0xC82FAEA3, 0xA1A39FAD, + 0x9E8E128C, 0xBCAA455E, 0xB5B71CCC, 0x73B573E9, 0x2BC3E4F8, 0x7B85C361, 0x114CEAEA, 0x7598EED3, + 0x95BC51FD, 0xB353195E, 0x46F6BBDE, 0x3C4B89CA, 0x30CE3A4F, 0x4F0F9915, 0xDF7D2B23, 0xACA5C88D, + 0xF77CF2E9, 0x3852736E, 0x7DAC1CEB, 0x9C534472, 0xC69D67F6, 0x59E576EE, 0xD9CDC384, 0x99016819, + 0x09191700, 0x0040057C, 0xFBD7F7EF, 0xDFBFDBAB, 0xAA1EBFFF, 0x3EF478F5, 0xDFB7F0FF, 0x5DD5426B, + 0xDB8EEBAA, 0x7FF03AA9, 0x5E0C16B2, 0xB54E8241, 0x99260026, 0x235A8422, 0x3FDDA6E6, 0xC11193E6, + 0x35393ED5, 0x55750381, 0xE181EAF9, 0x4AEF181E, 0x8274ACD4, 0xC0A6313B, 0xE2E6C167, 0x08C36307, + 0xC458C659, 0x241F0208, 0x30FA8FFA, 0xC7FDCEEC, 0x764E08AB, 0xF67D87AA, 0xEAED55C1, 0xF79200E1, + 0xB2890BC8, 0x12541F75, 0x826A562F, 0x48D76857, 0x13D5580E, 0xA392A176, 0xB9079D67, 0xD4708490, + 0x1AEA229D, 0xEEAED1E9, 0xA471CD61, 0x18D524C7, 0x59D3C558, 0xDDE567A9, 0xC5D27876, 0x5A3993AE, + 0xD2D339DA, 0x3B95211E, 0x794BE003, 0x9200812A, 0xAE356AE8, 0x59BE6D5D, 0x593ABB99, 0x9D33787B, + 0xD0E42D32, 0xF61E671D, 0xB239CB71, 0xA61BD6D2, 0xD6ADD1BE, 0xD6A94E5B, 0xF5741CD3, 0x51A34727, + 0x35AA8E0E, 0xA82AABAA, 0x6BD7AAFA, 0x8124E06D, 0xD96BAD4A, 0xF3EBAE65, 0xD5A5AF1B, 0xC6CE98AB, + 0x884BABD4, 0xEA9EFC83, 0xE3D46E9D, 0x5D954126, 0xF7E02773, 0xB7B5355E, 0x1FAC3362, 0x59D040E2, + 0xADDED265, 0xF578F5D1, 0x75ECD975, 0xEDA46ABA, 0x56A56B40, 0x3F739F72, 0xF32B718D, 0xB3ECAA3A, + 0x400BEC29, 0xBA1C5475, 0xB531B5B5, 0xE6E8D1AE, 0x7614670F, 0xDAE9A8B9, 0xCA513AB4, 0x9CB3468D, + 0xD5F70080, 0x047EDDDD, 0x75BFBFD7, 0x6FFB7BDD, 0xBFDDBDAB, 0xDFF5F7FB, 0xEFDDF7D7, 0x55FDF77E, + 0xA18AE882, 0x20DE7183, 0x0B0A73AC, 0x6BDF7626, 0x77A4B021, 0x24222DA6, 0x512028A8, 0xA33851F9, + 0xAC30C71C, 0x2CD17966, 0x12708A36, 0x74C836AD, 0x5C4DB71D, 0x8030E3A3, 0xFE3F6F66, 0xF31C6030, + 0x838646CA, 0xF8613324, 0x0570547F, 0xE6831ED7, 0x9D610C01, 0x34CE1658, 0x014CEBB1, 0x781CCDB7, + 0x66275234, 0xBC2F0342, 0x68803F0E, 0x20007209, 0x5FDDAD4E, 0x5A79F878, 0xC2040040, 0x74830AE4, + 0xE4BE8CB9, 0x0CF2E955, 0xE76DFACE, 0x9BEF1E1E, 0x475F2765, 0x4D47DF84, 0x307EA455, 0x2848FADA, + 0xDDEDEE76, 0x06526804, 0x8DC62411, 0x693464D1, 0x6834DF6B, 0x169838E8, 0x68D0071B, 0x0B131585, + 0x9B2F9E76, 0x3DD785BA, 0xBA861AF7, 0xD85B5D89, 0x281A924D, 0xC3B62FDA, 0x1ACED55C, 0xDDB4C0BD, + 0xEA966557, 0x9A8A35AD, 0xD5F35B8D, 0x26645FBF, 0xE67B68EF, 0x16D2D77C, 0x4FF594AA, 0x764AEABE, + 0xEE19A6A7, 0x49F91BA9, 0x77CE7894, 0xF7D69A21, 0xCDB9B465, 0x6EF4BCBB, 0x2CA3BDE9, 0x81D4BBD7, + 0x8BF6202A, 0xFAF6AC06, 0x00001526, 0xF0DDAFA3, 0xB96679CF, 0xE703CCDE, 0xC19D2C06, 0x99EB9F51, + 0x8EFF5A94, 0x4F87DC71, 0xD8ADC5E3, 0xB7198CEA, 0xDBD162CC, 0xF62C7A06, 0xEF42E958, 0xF88C5057, + 0x1277413A, 0x3A28396A, 0xB7961FCB, 0x6FEDFE5C, 0xBDDAB6CC, 0x53CDB887, 0x75EF27D1, 0xBC4F0B71, + 0xBC5C3051, 0x5C7369F6, 0x298EE159, 0x9EF7289C, 0x6CF9729A, 0xB2B329A5, 0x94524AA1, 0x1B4CF774, + 0x83BA0C21, 0x856ECA8D, 0xF7023D99, 0x79FA523F, 0x7519ED6D, 0x27BE7F3F, 0x25279D60, 0xFC86AB60, + 0xDE77F500, 0x00002A4C, 0x98306102, 0x19487FC0, 0xDA6FC278, 0x778CAEE6, 0xD1CB3377, 0xCEE8DDD6, + 0x6DC25452, 0x9EAAF79A, 0x032978F7, 0x5B67D154, 0x55A82AB4, 0xA2C75797, 0xB1EAA37A, 0xE4D7B232, + 0x7AF2B2F4, 0xAEEE703E, 0x54953A91, 0x959E6CBB, 0x7C8A86AC, 0x1954AC7D, 0x66D66C5E, 0xB134D7EE, + 0xE37C424B, 0x5B29CB31, 0x4F592D8C, 0xA1914571, 0x1E5B9284, 0x655B702C, 0x58CEF3B8, 0xA4A399A6, + 0x69B2DF77, 0xA5DCA52D, 0xA594CD55, 0x8242D699, 0xA273354B, 0xB59AB2A5, 0xDB545B1B, 0x39115AAE, + 0xA088CC63, 0x18F53023, 0xAA549946, 0x254CF17D, 0xC5A74EDD, 0xD3C3F430, 0xD3D33D3D, 0x8CB9E25A, + 0xB5B0B71B, 0xC6FBC4DB, 0x770CA34E, 0x9D952773, 0x53600000, 0x0015F0FF, 0xB2EA35F6, 0x7FDDF80F, + 0x0A7F56A7, 0x4F9C1BD2, 0x4690A64C, 0xCDC1DA6A, 0xD0718CFB, 0x58CCFA5E, 0x737FFA23, 0x6130A673, + 0x00FEFA66, 0x34096D65, 0xA48B8C47, 0x7CDF69CD, 0x7E270CF2, 0x90326081, 0xB356483F, 0x6751D767, + 0xBED06BE6, 0x2099D6B3, 0x54008A09, 0x11811F67, 0x633E4786, 0xACEA1227, 0xA92E6E0F, 0x4198C033, + 0xCE729F26, 0xAF861B37, 0x304F0AF3, 0x29501550, 0x57A7FCD4, 0xC91BB928, 0xDC215118, 0x58AED0F2, + 0xF4E7F263, 0x180DAAA9, 0x8714D29A, 0xE248018E, 0xF6208EC5, 0x756F7B6F, 0xF6882100, 0xC4E10D67, + 0x7100DAFD, 0x2D8E0A67, 0xEC924A41, 0xF9D989C2, 0x34D00481, 0xE1065719, 0x939F1BB3, 0x2CB7CD20, + 0xB00FDFDE, 0xAD083500, 0x7CD846DF, 0x18487772, 0x50393C1C, 0x0914628A, 0x30A5605C, 0x3BD36EAD, + 0x7F7A7996, 0x6E66B8ED, 0x5F6EAB6F, 0xDFA15D85, 0x0F57AFFA, 0xACDADB7E, 0xFF7F8300, 0xE0361361, + 0x4CA03CBA, 0xE5E087EF, 0x7DFA4EEE, 0x273D29BC, 0x09C1A707, 0x16285B11, 0xF92F4BDB, 0x0322AF1E, + 0x305C0102, 0xFBED24D8, 0xB94C38AE, 0xF739ECC6, 0xAB3BC1CF, 0xA6D24AE6, 0xF77A074C, 0xC5A9A6B9, + 0x8168770D, 0xBF01D34E, 0x3620ABAE, 0x80EE7AC4, 0xC126A076, 0x7E17D383, 0xA8709475, 0x4111E555, + 0xC66BAD6B, 0x420EFA61, 0xCC801638, 0xA6419BE5, 0x2F972DC5, 0xE6EECE4B, 0x1DBF5E79, 0xEE67E060, + 0x94D0D304, 0x8CE77202, 0x8223DC31, 0x30B39B62, 0xF68D38DD, 0xCD40A10E, 0x8925CD7D, 0x3B0E7D72, + 0x8E799E7E, 0x4D2F8C18, 0x2C200754, 0x3B54E299, 0xB00FE0D1, 0xE3D62571, 0x8103C145, 0x1256BCBF, + 0x4343FE9F, 0xEF325C11, 0x9F3ACF9F, 0x21F529FF, 0xC7072B67, 0x40580338, 0x342B76EE, 0x0C1C102E, + 0x4F57127D, 0xDEA9CB42, 0x4552E28B, 0xCDE70A5C, 0xEF179DCA, 0xC9DDDE65, 0xCF6D4F9F, 0xD6E1F37F, + 0x237B65B0, 0x07DC6582, 0xE70A287F, 0x9C2B7906, 0xA90FFF49, 0x702384B6, 0x808B5206, 0x0F063CE1, + 0x427921BC, 0xF97534E0, 0xC72EC991, 0x797E8CE7, 0x19A0E32F, 0x24C0DC86, 0x8368403A, 0x70E27A00, + 0xC052D50E, 0x3DA68012, 0xF585117D, 0xF2BE5F39, 0xE8A6FA34, 0x01CA6792, 0x35E1A0E1, 0x1E4B4FF6, + 0xBC49ADCD, 0x747AF5E2, 0x2095F2D6, 0x8461F85A, 0x8CCA0663, 0x9EF06953, 0x517E1F15, 0x7823F212, + 0x33147A84, 0x7062FD37, 0x332A6496, 0x27B9FF7F, 0xADB69E20, 0x4400C8C7, 0x01444221, 0x244ECEA1, + 0xD944E325, 0x4C916FED, 0xD459F0DB, 0x9FE51E71, 0xBF15B577, 0x7DBBFFFC, 0xF8116526, 0xBCD2FDEF, + 0xBEBE7B8F, 0x6D5755D0, 0xE1570A13, 0x0000A040, 0x5983E8BE, 0x1A4F3C0B, 0xBA6627B9, 0x954E50FD, + 0x5C929E1C, 0x1C7EA513, 0x542F33FF, 0x5AF7A765, 0xBBC94A35, 0xA8CF8A56, 0x59DF04DE, 0x3FE08FE4, + 0xF789AECD, 0xBA5AB5A0, 0x06C9AF1D, 0x57CF0D9F, 0xEF8E44E7, 0x7347A0A2, 0xF069A9F1, 0xABA6A8EC, + 0xEBE2EB9A, 0x43DF032B, 0xD3551F29, 0x6762E1E3, 0x0274E88B, 0x44E15267, 0x4A1A2F4F, 0xE255EB21, + 0xF72E7911, 0x6ED86E60, 0x3F0E9FCD, 0xE7F29FE2, 0xC1E45237, 0xD9C0E6B0, 0x5940028B, 0xCD02163B, + 0x691AA6C1, 0xFEDA03FB, 0x8C96BDFA, 0xD9D68E8C, 0xA3D99FED, 0x7B96EF88, 0xF15C3497, 0x3C79C7AD, + 0xAF7FEC28, 0x3162AB8A, 0x9E5234F7, 0xEECAAFCA, 0x96737911, 0xDB581F80, 0x01D11138, 0x2E18483C, + 0x1C242AF4, 0x46A2436E, 0x77248E0B, 0x1A12679B, 0x2D4CB5BF, 0x4DE96EA9, 0xBE635B42, 0x13B6670B, + 0x4338D216, 0x99EA3BD7, 0x4B8D6667, 0x30FAB5F7, 0x7C056AE7, 0x9BE7548D, 0x9AAF8729, 0x7E54C6E4, + 0x316B90DD, 0xAFEEB044, 0x8DCFDCF3, 0x75E4CF0B, 0x976FD4B9, 0x56070000, 0x00152600, 0x0000203A, + 0xD6BD5CEE, 0x75F33D15, 0x097EF72B, 0xC84CCF71, 0x6AB94A15, 0x0AC54A45, 0x4A3B3731, 0x8EE37A27, + 0xD2FCE338, 0xDCBCD0B8, 0xD5E260E2, 0x7970B0A9, 0xF4014E4F, 0x8B7D7A1D, 0xD4A83D1B, 0xDE609034, + 0x02F35799, 0xF966B367, 0x25B3D907, 0xA7F1188F, 0xD1C76F74, 0x3AD2701B, 0xD7AD0E7A, 0xE25B335A, + 0x3BF7DE95, 0x0151F63B, 0xC7170000, 0x00506102, 0x00A0C358, 0x7B41DF77, 0x1B15978C, 0x15DF587C, + 0x4E7610E8, 0x824BDEE7, 0x2C919BFA, 0x4E96FD28, 0xD59116B5, 0x1869E45D, 0xA4BA893C, 0x444477F4, + 0x7C3DD424, 0x33B522E6, 0x2B695866, 0xAB86A961, 0x6A98A878, 0x3BD7BC67, 0xDB078901, 0x7050D8EA, + 0x6F596BA6, 0x4657E8EC, 0xDC119EED, 0xFAA7F34B, 0xCF97A2BB, 0xB9DB13FC, 0x7D3C75FD, 0xB48523A5, + 0x9452186A, 0xD4A2C9F3, 0x26F7CB7B, 0xDA0C19E3, 0x5F6F0B6A, 0xDA3C2072, 0xD75360AD, 0xC7556E07, + 0x229663FF, 0x0000A820, 0x900001FA, 0x29477B75, 0xF9517327, 0xDD351C3A, 0xE65CF6AC, 0xAEA553F7, + 0xA04F6A68, 0x15AF5553, 0x772BAC6B, 0xAD24A7E6, 0x4E5AED51, 0x6D14E887, 0x33549635, 0xB7678D63, + 0xADAA6A95, 0x76D7965D, 0xEF997ADA, 0x3D563B6A, 0xB456B9B5, 0x1DBD7A56, 0xAB735896, 0xF3121BB8, + 0x80244080, 0xA73C6577, 0xFA1A006F, 0x0135CBAA, 0xB26196E6, 0x38CB5E7B, 0xEBF8F48C, 0x1EDD4EED, + 0xB6A3021F, 0x9004C819, 0x5759CD9E, 0xCDF5B422, 0x63B7D543, 0x67954E9B, 0x6FB85C56, 0x97D6B0AA, + 0xCBD2F63A, 0x2A3DAACA, 0xAE41A0CA, 0x9EF5CCBD, 0xBA8772C6, 0x14480264, 0x0EA871DA, 0x51E5547D, + 0xD4A13AAD, 0xC773AAE7, 0x3AE34C67, 0x8D556B16, 0x90EEE619, 0x0E311D7A, 0x00A7AC9A, 0x934B2DCB, + 0xF68C3DEC, 0xEA7FA581, 0x043EBFD3, 0x6A84DCB4, 0x39C0CE03, 0x222006C0, 0x751C0501, 0x38930457, + 0xCF1BD4C3, 0x88FB6BBD, 0xFFEBBDFB, 0x7EFBFA9F, 0x6F3FE100, 0xE260CF5F, 0xA8138251, 0x0614081B, + 0x1C605F06, 0x08FCF811, 0x65A6B677, 0xEFDA77C5, 0xB41FDDAF, 0x920F00BE, 0x2BB8571D, 0x41515E59, + 0x02BABE4E, 0x663A22D5, 0x0E8F3819, 0x5F7761C8, 0x8009C282, 0xCD181743, 0xD9D174F6, 0xBCA8CD3C, + 0xD705FCEF, 0xADBFAADB, 0x55AD1E3E, 0x7CFCAA55, 0xC188EF39, 0x8860001B, 0x1CA05FCE, 0x1982F008, + 0x07009897, 0xA10ED4BB, 0xB7B82BC2, 0x5BC00129, 0x93F5260E, 0xBEDFE001, 0xD1BBEF9C, 0xFEF2A6D8, + 0x663122EE, 0x18C50205, 0x5607C612, 0x84A8D43D, 0xA3E4E11A, 0x30C8B580, 0x8B8E32DB, 0x286233ED, + 0x1B0A18A3, 0x8EBE5DEA, 0x3DAB2822, 0xB91735B2, 0x3A0FC3E3, 0xA278DDD3, 0xEBDFE5B2, 0x151E2CC9, + 0xB57A680B, 0xB5EF0C7B, 0xFF9FFD7D, 0x831C96BE, 0x801E9844, 0xB51E29C1, 0x01D30E0D, 0xC73B42FD, + 0xA21A7704, 0x06BF288F, 0x3EE68021, 0x84C04938, 0x03AF6C77, 0x0C376377, 0xF3BC2D54, 0xBE197B4E, + 0x0C053050, 0x0B92DEE6, 0x2B35A965, 0x3D7FA78E, 0x3E5D5F6A, 0xCEF223D1, 0x5FA1AA6A, 0xE5E1C3D7, + 0x6AC5FA75, 0xFD232880, 0xFBA30700, 0x6E5950C0, 0x1CCEF814, 0x421AC74D, 0xB0C7DBE6, 0x7DD92685, + 0x3B2DDCE7, 0xD2B873A1, 0x9DBC61BF, 0x8F8B0C6A, 0x94BB3DF9, 0xDD8B8369, 0x2E4F1F8B, 0x370381C9, + 0x33197A62, 0xDE489D42, 0x23DACFE1, 0x983FC200, 0x001CF492, 0x70D24220, 0x323F5006, 0xA26FCD9A, + 0xCB6B74FA, 0x6AC2FBDC, 0x7B888355, 0x28722424, 0x0E071EE3, 0xAC614220, 0xEEAE67D8, 0x91DD1DC2, + 0x204BF3DC, 0x80788C5B, 0x00021CE7, 0x9DC4A6DB, 0x5C594E0C, 0x4590924B, 0x9BDF66C4, 0xA09D8EAE, + 0x60FF4DED, 0x409C40D8, 0x2F22423F, 0x1A1F1F49, 0x608D4441, 0x8CEC6237, 0xDA22F451, 0x0BF8F6F5, + 0x3B876150, 0x2A83CF2D, 0x6A8CE35C, 0xEB5FF75A, 0x4DF06BFB, 0x8EB5C6D5, 0x51B5E0DB, 0x08130000, + 0x280008F5, 0x7BA27E97, 0x8C15DF13, 0xF5BB5462, 0xD8322532, 0x25322556, 0x4AEC665A, 0x7E77854A, + 0x67B0D83C, 0x7E2B3219, 0x143F9B41, 0x1D79EF44, 0xBE0FE53B, 0xEEEE79E7, 0xAE9DDE16, 0x868B7C4F, + 0xA1D0D27D, 0x2D7A7C45, 0xCCBA7EBC, 0x9C2A7D8F, 0x7D1DEFFD, 0xB25766A9, 0xA31154BA, 0xE6CA95DC, + 0xBD745DDB, 0x12E665AC, 0x88581995, 0x31EFC937, 0xA6B197D6, 0x5F354D57, 0x155565FF, 0xEA524AD9, + 0xBDD5C9A1, 0x3B59FC36, 0x61B299D9, 0x0D2945B3, 0x21BB5729, 0x99F60A81, 0xD9A662AF, 0xA8CEBB97, + 0xBAE2E97C, 0xC7F9EFCC, 0x82ECD4AC, 0x87C64062, 0xDE797402, 0x00506102, 0x0000E829, 0xC05E5038, + 0x21E71BB5, 0x24203F53, 0x97C5BB5A, 0x4C69EECE, 0xFF7C4D28, 0xA7BEA703, 0xDFC22BF6, 0x69A6E663, + 0xF3385CB8, 0xF3B5F7EC, 0x7DAEBAF8, 0xA95C5A6E, 0x92CD06C8, 0x14A6A95E, 0x730EFFD5, 0x34ADE771, + 0xF2CB563B, 0x6BED88D8, 0x5A59849E, 0xB262A8F4, 0x59C6BD99, 0x79362DAF, 0xA577FF64, 0x56176C24, + 0x6E183D76, 0xFD375563, 0xC78C5A07, 0x2387BB0B, 0x6ECC78AA, 0x4B172D57, 0x2B4EBB3B, 0x5A7A1BBD, + 0x566AEC16, 0x43E915F1, 0xF6CDE9E9, 0xABA5466B, 0x0FF73DA7, 0x61C89CF1, 0x464DC9DD, 0x9188A11A, + 0x2A4506DF, 0x5993816F, 0xABE912C1, 0x748EF758, 0x4D0202BD, 0x6FC7A101, 0x0000A830, 0x0100009E, + 0xFD23FC06, 0x7DEF7B30, 0x819F2D0D, 0x3BA7409D, 0xBAAFF36E, 0xEF4E494F, 0x66F5912D, 0xBBE44636, + 0x3D3535E6, 0x8293A387, 0xD0E2355C, 0x74A2A755, 0xF8DB098A, 0x6D4778FE, 0xF9D0B227, 0xD633B739, + 0x6C924125, 0x29CBCEC6, 0x062DB743, 0xDC14CFA9, 0xA0C1842C, 0x51DF1673, 0x865C2E5E, 0x31A2F9FC, + 0xF5050000, 0x000015F0, 0xB9ABAC56, 0x6FA7E619, 0xAC670900, 0x4C9B684E, 0xA1441F08, 0x8B8D2758, + 0xF60E30A6, 0xAACD5BC9, 0xDE0930CF, 0x702A1ED3, 0x6CA10EA3, 0x07003837, 0xCA976B05, 0x860CD51E, + 0x40F1F348, 0x310FC014, 0x9EE6969D, 0xD1039BCA, 0x866CF453, 0xE6B078ED, 0xA2641E26, 0x081D751D, + 0x4FE3D097, 0x47DECC0D, 0xE891093C, 0x36542C9E, 0x120A09E0, 0x39F160F1, 0xCFC8AAC7, 0xE1BB0575, + 0xE2538310, 0x00D07F90, 0xC0B036AE, 0x718823DC, 0x657BBCB8, 0xA99C4183, 0xCDA63C9A, 0xDCABDFD6, + 0xB166FD0F, 0x0A707B75, 0x38C21B7A, 0x55C30E20, 0x3CA0025F, 0xEDF13195, 0x8743CC3B, 0x5B6F4F7C, + 0xF2B83D71, 0xBCF7FD3D, 0x3832C7C2, 0x5415FC0B, 0x091090C7, 0x9A25502E, 0xEDB68657, 0xB3A663D9, + 0xE933FB2E, 0xDD8306E0, 0x74C1B2AA, 0x7A0E0B7B, 0xB9B56A1A, 0xED764DAB, 0x5477CD5D, 0xDBF2A7C7, + 0xA3C00724, 0x0162F71C, 0x3CEBB387, 0xE3EC3663, 0x9E394A6B, 0x68CF3539, 0xD55EF956, 0xCDEE28F0, + 0x0049800C, 0xC83876F5, 0x5CDDE598, 0xEE0CE8DD, 0x3DBB0676, 0xAC82AB1F, 0xAFF5D869, 0x0778A6E5, + 0xB0E6B09C, 0xC3A19D35, 0xD65BC078, 0x802440BA, 0xC6AEB28A, 0xD3559C35, 0x781A6655, 0xB97A6D97, + 0xD0A9EE61, 0xCFE21A67, 0x02498040, 0xCD57A776, 0xFAB64497, 0x5AABBAAC, 0xC1A9B11C, 0x05EDBE05, + 0x9200019F, 0x35BBEAD4, 0xAC533DD3, 0xDDC09C76, 0x69C9EFD0, 0xE1DD6BF7, 0xE8D1A31C, 0xADD52920, + 0x091061D7, 0xDC8E12CA, 0xD7B4CBD6, 0xDEF1B457, 0xBB9DA7BF, 0x81C00724, 0xF07F3BFF, 0xBBB56F7D, + 0x553D2100, 0x40780E98, 0x41319E07, 0x9017B67E, 0x0712C750, 0x7A378720, 0x00ABEFDE, 0xEAAED5D3, + 0xF600B05F, 0x77F88404, 0x964975E4, 0x87C3C788, 0x5C3BF147, 0x32D3E3B1, 0xF371286D, 0x742EF025, + 0x0E1125C0, 0x301ADF71, 0x95E73260, 0xDBD36FD6, 0xEBC3613F, 0xB6D9817C, 0xEC7EEDB7, 0xEB1E5CEB, + 0x55A45679, 0xF8F0436D, 0xACAA1654, 0xF4A72EB7, 0xDED3CE7D, 0x01BABCE6, 0xA68710EC, 0xE0922FE6, + 0xA1113343, 0x28D04120, 0x80968B4F, 0x7BF04580, 0x4D38DC45, 0x8C286846, 0x716310AA, 0x02FE804B, + 0x0310029B, 0x7413072A, 0x2024BF46, 0x0C176012, 0x0CD0DD4D, 0x20C426E5, 0xD9A50FB0, 0x8FC5CCC0, + 0x744C7642, 0x882B87CA, 0x39B6E751, 0x3F6A4533, 0x6C759BBC, 0xABD6C110, 0x05FFAA95, 0xE1500EFF, + 0x5D8F78B4, 0xEF7C767D, 0xE0DFCFCC, 0x20DE001C, 0x90DD7507, 0x474C1E7E, 0x6C0CAF0F, 0xCA631434, + 0xEAB9454D, 0x23222446, 0xE7EFC761, 0x44C0E1E6, 0x89DCA3FA, 0x80ECAE33, 0x2E0218D2, 0x09EF3708, + 0x00D70640, 0x7B48EDDC, 0xB90BBF6F, 0xA956F4E1, 0x83091300, 0x803D0076, 0x6067165A, 0x62A678F1, + 0x9C59DE45, 0xA1124F5D, 0x19475D46, 0xAF83F042, 0xA1749C53, 0xFB3AF7E1, 0xE188A6AB, 0xCCA2515B, + 0x3566F515, 0x97834FE8, 0x2A6B8F7B, 0xFE3A1171, 0xB4B29FC3, 0x9999669A, 0xE66BDE9D, 0x234BAAAE, + 0xE99AAEA9, 0xDA5AB2DF, 0x87EB996E, 0xE9D5ED36, 0xAFA618F4, 0x743A31F3, 0x6822723E, 0x115BDF2A, + 0xEF3A7E1C, 0xA1B5E253, 0x9DBAC2F3, 0x7C9F4AD1, 0xBF8C0A00, 0x7400800A, 0x13000000, 0x78D5754E, + 0xCB801CB9, 0xB1FABDA0, 0x6F678E6B, 0xDD73F76E, 0xB47A8D9C, 0x7B66C554, 0x388E7D68, 0x7065E074, + 0xAC690AED, 0xD9B35574, 0x177BE788, 0x172DB65E, 0x3EA15E57, 0xA112B12A, 0xCDA4652B, 0xDD4256B9, + 0x4256A68F, 0x9A60AF2D, 0x52A3BA55, 0x31FD916B, 0x0BA53D66, 0x7F18B3EB, 0xB563553F, 0x5959AE5D, + 0x3B75C0E8, 0xCB746798, 0x1E35C137, 0x77FFDC4C, 0x30D10ED0, 0xD013F9B4, 0xD9E50400, 0x00A83001, + 0x0000E419, 0xC4AF0B15, 0xCAF7C973, 0x4E991995, 0x8895D06A, 0xA84CA7E2, 0x45D56CD7, 0x3199A17B, + 0xD1950D6E, 0x1BC9E5AD, 0xC26FACBA, 0x6BEFB1A3, 0x9CBC9383, 0xD7E1ACAA, 0xA917B3BE, 0x9067D543, + 0x0DE344BF, 0x97BDEECE, 0x7CF97AF3, 0x63E8EF4C, 0xD5F4DDD4, 0x6B8CA3B0, 0xE72E33FC, 0xAAF49F8B, + 0x4778F77C, 0xC778BCB7, 0x857E72FE, 0x2D7FA503, 0xE7D7EF7A, 0x170999BE, 0x987DAD71, 0x4340EF03, + 0x00005498, 0x00000000, 0x801DC89B, 0xEDFC8A2F, 0xB341E377, 0xE28EEFB9, 0xCE633DCC, 0xF437D7B9, + 0x57EFEA3A, 0xA3463B7E, 0x711DD771, 0x1DD7344C, 0xE39013E9, 0x6362D6F3, 0x27346924, 0xE3687269, + 0xFBF234AD, 0xB7475BCF, 0xE96DACB7, 0x4BAED29D, 0xEA6778DD, 0xCF565D43, 0xCA9A97F1, 0x3BADC07D, + 0x60AD8C96, 0xCF16F4DF, 0xA13162FC, 0x5763A0FD, 0x2C300100, 0x0050017F, 0xDC3AA827, 0xE42007E1, + 0x634E477A, 0xABA22ADD, 0x258C9671, 0xF27547BD, 0x7BCFBCAE, 0xE1CE11C2, 0x6AEE1C68, 0x41595446, + 0x2055C54B, 0x758F4AEA, 0x73A0E4B9, 0x7198F444, 0x05D4FC49, 0xFAA57BFE, 0xE67FEEA8, 0x382B94C1, + 0x697900F8, 0x31307DF5, 0x0C7702A2, 0x61D5B033, 0x3C40BAA6, 0x38026106, 0x290F00F0, 0x6652B8AF, + 0x34105CAB, 0x4CC710DA, 0xC8397D9E, 0xCF834BB9, 0x79EEB8DF, 0x1ABDDEFA, 0xF33FC277, 0x6A44F4AF, + 0x6855D52E, 0x7CF8A16A, 0x0D8E35C8, 0x4BA11888, 0x1C5175BB, 0xB70EA201, 0x6B5CB418, 0xF32F039C, + 0x478574F8, 0x1815B554, 0x473E60AF, 0x4F23937F, 0x12B6D3FB, 0xBBB11F1C, 0x551422CF, 0xF9950F0F, + 0xCCCD5CC3, 0x21E94511, 0xCED5A807, 0x3938E4D5, 0x337B9CA2, 0x512DF8F0, 0xFFDEA9D2, 0x83754042, + 0xFAE56E98, 0x496C54A8, 0x0AC533ED, 0x6662D0C7, 0x33E64586, 0x002B3538, 0x9DC689E9, 0x3CC53C11, + 0x9492F590, 0xF8408E11, 0xD2083302, 0x9F9AFBA6, 0xBD0B83DA, 0x3CF0008A, 0x2201EC17, 0x24ABD5A1, + 0x5BE99620, 0x8E10DCB8, 0x5F6398C7, 0x48E45B8D, 0x201FCF91, 0x83030C68, 0x80708890, 0x07E0610A, + 0xEBE22DDD, 0x8C9F9901, 0x9BE1ED70, 0xBA5EA618, 0x6176FF3E, 0x7DDC6E1F, 0x994B13BF, 0xED4259F0, + 0x7D22DB16, 0x8A6C5BC8, 0xB64521DB, 0x16103F81, 0x80381047, 0xFC208E20, 0xE2880802, 0x11C44144, + 0x9020E288, 0x1F5E4924, 0x0E899FBC, 0x725D595C, 0x3F585C97, 0xFC127225, 0x81FFAB78, 0xFD0707E8, + 0xC7A14CDC, 0x2BD4AD53, 0xB5167C5E, 0x61020090, 0x0A601332, 0x9192BF9B, 0xF7D84797, 0x7ABC7FA6, + 0x1AC1CF8C, 0x70F78AA5, 0xA71651DB, 0x0785C93B, 0xA72B7E84, 0xAA5656A5, 0x96A66A1D, 0xA56AFC18, + 0x31E3C4D7, 0x549731C6, 0x94BBE7F0, 0xF18FBC95, 0x3C7324C4, 0x736AA25F, 0x4F338754, 0xCBA21BE6, + 0xD6CDD47D, 0xB18630B7, 0xCE9825F7, 0x0CCE1AD6, 0xF3842EEC, 0x9E267E05, 0x70ACA999, 0xCA55CE26, + 0xD2BF3E5F, 0xBB711EDD, 0x95DD7E9E, 0xD34476CA, 0x236BB5A8, 0xCC2FA890, 0x6DB3BF02, 0x34D9A003, + 0x00004085, 0x09000070, 0xBEDE3BF4, 0xDEEF0670, 0xBE9D003A, 0xB614E573, 0x5F054E42, 0x02C0F9B9, + 0xE6D50970, 0xBEBDCBDD, 0xCA09709E, 0x16035950, 0xDE051F5E, 0x0180F3ED, 0x64A5EC15, 0x0F00009C, + 0xAF49435A, 0x8F38C6B7, 0x02C0F976, 0x02ECAAC6, 0xB7137EC7, 0xCABB2FD0, 0xBF5EE7EA, 0xFB3170EE, + 0x98CFA706, 0x2533481B, 0x6BEF9CCB, 0x174EF57B, 0xF6D19CAA, 0x30D5336E, 0x19F2578B, 0x6ED71AC1, + 0x67745167, 0x00BD20B2, 0x16B956F9, 0x01E0FC77, 0xB36C2F00, 0xCEB713E0, 0x7C3B01BA, 0xF3DB35E9, + 0x0400B401, 0x80FE9E33, 0xEEC8111B, 0x6AFBF350, 0x566D10F2, 0xA2D6BFB7, 0x35DBDFD5, 0x09A0C7D9, + 0x5F6487F9, 0xEE9EFD3A, 0x7E7D874E, 0x00440050, 0x61020000, 0x75B5AC92, 0x9F89DE3B, 0x6744663E, + 0xA9C439AB, 0x942479D8, 0x0FABE7A6, 0x42DB3A05, 0x56CB4D83, 0x06004885, 0xD175FFAE, 0x1AEBFE57, + 0x1AF2E738, 0x153E22C9, 0xC77B47CE, 0xA3399EA3, 0x26BA6F91, 0x55F233A0, 0x01CA1B24, 0x7B0380D4, + 0x3D9424F9, 0xDE7B0268, 0xB524AB1E, 0xF5E19274, 0x9E5705B2, 0xBF9493A5, 0xEB5D3632, 0x46E4BDAF, + 0x7C0FEF59, 0xCDC68FD1, 0x5F17D065, 0x4992E4DA, 0x78CE1AD0, 0x6EC9B5C5, 0xE54ABA92, 0xAEDEDEF0, + 0x1200B001, 0xC9BB7F64, 0x43CBFD06, 0x13EE36FA, 0x8FCED320, 0xF95EE6BB, 0xA724E3DE, 0xBB47CFDD, + 0x92649C9E, 0xB8A75E7E, 0x45F5D7C4, 0x25796779, 0x73179FD3, 0x515EBE87, 0xF7AF6547, 0xD338EF2A, + 0x6023C738, 0xD7C03891, 0x13201747, 0x0054C09F, 0x57257C09, 0x00D4C3FE, 0x64207010, 0x1CB7B943, + 0xE62D7D70, 0x4C24D37E, 0x4D5188A7, 0x13A97D87, 0x49521C9A, 0x546BA76A, 0x79F80008, 0x4A80005D, + 0x4E2101DE, 0xD67BAC01, 0x4FB921D9, 0x6E889665, 0xDB903087, 0x0E6042DA, 0x360A0910, 0x17BADD56, + 0x5DEDB73C, 0xFECC7126, 0x64FB4102, 0x2440C6DE, 0x6EE013F6, 0xAABE61CC, 0x6E817BD6, 0x5EC00F24, + 0x0102DFDA, 0x652F60E1, 0x555DD633, 0x02FCDDB6, 0xF50C6077, 0x6A693D90, 0x90002400, 0xCC724C88, + 0x0BF8D7BD, 0xF9E3F00D, 0x10A0AD05, 0x09DBCF4F, 0x27C03DD4, 0xD3734217, 0x702DB846, 0x695580F1, + 0xD48AF5F5, 0x1900E303, 0x9200F9EB, 0xAA2EEB19, 0x01C3AAB2, 0x1E9FB2BA, 0x2E81B5FA, 0xAA6AAE01, + 0x24240009, 0x0009E38D, 0x4D772FC0, 0x00014AB7, 0x2450FDE8, 0x99A34709, 0x749B6FA6, 0xEB2A7EC6, + 0x3B53AB33, 0xEE76090B, 0x480204F0, 0x2EE4F4AC, 0xD3C093F1, 0x94D575FA, 0x87840420, 0x01206157, + 0xE66C80BF, 0xA60B0810, 0x78CAE522, 0x81351C4F, 0x2F80D97D, 0x4C65F67A, 0x0761D76D, 0x55F5B37B, + 0xAE084002, 0x3FBEEEDB, 0x35F4DEF7, 0xDEF53EB4, 0xBF57EFDF, 0x7D55756C, 0xDD7BFFFE, 0xDDD7BBEB, + 0xADDEF51E, 0x3E7CB842, 0x0F55AD87, 0xAA6155F8, 0xF0E1C3EB, 0xDD76C71A, 0x7E7CEFDF, 0xF77A91BA, + 0x7877EB5D, 0x54F52EFC, 0x6D610200, 0xA0BC4F15, 0xE8BF394B, 0x1CEDB9B5, 0xA0ADAF5D, 0x56429260, + 0xD53C34EC, 0x9C0A39D1, 0xC67EF8DC, 0x93546850, 0x96A7C227, 0xF67F0789, 0x31D72AC7, 0x7B0E2579, + 0xCFEB4258, 0x779D38D9, 0xA47D068C, 0xF288EA4C, 0xA00F0028, 0x6F90EC0D, 0x0052F750, 0x92DC0090, + 0x87B23FE6, 0x7AD0DFA9, 0xE2FB90F9, 0xF22979B8, 0xFB4413B7, 0x315475F2, 0xC505802E, 0x4B92E419, + 0x2F7AC21A, 0xB403D64A, 0x4ED66055, 0x41C165FE, 0x6FBD76CC, 0xEFC40990, 0x97AB5194, 0x31CFD8A3, + 0xAB13BFB7, 0x9F9251CE, 0x5CA55B6E, 0x6EE64ECC, 0xFB02D13A, 0xFB1B0090, 0x097E8B7C, 0xDB0339B3, + 0xAD5BDE7B, 0x9F7B354B, 0x8ED088F3, 0x9E570DF9, 0xE9D9EE66, 0xE5781500, 0x00152600, 0x00001268, + 0xBEB18877, 0xBF2D99E5, 0x3EEEA021, 0x7395FDB5, 0xDBA979F7, 0x42927CC4, 0xC16BBC8B, 0xF4E5BC31, + 0xFAFDEF02, 0x6377EF27, 0x00286F90, 0xEC0D0052, 0xF75092DC, 0x0000F37B, 0xDE809A95, 0xF9626FBD, + 0x7BDE9C78, 0x92249318, 0x65F5FC7B, 0xE403233D, 0xA55A4408, 0x40972549, 0xF2ADF96A, 0x403BB45B, + 0xBE35C065, 0xFE6FED1F, 0x23BEA5F2, 0x4BD400C0, 0x329B762C, 0x03E9639D, 0xBBB78EFE, 0x0CCAAAB4, + 0x47AC96C5, 0xA59E066D, 0x6395F29E, 0xCF814E00, 0x80BCD534, 0x90BF3A7D, 0xCEB9142C, 0xE4EB7722, + 0x99FB73DF, 0x38B57C9A, 0xDFE4167D, 0x6FA80000, 0x15260000, 0xD0CE07FC, 0xDA6AB1C6, 0x9DB04243, + 0xD759F09E, 0x6F9E668F, 0xDC77CD32, 0xEF731788, 0x1F7345D6, 0xCC41433B, 0x010080F2, 0x5E48F606, + 0x00A97B28, 0x49B2F659, 0x71E00038, 0x794F4E24, 0x27AD94CF, 0x0828992D, 0x92EFA937, 0xBBEE05FA, + 0xF7C95AEF, 0x23E71B72, 0x9457A3A2, 0xFFAEA0CB, 0x9224C9D1, 0x6A39DE77, 0x2377660E, 0x48EEAAE4, + 0x24D1FF60, 0xE8583A96, 0xCECC0090, 0x0AED3053, 0x9F8EF55F, 0xA07F17AC, 0xF63A7257, 0x538C5D91, + 0x64AEE7AD, 0x29BF676F, 0xE5E4547D, 0xF7CAACEA, 0x65EC95AC, 0x48CEB677, 0x0AE47390, 0x24EF3ED7, + 0xFBC484B3, 0xEAD0F8F1, 0xFCC547FA, 0xF7678FAB, 0xD6040000, 0x2AE04355, 0xE586BA0A, 0x5EAB850F, + 0x1F3E7CA8, 0x063D7CBE, 0xBFBF6F15, 0xAE500D55, 0x413D7CF8, 0x7A59EB78, 0xA82A7CF8, 0xF0E1C36A, + 0x3DD4E1C7, 0xFBFDAB0E, 0xABA01A56, 0x857AF8B7, 0x13204BE2, 0x6DA6EE01, 0x5ED5655F, 0x02909000, + 0x24005C8B, 0x2564829C, 0x5DCFD0E3, 0xB4EB3240, 0x00870EAB, 0x924092ED, 0xE7E77641, 0xC6534FD7, + 0xDC930716, 0x57AABA06, 0x5CC53D80, 0xBB3F2B4B, 0xE003925A, 0x136E672B, 0x1B08C471, 0x0D018D6D, + 0x43082463, 0xCF82CC32, 0xF08DAF16, 0x84400864, 0xBBF78038, 0x8017F81C, 0x77D78685, 0xDD73C05A, + 0x3DAF0103, 0xF8811F38, 0xC31E0EE0, 0x14CF70F0, 0x0E799A77, 0x085CC3E2, 0x1F3C0AA4, 0x268FFC45, + 0xE9A26B8D, 0x099C6117, 0x7054E06A, 0xE01A96C0, 0x3D047BF4, 0xC8380EE0, 0x6EE02D4A, 0xFE5A05BF, + 0x7BA8C02F, 0x700F81A7, 0x810C4701, 0x194EE0B3, 0xD69CD339, 0xA6CE1ABA, 0xDA1EE5AC, 0xBB4E395A, + 0x47953DAA, 0x966D770D, 0x47DBAE1E, 0x55D3F21E, 0xD6AC39AC, 0xD9A375D4, 0xEC51CEBA, 0x470D87C3, + 0x51351C0E, 0xC733D61A, 0x8E6D8DED, 0x70742A8E, 0x1E768F39, 0x346B38A6, 0xEDD13147, 0x8F35DC5A, + 0x0E1D7B38, 0xD61A67D9, 0x35A7E5F0, 0x3E63EFE3, 0x1CDEB3D4, 0xEAD1A566, 0xB4670DC7, 0x331C3D1C, + 0x7BB46B38, 0x66EDD163, 0x0FC71A73, 0x1CE7B0D4, 0xB287DACE, 0x51A3C699, 0xB71935F6, 0x70EC6159, + 0x6A6DE7A8, 0xB1EDB2E6, 0x74D470F4, 0x98C3F10C, 0xAB478DBD, 0xACDA8E35, 0x7ACCE1B8, 0xC65AC3B1, + 0x4A5BADD9, 0xBA56AB23, 0xB586434B, 0xC71CCEB6, 0x9DC7B9D4, 0xD1532DC7, 0x1CA5E358, 0xD535DAD6, + 0xDDD6B8A6, 0x6BD4E872, 0xDC6B389E, 0xE168DDA3, 0xC71E8E59, 0x7BF4D8C3, 0xB1C61CC0, 0x812FAD82, + 0x5655870F, 0x1FFEBFDB, 0xD586AA0A, 0x3EBFEDFE, 0xBB0F5F58, 0x1DFBAABF, 0x4DABE0E1, 0xBF129800, + 0x7E59EBDE, 0xE5DBDE9E, 0x9FB10460, 0x67AF0770, 0x79FFF00A, 0x4CECC21A, 0xAC037079, 0x3A0097A7, + 0x435E1600, 0x48DD43E3, 0x7F2BB4A1, 0x0EC0E5E9, 0x005C46C4, 0xD3957AB9, 0x01B8BCEF, 0xA10FE4A6, + 0xBD9F9B7D, 0xFC6FAD0F, 0x00E8B2BD, 0x280B7500, 0x2E4F07E0, 0xF274002E, 0xC74AC4D3, 0xB500C8D0, + 0x01000072, 0x46AC578E, 0xB86E4E1F, 0xA4CF7823, 0xAD788F0E, 0xE0327F4B, 0x8CB3FB8D, 0x2C0D7DC0, + 0x658C478D, 0xD9F5F2FC, 0xF2FEBBDD, 0x91232E08, 0x74000040, 0x05000000, 0x00000000, 0x00000000, + 0x58010035, 0xF30D8CC8, 0x7C996F00, 0xC0885401, 0x648C4C01, 0x1923017D, 0x55220520, 0x632400E8, + 0xCB7C0315, 0x90019C4C, 0xCF6F007C, 0x7EFCDAC8, 0xF85680DF, 0xAF0164AF, 0x91BFC177, 0xDDB41334, + 0xA00F0020, 0x6364FA46, 0xFE18F9E3, 0xD706808C, 0xF5F39F7C, 0x03003E3F, 0x46472600, 0xA8E7271E, + 0xDFC26A00, 0xC0CAF90D, 0x1A99F016, 0x0F30E0BB, 0xD2030018, 0x77F17B05, 0x00C65DFC, 0x0DBEAB01, + 0x34EC04C0, 0x49F82EFE, 0x181D7012, 0x0034CECF, 0x020056CE, 0x6F008CBB, 0xF8BD0200, 0x8DCC3730, + 0x00403D3F, 0xF1F8167F, 0x0C01008D, 0x37E8583F, 0x56EEAA78, 0x833EAE5F, 0x039F023B, 0x62A0018D, + 0x18A86FD4, 0x38AD02D7, 0xA0A7CCA5, 0x8FD80F56, 0xEE921D50, 0xDFA05512, 0x6F64ACD1, 0x0119EB5F, + 0x712CADF2, 0x061A768D, 0x0E58B94B, 0x76407D63, 0x2F3DC503, 0x196F818F, 0x8FBE3768, 0x0734ECBB, + 0xBA946BB4, 0x903D3905, 0x8EA56B74, 0xC4C0DBF5, 0x69953790, 0x81063420, 0x23BE41D7, 0xA063E929, + 0xE5BBBA64, 0xC05BACF2, 0x061A768D, 0x7EB07297, 0xEC80FA06, 0xAD927823, 0x03D989D8, 0x68D80E40, + 0xC37640C6, 0xC837909D, 0xB01DD0B0, 0x633D23F2, 0x0D54FBF8, 0x788B6BA8, 0xFC7EFE93, 0x6FA00119, + 0x23EFDA4B, 0xD76FEF92, 0x1D70D2EE, 0x14735D05, 0x7640C6C8, 0x34362E3B, 0xA061C7FA, 0xD1903190, + 0x3D89CC07, 0xBF411FF1, 0x00276D95, 0xABE20DCA, 0x6F2BD51E, 0x2063AF57, 0x9E023BA0, 0x613B6223, + 0x7B12A9E2, 0x818CD39D, 0xF9E6D2B1, 0xF4B1B463, 0x7DC7E921, 0xE37DFA6D, 0xE553E82B, 0x9F0FD7CF, + 0x0FD7883C, 0x053E6207, 0xE02D8EA5, 0x6BA8FC7E, 0xFE936F20, 0x63E45D7B, 0xE9FAED5D, 0xB223D3EE, + 0x14736915, 0xD8011923, 0xD3D80940, 0x43C64023, 0x361AB603, 0x32466EB1, 0x85BEF3F7, 0x527640C3, + 0x8EA53B21, 0x3BF106ED, 0x8086EDC8, 0x7C03F80D, 0x5A1FAEAF, 0xF21BB423, 0xD356D157, 0x813B6BD0, + 0x9DF96CFD, 0x1B77F19F, 0xB4EB8BBE, 0xCC7C8046, 0x66466C7C, 0x7E0227CF, 0xA7788093, 0x762754BB, + 0x133266F1, 0x4623131A, 0xF01B7427, 0x649C1E20, 0x6316C898, 0x25D256B9, 0x710AEC90, 0x94AAA07C, + 0x977C975C, 0x831E2063, 0x6406648C, 0xCC80DFA0, 0x07C8387D, 0x57853C05, 0x9E02276D, 0x07E592EF, + 0xAA90A7C0, 0x5320E374, 0x0D7A808C, 0x9119909D, 0xB01DD0B0, 0x3BA1611F, 0xB1432FD3, 0xEE849390, + 0xB1973EAE, 0xF18A074E, 0xC27823FF, 0x7D314E3B, 0xE0E41BC8, 0x982552C0, 0xE5B71390, 0x8DEC35E8, + 0x58F7AA07, 0x8DD8C846, 0x031A9081, 0x93769281, 0x0634009F, 0x0FD7C80F, 0xDF157C97, 0x6C336D07, + 0x642762A3, 0xDA9DF96C, 0x951BE7C3, 0x35E84EE8, 0xAB12990F, 0x2EDF15E5, 0xBBE41A74, 0xD2800C9C, + 0xB41D92D2, 0x90A7C00E, 0x5B051E3C, 0x3020032B, 0x912A70F9, 0x0D8C720D, 0xD1C87C03, 0x8DD868D8, + 0x0EC818B9, 0x45B6D077, 0xFE5EDA01, 0x0D3B96EE, 0x84ECC41B, 0xB4031AB6, 0x23F30D00, 0x00EA5AFB, + 0x3ED45FF5, 0xF7AFFDEA, 0xA7BF7FFF, 0xDAA1FF77, 0xEA577DD5, 0xE00100A0, 0xFE5EA157, 0x6B6B7BAF, + 0xDADD579B, 0xFCF75EE1, 0x0DEF5EE1, 0xF5558557, 0x4FF6EFEB, 0x7D78EBDD, 0xEBBDE061, 0x5555A0AA, + 0x2A505555, 0xA3AA2A50, 0x555585AA, 0x5675AAAA, 0x0A555505, 0x6AB50A55, 0xFD54550D, 0x5A550555, + 0x553D55AB, 0x55505515, 0x76555550, 0x55555055, 0xEB59B5AA, 0xAAADDE53, 0x5555ADBA, 0xAA50B52A, + 0x505555D0, 0x9F000000, 0x000000C6, 0x29F05DF5, 0x00000000, 0x000000C0, 0x0200FF28, 0x810BFEF4, + 0x9F31658C, 0x630C0000, 0xC075685C, 0x87E68997, 0xBA38897D, 0xBA985CCB, 0xD8C7B87D, 0xD5EC110E, + 0x36F01800, 0xA013EF09, 0x7EF45BA5, 0x4E4D8266, 0x896A1B4D, 0xA26A8B26, 0x31B1A026, 0x492C6092, + 0x08D057F2, 0x14A02600, 0xB4CAEF37, 0x0881387D, 0x2F680802, 0x00000000, 0x0000004A, 0x00000305, + 0x209C7A60, 0x908C1600, 0xCAA5718A, 0xF09EF686, 0x50B6C80B, 0xC0CAF648, 0x9C251523, 0x00ECD8F3, + 0xF4FE9556, 0x08408C35, 0x356E40BB, 0xC200002C, 0x01AEDC2A, 0xBA0087EB, 0x955E3500, 0xF0BDE0FB, + 0x9936A27F, 0x5B597E53, 0x38AE9FDF, 0x27BE672B, 0x4EB6C4D1, 0xF91671EE, 0x2E0DDBF5, 0xEC822B0A, + 0x2994409A, 0xA03988A6, 0xB3F024D1, 0xF3F55C48, 0x122E9F43, 0x26098A50, 0x89E4B010, 0x901C0100, + 0xC1C0946D, 0x296ED9C6, 0xB22DC31B, 0xF7E8CECC, 0xEA27F9A7, 0xD0CAB21D, 0xCB756CD7, 0xF6BF8374, + 0xE759FB12, 0x9FD9C4B7, 0xA5ED938A, 0xE75BDC68, 0x25D0B765, 0xBE962511, 0x46B7D25C, 0xDA974B25, + 0x4942F16D, 0xE4E25822, 0xCA92B6CC, 0x4D128DBE, 0x852D121C, 0x00C041CC, 0x77D30A27, 0xF57E7FAF, + 0xA080DFB7, 0xA54D7C4B, 0x5AC4927D, 0xFCBAA2FB, 0x07034685, 0xEA37CA4D, 0x1CE3912A, 0xBAD2F6AA, + 0xB4672948, 0x25397225, 0x8E31C295, 0x441A4B80, 0x2F390925, 0xB1842BF1, 0x44003009, 0x40306368, + 0xFFEDFE38, 0x0C8CE354, 0xDBA7F8AE, 0xA921DCD5, 0xD43C7A08, 0xC6D12407, 0x83A0E000, 0x0074BF47, + 0xF3FCA9D0, 0x21A8B34D, 0xDAC7CA91, 0xC513A121, 0x8CA0DAAA, 0xC75BA804, 0x4B0C64C8, 0x84CF1680, + 0x2B643889, 0x00023800, 0xD0F5BF7F, 0xBF240000, 0x0008295B, 0x0FCD8713, 0x4AD72180, 0xC1011009, + 0x7D22A879, 0x53C014D2, 0x610C90CD, 0x15948A9E, 0x212995C0, 0x3516F50D, 0x83A9400A, 0xB726EA8C, + 0x009485FA, 0xF456976F, 0xF9B4AA5A, 0x7481EEDD, 0x00A06755, 0x8D76532B, 0xDEBA5975, 0x70D7667C, + 0xDDEA5B8B, 0xCD98E330, 0x06557D66, 0x2FF09BAB, 0xF6F1EF3D, 0x86A63228, 0x5277F410, 0x08A1D61A, + 0x9919AC82, 0x54AAE953, 0x3DEE82D0, 0xA95E9C91, 0xBDF75E7C, 0x39A39F90, 0xA9935AA3, 0x87B89D55, + 0xCBE37572, 0x7A2CDF19, 0x722AE419, 0x4551A4C3, 0xD87546C0, 0xD945B8A1, 0x72B23273, 0x826BB466, + 0xC63DFBCD, 0x9EC24C31, 0x8A50A38B, 0x919B511D, 0xEF0220FD, 0xAD054D16, 0x1CBDE852, 0x7FF5A686, + 0x275A3F7E, 0x6D7777EF, 0xFE7FFFF8, 0x7DD7F79F, 0xD4C71440, 0x9377445D, 0xC813CA9B, 0x3B3604A2, + 0x6D4D9B18, 0x93450573, 0x1B7DA7AA, 0xA20E8F04, 0x0084A003, 0x43A41B8D, 0xF38704AC, 0x1CB09273, + 0x584351AA, 0x9B51A1A9, 0xA4D00407, 0x21E49E05, 0x4A7B07D5, 0x1813EA1B, 0xA09E58BB, 0x5811AD3A, + 0x15CB6EC4, 0xE005D520, 0xAA211F6E, 0xDCE279C4, 0x90C55235, 0xB33C46BF, 0xA511A439, 0x42128131, + 0xF6CD9C37, 0x3C9ED6CF, 0x735B0CF6, 0xF5A1FB14, 0x5E9D38E7, 0x240841CC, 0x97438ADA, 0xFE0754A6, + 0x33ED684D, 0xF6682864, 0x1D0FC00F, 0xD0766DD9, 0xF9DBDD06, 0x331FF33A, 0xF972164E, 0x1FF61EEE, + 0xF5FDEFD7, 0xB97DC4B3, 0x009CB9B3, 0xEE91A36B, 0x7DADA77F, 0xF54138E0, 0x4ED09E2D, 0x0D804EF2, + 0x15C83C8D, 0x1DF94EB7, 0xEDEECAE5, 0xA012103D, 0x8CC0E67C, 0x2C01446B, 0x75EBDCE4, 0x8858BAE6, + 0x7AB914CC, 0xF465DEB5, 0xF13E809D, 0xC10A6586, 0x979220E0, 0xF770AB77, 0x3DFEFA53, 0xF5D5AAF7, + 0xE08ADC03, 0xD029A25D, 0x1CE40E6C, 0x9505CA5C, 0x80321B22, 0x075530E0, 0x74E8D786, 0xECE0CB30, + 0xC6C2C764, 0x753E4EF9, 0x37E4DD06, 0x61100054, 0x27A5C67A, 0x88BEFAEA, 0x6395E090, 0x035A31A5, + 0xD70A1278, 0xF45F97F0, 0x247AE628, 0x3F668B97, 0xEBD56583, 0xD32988C1, 0x0D16A636, 0x70FBD000, + 0x02EEF323, 0x4096459D, 0x2407A7AA, 0x6AFFA7EB, 0x7DDF1B84, 0x01691082, 0x69340F5D, 0x1E3D6C2D, + 0xA39D2483, 0x20C103B0, 0xAECE7AB0, 0x1C5F02A8, 0xE7EDF47D, 0x52DE4B4B, 0x6102C8C7, 0xD6537CBC, + 0x7D641079, 0xCE63933D, 0x9B732904, 0x08CE0F9B, 0x9C868644, 0xB89A2CFE, 0x75FF0F79, 0xD784F43B, + 0xA220273C, 0xCC14B6E3, 0x063BAE2F, 0x3E61F342, 0x073B79EB, 0x3E57BF26, 0xED188037, 0x0304701E, + 0x6CBF610C, 0xCBE19397, 0xD1AE6246, 0xB2BF6731, 0x45071406, 0x4E0A0268, 0xED306417, 0x030458D6, + 0x3BEBE259, 0x9CF2D787, 0x261BEEDB, 0x0C8445BE, 0x72FE0FB1, 0x23647FF4, 0x958E3EDB, 0x48E14170, + 0x060C435A, 0xEBF89D91, 0x4D1C71CD, 0x7D4638BE, 0x7C8274C1, 0x01E69F16, 0x9CF5B3E9, 0x74EF139C, + 0xF573E860, 0x2774D6CF, 0xA103EE47, 0x593F870E, 0x467B6F9E, 0x6789C214, 0x0317820B, 0xAD8DEDD8, + 0x00E352D9, 0x3E240000, 0xC04E0100, 0x0090FDBE, 0xF6FA8D80, 0x02000000, 0x000080C1, 0x8A9B2902, + 0x118C97B0, 0xA58B18BF, 0xBA6F3F02, 0x52BB7A74, 0xDE22270A, 0x0340FA47, 0x11A890C6, 0x18C735B4, + 0x87473567, 0x0600F883, 0x0CA07A1A, 0x7767147B, 0x2E5FF40A, 0x405D1EE5, 0x61B7CEAF, 0x217542B9, + 0x9CFB0CF5, 0x3C8E76D2, 0x4B790F86, 0x19A8888E, 0x2674910D, 0xA7E2DBD0, 0xCE413E1F, 0x403C72C6, + 0x313EDD27, 0xFF2D60BC, 0x0700FE7F, 0x1B000000, 0x00000000, 0x9F31568C, 0x630C1DB1, 0x111BD447, + 0x65D35746, 0xE7697598, 0x7C675878, 0x78690031, 0xCB9FECB8, 0x89E31DE3, 0x0C662709, 0x3100FF11, + 0x00A0F321, 0x0A7E2409, 0x49A2CF13, 0x15B56DA2, 0x5DAB1ABB, 0x5A6D825D, 0x35A06D5B, 0x0DC4B6AD, + 0x4092B606, 0x20560390, 0x68C2EF37, 0x3E2056DF, 0x8B892000, 0x00000000, 0xB46992C0, 0x02005004, + 0x7227E9B4, 0x32CACFCB, 0x6239F500, 0x3067E441, 0x00547420, 0x756B6B00, 0x0DC93106, 0x90E0BC00, + 0x400AE3B9, 0xE71BE0F8, 0x95FE4E32, 0xE2F6DC44, 0xF56F7078, 0xB31199FE, 0x8C696EDE, 0x71992EFD, + 0x26F50579, 0x0000E8F8, 0xD3B17426, 0x4BC5463E, 0x6A372A3E, 0xC2A7E3BC, 0x9429131C, 0x6414F71C, + 0x97326152, 0xE3CDFEE7, 0x7D57ACA2, 0xB5B76F55, 0xC6BCE70B, 0x55CBAFB3, 0x9235B932, 0xDD63AADA, + 0x3A7BA934, 0x7DB4A45E, 0xDF9531FB, 0x72EA0B4B, 0xE9506878, 0xBE174AFF, 0x5E419836, 0xA6B71157, + 0x91F17899, 0xA632425B, 0x57FB1B31, 0x8E1B3BE9, 0xE23A7801, 0x00C01C2A, 0x2B3162DD, 0xF2758092, + 0x8C638C13, 0xB12A5AE9, 0xF9A55CAA, 0x9932DE3D, 0xBDA9D011, 0x23BD2696, 0xFDFBCC4F, 0x8993CAA5, + 0xBE9791FE, 0xD2938E9C, 0xB36BA736, 0xC278D1DB, 0x785370F5, 0x6A965D1C, 0x2D1997DA, 0xF4DA920F, + 0x00C0F451, 0x8CDAE80F, 0x23268F4B, 0xCB589251, 0xA723750E, 0x65D29A7E, 0xD679CEF5, 0x68B4657D, + 0x75CF2E6D, 0x84798FF7, 0x9E3707E5, 0xD5F64766, 0x6EF77C7E, 0x8A1175E3, 0xFA3EF102, 0x7DCBDC46, + 0xC8ABA602, 0xA01E2BD2, 0x4B1BE9E4, 0x4083635C, 0xB7C5BD27, 0x8449153C, 0xEEBB1D25, 0x9907C284, + 0xB273E512, 0xE7D83DAE, 0x64776734, 0xD1513DA4, 0xF318A729, 0x341E3523, 0x973D87B7, 0x660C35BC, + 0x73F7A945, 0x9DFAE898, 0xDD8B815B, 0x59353AFC, 0x6FE8AE6A, 0x0911393B, 0x07000880, 0xFCDF7F5F, + 0x017891A3, 0x478F0E00, 0x004C3094, 0x7A811917, 0x2EE30D38, 0x30CB2DE8, 0x5C64F881, 0x7D1E6187, + 0xFE3A8FA6, 0xB9D11BB1, 0x45518338, 0x90086A37, 0x5D21B0BF, 0x32F60ED5, 0x0489CEC8, 0xB603F562, + 0x0400A008, 0xE0A48A3F, 0xDCC32930, 0xF8E46960, 0x94131847, 0xE054C3AE, 0xD9B3D935, 0x7B76B4DA, + 0x023E335D, 0x391D7367, 0x5865CD3E, 0x7EBCDAF5, 0x3AE1AD85, 0xEF6CEF23, 0x6CF8A34F, 0xD60A7BEF, + 0x0D839B26, 0xC58693E3, 0x7139A5AF, 0xD097A595, 0x93E9CC56, 0x68BB822C, 0xF9B2292C, 0xAABBEB1D, + 0x0270D97A, 0x4E204E56, 0x9F336F81, 0x2B1D715A, 0x54BB0A78, 0x014F59CD, 0x0ADCABB6, 0xE80350DB, + 0xFDEFAFEF, 0xAE6FED3E, 0xB58E9381, 0x27808351, 0x278903C4, 0x67F0058D, 0xFE7679E2, 0xB6119A4E, + 0x631CFBF2, 0x74B5C31F, 0x752F2E86, 0x8344F8B2, 0xE5C40945, 0x52C19D9B, 0x50A18234, 0x09B6F06C, + 0x1E051C7E, 0xAD1DEFC1, 0xF03E7336, 0xB92DCE5A, 0xFD2C94B6, 0xE5CD1ACB, 0x6D7A3D38, 0xC16AA5C1, + 0xF365DF2C, 0x8E9ABD01, 0xFC73ABBE, 0x040E0068, 0xD6530F08, 0x5C7965AB, 0x37535001, 0x428E4EEC, + 0xA0E0523D, 0x29E82002, 0xC0C35FDF, 0xC544BD04, 0x644CE5C3, 0xCEDFCBB9, 0x0BE6600E, 0x46D47A4F, + 0x0EB60307, 0x6DA7B97D, 0x0D377DE1, 0x7E9C1D16, 0x710D61C8, 0x8460D180, 0x64A14038, 0xAD545AE5, + 0xE122C0C6, 0x9152E980, 0x99385522, 0x66393A29, 0x330EB82D, 0x07FD0C82, 0xC793A874, 0x2F2FCADA, + 0x8B1F9E4B, 0x38559D11, 0xFA145285, 0x65B3D943, 0x42823091, 0x46C71928, 0x07F30CD1, 0x62750B2D, + 0x5F78491B, 0x047A46DC, 0x563128F3, 0x91218E22, 0xA1BAD02E, 0x4E7DAA4F, 0x75EA7835, 0x75A70100, + 0x04F121F2, 0x8F157511, 0x10CE1CF4, 0xE5647EA5, 0x6B93B038, 0x350E801F, 0x3073A053, 0xA5C38071, + 0x980F1523, 0x304599C2, 0xC6FCB91F, 0x7747E9E6, 0x76EA27BC, 0x830939DA, 0x001D5CA9, 0x0282A956, + 0xE385DA19, 0x0F2E0F47, 0x7D218D40, 0xF809B821, 0x28A03FEA, 0xB2D4D600, 0x8274DBA8, 0xBBA8E7FC, + 0x55F3D99A, 0x1EE2209F, 0xD8D33460, 0xE220061E, 0x109170E4, 0xEF879BBF, 0x0AC3D098, 0xD6F73B18, + 0x849B948E, 0x9975D87A, 0x6F342739, 0x85C0810D, 0x4822BC22, 0xABB890B9, 0x65F9EB84, 0x3356D58F, + 0x83FB5F62, 0x522C00FB, 0x276781B8, 0x9A0DFFCC, 0xBD00CAED, 0x43F09006, 0x29A01D9C, 0x2ED47971, + 0xCDFB6FEA, 0xC8C911E7, 0xBB7D6FA2, 0xEC702728, 0x070B15E2, 0x34934AB2, 0x40A20561, 0x1010DC94, + 0x4A6CE4E0, 0xB1EA88E0, 0xF58FBA3E, 0xC30357FB, 0x55360330, 0x7705A9E6, 0x9AE3DA62, 0x879380E7, + 0x3531E8C8, 0xD7CE6618, 0x8311D163, 0x85C1814C, 0x2E9F4611, 0x18A05DBB, 0xC1CEE622, 0x81500950, + 0x7A64ADBC, 0xD185BF14, 0x6817A47F, 0x1CDF6200, 0x2433F7BA, 0x38FE537F, 0x96B3E400, 0x211CA004, + 0x381F6168, 0x7A0244D4, 0xE2C5C35B, 0xAD040EB5, 0x98A6E5F3, 0x3CB56FD5, 0x789F542C, 0x5028C71F, + 0xB0C16903, 0x6F27948C, 0x1C4E4D5B, 0xCA77BB93, 0x010D034A, 0x84F09F4B, 0x06018218, 0x0335733C, + 0x9F1F4302, 0x004042A2, 0xD470D292, 0xC8153601, 0x00CE433F, 0x1CE95247, 0xCB5C6610, 0xBC5B0283, + 0x0B1C5301, 0x0E91EF88, 0x4790547D, 0xDCECF132, 0x9CD2ABAA, 0x1AAF8F8A, 0xB0147055, 0x443403F0, + 0x9CE02457, 0x061F4E5C, 0x2760919C, 0x6EE8E81D, 0x34351C9C, 0x03C24940, 0xE8291204, 0x760E2E09, + 0x886D3A55, 0xDF6B030C, 0x4355FBB1, 0xE23ECFF4, 0x22094240, 0x721E6461, 0x0F471A56, 0xF80B360B, + 0x45F285BB, 0xAE7730DD, 0xFC1CF49F, 0x344F4CE6, 0x90937448, 0xE8C6F0A5, 0x1CC09274, 0xAF7D75F0, + 0xE19B00C4, 0x52960740, 0xB795441C, 0x5A4FE29E, 0x871A437F, 0x576ECFD1, 0x00E11BA5, 0x0F29DC39, + 0x45388FA5, 0x65B81122, 0xD18DD2FA, 0x31ED3CEE, 0xAA0069D7, 0x93754014, 0x14355361, 0xA58FA07C, + 0xEF5D0000, 0x20B3C737, 0x118E635E, 0xAE1F7EC7, 0xDCA70BA1, 0x00AA20B8, 0xD71D5F15, 0x007825FF, + 0xFCFB6F79, 0xAD0AE156, 0xC843E2DC, 0x19938CB0, 0x1A13258C, 0x165E7AFB, 0x9E0B72FD, 0xF648DEAE, + 0x07FE9301, 0x0000CC3F, 0x8660DE8B, 0xDFDC6368, 0x3D675D4D, 0x7AD41E2F, 0x6398FE35, 0xCD74C027, + 0x99AE2814, 0x5AB93053, 0x5C4B309E, 0x6AC550BC, 0xB80C4F16, 0x00000000, 0x6083F02B, 0x57782EDD, + 0xF828300C, 0x01000000, 0x000000E0, 0x35872650, 0x4263E64E, 0x844B043B, 0xC9267F1C, 0xCBA006E5, + 0x37F159AF, 0xCE214EBF, 0x823145D6, 0x80036E00, 0x603D1BDA, 0x205BEC12, 0x9EC83DF1, 0x78B181E7, + 0x39683AEC, 0x7D2E9B3D, 0xA0660E00, 0x12E005C9, 0x63E50726, 0x87D216E9, 0x01EADD8D, 0xC9E04B0E, + 0x33CC4C1F, 0x2739960B, 0xDCE36091, 0x10326016, 0x9DAC53AA, 0xBA2F5040, 0xB9A73638, 0x73645D22, + 0xA90C6887, 0x8B1929BC, 0x348E19BA, 0x58D5FA48, 0xEA56CFB4, 0x660C0901, 0x92E501B8, 0x6F66E1B6, + 0x4164134F, 0x99EED4BC, 0xE4093A20, 0xF3DC9B15, 0x7BDB6257, 0xF81A88DE, 0xD3898FF8, 0x0DC8B1DF, + 0xDC406A1D, 0x941178EC, 0x39EC1403, 0x31CC870C, 0xCD596240, 0xF31C473E, 0x310679F9, 0xE4B45FFA, + 0x57858A93, 0x3A20BCE1, 0xA8611266, 0xCB010400, 0x91E450E5, 0x55B3FA18, 0x60F476B4, 0xF8282E2F, + 0xC1FB4E33, 0x14288F83, 0x43C0C011, 0xC20F855B, 0x7DF50207, 0x0A122AAB, 0xE638330D, 0x2C68FBB9, + 0x343341DF, 0xD993733D, 0x194B8E0A, 0xDE73C0B8, 0x3100949D, 0xBD270EE0, 0x729C7E0F, 0xA6073ED7, + 0x50BF8E80, 0x59B33AEE, 0x9FAF7379, 0x925B15F7, 0x92EF1272, 0x0A1E00BD, 0x7E1B0000, 0x00000000, }; static u32 ForceRateDiv; -static u32 sysopen = 0; -static u32 sndopen = 0; +static BINKSNDSYSOPEN sysopen = 0; +static BINKSNDOPEN sndopen = 0; static u32 numopensounds = 0; -static u32 cb_bink_IO = 0; -static u32 cb_bink_sound = 0; -static u32 ForceRate = 0xFFFFFFFF; -static u32 IOBufferSize = 0xFFFFFFFF; -static u32 Simulate = 0xFFFFFFFF; +static RADCB_HANDLER PTR4* cb_bink_IO = 0; +static RADCB_HANDLER PTR4* cb_bink_sound = 0; +static u32 ForceRate = BINK_OPEN_OVERRIDE_UNSET; +static u32 IOBufferSize = BINK_OPEN_OVERRIDE_UNSET; +static u32 Simulate = BINK_OPEN_OVERRIDE_UNSET; static u32 TotTracks = 1; -static u32 UserOpen = 0; +static BINKIOOPEN UserOpen = 0; -void bpopmalloc(HBINK bnk, u32 tmpArg) +static void PTR4* bpopmalloc(HBINK bnk, u32 bytes) { - u32 tmpReg; + u32 total; - tmpReg = popmalloctotal(); - bnk->totalmem = bnk->totalmem + tmpReg + tmpArg; - popmalloc(); + total = popmalloctotal(); + total += bytes; + bnk->totalmem += total; + return popmalloc(bytes); } -void BinkSetError(const char* err) +void BinkSetError(const char PTR4* err) { strcpy(binkerr, err); } -char* BinkGetError(void) +char PTR4* BinkGetError(void) { return binkerr; } -// TODO: Fix Function -// I hate nested if's -s32 BinkSetSoundSystem(BINKSNDSYSOPEN open, u32 param) +s32 BinkSetSoundSystem(BINKSNDSYSOPEN open, UINTa param) { - BINKSNDSYSOPEN* pcVar1; - s32 iVar2; + BINKSNDOPEN newopen; - if (open != NULL) - { - if (sysopen == NULL) - { - sysopen = open; - } - pcVar1 = sysopen; - if ((sysopen == open) || (pcVar1 = open, numopensounds != 0)) - { - sysopen = pcVar1; - iVar2 = (param); - if (iVar2 != 0) - { - sndopen = iVar2; - } - if (sndopen != 0) - { - return 1; - } - return 0; + if (open == 0) { + goto fail; + } + if (sysopen == 0) { + sysopen = open; + } + if (sysopen != open) { + if (numopensounds == 0) { + goto set_open; } + goto fail; } + goto open_sound; + +fail: return 0; + +set_open: + sysopen = open; + +open_sound: + newopen = sysopen(param); + if (newopen != 0) { + sndopen = newopen; + } + return sndopen != 0; } -// (undefined *param_1,undefined2 *param_2,uint param_3) -void conv16to8() +static u8 PTR4* conv16to8(u8 PTR4* dest, const s16 PTR4* src, u32 bytes) { + u32 count = bytes >> 1; + + while (count != 0) { + *dest++ = *src++ >> BINK_16_TO_8_SAMPLE_SHIFT; + --count; + } + return dest; } -void checksound(u32 tmp) +static u32 dosilence(BINKSND PTR4* snd) { + u32 amt = snd->sndamt; + u32 prime = snd->sndprime; + + if (amt < prime) { + u32 bytes = prime - amt; + u32 silence_ms = + bytes * BINK_MILLISECONDS_PER_SECOND / (((snd->freq * snd->bits) * + snd->chans) >> + 3); + u8 PTR4* pos = snd->sndreadpos - bytes; + u8 PTR4* start = snd->sndbuf; + + snd->sndreadpos = pos; + if (pos < start) { + u32 wrap = start - pos; + u32 first = bytes - wrap; + + snd->sndreadpos = pos + snd->sndbufsize; + memset(start, 0, first); + memset(snd->sndreadpos, 0, wrap); + } else { + memset(pos, 0, bytes); + } + + snd->sndamt += bytes; + return silence_ms; + } + + return 0; } -void inittimer(HBINK bnk) +static void checksound(HBINK bnk) +{ + if (bnk->bsnd[bnk->playingtracks - 1].Ready != 0) { + RADCB_CALLBACK PTR4* callback = BINK_SOUND_CALLBACK(bnk); + + if (RADCB_try_to_suspend_callback(cb_bink_sound, callback) != 0) { + u32 i; + + i = 0; + if (i < bnk->playingtracks) { + u32 next; + + do { + next = i + 1; + while (bnk->bsnd[i].sndamt > bnk->bsnd[i].BestSizeIn16 || + ((s32)bnk->FrameNum > (s32)bnk->bsnd[i].sndendframe && + bnk->bsnd[i].sndamt != 0)) { + if (bnk->bsnd[i].Ready(&bnk->bsnd[i]) != 0) { + u8 PTR4* addr; + u32 len; + + if (bnk->bsnd[i].Lock(&bnk->bsnd[i], &addr, &len) != 0) { + u32 tail; + + if (bnk->bsnd[i].sndconvert8 != 0) { + len += len; + } + if (len > bnk->bsnd[i].sndamt) { + len = bnk->bsnd[i].sndamt; + } + if ((s32)bnk->FrameNum <= (s32)bnk->bsnd[i].sndendframe) { + len &= bnk->bsnd[i].BestSizeMask << bnk->bsnd[i].sndconvert8; + } + + bnk->bsnd[i].sndamt -= len; + tail = bnk->bsnd[i].sndend - bnk->bsnd[i].sndreadpos; + if (tail < len) { + if (tail != 0) { + if (bnk->bsnd[i].sndconvert8 != 0) { + addr = conv16to8(addr, (s16 PTR4*)bnk->bsnd[i].sndreadpos, tail); + } else { + memcpy(addr, bnk->bsnd[i].sndreadpos, tail); + addr += tail; + } + } + + if (bnk->bsnd[i].sndconvert8 != 0) { + conv16to8(addr, (s16 PTR4*)bnk->bsnd[i].sndbuf, len - tail); + } else { + memcpy(addr, bnk->bsnd[i].sndbuf, len - tail); + } + bnk->bsnd[i].sndreadpos = bnk->bsnd[i].sndbuf + (len - tail); + } else { + if (bnk->bsnd[i].sndconvert8 != 0) { + conv16to8(addr, (s16 PTR4*)bnk->bsnd[i].sndreadpos, len); + } else { + memcpy(addr, bnk->bsnd[i].sndreadpos, len); + } + bnk->bsnd[i].sndreadpos += len; + } + + if (bnk->bsnd[i].sndconvert8 != 0) { + len >>= 1; + } + bnk->bsnd[i].Unlock(&bnk->bsnd[i], len); + continue; + } + } + break; + } + + i = next; + } while (next < bnk->playingtracks); + } + + RADCB_resume_callback(cb_bink_sound, callback); + } + } +} + +static void inittimer(HBINK bnk) { if (bnk->startsynctime == 0) { @@ -711,46 +787,47 @@ void inittimer(HBINK bnk) } } -void GotoFrame(HBINK bnk, u32 tmp) +static void GotoFrame(HBINK bnk, u32 frame) { - u32 uVar1; + u32 frame_offset; - if (tmp != 0) + if (frame != 0) { - tmp = tmp - 1; + frame = frame - 1; } - bnk->bsnd = 0; - if (bnk->lastresynctime != 0) + bnk->skipped_this_frame = 0; + if (bnk->playingtracks != 0) { checksound(bnk); } bnk->LastFrameNum = bnk->FrameNum; - if (tmp == 0) + if (frame == 0) { if (bnk->soundon != 0) { - BinkSetSoundOnOff(bnk, 0); - BinkSetSoundOnOff(bnk, 1); + BinkSetSoundOnOff(bnk, BINK_SOUND_OFF); + BinkSetSoundOnOff(bnk, BINK_SOUND_ON); } bnk->startsynctime = 0; - bnk->bunp.patptr = 0; // had to change this, I assume the struct is correct + bnk->big_sound_skip_adj = 0; } if (bnk->preloadptr != 0) { - bnk->compframe = bnk->preloadptr + ((bnk->frameoffsets[tmp] & 0xfffffffe) - - ((*bnk->frameoffsets) & 0xfffffffe)); + bnk->compframe = bnk->preloadptr + (BINK_FRAME_OFFSET(bnk->frameoffsets[frame]) - + BINK_FRAME_OFFSET(*bnk->frameoffsets)); } else { - uVar1 = (bnk->frameoffsets[tmp] & 0xfffffffe) & 0xfffffffe; - (bnk->bio.ReadFrame)(&bnk->bio, tmp, uVar1, bnk->compframe, - (*((bnk->frameoffsets + tmp) + 1) & 0xfffffffe) - uVar1); + frame_offset = BINK_FRAME_OFFSET(bnk->frameoffsets[frame]); + (bnk->bio.ReadFrame)(&bnk->bio, frame, frame_offset, bnk->compframe, + BINK_FRAME_OFFSET(bnk->frameoffsets[frame + 1]) - + frame_offset); } - if (bnk->lastresynctime != 0) + if (bnk->playingtracks != 0) { checksound(bnk); } - bnk->FrameNum = tmp + 1; + bnk->FrameNum = frame + 1; } void BinkSetFrameRate(u32 FRate, u32 FRateDiv) @@ -766,7 +843,7 @@ void BinkSetIOSize(u32 iosize) void BinkSetIO(BINKIOOPEN io) { - UserOpen = (u32)io; + UserOpen = io; } void BinkSetSimulate(u32 sim) @@ -774,182 +851,1879 @@ void BinkSetSimulate(u32 sim) Simulate = sim; } -void BinkSetSoundTrack(u32 total_tracks, u32* tracks) +void BinkSetSoundTrack(u32 total_tracks, u32 PTR4* tracks) { u32 i; - s32 iVar2; - if (8 < total_tracks) - { - total_tracks = 8; + if (total_tracks > BINK_MAX_TRACKS) { + total_tracks = BINK_MAX_TRACKS; } - i = 0; TotTracks = total_tracks; - if (TotTracks == 0) - { - return; + for (i = 0; i < total_tracks; ++i) { + TrackNums[i] = tracks[i]; } - for (i = 0; i < total_tracks; i++) - { - *(u32*)((s32)&TrackNums + iVar2) = *(u32*)(iVar2 + total_tracks); - iVar2 = iVar2 + 4; +} + +static u32 high1secrate(s32 frames, const u32 PTR4* frameoffsets, s32 span, + u32 PTR4* highframe, u32 PTR4* allkey) +{ + u32 rate; + u32 best; + u32 key; + s32 i; + + rate = 0; + best = 0; + key = 1; + i = 0; + if (i < frames - span) { + const u32 PTR4* start = frameoffsets; + const u32 PTR4* end = frameoffsets + span; + + do { + u32 diff = *end++ - *start++; + + if (diff > rate) { + rate = diff; + best = i; + } + ++i; + } while (i < frames - span); + } + + if (rate == 0) { + rate = mult64anddiv(frameoffsets[frames] - frameoffsets[0], span, frames); + } + + for (i = 0; i < frames; ++i) { + if (BINK_FRAME_KEY(frameoffsets[i]) == 0) { + key = 0; + break; + } } + + *allkey = key; + *highframe = best; + return rate; } -void bink_suspend_io(HBINK bnk) +static void bink_suspend_io(BINKIO PTR4* io) { - RADCB_suspend_callback(cb_bink_IO, &bnk->Paused); + RADCB_suspend_callback(cb_bink_IO, BINK_IO_CALLBACK(io)); } -void bink_try_suspend_io(HBINK bnk) +static s32 bink_try_suspend_io(BINKIO PTR4* io) { - RADCB_try_to_suspend_callback(cb_bink_IO, &bnk->Paused); + return RADCB_try_to_suspend_callback(cb_bink_IO, BINK_IO_CALLBACK(io)); } -void bink_resume_io(HBINK bnk) +static void bink_resume_io(BINKIO PTR4* io) { - RADCB_resume_callback(cb_bink_IO, &bnk->Paused); + RADCB_resume_callback(cb_bink_IO, BINK_IO_CALLBACK(io)); } -void bink_idle_on_io(HBINK bnk) +static void bink_idle_on_io(BINKIO PTR4* io) { RADCB_idle_on_callbacks(); } -u32 bink_get_priority_sound(s32 tmp, u32 tmp2) +static u32 bink_get_priority_sound(RADCB_CALLBACK PTR4* callback, u32 count) { - if (*(u32*)(tmp + -4) != tmp2) + HBINK bnk = BINK_FROM_SOUND_CALLBACK(callback); + + if (bnk->last_time_almost_empty != count) { - return ~*(u32*)(tmp + -4); + return ~bnk->last_time_almost_empty; } return 0; } -void bink_sound_callback(HBINK bnk, u32 tmp) +static void bink_sound_callback(RADCB_CALLBACK PTR4* callback, u32 callback_count) { - if (bnk->lastresynctime != NULL) - { - checksound(&bnk->last_read_count); + HBINK bnk = BINK_FROM_SOUND_CALLBACK(callback); + + if (bnk->playingtracks != 0) { + checksound(bnk); } - bnk->last_time_almost_empty = tmp; + bnk->last_time_almost_empty = callback_count; } HBINK BinkOpen(const char PTR4* name, u32 flags) { + BINK bnk; + BINKHDR hdr; + BINKIOOPEN open; + u32 open_flags; + u32 scale_flags; + u32 bundle_sizes[BINK_BUNDLE_COUNT]; + u32 all_key; + u32 plane_size; + u32 simulate; + HBINK out; + + open = (BINKIOOPEN)BinkFileOpen; + if ((flags & BINKSNDTRACK) == 0) { + TrackNums[0] = 0; + TotTracks = 1; + } + + memset(&bnk, 0, sizeof(bnk)); + bnk.timeopen = RADTimerRead(); + binkerr[0] = 0; + + if ((flags & BINKFROMMEMORY) != 0) { + hdr = *(const BINKHDR*)name; + } else { + if ((flags & BINKIOPROCESSOR) != 0 && UserOpen != 0) { + open = UserOpen; + } + UserOpen = 0; + + if (open(&bnk.bio, name, flags) == 0) { + if (binkerr[0] == 0) { + BinkSetError(BINK_ERROR_OPENING_FILE); + } + return 0; + } + bnk.bio.ReadHeader(&bnk.bio, 0, &hdr, sizeof(hdr)); + } + + if (hdr.Marker != BINKMARKER1 && hdr.Marker != BINKMARKER2 && + hdr.Marker != BINKMARKER3 && hdr.Marker != BINKMARKER4) { + BinkSetError(BINK_ERROR_NOT_BINK); + goto close_and_fail; + } + + if (hdr.Frames == 0) { + BinkSetError(BINK_ERROR_NO_COMPRESSED_FRAMES); + goto close_and_fail; + } + + open_flags = flags & ~BINKCOPYNOSCALING; + bnk.UVWidth = (((hdr.Width + BINK_CHROMA_ROUND_MASK) >> BINK_CHROMA_SHIFT) + + BINK_CHROMA_ALIGN_MASK) & + ~BINK_CHROMA_ALIGN_MASK; + bnk.UVHeight = (((hdr.Height + BINK_CHROMA_ROUND_MASK) >> BINK_CHROMA_SHIFT) + + BINK_CHROMA_ALIGN_MASK) & + ~BINK_CHROMA_ALIGN_MASK; + bnk.YWidth = bnk.UVWidth * BINK_CHROMA_SCALE; + bnk.YHeight = bnk.UVHeight * BINK_CHROMA_SCALE; + bnk.MaskPitch = bnk.YWidth >> BINK_MASK_BLOCK_SHIFT; + + bnk.Width = hdr.Width; + bnk.Height = hdr.Height; + bnk.decompwidth = hdr.Width; + bnk.decompheight = hdr.Height; + bnk.BinkType = hdr.Flags; + if ((hdr.Flags & BINKALPHA) == 0) { + open_flags = flags & ~(BINKCOPYNOSCALING | BINKALPHA); + } + open_flags |= hdr.Flags & BINKGRAYSCALE; + + scale_flags = flags & BINKCOPYNOSCALING; + if (scale_flags != BINKCOPYNOSCALING) { + if (scale_flags == 0) { + scale_flags = hdr.Flags & BINKCOPYNOSCALING; + } + open_flags |= scale_flags; + if (scale_flags == BINKCOPY2XW) { + bnk.Width = hdr.Width * BINK_COPY_SCALE; + } else if (scale_flags == BINKCOPY2XH || scale_flags == BINKCOPY2XHI) { + bnk.Height = hdr.Height * BINK_COPY_SCALE; + } else if (scale_flags == BINKCOPY2XWH || scale_flags == BINKCOPY2XWHI) { + bnk.Width = hdr.Width * BINK_COPY_SCALE; + bnk.Height = hdr.Height * BINK_COPY_SCALE; + } + } + + if (hdr.Marker == BINKMARKER1 || hdr.Marker == BINKMARKER2) { + open_flags |= BINKRBINVERT; + open_flags |= BINKOLDFRAMEFORMAT; + } else if (hdr.Marker == BINKMARKER3) { + open_flags |= BINKOLDFRAMEFORMAT; + } + + bnk.OpenFlags = open_flags; + bnk.Frames = hdr.Frames; + bnk.InternalFrames = hdr.InternalFrames; + if ((flags & BINKFRAMERATE) != 0 && ForceRate != BINK_OPEN_OVERRIDE_UNSET) { + bnk.FrameRate = ForceRate; + bnk.FrameRateDiv = ForceRateDiv; + ForceRate = BINK_OPEN_OVERRIDE_UNSET; + } else { + bnk.FrameRate = hdr.FrameRate; + bnk.FrameRateDiv = hdr.FrameRateDiv; + } + bnk.runtimeframes = (hdr.FrameRate + (hdr.FrameRateDiv >> 1)) / hdr.FrameRateDiv; + bnk.Size = hdr.Size; + bnk.NumTracks = hdr.NumTracks; + bnk.LargestFrameSize = hdr.LargestFrameSize; + bnk.fileframerate = hdr.FrameRate; + bnk.fileframeratediv = hdr.FrameRateDiv; + bnk.MaskLength = ((bnk.YWidth >> BINK_MASK_BLOCK_SHIFT) * bnk.YHeight) >> + BINK_MASK_BLOCK_SHIFT; + if (bnk.runtimeframes == 0) { + bnk.runtimeframes = 1; + } + bnk.runtimemoveamt = BINK_ARRAY_BYTES(bnk.runtimeframes - 1, bnk.rtframetimes); + + pushmalloc(&bnk.MaskPlane, bnk.MaskLength + BINK_MASK_PLANE_GUARD_BYTES); + pushmalloc((void PTR4* PTR4*)&bnk.rtframetimes, + BINK_ARRAY_BYTES(bnk.runtimeframes, bnk.rtframetimes)); + pushmalloc((void PTR4* PTR4*)&bnk.rtadecomptimes, + BINK_ARRAY_BYTES(bnk.runtimeframes, bnk.rtadecomptimes)); + pushmalloc((void PTR4* PTR4*)&bnk.rtvdecomptimes, + BINK_ARRAY_BYTES(bnk.runtimeframes, bnk.rtvdecomptimes)); + pushmalloc((void PTR4* PTR4*)&bnk.rtblittimes, + BINK_ARRAY_BYTES(bnk.runtimeframes, bnk.rtblittimes)); + pushmalloc((void PTR4* PTR4*)&bnk.rtreadtimes, + BINK_ARRAY_BYTES(bnk.runtimeframes, bnk.rtreadtimes)); + pushmalloc((void PTR4* PTR4*)&bnk.rtidlereadtimes, + BINK_ARRAY_BYTES(bnk.runtimeframes, bnk.rtidlereadtimes)); + pushmalloc((void PTR4* PTR4*)&bnk.rtthreadreadtimes, + BINK_ARRAY_BYTES(bnk.runtimeframes, bnk.rtthreadreadtimes)); + pushmalloc((void PTR4* PTR4*)&bnk.bsnd, BINK_ARRAY_BYTES(TotTracks, bnk.bsnd)); + pushmalloc((void PTR4* PTR4*)&bnk.trackindexes, + BINK_ARRAY_BYTES(TotTracks, bnk.trackindexes)); + + ExpandBundleSizes(bundle_sizes, bnk.YWidth); + pushmalloc(&bnk.bunp.typeptr, bundle_sizes[BINK_BUNDLE_BLOCK_TYPES]); + pushmalloc(&bnk.bunp.type16ptr, bundle_sizes[BINK_BUNDLE_SUBBLOCK_TYPES]); + pushmalloc(&bnk.bunp.colorptr, bundle_sizes[BINK_BUNDLE_COLORS]); + pushmalloc(&bnk.bunp.bits2ptr, bundle_sizes[BINK_BUNDLE_PATTERNS]); + pushmalloc(&bnk.bunp.motionXptr, bundle_sizes[BINK_BUNDLE_X_OFFSETS]); + pushmalloc(&bnk.bunp.motionYptr, bundle_sizes[BINK_BUNDLE_Y_OFFSETS]); + pushmalloc(&bnk.bunp.dctptr, bundle_sizes[BINK_BUNDLE_INTRA_DC]); + pushmalloc(&bnk.bunp.mdctptr, bundle_sizes[BINK_BUNDLE_INTER_DC]); + pushmalloc(&bnk.bunp.patptr, bundle_sizes[BINK_BUNDLE_RUNS]); + + if ((flags & BINKFROMMEMORY) == 0) { + pushmalloc((void PTR4* PTR4*)&bnk.frameoffsets, + BINK_FRAME_OFFSETS_BYTES(bnk.InternalFrames, bnk.frameoffsets)); + pushmalloc((void PTR4* PTR4*)&bnk.tracksizes, + BINK_ARRAY_BYTES(bnk.NumTracks, bnk.tracksizes)); + pushmalloc((void PTR4* PTR4*)&bnk.tracktypes, + BINK_ARRAY_BYTES(bnk.NumTracks, bnk.tracktypes)); + pushmalloc((void PTR4* PTR4*)&bnk.trackIDs, + BINK_ARRAY_BYTES(bnk.NumTracks, bnk.trackIDs)); + } + + out = bpopmalloc(&bnk, sizeof(*out)); + if (out != 0) { + memcpy(out, &bnk, sizeof(*out)); + out->bio.bink = out; + out->rtadecomptimes[0] = 0; + out->rtvdecomptimes[0] = 0; + out->rtblittimes[0] = 0; + out->rtreadtimes[0] = 0; + out->rtidlereadtimes[0] = 0; + out->rtthreadreadtimes[0] = 0; + + if ((flags & BINKFROMMEMORY) == 0) { + out->bio.ReadHeader(&out->bio, -1, out->tracksizes, + BINK_ARRAY_BYTES(out->NumTracks, out->tracksizes)); + out->bio.ReadHeader(&out->bio, -1, out->tracktypes, + BINK_ARRAY_BYTES(out->NumTracks, out->tracktypes)); + out->bio.ReadHeader(&out->bio, -1, out->trackIDs, + BINK_ARRAY_BYTES(out->NumTracks, out->trackIDs)); + out->bio.ReadHeader(&out->bio, -1, out->frameoffsets, + BINK_FRAME_OFFSETS_BYTES(out->InternalFrames, out->frameoffsets)); + } else { + out->tracksizes = (u32 PTR4*)name + BINK_MEMORY_HEADER_WORDS; + out->tracktypes = out->tracksizes + out->NumTracks; + out->trackIDs = out->tracktypes + out->NumTracks; + out->frameoffsets = out->trackIDs + out->NumTracks; + } + + out->Highest1SecRate = + high1secrate(out->Frames, out->frameoffsets, out->runtimeframes, + &out->Highest1SecFrame, &all_key); + + plane_size = out->YWidth * out->YHeight + out->UVWidth * out->UVHeight * BINK_CHROMA_PLANE_COUNT; + if ((out->BinkType & BINKALPHA) == 0) { + if (all_key == 0) { + pushmalloc(&out->YPlane[1], plane_size); + } + } else { + pushmalloc(&out->APlane[0], out->YWidth * out->YHeight); + if (all_key == 0) { + pushmalloc(&out->APlane[1], out->YWidth * out->YHeight); + pushmalloc(&out->YPlane[1], plane_size); + } + } + + out->YPlane[0] = bpopmalloc(out, plane_size); + if (out->YPlane[0] == 0) { + radfree(out); + goto open_failed; + } else { + if (all_key != 0) { + out->YPlane[1] = out->YPlane[0]; + out->APlane[1] = out->APlane[0]; + } + + if ((flags & BINKIOSIZE) != 0 && IOBufferSize != BINK_OPEN_OVERRIDE_UNSET) { + out->iosize = IOBufferSize; + IOBufferSize = BINK_OPEN_OVERRIDE_UNSET; + } else { + out->iosize = out->Highest1SecRate; + } + + if ((flags & BINKSIMULATE) != 0 && Simulate != BINK_OPEN_OVERRIDE_UNSET) { + simulate = Simulate; + Simulate = BINK_OPEN_OVERRIDE_UNSET; + } else { + simulate = 0; + } + + if ((flags & BINKFROMMEMORY) != 0) { + out->preloadptr = (u8 PTR4*)name + BINK_FRAME_OFFSET(out->frameoffsets[0]); + } else { + out->iosize = out->bio.GetBufferSize(&out->bio, out->iosize); + if (out->iosize >= + (out->Size * BINK_PRELOAD_THRESHOLD_NUMERATOR) / + BINK_PRELOAD_THRESHOLD_DENOMINATOR) { + flags |= BINKPRELOADALL; + out->OpenFlags |= BINKPRELOADALL; + } + + if ((flags & BINKPRELOADALL) != 0) { + u32 preload_size = + out->Size + BINK_FILE_HEADER_BYTES - + BINK_FRAME_OFFSET(out->frameoffsets[0]); + + out->preloadptr = bpopmalloc(out, preload_size); + if (out->preloadptr == 0) { + radfree(bnk.YPlane[0]); + goto open_failed; + } + out->bio.SetInfo(&out->bio, 0, 0, out->Size + BINK_FILE_HEADER_BYTES, simulate); + out->bio.ReadFrame(&out->bio, 0, BINK_FRAME_OFFSET(out->frameoffsets[0]), + out->preloadptr, preload_size); + out->bio.Close(&out->bio); + out->BackgroundThread = 0; + } else { + pushmalloc(&out->compframe, out->LargestFrameSize); + out->ioptr = bpopmalloc(out, out->iosize); + if (out->ioptr == 0) { + out->iosize = 0; + } + out->bio.SetInfo(&out->bio, out->ioptr, out->iosize, + out->Size + BINK_FILE_HEADER_BYTES, simulate); + } + } + + ((u8 PTR4*)out->MaskPlane)[out->MaskLength] = 0; + out->FrameNum = BINK_FRAME_BEFORE_FIRST; + if (out->FrameRate != 0) { + out->twoframestime = mult64anddiv(BINK_TWO_FRAME_MILLISECONDS, out->FrameRateDiv, out->FrameRate); + } else { + out->twoframestime = BINK_TWO_FRAME_MILLISECONDS; + } + out->videoon = BINK_VIDEO_ON; + GotoFrame(out, BINK_FIRST_FRAME); + out->timeopen = RADTimerRead() - out->timeopen; + out->bsnd[0].Latency = BINK_FIXED_1; + out->playingtracks = 0; + if (out->NumTracks != 0 && TotTracks != 0) { + u32 wanted; + + for (wanted = 0; wanted < TotTracks; ++wanted) { + s32 track; + + for (track = 0; track < out->NumTracks; ++track) { + if (out->trackIDs[track] == TrackNums[wanted]) { + out->trackindexes[out->playingtracks] = track; + ++out->playingtracks; + break; + } + } + } + } + TrackNums[0] = 0; + TotTracks = 1; + if (out->playingtracks != 0) { + u32 playing = 0; + + while (playing < out->playingtracks) { + u32 track = out->trackindexes[playing]; + u32 tracktype = out->tracktypes[track]; + BINKSND PTR4* snd = &out->bsnd[playing]; + + if (BINKTRACKISOPENABLE(tracktype)) { + if (sndopen == 0) { + BinkSetSoundSystem(BinkOpenNGCSound, 0); + } + snd->sndbuf = 0; + if (sndopen != 0) { + u32 freq = BINKTRACKFREQ(tracktype); + s32 bits = BINKTRACKBITS(tracktype); + s32 chans = BINKTRACKCHANNELS(tracktype); + + if (out->FrameRate != 0 && out->FrameRateDiv != 0) { + freq = ((f64)freq * (f64)out->FrameRate * + (f64)out->fileframeratediv) / + ((f64)out->FrameRateDiv * (f64)out->fileframerate); + } + + if (sndopen(snd, freq, bits, chans, out->OpenFlags, out) != 0) { + if (snd->BestSizeMask == 0) { + snd->BestSizeMask = -1; + } + snd->sndbufsize = BINK_SOUND_BUFFER_BYTES(out->tracksizes[track]); + snd->sndbuf = bpopmalloc(out, snd->sndbufsize); + if (snd->sndbuf == 0) { + snd->Close(snd); + } else { + ++numopensounds; + snd->sndconvert8 = + BINKTRACKBITS(tracktype) == BINK_SOUND_BITS_8; + snd->sndend = snd->sndbuf + snd->sndbufsize; + snd->sndwritepos = snd->sndbuf; + snd->sndreadpos = snd->sndbuf; + snd->sndprime = + BINK_SOUND_PRIME_BYTES(freq, tracktype, snd->SoundDroppedOut); + if (snd->sndbufsize < snd->sndprime) { + snd->sndprime = snd->sndbufsize; + } + snd->sndcomp = (UINTa)BinkAudioDecompressOpen( + freq, chans, BINKTRACKDECOMPFLAGS(tracktype)); + snd->sndendframe = + out->Frames - + BINK_SOUND_END_PREROLL_FRAMES(out->fileframerate, out->fileframeratediv); + snd->sndamt = 0; + } + } + } + } + + if (snd->Latency == 0) { + snd->Latency = BINK_FIXED_1; + } + if (snd->sndbuf == 0) { + --out->playingtracks; + memcpy(&out->trackindexes[playing], &out->trackindexes[playing + 1], + (out->playingtracks - playing) * sizeof(out->trackindexes[0])); + } else { + ++playing; + } + } + } + if (out->playingtracks != 0) { + out->soundon = BINK_SOUND_ON; + if (cb_bink_sound == 0) { + cb_bink_sound = RADCB_allocate_handler(BINK_SOUND_CALLBACK_PRIORITY); + RADCB_resume_handler(cb_bink_sound); + } + RADCB_register_callback(cb_bink_sound, BINK_SOUND_CALLBACK(out), + bink_get_priority_sound, bink_sound_callback); + } + out->bio.resume_callback = bink_resume_io; + out->bio.suspend_callback = bink_suspend_io; + out->bio.try_suspend_callback = bink_try_suspend_io; + out->bio.idle_on_callback = bink_idle_on_io; + if (out->preloadptr == 0 && (flags & BINKNOFILLIOBUF) == 0) { + while (out->bio.Idle(&out->bio) != 0) { + } + } + return out; + } + } + +open_failed: + BinkSetError(BINK_ERROR_OUT_OF_MEMORY); +close_and_fail: + if ((flags & BINKFROMMEMORY) == 0) { + bnk.bio.Close(&bnk.bio); + } + return 0; } -s32 BinkCopyToBuffer(HBINK bnk, void* dest, s32 destpitch, u32 destheight, u32 destx, u32 desty, +s32 BinkCopyToBuffer(HBINK bnk, void PTR4* dest, s32 destpitch, u32 destheight, u32 destx, u32 desty, u32 flags) { - BinkCopyToBufferRect(bnk, dest, destpitch, destheight, destx, desty, 0, 0, bnk->Width, - bnk->Height, flags); + return BinkCopyToBufferRect(bnk, dest, destpitch, destheight, destx, desty, 0, 0, bnk->Width, + bnk->Height, flags); } -s32 BinkCopyToBufferRect(HBINK bnk, void* dest, s32 destpitch, u32 destheight, u32 destx, u32 desty, +s32 BinkCopyToBufferRect(HBINK bnk, void PTR4* dest, s32 destpitch, u32 destheight, u32 destx, u32 desty, u32 srcx, u32 srcy, u32 srcw, u32 srch, u32 flags) { + u32 blitflags; + s32 skipped; + + skipped = 0; + if (bnk == 0 || dest == 0) { + return 0; + } + + srcx = mult64anddiv(srcx, bnk->decompwidth, bnk->Width); + srcw = mult64anddiv(srcw, bnk->decompwidth, bnk->Width); + srcy = mult64anddiv(srcy, bnk->decompheight, bnk->Height); + srch = mult64anddiv(srch, bnk->decompheight, bnk->Height); + + if (srcx >= bnk->decompwidth) { + srcx = bnk->decompwidth; + } + if (srcy >= bnk->decompheight) { + srcy = bnk->decompheight; + } + if (srcx + srcw > bnk->decompwidth) { + srcw = bnk->decompwidth - srcx; + } + if (srcy + srch > bnk->decompheight) { + srch = bnk->decompheight - srcy; + } + + if (srcw == 0 || srch == 0 || bnk->skipped_this_frame != 0) { + return bnk->skipped_this_frame; + } + + bnk->bio.Working = 1; + blitflags = flags | (bnk->OpenFlags & (BINKCOPYNOSCALING | BINKGRAYSCALE | BINKRBINVERT)); + YUV_init(blitflags & BINKSURFACEMASK); + + if (destpitch < 0) { + destheight -= desty; + desty = 0; + dest = (u8 PTR4*)dest + -destpitch * (destheight - 1); + } + + if (bnk->playingtracks != 0 && bnk->FrameRate != 0 && bnk->Paused == 0) { + u32 now; + u32 wait; + u32 adjusted; + u32 elapsed; + + inittimer(bnk); + now = RADTimerRead(); + wait = mult64anddiv((bnk->playedframes - bnk->startsyncframe) * BINK_MILLISECONDS_PER_SECOND, + bnk->FrameRateDiv, bnk->FrameRate); + adjusted = mult64andshift(wait, bnk->bsnd->Latency, BINK_FIXED_SHIFT); + elapsed = now - (bnk->startsynctime + bnk->big_sound_skip_adj); + if (elapsed >= adjusted) { + if (elapsed - adjusted > bnk->twoframestime) { + u32 count; + + count = bnk->skipped_in_a_row; + bnk->skipped_in_a_row = count + 1; + if (count + 1 >= BINK_MAX_CONSECUTIVE_SKIPS) { + bnk->skipped_in_a_row = 0; + } else { + if ((flags & BINKNOSKIP) == 0 && (bnk->OpenFlags & BINKNOSKIP) == 0) { + bnk->bio.Working = 0; + bnk->skippedlastblit = 1; + bnk->skippedblits++; + return 1; + } + skipped = 1; + } + goto do_blit; + } + bnk->skipped_in_a_row = 0; + } + bnk->skippedlastblit = 0; + } + +do_blit: + if (bnk->preloadptr == 0 && bnk->bio.DoingARead == 0 && + BINK_IO_BUFFER_USED_PERCENT(bnk) < BINK_IO_BUFFER_LOW_PERCENT) { + if (RADCB_try_to_suspend_callback(cb_bink_IO, BINK_IO_CALLBACK(&bnk->bio)) != 0) { + bnk->bio.Idle(&bnk->bio); + RADCB_resume_callback(cb_bink_IO, BINK_IO_CALLBACK(&bnk->bio)); + } else { + if ((bnk->OpenFlags & BINKNOTHREADEDIO) == 0) { + RADCB_idle_on_callbacks(); + } + } + } + + bnk->lastblitflags = blitflags; + if ((s32)blitflags >= 0 && bnk->MaskLength <= strlen((char PTR4*)bnk->MaskPlane)) { + flags = blitflags & BINKSURFACEMASK; + if (flags == BINKSURFACE4444) { + if (bnk->APlane[0] != 0) { + YUV_blit_16a4bpp(dest, destx, desty, destpitch, bnk->YPlane[bnk->PlaneNum], srcx, + srcy, srcw, srch, bnk->YWidth, bnk->YHeight, + bnk->APlane[bnk->PlaneNum], blitflags); + goto blit_done; + } + } else if (flags < BINKSURFACE5551) { + if (flags != BINKSURFACE32) { + if (flags != BINKSURFACE32A) { + goto blit_done; + } + if (bnk->APlane[0] != 0) { + YUV_blit_32abpp(dest, destx, desty, destpitch, bnk->YPlane[bnk->PlaneNum], srcx, + srcy, srcw, srch, bnk->YWidth, bnk->YHeight, + bnk->APlane[bnk->PlaneNum], blitflags); + goto blit_done; + } + } + YUV_blit_32bpp(dest, destx, desty, destpitch, bnk->YPlane[bnk->PlaneNum], srcx, srcy, + srcw, srch, bnk->YWidth, bnk->YHeight, blitflags); + goto blit_done; + } else if (flags >= BINKSURFACE555) { + if (flags > BINKSURFACE664) { + if (flags != BINKSURFACEYUY2) { + goto blit_done; + } + YUV_blit_YUY2(dest, destx, desty, destpitch, bnk->YPlane[bnk->PlaneNum], srcx, srcy, + srcw, srch, bnk->YWidth, bnk->YHeight, blitflags); + goto blit_done; + } + } + YUV_blit_16bpp(dest, destx, desty, destpitch, bnk->YPlane[bnk->PlaneNum], srcx, srcy, srcw, + srch, bnk->YWidth, bnk->YHeight, blitflags); + } else { + flags = blitflags & BINKSURFACEMASK; + if (flags == BINKSURFACE4444) { + if (bnk->APlane[0] != 0) { + YUV_blit_16a4bpp_mask(dest, destx, desty, destpitch, bnk->MaskPlane, bnk->MaskPitch, + bnk->YPlane[bnk->PlaneNum], srcx, srcy, srcw, srch, bnk->YWidth, bnk->YHeight, + bnk->APlane[bnk->PlaneNum], blitflags); + goto blit_done; + } + } else if (flags < BINKSURFACE5551) { + if (flags != BINKSURFACE32) { + if (flags != BINKSURFACE32A) { + goto blit_done; + } + if (bnk->APlane[0] != 0) { + YUV_blit_32abpp_mask(dest, destx, desty, destpitch, bnk->MaskPlane, bnk->MaskPitch, + bnk->YPlane[bnk->PlaneNum], srcx, srcy, srcw, srch, bnk->YWidth, bnk->YHeight, + bnk->APlane[bnk->PlaneNum], blitflags); + goto blit_done; + } + } + YUV_blit_32bpp_mask(dest, destx, desty, destpitch, bnk->MaskPlane, bnk->MaskPitch, + bnk->YPlane[bnk->PlaneNum], srcx, srcy, srcw, srch, bnk->YWidth, bnk->YHeight, blitflags); + goto blit_done; + } else if (flags >= BINKSURFACE555) { + if (flags > BINKSURFACE664) { + if (flags != BINKSURFACEYUY2) { + goto blit_done; + } + YUV_blit_YUY2_mask(dest, destx, desty, destpitch, bnk->MaskPlane, bnk->MaskPitch, + bnk->YPlane[bnk->PlaneNum], srcx, srcy, srcw, srch, bnk->YWidth, bnk->YHeight, blitflags); + goto blit_done; + } + } + YUV_blit_16bpp_mask(dest, destx, desty, destpitch, bnk->MaskPlane, bnk->MaskPitch, + bnk->YPlane[bnk->PlaneNum], srcx, srcy, srcw, srch, bnk->YWidth, bnk->YHeight, blitflags); + } + +blit_done: + bnk->bio.Working = 0; + return skipped; } s32 BinkDoFrame(HBINK bnk) { + s32 suspended; + + suspended = 0; + if (bnk == 0 || bnk->lastdecompframe == bnk->FrameNum) { + return 0; + } + + bnk->skipped_this_frame = 0; + bnk->bio.Working = 1; + bnk->startframetime = RADTimerRead(); + + memmove(bnk->rtframetimes + 1, bnk->rtframetimes, bnk->runtimemoveamt); + memmove(bnk->rtvdecomptimes + 1, bnk->rtvdecomptimes, bnk->runtimemoveamt); + memmove(bnk->rtadecomptimes + 1, bnk->rtadecomptimes, bnk->runtimemoveamt); + memmove(bnk->rtblittimes + 1, bnk->rtblittimes, bnk->runtimemoveamt); + memmove(bnk->rtreadtimes + 1, bnk->rtreadtimes, bnk->runtimemoveamt); + memmove(bnk->rtidlereadtimes + 1, bnk->rtidlereadtimes, bnk->runtimemoveamt); + memmove(bnk->rtthreadreadtimes + 1, bnk->rtthreadreadtimes, bnk->runtimemoveamt); + + bnk->rtframetimes[0] = bnk->startframetime; + bnk->rtvdecomptimes[0] = bnk->timevdecomp; + bnk->rtadecomptimes[0] = bnk->timeadecomp; + bnk->rtblittimes[0] = bnk->timeblit; + bnk->rtreadtimes[0] = bnk->bio.ForegroundTime; + bnk->rtidlereadtimes[0] = bnk->bio.IdleTime; + bnk->rtthreadreadtimes[0] = bnk->bio.ThreadTime; + + if (bnk->firstframetime == 0) { + u32 frame_time; + + frame_time = mult64anddiv(BINK_MILLISECONDS_PER_SECOND, bnk->fileframeratediv, + bnk->fileframerate); + bnk->rtframetimes[1] = bnk->startframetime - frame_time; + bnk->bio.ThreadTime = 0; + bnk->bio.IdleTime = 0; + bnk->firstframetime = bnk->startframetime; + } else if (bnk->playingtracks != 0) { + checksound(bnk); + } + + bnk->entireframetime = bnk->startframetime; + + { + BINKTRACKFRAME PTR4* frame_data; + + frame_data = (BINKTRACKFRAME PTR4*)bnk->compframe; + if (bnk->bio.ReadError != 0) { + bnk->ReadError = 1; + } + + if (bnk->ReadError != 0) { + return 1; + } + + { + s32 track; + + track = 0; + if ((s32)bnk->NumTracks > 0) { + do { + s32 playing_index; + BINKTRACKFRAME PTR4* next_frame_data; + + playing_index = 0; + if ((s32)bnk->playingtracks > 0) { + s32 PTR4* indexes; + + indexes = bnk->trackindexes; + do { + if (*indexes++ == track) { + break; + } + ++playing_index; + } while (playing_index < (s32)bnk->playingtracks); + } + if (playing_index >= (s32)bnk->playingtracks) { + playing_index = -1; + } + + next_frame_data = BINK_NEXT_TRACK_FRAME(frame_data); + if (playing_index != -1 && frame_data->size != 0) { + void PTR4* in; + u32 in_bytes; + + in = frame_data->data; + in_bytes = frame_data->decoded_size; + if (suspended == 0) { + suspended = 1; + RADCB_suspend_callback(cb_bink_sound, BINK_SOUND_CALLBACK(bnk)); + } + + if (in_bytes != 0) { + do { + BINKSND PTR4* snd; + void PTR4* out; + u32 out_bytes; + u32 free_bytes; + + snd = &bnk->bsnd[playing_index]; + free_bytes = snd->sndbufsize - snd->sndamt; + BinkAudioDecompress((HBINKAUDIODECOMP)snd->sndcomp, &out, + &out_bytes, in, &in); + if (in_bytes < out_bytes) { + out_bytes = in_bytes; + } + in_bytes -= out_bytes; + + if (free_bytes < out_bytes) { + u32 over; + + over = out_bytes - free_bytes; + snd->sndreadpos += over; + snd->sndamt -= over; + if (snd->sndend < snd->sndreadpos) { + snd->sndreadpos -= snd->sndbufsize; + } + } + + snd->sndamt += out_bytes; + { + u8 PTR4* write; + u32 tail; + + write = snd->sndwritepos; + tail = (u32)snd->sndend - (u32)write; + if (tail < out_bytes) { + if (tail != 0) { + memcpy(write, out, tail); + out_bytes -= tail; + out = (u8 PTR4*)out + tail; + } + memcpy(snd->sndbuf, out, out_bytes); + write = snd->sndbuf; + } else { + memcpy(write, out, out_bytes); + } + snd->sndwritepos = write + out_bytes; + } + } while (in_bytes != 0); + } + } + + ++track; + frame_data = next_frame_data; + } while (track < (s32)bnk->NumTracks); + } + } + + if (suspended != 0) { + RADCB_resume_callback(cb_bink_sound, BINK_SOUND_CALLBACK(bnk)); + } + + { + u32 video_start; + + video_start = RADTimerRead(); + if (bnk->videoon != 0) { + if (bnk->skippedlastblit == 0) { + memset(bnk->MaskPlane, 0, + (bnk->YWidth >> BINK_MASK_BLOCK_SHIFT) * + (bnk->YHeight >> BINK_MASK_BLOCK_SHIFT)); + } + + if (bnk->preloadptr == 0 && bnk->bio.DoingARead == 0 && + BINK_IO_BUFFER_USED_PERCENT(bnk) < BINK_IO_BUFFER_LOW_PERCENT) { + if (RADCB_try_to_suspend_callback(cb_bink_IO, + BINK_IO_CALLBACK(&bnk->bio)) == 0) { + if ((bnk->OpenFlags & BINKNOTHREADEDIO) == 0) { + RADCB_idle_on_callbacks(); + } + } else { + bnk->bio.Idle(&bnk->bio); + RADCB_resume_callback(cb_bink_IO, BINK_IO_CALLBACK(&bnk->bio)); + } + } + + { + u32 cur; + u32 next; + + cur = bnk->PlaneNum; + next = cur ^ 1; + ExpandBink(bnk->YPlane[next], bnk->YPlane[cur], + bnk->APlane[next], bnk->APlane[cur], + bnk->MaskPlane, bnk->decompwidth, + bnk->decompheight, bnk->YWidth, + bnk->YHeight, (u32 PTR4*)frame_data, + BINK_FRAME_KEY(bnk->frameoffsets[bnk->FrameNum - 1]), + &bnk->bunp, bnk->OpenFlags, + bnk->BinkType); + bnk->PlaneNum ^= 1; + } + } + + if (bnk->playingtracks != 0) { + checksound(bnk); + } + + bnk->NumRects = BINK_RECTS_UNCALCULATED; + bnk->playedframes += 1; + inittimer(bnk); + { + u32 end; + + end = RADTimerRead(); + bnk->bio.Working = 0; + bnk->timeadecomp += video_start - bnk->startframetime; + bnk->timevdecomp += end - video_start; + bnk->LastFrameNum = bnk->FrameNum; + bnk->startblittime = end; + bnk->lastdecompframe = bnk->FrameNum; + } + } + } + + return 0; } -void timeframe(HBINK bnk, u32 tmp) +static void timeframe(HBINK bnk, u32 now) { - u32 tmpReg; + u32 start; - tmpReg = (bnk->startblittime); - if (tmpReg == 0) + start = (bnk->startblittime); + if (start == 0) { return; } bnk->startblittime = 0; - bnk->timeblit = (bnk->timeblit) + (tmp - tmpReg); + bnk->timeblit = (bnk->timeblit) + (now - start); } void BinkNextFrame(HBINK bnk) { + u32 skipped; + + skipped = 0; + if (bnk != 0) { + u32 i; + + bnk->bio.Working = 1; + i = 0; + bnk->bio.Working = skipped; + if (i < bnk->playingtracks) { + u32 zero; + + zero = 0; + do { + BINKSND PTR4* snd; + + snd = bnk->bsnd + i; + if (snd->SoundDroppedOut != 0) { + snd->SoundDroppedOut = zero; + if (bnk->FrameNum > BINK_FIRST_FRAME && + (s32)bnk->FrameNum <= (s32)bnk->bsnd[i].sndendframe) { + skipped = 1; + } + } + ++i; + } while (i < bnk->playingtracks); + } + + if (skipped != 0) { + RADCB_CALLBACK PTR4* sound_callback; + RADCB_CALLBACK PTR4* io_callback; + + bnk->soundskips++; + sound_callback = BINK_SOUND_CALLBACK(bnk); + io_callback = BINK_IO_CALLBACK(&bnk->bio); + RADCB_suspend_2_callbacks(cb_bink_sound, sound_callback, cb_bink_IO, io_callback); + BinkSetSoundOnOff(bnk, BINK_SOUND_OFF); + + while (BINK_IO_BUFFER_USED_PERCENT(bnk) <= BINK_IO_BUFFER_RESUME_PERCENT) { + if (bnk->bio.Idle(&bnk->bio) == 0) { + break; + } + } + + bnk->startsynctime = 0; + bnk->big_sound_skip_adj = 0; + BinkSetSoundOnOff(bnk, BINK_SOUND_ON); + RADCB_resume_callback(cb_bink_sound, sound_callback); + RADCB_resume_callback(cb_bink_IO, io_callback); + } + + bnk->bio.Working = 1; + if (bnk->playingtracks != 0) { + checksound(bnk); + } + + { + u32 now; + u32 next; + + now = RADTimerRead(); + timeframe(bnk, now); + next = BINK_FIRST_FRAME; + if (bnk->FrameNum < bnk->Frames) { + next = bnk->FrameNum + 1; + } + GotoFrame(bnk, next); + bnk->bio.Working = 0; + } + } } u32 BinkGetKeyFrame(HBINK bnk, u32 frame, s32 flags) { + u32 cur; + s32 prev; + + if (bnk == 0) { + return 0; + } + + if ((flags & BINKGETKEYNOTEQUAL) == 0) { + if (BINK_FRAME_KEY(bnk->frameoffsets[frame - 1]) != 0) { + return frame; + } + } + + flags = BINKGETKEY_DIRECTION(flags); + switch (flags) { + case BINKGETKEYPREVIOUS: + prev = frame - 2; + if (prev > 0) { + const u32 PTR4* ptr; + + ptr = bnk->frameoffsets; + if (BINK_FRAME_KEY(ptr[prev]) == 0) { + ptr += prev; + do { + --prev; + if (prev <= 0) { + break; + } + --ptr; + } while (BINK_FRAME_KEY(*ptr) == 0); + } + } +found_previous: + return prev + 1; + case BINKGETKEYNEXT: + cur = frame; + if (cur >= bnk->Frames) { + break; + } else { + const u32 PTR4* ptr; + u32 limit; + + limit = bnk->Frames; + ptr = bnk->frameoffsets + cur; + do { + u32 value; + + value = *ptr++; + ++cur; + if (BINK_FRAME_KEY(value) != 0) { + return cur; + } + } while (cur < limit); + } + break; + case BINKGETKEYCLOSEST: { + prev = frame - 2; + cur = frame; + do { + if (prev >= 0) { + if (BINK_FRAME_KEY(bnk->frameoffsets[prev]) != 0) { + goto found_previous; + } + + if (cur < bnk->Frames) { + u32 value = bnk->frameoffsets[cur]; + + ++cur; + if (BINK_FRAME_KEY(value) != 0) { + return cur; + } + } + } else { + if (cur >= bnk->Frames) { + return 0; + } + + { + u32 value = bnk->frameoffsets[cur]; + + ++cur; + if (BINK_FRAME_KEY(value) != 0) { + return cur; + } + } + } + + --prev; + } while (1); + } + } + + return 0; } void BinkGoto(HBINK bnk, u32 frame, s32 flags) { + u32 start; + + if (bnk == 0) { + return; + } + + if (frame == 0) { + frame = BINK_FIRST_FRAME; + } + bnk->bio.Working = 1; + if (frame > bnk->Frames) { + frame = bnk->Frames; + } + + if ((flags & BINKGOTOQUICKSOUND) != 0 || bnk->playingtracks == 0) { + start = frame; + } else { + u32 frames; + + frames = bnk->FrameRate - 1; + start = (frames + bnk->FrameRateDiv) / bnk->FrameRateDiv; + if (start >= frame) { + start = BINK_FIRST_FRAME; + } else { + start = frame - start; + } + } + + if (bnk->FrameNum != frame) { + if ((flags & (BINKGOTOQUICK | BINKGOTOQUICKSOUND)) != + (BINKGOTOQUICK | BINKGOTOQUICKSOUND)) { + u32 keyframe; + u32 video_on; + + if ((flags & BINKGOTOQUICK) == 0) { + keyframe = BinkGetKeyFrame(bnk, frame, 0); + } else { + keyframe = frame; + } + + if (keyframe > start) { + video_on = bnk->videoon; + BinkSetVideoOnOff(bnk, BINK_VIDEO_OFF); + } else { + start = keyframe; + video_on = 0; + } + + if (frame < bnk->FrameNum || start > bnk->FrameNum) { + GotoFrame(bnk, start); + if (start == frame) { + bnk->bio.Working = 0; + return; + } + } + + { + u32 sound_on; + + RADCB_suspend_callback(cb_bink_sound, BINK_SOUND_CALLBACK(bnk)); + sound_on = bnk->soundon; + if (sound_on != 0) { + BinkSetSoundOnOff(bnk, BINK_SOUND_OFF); + } + + if (bnk->FrameNum != bnk->lastdecompframe) { + BinkDoFrame(bnk); + bnk->skippedlastblit = 1; + } + + BinkNextFrame(bnk); + if (bnk->FrameNum != frame) { + u32 one; + + one = 1; + do { + if (video_on != 0 && bnk->FrameNum == keyframe) { + BinkSetVideoOnOff(bnk, BINK_VIDEO_ON); + video_on = 0; + } + + BinkDoFrame(bnk); + bnk->skippedlastblit = one; + BinkNextFrame(bnk); + } while (bnk->FrameNum != frame); + } + + bnk->startsynctime = 0; + if (video_on != 0) { + BinkSetVideoOnOff(bnk, BINK_VIDEO_ON); + } + RADCB_resume_callback(cb_bink_sound, BINK_SOUND_CALLBACK(bnk)); + if (sound_on != 0) { + BinkSetSoundOnOff(bnk, BINK_SOUND_ON); + } + } + } else { + GotoFrame(bnk, frame); + } + } + + bnk->bio.Working = 0; } void BinkClose(HBINK bnk) { + if (bnk != 0) { + RADCB_HANDLER PTR4* io_handler; + RADCB_HANDLER PTR4* sound_handler; + s32 result; + u32 i; + + BinkPause(bnk, 1); + io_handler = 0; + if ((bnk->OpenFlags & BINKNOTHREADEDIO) == 0 && bnk->preloadptr == 0) { + io_handler = cb_bink_IO; + } + + sound_handler = 0; + if (bnk->playingtracks != 0) { + sound_handler = cb_bink_sound; + } + + result = RADCB_unregister_2_callbacks(io_handler, BINK_IO_CALLBACK(&bnk->bio), + sound_handler, BINK_SOUND_CALLBACK(bnk), 1); + switch (result) { + case 1: + cb_bink_IO = 0; + break; + case 2: + cb_bink_sound = 0; + break; + case 3: + cb_bink_IO = 0; + cb_bink_sound = 0; + break; + } + + i = 0; + if (i < bnk->playingtracks) { + do { + bnk->bsnd[i].Close(&bnk->bsnd[i]); + BinkAudioDecompressClose((HBINKAUDIODECOMP)bnk->bsnd[i].sndcomp); + if (bnk->bsnd[i].sndbuf != 0) { + radfree(bnk->bsnd[i].sndbuf); + } + ++i; + } while (i < bnk->playingtracks); + } + + if (bnk->preloadptr != 0) { + if ((bnk->OpenFlags & BINKFROMMEMORY) == 0) { + radfree(bnk->preloadptr); + } + } else { + bnk->bio.Close(&bnk->bio); + radfree(bnk->ioptr); + } + + if (bnk->YPlane[0] != 0) { + radfree(bnk->YPlane[0]); + } + + memset(bnk, 0, sizeof(*bnk)); + radfree(bnk); + } } -// TODO: -// Double check that the second are is supposed to be a u32 -void endframe(HBINK bnk, u32 tmp) +static void endframe(HBINK bnk, u32 now) { + u32 start = bnk->entireframetime; + + if (start != 0) { + now -= start; + bnk->entireframetime = 0; + + if (now > bnk->slowestframetime) { + bnk->slowest2frametime = bnk->slowestframetime; + bnk->slowest2frame = bnk->slowestframe; + bnk->slowestframetime = now; + bnk->slowestframe = bnk->FrameNum; + } else if (now > bnk->slowest2frametime) { + bnk->slowest2frametime = now; + bnk->slowest2frame = bnk->FrameNum; + } + } } s32 BinkWait(HBINK bnk) { + u32 sync_time; + u32 now; + + if (bnk == 0) { + return 0; + } + + if (bnk->playedframes == 0 && bnk->Paused == 0) { + return 0; + } + + if (bnk->ReadError != 0) { + return 0; + } + + sync_time = bnk->startsynctime; + if (sync_time == 0) { + inittimer(bnk); + sync_time = bnk->startsynctime; + } + + if (bnk->playingtracks != 0) { + checksound(bnk); + } + + now = RADTimerRead(); + timeframe(bnk, now); + endframe(bnk, now); + + if (bnk->Paused == 0) { + if (bnk->playingtracks == 0 || bnk->soundon != 0) { + u32 frame_rate; + u32 wait; + + frame_rate = bnk->FrameRate; + if (frame_rate == 0) { + return 0; + } + + wait = mult64anddiv((bnk->playedframes - bnk->startsyncframe) * BINK_MILLISECONDS_PER_SECOND, + bnk->FrameRateDiv, frame_rate); + wait = mult64andshift(wait, bnk->bsnd->Latency, BINK_FIXED_SHIFT); + + { + u32 elapsed; + + elapsed = now - (sync_time + bnk->big_sound_skip_adj); + if ((s32)elapsed < (s32)wait) { + goto wait_on_io; + } + + elapsed -= wait; + if ((s32)elapsed > (s32)bnk->twoframestime) { + if (bnk->playingtracks == 0) { + bnk->startsynctime = now; + bnk->startsyncframe = bnk->playedframes - 1; + } else { + bnk->big_sound_skip_adj = elapsed; + bnk->big_sound_skip_reduce = + mult64anddiv(elapsed, bnk->FrameRateDiv, bnk->FrameRate); + } + } + + if (bnk->big_sound_skip_adj < bnk->big_sound_skip_reduce) { + bnk->big_sound_skip_adj = 0; + } else { + bnk->big_sound_skip_adj -= bnk->big_sound_skip_reduce; + } + } + + return 0; + } + } + +wait_on_io: + if (bnk->preloadptr == 0) { + RADCB_CALLBACK PTR4* io_callback; + + io_callback = BINK_IO_CALLBACK(&bnk->bio); + if (RADCB_try_to_suspend_callback(cb_bink_IO, io_callback) != 0) { + bnk->bio.Idle(&bnk->bio); + RADCB_resume_callback(cb_bink_IO, io_callback); + } + } + + return 1; } s32 BinkPause(HBINK bnk, s32 pause) { + u32 now; + u32 i; + + if (bnk == 0) { + return 0; + } + + now = RADTimerRead(); + timeframe(bnk, now); + endframe(bnk, now); + + if (pause == 0 && bnk->Paused != 0) { + bnk->startsynctime = pause; + bnk->big_sound_skip_adj = pause; + } + + i = 0; + bnk->Paused = pause; + if (i < bnk->playingtracks) { + do { + BINKSND PTR4* snd; + + snd = &bnk->bsnd[i]; + snd->Pause(snd, pause); + ++i; + } while (i < bnk->playingtracks); + } + + if (bnk->playedframes != 0 && bnk->playingtracks != 0) { + checksound(bnk); + } + + return bnk->Paused; } -void BinkGetSummary(HBINK bnk, BINKSUMMARY* sum) +void BinkGetSummary(HBINK bnk, BINKSUMMARY PTR4* sum) { + u32 now; + u32 data_size; + u32 bytes_read; + u32 read_time; + + if (bnk == 0 || sum == 0) { + return; + } + + now = RADTimerRead(); + timeframe(bnk, now); + endframe(bnk, now); + memset(sum, 0, sizeof(*sum)); + + sum->FrameRate = bnk->FrameRate; + sum->FrameRateDiv = bnk->FrameRateDiv; + sum->SkippedBlits = bnk->skippedblits; + sum->SoundSkips = bnk->soundskips; + sum->FileFrameRate = bnk->fileframerate; + sum->FileFrameRateDiv = bnk->fileframeratediv; + sum->TotalFrames = bnk->Frames; + sum->TotalPlayedFrames = bnk->playedframes; + sum->TotalTime = RADTimerRead() - bnk->firstframetime; + sum->TotalOpenTime = bnk->timeopen; + sum->TotalAudioDecompTime = bnk->timeadecomp; + sum->TotalVideoDecompTime = bnk->timevdecomp; + sum->TotalBlitTime = bnk->timeblit; + sum->HighestMemAmount += bnk->totalmem; + sum->TotalIOMemory = bnk->iosize; + bytes_read = bnk->bio.BytesRead; + read_time = bnk->bio.TotalTime; + sum->TotalReadSpeed = mult64anddiv(bytes_read, BINK_MILLISECONDS_PER_SECOND, read_time + 1); + sum->TotalReadTime = bnk->bio.ForegroundTime; + sum->TotalIdleReadTime = bnk->bio.IdleTime; + sum->TotalBackReadTime = bnk->bio.ThreadTime; + + data_size = bnk->Size - BINK_FRAME_OFFSET(*bnk->frameoffsets); + sum->AverageDataRate = mult64anddiv(data_size, bnk->fileframerate, bnk->fileframeratediv * bnk->Frames); + sum->AverageFrameSize = data_size / bnk->Frames; + sum->Highest1SecRate = bnk->Highest1SecRate; + sum->Highest1SecFrame = bnk->Highest1SecFrame + 1; + sum->Width = bnk->Width; + sum->Height = bnk->Height; + sum->SlowestFrameTime = bnk->slowestframetime; + sum->Slowest2FrameTime = bnk->slowest2frametime; + sum->SlowestFrameNum = bnk->slowestframe; + sum->Slowest2FrameNum = bnk->slowest2frame; + sum->TotalIOMemory = bnk->bio.BufSize; + sum->HighestIOUsed = bnk->bio.BufHighUsed; } void BinkGetRealtime(HBINK bink, BINKREALTIME PTR4* run, u32 frames) { + u32 now; + u32 diff; + + now = RADTimerRead(); + timeframe(bink, now); + endframe(bink, now); + + if (frames == 0 || frames >= bink->runtimeframes) { + frames = bink->runtimeframes - 1; + } + + if (frames > bink->FrameNum) { + frames = bink->FrameNum - 1; + if (frames == 0) { + frames = 1; + } + } + + run->FrameNum = bink->LastFrameNum; + run->FrameRate = bink->FrameRate; + run->FrameRateDiv = bink->FrameRate; + run->ReadBufferSize = bink->bio.CurBufSize; + run->ReadBufferUsed = bink->bio.CurBufUsed; + run->FramesDataRate = mult64anddiv(bink->frameoffsets[bink->FrameNum] - bink->frameoffsets[bink->FrameNum - frames], + bink->fileframerate, + frames * bink->fileframeratediv); + + run->Frames = frames; + + diff = bink->rtframetimes[0] - bink->rtframetimes[frames]; + run->FramesTime = diff; + if (diff == 0) { + run->FramesTime = 1; + } + + run->FramesVideoDecompTime = bink->rtvdecomptimes[0] - bink->rtvdecomptimes[frames]; + run->FramesAudioDecompTime = bink->rtadecomptimes[0] - bink->rtadecomptimes[frames]; + run->FramesBlitTime = bink->rtblittimes[0] - bink->rtblittimes[frames]; + run->FramesReadTime = bink->rtreadtimes[0] - bink->rtreadtimes[frames]; + run->FramesIdleReadTime = bink->rtidlereadtimes[0] - bink->rtidlereadtimes[frames]; + run->FramesThreadReadTime = bink->rtthreadreadtimes[0] - bink->rtthreadreadtimes[frames]; } -// need to add in the proper args. should be like 4 of em -// void smallestrect() -// { -// } +static s32 smallestrect(BINKRECT PTR4* out, const u8 PTR4* mask, s32 pitch, const BINKRECT PTR4* rect) +{ + s32 width_blocks; + s32 height_blocks; + const u8 PTR4* rect_mask; + const u8 PTR4* top_scan; + s32 top_blocks; + s32 left_blocks; + s32 row; + s32 col; + s32 value; -// need to add in the proper args. should be like 5 of em -// void trysplit() -// { -// } + rect_mask = mask + BINK_MASK_BLOCKS(rect->Left) + BINK_MASK_BLOCKS(rect->Top) * pitch; + height_blocks = BINK_MASK_BLOCKS(rect->Height); + width_blocks = BINK_MASK_BLOCKS(rect->Width); + + top_scan = rect_mask; + + for (top_blocks = 0; top_blocks < height_blocks; ++top_blocks) { + for (col = 0; col < width_blocks; ++col) { + if (top_scan[col] != 0) { + goto found_top; + } + } + + top_scan += pitch; + } + + return 0; + +found_top: + rect_mask += top_blocks * pitch; + height_blocks -= top_blocks; + out->Top = rect->Top + top_blocks * BINK_MASK_BLOCK_SIZE; + value = rect->Height - top_blocks * BINK_MASK_BLOCK_SIZE; + out->Height = value; + + if (value > 1) { + s32 bottom_blocks = 0; + const u8 PTR4* scan = rect_mask + (height_blocks - 1) * pitch; + + do { + for (col = 0; col < width_blocks; ++col) { + if (scan[col] != 0) { + goto found_bottom; + } + } + + ++bottom_blocks; + scan -= pitch; + } while (bottom_blocks < height_blocks); + +found_bottom: + height_blocks -= bottom_blocks; + out->Height = value - bottom_blocks * BINK_MASK_BLOCK_SIZE; + } + + for (left_blocks = 0; left_blocks < width_blocks; ++left_blocks) { + const u8 PTR4* scan = rect_mask + left_blocks; + + for (row = 0; row < height_blocks; ++row) { + if (*scan != 0) { + goto found_left; + } + + scan += pitch; + } + } + +found_left: + width_blocks -= left_blocks; + rect_mask += left_blocks; + out->Left = rect->Left + left_blocks * BINK_MASK_BLOCK_SIZE; + value = rect->Width - left_blocks * BINK_MASK_BLOCK_SIZE; + out->Width = value; + + if (value > 1) { + s32 first_right = width_blocks - 1; + s32 right_blocks; + + for (right_blocks = first_right; right_blocks >= 0; --right_blocks) { + const u8 PTR4* scan = rect_mask + right_blocks; + + for (row = 0; row < height_blocks; ++row) { + if (*scan != 0) { + goto found_right; + } + + scan += pitch; + } + } + +found_right: + out->Width = value - (first_right - right_blocks) * BINK_MASK_BLOCK_SIZE; + } + + return 1; +} + +static s32 trysplit(BINKRECT PTR4* outa, BINKRECT PTR4* outb, const BINKRECT PTR4* rect, + const u8 PTR4* mask, s32 pitch) +{ + BINKRECT first_half; + BINKRECT second_half; + BINKRECT split_rect; + s32 best_score; + + if (rect->Width >= BINK_DIRTY_SPLIT_MIN_SIZE) { + u32 split; + + split_rect = *rect; + split = (split_rect.Width / BINK_COPY_SCALE + BINK_MASK_BLOCK_ROUND_MASK) & + ~BINK_MASK_BLOCK_ROUND_MASK; + split_rect.Width = split; + smallestrect(outa, mask, pitch, &split_rect); + + split_rect = *rect; + split_rect.Width -= split; + split_rect.Left += split; + smallestrect(outb, mask, pitch, &split_rect); + best_score = (rect->Width * rect->Height - outa->Width * outa->Height) - + outb->Width * outb->Height; + } else { + best_score = 0; + } + + if (rect->Height >= BINK_DIRTY_SPLIT_MIN_SIZE) { + s32 split_score; + u32 split; + + split_rect = *rect; + split = (split_rect.Height / BINK_COPY_SCALE + BINK_MASK_BLOCK_ROUND_MASK) & + ~BINK_MASK_BLOCK_ROUND_MASK; + split_rect.Height = split; + smallestrect(&first_half, mask, pitch, &split_rect); + + split_rect = *rect; + split_rect.Height -= split; + split_rect.Top += split; + smallestrect(&second_half, mask, pitch, &split_rect); + split_score = (rect->Width * rect->Height - first_half.Width * first_half.Height) - + second_half.Width * second_half.Height; + if (best_score < split_score) { + *outa = first_half; + *outb = second_half; + return split_score; + } + } + + return best_score; +} s32 BinkGetRects(HBINK bnk, u32 flags) { + BINKRECT rects[BINKMAXDIRTYRECTS]; + BINKRECT split_a[BINKMAXDIRTYRECTS]; + BINKRECT split_b[BINKMAXDIRTYRECTS]; + s32 scores[BINKMAXDIRTYRECTS]; + + if ((flags & BINKSURFACEDIRECT) != 0 || + (((flags & BINKNOSKIP) == 0 && ((bnk->OpenFlags & BINKNOSKIP) == 0)) && + bnk->skippedlastblit != 0)) { + return 0; + } + + if (bnk->NumRects == BINK_RECTS_UNCALCULATED) { + rects[0].Top = 0; + rects[0].Left = 0; + rects[0].Width = bnk->YWidth; + rects[0].Height = bnk->YHeight; + bnk->NumRects = smallestrect(bnk->FrameRects, bnk->MaskPlane, bnk->MaskPitch, rects); + if ((flags & BINKSURFACESLOW) == 0 && bnk->NumRects != 0) { + rects[0] = bnk->FrameRects[0]; + scores[0] = trysplit(split_a, split_b, rects, bnk->MaskPlane, bnk->MaskPitch); + while (1) { + s32 i; + s32 best_index; + s32 best_score; + + i = 0; + best_index = -1; + best_score = 0; + if (i < bnk->NumRects) { + s32 PTR4* score; + + score = scores; + do { + s32 score_value; + + score_value = *score; + if (score_value > best_score) { + best_score = score_value; + best_index = i; + } + ++score; + ++i; + } while (i < bnk->NumRects); + } + if (best_index == -1) { + break; + } + + { + u32 rect_count; + + rects[best_index] = split_a[best_index]; + rect_count = bnk->NumRects; + rects[rect_count] = split_b[best_index]; + bnk->NumRects = rect_count + 1; + } + if (bnk->NumRects == BINKMAXDIRTYRECTS) { + break; + } + + scores[best_index] = trysplit(&split_a[best_index], &split_b[best_index], + &rects[best_index], bnk->MaskPlane, bnk->MaskPitch); + best_index = bnk->NumRects - 1; + scores[best_index] = trysplit(&split_a[best_index], &split_b[best_index], + &rects[best_index], bnk->MaskPlane, bnk->MaskPitch); + } + } + + { + s32 i; + s32 count; + + i = 0; + count = bnk->NumRects; + if (i < count) { + do { + s32 best_index; + u32 best_key; + s32 j; + s32 next_i; + + best_index = 0; + best_key = BINK_RECT_SORT_KEY_SENTINEL; + next_i = i + 1; + j = 0; + if (j < bnk->NumRects) { + do { + u32 key; + + key = BINK_RECT_SORT_KEY(rects[j].Top, rects[j].Left); + if (key < best_key) { + best_key = key; + best_index = j; + } + ++j; + } while (j < bnk->NumRects); + } + + switch (bnk->lastblitflags & BINKCOPYNOSCALING) { + case BINKCOPY2XH: + case BINKCOPY2XHI: + rects[best_index].Top *= BINK_COPY_SCALE; + rects[best_index].Height *= BINK_COPY_SCALE; + break; + case BINKCOPY2XW: + rects[best_index].Left *= BINK_COPY_SCALE; + rects[best_index].Width *= BINK_COPY_SCALE; + break; + case BINKCOPY2XWH: + case BINKCOPY2XWHI: + rects[best_index].Left *= BINK_COPY_SCALE; + rects[best_index].Width *= BINK_COPY_SCALE; + rects[best_index].Top *= BINK_COPY_SCALE; + rects[best_index].Height *= BINK_COPY_SCALE; + break; + } + + if ((u32)(rects[best_index].Left + rects[best_index].Width) > bnk->Width) { + rects[best_index].Width -= + rects[best_index].Left + rects[best_index].Width - bnk->Width; + } + if ((u32)(rects[best_index].Top + rects[best_index].Height) > bnk->Height) { + rects[best_index].Height -= + rects[best_index].Top + rects[best_index].Height - bnk->Height; + } + + bnk->FrameRects[i] = rects[best_index]; + rects[best_index].Left = 0; + rects[best_index].Top = BINK_RECT_SORT_TOP_SENTINEL; + i = next_i; + count = bnk->NumRects; + } while (i < count); + } + } + } + + return bnk->NumRects; } void BinkService(HBINK bink) { - if ((bink->lastresynctime) != 0) + if ((bink->playingtracks) != 0) { checksound(bink); } } -// TODO: -// Double check that the second are is supposed to be a u32 -s32 idtoindex(HBINK bnk, u32 tmp) +static s32 idtoindex(HBINK bnk, u32 trackid) { + s32 i; + + for (i = 0; i < (s32)bnk->playingtracks; ++i) { + if (bnk->trackIDs[bnk->trackindexes[i]] == trackid) { + return i; + } + } + + return -1; } void BinkSetVolume(HBINK bnk, u32 trackid, s32 volume) { + s32 index; + BINKSND PTR4* snd; + BINKSNDVOLUME volume_callback; + + if (bnk != 0 && bnk->playingtracks != 0) { + index = idtoindex(bnk, trackid); + if (index != -1) { + snd = &bnk->bsnd[index]; + volume_callback = snd->Volume; + if (volume_callback != 0) { + volume_callback(snd, volume); + } + } + } } void BinkSetMixBins(HBINK bnk, u32 trackid, u32 PTR4* mix_bins, u32 total) { + s32 index; + BINKSND PTR4* snd; + BINKSNDMIXBINS mix_bins_callback; + + if (bnk != 0 && bnk->playingtracks != 0) { + index = idtoindex(bnk, trackid); + if (index != -1) { + snd = &bnk->bsnd[index]; + mix_bins_callback = snd->MixBins; + if (mix_bins_callback != 0) { + mix_bins_callback(snd, mix_bins, total); + } + } + } } -void BinkSetMixBinVolumes(HBINK bnk, u32 trackid, u32* vol_mix_bins, s32* volumes, u32 total) +void BinkSetMixBinVolumes(HBINK bnk, u32 trackid, u32 PTR4* vol_mix_bins, s32 PTR4* volumes, u32 total) { + s32 index; + BINKSND PTR4* snd; + BINKSNDMIXBINVOLS mix_bin_vols_callback; + + if (bnk != 0 && bnk->playingtracks != 0) { + index = idtoindex(bnk, trackid); + if (index != -1) { + snd = &bnk->bsnd[index]; + mix_bin_vols_callback = snd->MixBinVols; + if (mix_bin_vols_callback != 0) { + mix_bin_vols_callback(snd, vol_mix_bins, volumes, total); + } + } + } } void BinkSetPan(HBINK bnk, u32 trackid, s32 pan) { + s32 index; + BINKSND PTR4* snd; + BINKSNDPAN pan_callback; + + if (bnk != 0 && bnk->playingtracks != 0) { + index = idtoindex(bnk, trackid); + if (index != -1) { + snd = &bnk->bsnd[index]; + pan_callback = snd->Pan; + if (pan_callback != 0) { + pan_callback(snd, pan); + } + } + } } -void* BinkLogoAddress() +void PTR4* BinkLogoAddress(void) { return LogoData; } @@ -958,7 +2732,7 @@ u32 BinkGetTrackType(HBINK bnk, u32 trackindex) { if (bnk != 0) { - return *(u32*)(trackindex * 1 + (bnk->tracktypes)); + return bnk->tracktypes[trackindex]; } return 0; } @@ -967,7 +2741,7 @@ u32 BinkGetTrackMaxSize(HBINK bnk, u32 trackindex) { if (bnk != 0) { - return *(u32*)(trackindex * 1 + (bnk->tracksizes)); + return bnk->tracksizes[trackindex]; } return 0; } @@ -976,13 +2750,60 @@ u32 BinkGetTrackID(HBINK bnk, u32 trackindex) { if (bnk != 0) { - return *(u32*)(trackindex * 1 + (bnk->trackIDs)); + return bnk->trackIDs[trackindex]; } return 0; } HBINKTRACK BinkOpenTrack(HBINK bnk, u32 trackindex) { + u32 tracktype; + u32 open_tracktype; + HBINKAUDIODECOMP sndcomp; + HBINKTRACK track; + + if (bnk == 0) { + return 0; + } + if (trackindex >= bnk->NumTracks) { + return 0; + } + + tracktype = bnk->tracktypes[trackindex]; + if (!BINKTRACKISOPENABLE(tracktype)) { + return 0; + } + + open_tracktype = bnk->tracktypes[trackindex]; + sndcomp = BinkAudioDecompressOpen(BINKTRACKFREQ(open_tracktype), + BINKTRACKCHANNELS(open_tracktype), + BINKTRACKDECOMPFLAGS(open_tracktype)); + if (sndcomp == 0) { + return 0; + } + + track = bpopmalloc(bnk, sizeof(*track)); + if (track == 0) { + BinkAudioDecompressClose(sndcomp); + goto fail; + } + + memset(track, 0, sizeof(*track)); + track->sndcomp = (UINTa)sndcomp; + track->bink = bnk; + track->Frequency = BINKTRACKFREQ(bnk->tracktypes[trackindex]); + track->Bits = BINKTRACKBITS(bnk->tracktypes[trackindex]); + track->Channels = BINKTRACKCHANNELS(bnk->tracktypes[trackindex]); + track->MaxSize = BINK_TRACK_BUFFER_BYTES(bnk->tracksizes[trackindex]); + if (track->Bits == BINK_SOUND_BITS_8) { + track->MaxSize >>= 1; + } + track->trackindex = trackindex; + + return track; + +fail: + return 0; } void BinkCloseTrack(HBINKTRACK bnkt) @@ -991,15 +2812,71 @@ void BinkCloseTrack(HBINKTRACK bnkt) { if (bnkt->sndcomp) { - BinkAudioDecompressClose(bnkt->sndcomp); + BinkAudioDecompressClose((HBINKAUDIODECOMP)bnkt->sndcomp); bnkt->sndcomp = 0; } radfree(bnkt); } } -u32 BinkGetTrackData(HBINKTRACK bnkt, void* dest) +u32 BinkGetTrackData(HBINKTRACK bnkt, void PTR4* dest) { + if (bnkt != 0) { + s32 i = 0; + u8 PTR4* src = bnkt->bink->compframe; + + while (i < bnkt->bink->NumTracks) { + BINKTRACKFRAME PTR4* frame = (BINKTRACKFRAME PTR4*)src; + u32 size = frame->size; + src += sizeof(frame->size); + if ((s32)i == bnkt->trackindex && size != 0) { + u32 left = frame->decoded_size; + u32 wrote = 0; + src += sizeof(frame->decoded_size); + + while (left != 0) { + void PTR4* out; + u32 outbytes; + + BinkAudioDecompress((HBINKAUDIODECOMP)bnkt->sndcomp, &out, &outbytes, + src, (void PTR4* PTR4*)&src); + if (outbytes > left) { + outbytes = left; + } + + left -= outbytes; + if (bnkt->Bits == BINK_SOUND_BITS_16) { + memcpy(dest, out, outbytes); + } else { + u32 j; + + outbytes >>= 1; + { + char PTR4* dst = dest; + s16 PTR4* in = out; + + for (j = 0; j < outbytes; ++j) { + s32 sample = *in++; + + sample >>= BINK_16_TO_8_SAMPLE_SHIFT; + sample += BINK_UNSIGNED_8_SAMPLE_BIAS; + *dst++ = sample; + } + } + } + + dest = (u8 PTR4*)dest + outbytes; + wrote += outbytes; + } + return wrote; + } + + src += size; + ++i; + } + } + + return 0; } s32 BinkSetVideoOnOff(HBINK bnk, s32 onoff) @@ -1013,4 +2890,57 @@ s32 BinkSetVideoOnOff(HBINK bnk, s32 onoff) s32 BinkSetSoundOnOff(HBINK bnk, s32 onoff) { + u32 ret; + + ret = 0; + if (bnk != 0) { + u32 suspended; + u32 i; + + suspended = 0; + i = 0; + if (ret < bnk->playingtracks) { + u32 one; + + one = 1; + do { + if (bnk->bsnd[i].SetOnOff != 0) { + if (suspended == 0) { + suspended = 1; + RADCB_suspend_callback(cb_bink_sound, BINK_SOUND_CALLBACK(bnk)); + } + + { + s32 ok; + + ok = bnk->bsnd[i].SetOnOff(&bnk->bsnd[i], onoff != 0); + if (ok == 0) { + if (bnk->soundon != 0) { + bnk->bsnd[i].sndwritepos = bnk->bsnd[i].sndbuf; + bnk->bsnd[i].sndreadpos = bnk->bsnd[i].sndbuf; + bnk->bsnd[i].sndamt = ok; + dosilence(&bnk->bsnd[i]); + bnk->soundon = ok; + } + } else { + ret = 1; + if (bnk->soundon == 0) { + *(volatile u32 PTR4*)&bnk->big_sound_skip_adj = 0; + *(volatile u32 PTR4*)&bnk->soundon = one; + *(volatile u32 PTR4*)&bnk->startsynctime = 0; + } + } + } + } + + ++i; + } while (i < bnk->playingtracks); + } + + if (suspended != 0) { + RADCB_resume_callback(cb_bink_sound, BINK_SOUND_CALLBACK(bnk)); + } + } + + return ret; } diff --git a/src/bink/src/sdk/decode/expand.c b/src/bink/src/sdk/decode/expand.c index e56c5e5d0..260aa9f7e 100644 --- a/src/bink/src/sdk/decode/expand.c +++ b/src/bink/src/sdk/decode/expand.c @@ -1,35 +1,1243 @@ -#include "bink.h" - -static const char huff4decode00[16] = "@ABCDEFGHIJKLMNO"; -static const char huff4decode01[32] = - "\20A\20R\20S\20T\20U\20V\20W\20X\20A\20Y\20Z\20[\20\\\20]\20^\20_"; -static const char huff4decode02[32] = " B!X T!\\ C!Z V!^ B!Y U!] C![ W!_"; -static const char huff4decode03[32] = " C1X E2\\ D1Z V2^ C1Y E2] D1[ W2_"; -static const char huff4decode04[32] = "0D2X1F3\\0E2Z1G3^0D2Y1F3]0E2[1G3_"; -static const char huff4decode05[32] = "0FBJ1HD\\0GCK1IE^0FBJ1HD]0GCK1IE_"; -static const char huff4decode06[32] = " EAI GC\\ FBZ HD^ EAI GC] FB[ HD_"; -static const char huff4decode07[64] = - "1S2h1U2l1T2j1f2n1S2i1U2m1T2k1g2o"; -static const char huff4decode08[64] = - "\20!\20R\20!\20h\20!\20d\20!\20l\20!\20S\20!\20j\20!\20f\20!\20n\20!\20R\20!\20i\20!\20e\20!\20m\20!\20S\20!\20k\20!\20g\20!\20o"; -static const char huff4decode09[64] = - "1TBh1VCl1UBj1WCn1TBi1VCm1UBk1WCo"; -static const char huff4decode10[64] = - " 2!U C!Y 2!W D!l 2!V C!j 2!X D!n 2!U C!Y 2!W D!m 2!V C!k 2!X D!o"; -static const char huff4decode11[64] = - "\20A\20U\20C\20Y\20B\20W\20D\20l\20A\20V\20C\20j\20B\20X\20D\20n\20A\20U\20C\20Y\20B\20W\20D\20m\20A\20V\20C\20k\20B\20X\20D\20o"; -static const char huff4decode12[64] = - " \"!S \"!h \"!U \"!l \"!T \"!j \"!f \"!n \"!S \"!i \"!U \"!m \"!T \"!k \"!g \"!o"; -static const char huff4decode13[128] = - "132d132x132f132|132e132z132g132~132d132y132f132}132e132{132g132"; -static const char huff4decode14[128] = - "132T132x132e132|132T132z132v132~132T132y132e132}132T132{132w132"; -static const char huff4decode15[128] = - " 2!4 3!e 2!4 3!i 2!4 3!g 2!4 3!| 2!4 3!f 2!4 3!z 2!4 3!h 2!4 3!~ 2!4 3!e 2!4 3!i 2!4 3!g 2!4 3!} 2!4 3!f 2!4 3!{ 2!4 3!h 2!4 3!"; -static const char huff4reads[16] = "\4\5\5\5\5\5\5\6\6\6\6\6\6\7\7\7"; -static const char mask2[64] = - "\0\0\0\0\377\0\0\0\0\377\0\0\377\377\0\0\0\0\377\0\377\0\377\0\0\377\377\0\377\377\377\0\0\0\0\377\377\0\0\377\0\377\0\377\377\377\0\377\0\0\377\377\377\0\377\377\0\377\377\377\377\377\377\377"; -static const char mask1[64] = - "\377\377\377\377\0\377\377\377\377\0\377\377\0\0\377\377\377\377\0\377\0\377\0\377\377\0\0\377\0\0\0\377\377\377\377\0\0\377\377\0\377\0\377\0\0\0\377\0\377\377\0\0\0\377\0\0\377\0\0\0\0\0\0\0"; -static const char mask4[16] = "\0\0\0\0\377\377\0\0\0\0\377\377\377\377\377\377"; -static const char mask3[16] = "\377\377\377\377\0\0\377\377\377\377\0\0\0\0\0\0"; +#include "bink.h" +#include "expand.h" +#include "../bitplane.h" +#include "../dct.h" +#include "../varbits.h" +#include "binkngc.h" + +#define BINK_BLOCK_SHIFT 3 +#define BINK_BLOCK_SIDE 8 +#define BINK_BLOCK_PIXELS (BINK_BLOCK_SIDE * BINK_BLOCK_SIDE) +#define BINK_RUN_BLOCK_LAST_PIXEL (BINK_BLOCK_PIXELS - 1) +#define BINK_BLOCK_ROUND_MASK (BINK_BLOCK_SIDE - 1) +#define BINK_BLOCK_ROUND(value) (((value) + BINK_BLOCK_ROUND_MASK) & ~BINK_BLOCK_ROUND_MASK) +#define BINK_BLOCK_PATTERN_OFFSET(offset, pitch) \ + (((offset) >> BINK_BLOCK_SHIFT) * (pitch) + ((offset) & BINK_BLOCK_ROUND_MASK)) +#define BINK_BLOCK_ODD_ROW(row) (((row) & BINK_BLOCK_SIDE) != 0) +#define BINK_BUNDLE_WIDTH 0x200 +#define BINK_CHROMA_SHIFT 1 +#define BINK_CHROMA_ROUND(value) (((value) + 1) >> BINK_CHROMA_SHIFT) +#define BINK_LUMA_PLANE_SCALE 1 +#define BINK_CHROMA_PLANE_SCALE 2 +#define BINK_COLOR_BLOCK_BYTES BINK_BLOCK_PIXELS +#define BINK_PATTERN_BLOCK_BYTES BINK_BLOCK_SIDE +#define BINK_RUN_BLOCK_BYTES 0x30 +#define BINK_BLOCK_TYPE_BITS 4 +#define BINK_COLOR_BITS 8 +#define BINK_PATTERN_BITS 8 +#define BINK_MOTION_BITS 5 +#define BINK_DC_START_BITS 11 +#define BINK_RUN_BITS 4 +typedef BITSTYPE EXPBITSTYPE; + +#define EXP_BITS_PER_WORD BITSTYPELEN +#define EXP_LAST_BIT_INDEX (EXP_BITS_PER_WORD - 1) +#define EXP_WORD_BYTES BITSTYPEBYTES +#define EXP_U16_MASK 0xffff +#define HUFF4_SYMBOLS 16 +#define HUFF4_SYMBOL_MASK 0xf +#define HUFF4_ALL_SYMBOLS_MASK 0xffff +#define HUFF4_USED_SHIFT 4 +#define HUFF4_CODE_USED(code) ((code) >> HUFF4_USED_SHIFT) +#define HUFF4_CODE_SYMBOL(code) ((code) & HUFF4_SYMBOL_MASK) +#define HUFF4_CODE_VALUE(code, values) ((values)[HUFF4_CODE_SYMBOL(code)]) +#define HUFF4_SUBTYPE_BITS 2 +#define HUFF4_EXPLICIT_SUBTYPE_BITS 3 +#define HUFF4_PAIR_COUNT 8 +#define HUFF4_PAIR_SYMBOLS 2 +#define HUFF4_QUARTER_SYMBOLS 4 +#define HUFF4_HALF_SYMBOLS 8 +#define HUFF4_LAST_QUARTER_SYMBOL (HUFF4_QUARTER_SYMBOLS * 3) +#define HUFF4_LAST_PAIR_SYMBOL (HUFF4_SYMBOLS - HUFF4_PAIR_SYMBOLS) +#define HUFF4_DECODE_1BYTE_SIZE HUFF4_SYMBOLS +#define HUFF4_DECODE_2BYTE_SIZE (HUFF4_SYMBOLS * 2) +#define HUFF4_DECODE_4BYTE_SIZE (HUFF4_SYMBOLS * 4) +#define HUFF4_DECODE_8BYTE_SIZE (HUFF4_SYMBOLS * 8) +#define HUFF4_MASK_4BYTE_SIZE (HUFF4_SYMBOLS * 4) +#define HUFF4_MASK_1BYTE_SIZE HUFF4_SYMBOLS +#define HUFF4_MERGE_PAIR_SIZE 4 +#define HUFF4_RLE_LITERAL_COUNT 12 +#define HUFF8_TABLE_STATES 16 +#define BUNDLE_REPEAT_EXTRA 0x14 +#define BUNDLE_REPEAT_THRESHOLD (BUNDLE_REPEAT_EXTRA + 2) +#define BINK_SIGNED_BYTE_BIAS 0x80 +#define BINK_SIGNED_BYTE_MASK 0x7f +#define BINK_DELTA16_GROUP_MAX 8 +#define BINK_BUNDLE_MIN_BYTE_BITS 8 +#define BINK_BUNDLE_MIN_WORD_BITS 16 +#define BINK_BYTE_BITS 8 +#define BINK_WORD_ALIGN_MASK 3 +#define BINK_PLANE_WORD_BYTES sizeof(u32) +#define BINK_WORK_BLOCK_SPAN (BINK_BLOCK_SIDE * BINK_CHROMA_PLANE_SCALE) +#define BINK_DC_BYTES sizeof(s16) +#define BINK_RESIDUE_LIMIT_BITS 7 +#define BINK_DCT_QUANT_BITS 4 +#define BINK_DCT_PATTERN_BITS 4 +#define BINK_BLOCK_ROWS(rows) ((rows) >> BINK_BLOCK_SHIFT) +#define BINK_BUNDLE_COUNT_BASE(rows, pitch) (BINK_BLOCK_ROWS(rows) * (pitch) - 1) +#define BINK_BUNDLE_COUNT_BITS(width, count_base) \ + getbitlevelvar(((width) + (count_base)) & EXP_U16_MASK) +#define BINK_BUNDLE_INITIAL_VALUE(shift) (1 << ((shift) - 1)) +#define BINK_BUNDLE_STORAGE_SIZE(width, rows, bits, pitch) \ + ((((width) * (bits)) >> BINK_BLOCK_SHIFT) + \ + ((BINK_BLOCK_ROWS(rows) * (pitch) * (bits)) >> BINK_BLOCK_SHIFT)) +#define BINK_BUNDLE_ALIGN_SIZE(size) (((size) + BINK_WORD_ALIGN_MASK) & ~BINK_WORD_ALIGN_MASK) +#define BINK_BUNDLE_EMPTY_CUR(bundle) ((bundle)->data + EXP_WORD_BYTES) +#define BINK_MARK_WORK_BLOCK(work_row, work_col) ((work_row)[(work_col) >> BINK_CHROMA_SHIFT] = 1) +#define BINK_MOTION_SOURCE(old, pitch, mx, my) ((old) + (my) * (s32)(pitch) + (mx)) +#define BINK_DCT_PATTERN_SCAN(pattern) (patterns + (pattern) * BINK_BLOCK_PIXELS) +#define BINK_FILL_WORD(value) \ + ((value) | ((value) << BINK_BYTE_BITS) | ((value) << (BINK_BYTE_BITS * 2)) | ((value) << (BINK_BYTE_BITS * 3))) +#define BINK_BLOCK_ROW_WORD(ptr, pitch, row, word) \ + (*(u32 PTR4*)((ptr) + (pitch) * (row) + (word) * BINK_PLANE_WORD_BYTES)) +#define BINK_LINEAR_BLOCK_ROW_WORD(ptr, row, word) \ + (*(u32 PTR4*)((ptr) + (row) * BINK_BLOCK_SIDE + (word) * BINK_PLANE_WORD_BYTES)) +#define BINK_HUFF4_RLE_LENGTH(value) \ + ((u8 PTR4*)&BINK_HUFF4_RLE_LENGTHS_PACKED)[(value) - HUFF4_RLE_LITERAL_COUNT] + +enum BINKBLOCKTYPE +{ + BINK_BLOCK_SKIP, /* Copy the previous-frame 8x8 block. */ + BINK_BLOCK_SCALED, /* Decode a 16x16 block through the subblock bundle. */ + BINK_BLOCK_MOTION, /* Copy an 8x8 block from the previous frame with motion offsets. */ + BINK_BLOCK_RUN, /* Fill in scan-order color runs. */ + BINK_BLOCK_RESIDUE, /* Motion block plus residue bitplane coefficients. */ + BINK_BLOCK_INTRA, /* Intra DCT block. */ + BINK_BLOCK_FILL, /* Solid color block. */ + BINK_BLOCK_INTER, /* Motion-compensated inter DCT block. */ + BINK_BLOCK_PATTERN, /* Two-color pattern block. */ + BINK_BLOCK_RAW /* Uncoded 8x8 color block. */ +}; + +const double BINK_HUFF4_DECODE0_ALIGN = 0.0; +const double BINK_HUFF4_DECODE_TABLES_ALIGN = 0.0; +static const u8 huff4decode00[HUFF4_DECODE_1BYTE_SIZE] = "@ABCDEFGHIJKLMNO"; +static const u8 huff4decode01[HUFF4_DECODE_2BYTE_SIZE] = + "\20A\20R\20S\20T\20U\20V\20W\20X\20A\20Y\20Z\20[\20\\\20]\20^\20_"; +static const u8 huff4decode02[HUFF4_DECODE_2BYTE_SIZE] = " B!X T!\\ C!Z V!^ B!Y U!] C![ W!_"; +static const u8 huff4decode03[HUFF4_DECODE_2BYTE_SIZE] = " C1X E2\\ D1Z V2^ C1Y E2] D1[ W2_"; +static const u8 huff4decode04[HUFF4_DECODE_2BYTE_SIZE] = "0D2X1F3\\0E2Z1G3^0D2Y1F3]0E2[1G3_"; +static const u8 huff4decode05[HUFF4_DECODE_2BYTE_SIZE] = "0FBJ1HD\\0GCK1IE^0FBJ1HD]0GCK1IE_"; +static const u8 huff4decode06[HUFF4_DECODE_2BYTE_SIZE] = " EAI GC\\ FBZ HD^ EAI GC] FB[ HD_"; +static const u8 huff4decode07[HUFF4_DECODE_4BYTE_SIZE] = + "1S2h1U2l1T2j1f2n1S2i1U2m1T2k1g2o"; +static const u8 huff4decode08[HUFF4_DECODE_4BYTE_SIZE] = + "\20!\20R\20!\20h\20!\20d\20!\20l\20!\20S\20!\20j\20!\20f\20!\20n\20!\20R\20!\20i\20!\20e\20!\20m\20!\20S\20!\20k\20!\20g\20!\20o"; +static const u8 huff4decode09[HUFF4_DECODE_4BYTE_SIZE] = + "1TBh1VCl1UBj1WCn1TBi1VCm1UBk1WCo"; +static const u8 huff4decode10[HUFF4_DECODE_4BYTE_SIZE] = + " 2!U C!Y 2!W D!l 2!V C!j 2!X D!n 2!U C!Y 2!W D!m 2!V C!k 2!X D!o"; +static const u8 huff4decode11[HUFF4_DECODE_4BYTE_SIZE] = + "\20A\20U\20C\20Y\20B\20W\20D\20l\20A\20V\20C\20j\20B\20X\20D\20n\20A\20U\20C\20Y\20B\20W\20D\20m\20A\20V\20C\20k\20B\20X\20D\20o"; +static const u8 huff4decode12[HUFF4_DECODE_4BYTE_SIZE] = + " \"!S \"!h \"!U \"!l \"!T \"!j \"!f \"!n \"!S \"!i \"!U \"!m \"!T \"!k \"!g \"!o"; +static const u8 huff4decode13[HUFF4_DECODE_8BYTE_SIZE] = + "132d132x132f132|132e132z132g132~132d132y132f132}132e132{132g132"; +static const u8 huff4decode14[HUFF4_DECODE_8BYTE_SIZE] = + "132T132x132e132|132T132z132v132~132T132y132e132}132T132{132w132"; +static const u8 huff4decode15[HUFF4_DECODE_8BYTE_SIZE] = + " 2!4 3!e 2!4 3!i 2!4 3!g 2!4 3!| 2!4 3!f 2!4 3!z 2!4 3!h 2!4 3!~ 2!4 3!e 2!4 3!i 2!4 3!g 2!4 3!} 2!4 3!f 2!4 3!{ 2!4 3!h 2!4 3!"; +static const u8 PTR4* huff4decodes[HUFF4_SYMBOLS] = { + huff4decode00, huff4decode01, huff4decode02, huff4decode03, + huff4decode04, huff4decode05, huff4decode06, huff4decode07, + huff4decode08, huff4decode09, huff4decode10, huff4decode11, + huff4decode12, huff4decode13, huff4decode14, huff4decode15, +}; +static const u8 BINK_HUFF4_BITS_TO_PEEK[HUFF4_SYMBOLS] = "\4\5\5\5\5\5\5\6\6\6\6\6\6\7\7\7"; +static const u8 mask2[HUFF4_MASK_4BYTE_SIZE] = + "\0\0\0\0\377\0\0\0\0\377\0\0\377\377\0\0\0\0\377\0\377\0\377\0\0\377\377\0\377\377\377\0\0\0\0\377\377\0\0\377\0\377\0\377\377\377\0\377\0\0\377\377\377\0\377\377\0\377\377\377\377\377\377\377"; +static const u8 mask1[HUFF4_MASK_4BYTE_SIZE] = + "\377\377\377\377\0\377\377\377\377\0\377\377\0\0\377\377\377\377\0\377\0\377\0\377\377\0\0\377\0\0\0\377\377\377\377\0\0\377\377\0\377\0\377\0\0\0\377\0\377\377\0\0\0\377\0\0\377\0\0\0\0\0\0\0"; +static const u8 mask4[HUFF4_MASK_1BYTE_SIZE] = "\0\0\0\0\377\377\0\0\0\0\377\377\377\377\377\377"; +static const u8 mask3[HUFF4_MASK_1BYTE_SIZE] = "\377\377\377\377\0\0\377\377\377\377\0\0\0\0\0\0"; +/* Huff4 symbols 12..15 expand to repeated block-type runs of 4, 8, 12, and 32. */ +const u32 BINK_HUFF4_RLE_LENGTHS_PACKED = 0x04080c20; + +typedef struct READBUNDLE +{ + u8 PTR4* cur; + u8 PTR4* end; + u32 bit_size; /* Bits per direct bundle element. */ + u32 initial_value; + u8 values[HUFF4_SYMBOLS]; /* Huffman symbol to Bink symbol translation list. */ + u32 bits_to_peek; + const u8 PTR4* decode; + u32 count_bits; /* Bits used to read the next decoded-element count. */ + u8 PTR4* data; +} READBUNDLE; + +typedef struct HUFF8TABLE +{ + u8 values[HUFF8_TABLE_STATES][HUFF4_SYMBOLS]; + u32 bits_to_peek[HUFF8_TABLE_STATES]; + const u8 PTR4* decode[HUFF8_TABLE_STATES]; + /* Last decoded high nibble selects the next color high-nibble codebook. */ + u32 state; +} HUFF8TABLE; + +typedef VARBITS EXPBITS; + +static void ReadHuffTable(EXPBITS PTR4* bits, const u8 PTR4* PTR4* decode, + u32 PTR4* bits_to_peek, u8 PTR4* values); + +static void OpenReadBundle(u8 PTR4* bits, READBUNDLE PTR4* rb, s32 width, u32 rows, + s32 shift, s32 pitch, s32 use_initial_value) +{ + u32 count_base; + + rb->bit_size = shift; + rb->cur = 0; + rb->end = 0; + count_base = BINK_BUNDLE_COUNT_BASE(rows, pitch); + rb->count_bits = BINK_BUNDLE_COUNT_BITS(width, count_base); + if (use_initial_value) { + rb->initial_value = BINK_BUNDLE_INITIAL_VALUE(shift); + } else { + rb->initial_value = 0; + } + rb->data = bits; +} + +static inline u32 exp_get_bits(EXPBITS PTR4* bits, u32 count) +{ + u32 bitcount; + EXPBITSTYPE bitbuf; + EXPBITSTYPE word; + u32 mask; + u32 value; + + mask = GetBitsLen(count); + bitcount = bits->bitlen; + if (bitcount > count - 1) { + bitbuf = bits->bits; + bits->bitlen = bitcount - count; + bits->bits = bitbuf >> count; + } else { + bitbuf = bits->bits; + word = *bits->cur++; + bits->bitlen = bitcount + EXP_BITS_PER_WORD - count; + bits->bits = word >> (count - bitcount); + bitbuf |= word << bitcount; + } + + value = bitbuf & mask; + return value; +} + +static inline u32 exp_get_bit(EXPBITS PTR4* bits) +{ + EXPBITSTYPE bitbuf; + + if (bits->bitlen != 0) { + bitbuf = bits->bits; + bits->bitlen--; + bits->bits = bitbuf >> 1; + } else { + bitbuf = *bits->cur++; + bits->bitlen = EXP_LAST_BIT_INDEX; + bits->bits = bitbuf >> 1; + } + + return bitbuf & 1; +} + +static void simpmergesort(EXPBITS PTR4* bits, u8 PTR4* out, u8 PTR4* left, + u8 PTR4* right, s32 count) +{ + u8 value; + s32 left_count; + + left_count = count; + for (;;) { + if (exp_get_bit(bits) != 0) { + value = *right++; + count--; + } else { + value = *left++; + left_count--; + } + + *out++ = value; + if (left_count == 0) { + break; + } + if (count == 0) { + break; + } + } + + if (left_count != 0) { + while (left_count != 0) { + *out++ = *left++; + left_count--; + } + } else { + while (count != 0) { + *out++ = *right++; + count--; + } + } +} + +static inline u32 exp_read_huff4(EXPBITS PTR4* bits, u32 bits_to_peek, + const u8 PTR4* decode, u8 PTR4* values) +{ + u32 bitcount; + EXPBITSTYPE bitbuf; + EXPBITSTYPE word; + u32 mask; + u8 code; + u32 used; + u32 value; + + bitcount = bits->bitlen; + mask = GetBitsLen(bits_to_peek); + if (bitcount >= bits_to_peek) { + bitbuf = bits->bits & mask; + code = decode[bitbuf]; + used = HUFF4_CODE_USED(code); + value = HUFF4_CODE_VALUE(code, values); + bits->bits >>= used; + bits->bitlen = bitcount - used; + } else { + word = *bits->cur; + bitbuf = (bits->bits | (word << bitcount)) & mask; + code = decode[bitbuf]; + used = HUFF4_CODE_USED(code); + value = HUFF4_CODE_VALUE(code, values); + if (bitcount > used - 1) { + bits->bits >>= used; + bits->bitlen = bitcount - used; + } else { + bits->bits = word >> (used - bitcount); + bits->bitlen = bitcount + EXP_BITS_PER_WORD - used; + bits->cur++; + } + } + + return value; +} + +static inline u32 exp_read_huff8(EXPBITS PTR4* bits, u32 state, HUFF8TABLE PTR4* table) +{ + return exp_read_huff4(bits, table->bits_to_peek[state], table->decode[state], + table->values[state]); +} + +static void ReadHuffTable(EXPBITS PTR4* vb, const u8 PTR4* PTR4* decode, + u32 PTR4* bits_to_peek, u8 PTR4* values) +{ + u32 mode; + u32 subtype; + u32 count; + u32 remaining; + u32 mask; + u32 j; + u32 i; + u8 order[HUFF4_SYMBOLS]; + u8 merge01[HUFF4_MERGE_PAIR_SIZE]; + u8 merge23[HUFF4_MERGE_PAIR_SIZE]; + u8 merge45[HUFF4_MERGE_PAIR_SIZE]; + u8 merge67[HUFF4_MERGE_PAIR_SIZE]; + u8 PTR4* out; + + out = values; + + /* Each table stores a 4-bit codebook index plus a 16-entry symbol remap. */ + mode = exp_get_bits(vb, HUFF4_USED_SHIFT); + *decode = huff4decodes[mode]; + *bits_to_peek = (u8)BINK_HUFF4_BITS_TO_PEEK[mode]; + if (mode == 0) { + for (j = 0; j < HUFF4_SYMBOLS; ++j) { + out[j] = j; + } + return; + } + + if (exp_get_bit(vb) == 0) { + /* Compact symbol shuffling: merge adjacent pair, quarter, and half lists. */ + subtype = exp_get_bits(vb, HUFF4_SUBTYPE_BITS); + if (subtype == 0) { + i = 0; + count = HUFF4_PAIR_COUNT; + do { + if (exp_get_bit(vb) != 0) { + out[1] = i; + out[0] = i + 1; + } else { + out[0] = i; + out[1] = i + 1; + } + out += HUFF4_PAIR_SYMBOLS; + i += HUFF4_PAIR_SYMBOLS; + count--; + } while (count != 0); + } else { + u32 left; + u32 right; + + left = 0; + right = 1; + for (i = 0; i < HUFF4_PAIR_COUNT; ++i) { + if (exp_get_bit(vb) != 0) { + order[left] = right; + order[right] = left; + } else { + order[left] = left; + order[right] = right; + } + left += HUFF4_PAIR_SYMBOLS; + right += HUFF4_PAIR_SYMBOLS; + } + + if (subtype == 1) { + simpmergesort(vb, values, order, order + HUFF4_PAIR_SYMBOLS, HUFF4_PAIR_SYMBOLS); + simpmergesort(vb, values + HUFF4_QUARTER_SYMBOLS, + order + HUFF4_QUARTER_SYMBOLS, + order + HUFF4_QUARTER_SYMBOLS + HUFF4_PAIR_SYMBOLS, + HUFF4_PAIR_SYMBOLS); + simpmergesort(vb, values + HUFF4_HALF_SYMBOLS, + order + HUFF4_HALF_SYMBOLS, + order + HUFF4_HALF_SYMBOLS + HUFF4_PAIR_SYMBOLS, + HUFF4_PAIR_SYMBOLS); + simpmergesort(vb, values + HUFF4_LAST_QUARTER_SYMBOL, + order + HUFF4_LAST_QUARTER_SYMBOL, + order + HUFF4_LAST_PAIR_SYMBOL, HUFF4_PAIR_SYMBOLS); + } else { + simpmergesort(vb, merge01, order, order + HUFF4_PAIR_SYMBOLS, + HUFF4_PAIR_SYMBOLS); + simpmergesort(vb, merge23, order + HUFF4_QUARTER_SYMBOLS, + order + HUFF4_QUARTER_SYMBOLS + HUFF4_PAIR_SYMBOLS, + HUFF4_PAIR_SYMBOLS); + simpmergesort(vb, merge45, order + HUFF4_HALF_SYMBOLS, + order + HUFF4_HALF_SYMBOLS + HUFF4_PAIR_SYMBOLS, + HUFF4_PAIR_SYMBOLS); + simpmergesort(vb, merge67, order + HUFF4_LAST_QUARTER_SYMBOL, + order + HUFF4_LAST_PAIR_SYMBOL, HUFF4_PAIR_SYMBOLS); + if (subtype == 2) { + simpmergesort(vb, values, merge01, merge23, HUFF4_QUARTER_SYMBOLS); + simpmergesort(vb, values + HUFF4_HALF_SYMBOLS, merge45, merge67, + HUFF4_QUARTER_SYMBOLS); + } else { + simpmergesort(vb, order, merge01, merge23, HUFF4_QUARTER_SYMBOLS); + simpmergesort(vb, order + HUFF4_HALF_SYMBOLS, merge45, merge67, + HUFF4_QUARTER_SYMBOLS); + simpmergesort(vb, values, order, order + HUFF4_HALF_SYMBOLS, + HUFF4_HALF_SYMBOLS); + } + } + } + } else { + subtype = exp_get_bits(vb, HUFF4_EXPLICIT_SUBTYPE_BITS); + remaining = HUFF4_ALL_SYMBOLS_MASK; + for (count = 0; count <= subtype; ++count) { + mode = exp_get_bits(vb, HUFF4_USED_SHIFT); + values[count] = mode; + remaining &= ~(1 << mode); + } + + i = 0; + mask = remaining; + do { + if ((mask & 1) != 0) { + subtype++; + values[subtype] = i; + } + i++; + mask >>= 1; + } while (mask != 0); + } +} + +static void StartReadHuff4Bundle(READBUNDLE PTR4* rb, EXPBITS PTR4* bits) +{ + ReadHuffTable(bits, &rb->decode, &rb->bits_to_peek, rb->values); +} + +static void StartReadHuff8Bundle(READBUNDLE PTR4* rb, EXPBITS PTR4* bits, + HUFF8TABLE PTR4* huff8_table) +{ + u32 PTR4* codes; + const u8 PTR4* PTR4* huff_table; + u8 PTR4* cur; + u8 PTR4* end; + + cur = huff8_table->values[0]; + end = huff8_table->values[HUFF8_TABLE_STATES - 1]; + codes = huff8_table->bits_to_peek; + huff_table = huff8_table->decode; + do { + ReadHuffTable(bits, huff_table, codes, cur); + cur += HUFF4_SYMBOLS; + ++codes; + ++huff_table; + } while (cur <= end); + ReadHuffTable(bits, &rb->decode, &rb->bits_to_peek, rb->values); + huff8_table->state = 0; +} + +static void CheckReadRLEHuff4Bundle(READBUNDLE PTR4* bundle, EXPBITS PTR4* bits) +{ + u32 count; + u8 PTR4* dest; + u32 value; + u32 last; + u8 run; + u8 PTR4* values; + const u8 PTR4* decode; + u32 peek; + + if (bundle->cur != bundle->end) { + return; + } + + count = exp_get_bits(bits, bundle->count_bits); + if (count == 0) { + /* Empty bundles point cur past data so callers see no decoded elements. */ + bundle->end = bundle->data; + bundle->cur = BINK_BUNDLE_EMPTY_CUR(bundle); + return; + } + + bundle->cur = bundle->data; + bundle->end = bundle->data + count; + if (exp_get_bit(bits) == 0) { + /* Literal Huff4 values above 11 repeat the previous decoded symbol. */ + dest = bundle->data; + last = 0; + values = bundle->values; + decode = bundle->decode; + peek = (u8)bundle->bits_to_peek; + while (count != 0) { + value = exp_read_huff4(bits, peek, decode, values); + if (value > HUFF4_RLE_LITERAL_COUNT - 1) { + u32 fill; + + /* Packed word stores four copies of the last byte for the run fill. */ + fill = last | (last << BINK_BYTE_BITS); + run = BINK_HUFF4_RLE_LENGTH(value); + count -= run; + fill |= fill << BINK_BUNDLE_MIN_WORD_BITS; + do { + *(u32 PTR4*)dest = fill; + dest += EXP_WORD_BYTES; + run -= EXP_WORD_BYTES; + } while (run != 0); + } else { + *dest++ = (u8)value; + count--; + last = value; + } + } + } else { + value = exp_get_bits(bits, HUFF4_USED_SHIFT); + memset(bundle->data, value, count); + } +} + +static void CheckReadHuff8Bundle(READBUNDLE PTR4* bundle, EXPBITS PTR4* bits, + HUFF8TABLE PTR4* huff8_table) +{ + u32 count; + u8 PTR4* dest; + u8 PTR4* values; + const u8 PTR4* decode; + u32 peek; + u32 state; + u32 high; + u32 low; + u32 value; + s32 remaining; + s32 prev_remaining; + + if (bundle->cur != bundle->end) { + return; + } + + count = exp_get_bits(bits, bundle->count_bits); + if (count == 0) { + /* The empty sentinel matches the other bundle readers. */ + bundle->end = bundle->data; + bundle->cur = BINK_BUNDLE_EMPTY_CUR(bundle); + return; + } + + dest = bundle->data; + peek = bundle->bits_to_peek; + bundle->cur = dest; + bundle->end = dest + count; + decode = bundle->decode; + state = huff8_table->state; + if (exp_get_bit(bits) != 0) { + /* Negative remaining marks the old-format repeat packet variant. */ + count = -(count + BUNDLE_REPEAT_EXTRA); + } + values = bundle->values; + remaining = (s32)count; + do { + prev_remaining = remaining; + high = exp_read_huff8(bits, state, huff8_table); + state = high; + low = exp_read_huff4(bits, peek, decode, values); + value = ((high & HUFF4_SYMBOL_MASK) << HUFF4_USED_SHIFT) | low; + if ((value & BINK_SIGNED_BYTE_BIAS) == 0) { + value |= BINK_SIGNED_BYTE_BIAS; + } else { + value = BINK_SIGNED_BYTE_BIAS - (value & BINK_SIGNED_BYTE_MASK); + } + *dest++ = (u8)value; + remaining = prev_remaining - 1; + } while (remaining > 0); + + if (remaining < -BUNDLE_REPEAT_THRESHOLD) { + /* Repeat packets back-fill the whole bundle with the first decoded byte. */ + memset(bundle->data, *bundle->data, -(prev_remaining + BUNDLE_REPEAT_EXTRA)); + } + huff8_table->state = state; +} + +static void NewCheckReadHuff8Bundle(READBUNDLE PTR4* bundle, EXPBITS PTR4* bits, + HUFF8TABLE PTR4* huff8_table) +{ + u32 count; + u8 PTR4* dest; + u8 PTR4* values; + const u8 PTR4* decode; + u32 peek; + u32 state; + u32 low; + s32 remaining; + s32 prev_remaining; + + if (bundle->cur != bundle->end) { + return; + } + + count = exp_get_bits(bits, bundle->count_bits); + if (count == 0) { + bundle->end = bundle->data; + bundle->cur = BINK_BUNDLE_EMPTY_CUR(bundle); + return; + } + + dest = bundle->data; + peek = bundle->bits_to_peek; + bundle->cur = dest; + bundle->end = dest + count; + decode = bundle->decode; + state = huff8_table->state; + if (exp_get_bit(bits) != 0) { + /* New-format Huff8 repeat packets keep the byte unsigned. */ + count = -count - BUNDLE_REPEAT_EXTRA; + } + values = bundle->values; + remaining = (s32)count; + do { + prev_remaining = remaining; + state = exp_read_huff8(bits, state, huff8_table); + low = exp_read_huff4(bits, peek, decode, values); + *dest++ = (u8)(low | (state << HUFF4_USED_SHIFT)); + remaining = prev_remaining - 1; + } while (remaining > 0); + + if (remaining < -BUNDLE_REPEAT_THRESHOLD) { + /* Match old-format repeat handling after the one-byte payload is decoded. */ + memset(bundle->data, *bundle->data, -(prev_remaining + BUNDLE_REPEAT_EXTRA)); + } + huff8_table->state = state; +} + +static void CheckReadHuff4Bundle(READBUNDLE PTR4* bundle, EXPBITS PTR4* bits) +{ + u32 count; + u8 PTR4* dest; + u8 PTR4* values; + const u8 PTR4* decode; + u32 peek; + u32 value; + + if (bundle->cur != bundle->end) { + return; + } + + count = exp_get_bits(bits, bundle->count_bits); + if (count == 0) { + bundle->end = bundle->data; + bundle->cur = BINK_BUNDLE_EMPTY_CUR(bundle); + return; + } + + bundle->cur = bundle->data; + bundle->end = bundle->data + count; + if (exp_get_bit(bits) == 0) { + /* Direct Huff4 bundles decode one nibble-sized symbol per byte. */ + dest = bundle->data; + values = bundle->values; + decode = bundle->decode; + peek = bundle->bits_to_peek; + while (count != 0) { + *dest++ = (u8)exp_read_huff4(bits, peek, decode, values); + count--; + } + } else { + value = exp_get_bits(bits, HUFF4_USED_SHIFT); + memset(bundle->data, value, count); + } +} + +static void CheckReadHuff4PairBundle(READBUNDLE PTR4* bundle, EXPBITS PTR4* bits) +{ + u32 count; + u8 PTR4* dest; + u8 PTR4* values; + const u8 PTR4* decode; + u32 peek; + u32 first; + u32 second; + + if (bundle->cur != bundle->end) { + return; + } + + count = exp_get_bits(bits, bundle->count_bits); + if (count == 0) { + bundle->end = bundle->data; + bundle->cur = BINK_BUNDLE_EMPTY_CUR(bundle); + return; + } + + dest = bundle->data; + bundle->cur = dest; + bundle->end = dest + count; + values = bundle->values; + decode = bundle->decode; + peek = bundle->bits_to_peek; + do { + /* Pair bundles pack two Huff4 symbols into each output byte. */ + count--; + first = exp_read_huff4(bits, peek, decode, values); + second = exp_read_huff4(bits, peek, decode, values); + *dest++ = (u8)(first | (second << HUFF4_USED_SHIFT)); + } while (count != 0); +} + +static void CheckReadHuff4SBundle(READBUNDLE PTR4* bundle, EXPBITS PTR4* bits) +{ + u32 count; + s8 PTR4* dest; + u8 PTR4* values; + const u8 PTR4* decode; + u32 peek; + s32 value; + + if (bundle->cur != bundle->end) { + return; + } + + count = exp_get_bits(bits, bundle->count_bits); + if (count == 0) { + bundle->end = bundle->data; + bundle->cur = BINK_BUNDLE_EMPTY_CUR(bundle); + return; + } + + bundle->cur = bundle->data; + bundle->end = bundle->data + count; + if (exp_get_bit(bits) == 0) { + /* Signed Huff4 bundles store a sign bit only for nonzero symbols. */ + dest = bundle->data; + values = bundle->values; + decode = bundle->decode; + peek = bundle->bits_to_peek; + count--; + do { + value = (s32)exp_read_huff4(bits, peek, decode, values); + if (value != 0 && exp_get_bit(bits) != 0) { + value = -value; + } + *dest++ = (s8)value; + } while (count-- != 0); + } else { + value = (s32)exp_get_bits(bits, HUFF4_USED_SHIFT); + if (value != 0 && exp_get_bit(bits) != 0) { + value = -value; + } + memset(bundle->data, value, count); + } +} + +static void CheckReadDelta16Bundle(READBUNDLE PTR4* bundle, EXPBITS PTR4* bits) +{ + u32 count; + u32 remaining; + u32 group_count; + u32 bit_count; + u32 current; + u32 value; + s32 delta; + s16 PTR4* dest; + + if (bundle->cur != bundle->end) { + return; + } + + count = exp_get_bits(bits, bundle->count_bits); + if (count == 0) { + bundle->end = bundle->data; + bundle->cur = BINK_BUNDLE_EMPTY_CUR(bundle); + return; + } + + dest = (s16 PTR4*)bundle->data; + if (bundle->initial_value == 0) { + current = exp_get_bits(bits, bundle->bit_size) & EXP_U16_MASK; + } else { + current = exp_get_bits(bits, bundle->bit_size - 1) & EXP_U16_MASK; + if (current != 0 && exp_get_bit(bits) != 0) { + current = -current & EXP_U16_MASK; + } + } + + *dest++ = (s16)current; + remaining = count - 1; + bundle->cur = bundle->data; + bundle->end = bundle->data + count * sizeof(*dest); + while (remaining != 0) { + group_count = remaining; + if (group_count > BINK_DELTA16_GROUP_MAX) { + group_count = BINK_DELTA16_GROUP_MAX; + } + + bit_count = exp_get_bits(bits, HUFF4_USED_SHIFT); + if (bit_count == 0) { + radmemset16(dest, (u16)current, group_count * sizeof(*dest)); + dest += group_count; + remaining -= group_count; + } else { + remaining -= group_count; + while (group_count != 0) { + group_count--; + value = exp_get_bits(bits, bit_count) & EXP_U16_MASK; + delta = (s32)(s16)value; + if (delta != 0 && exp_get_bit(bits) != 0) { + delta = (s32)(s16)-value; + } + current = (current + delta) & EXP_U16_MASK; + *dest++ = (s16)current; + } + } + } +} + +static inline void expand_run_block(u8 PTR4* dest, + u32 pitch, + READBUNDLE PTR4* colors, + READBUNDLE PTR4* runs, + EXPBITS PTR4* bits) +{ + const u8 PTR4* scan; + u32 filled; + + scan = BINK_DCT_PATTERN_SCAN(exp_get_bits(bits, BINK_DCT_PATTERN_BITS)); + filled = 0; + do { + u32 count; + + count = *runs->cur++ + 1; + filled += count; + if (exp_get_bit(bits) != 0) { + u8 value; + + value = *colors->cur++; + do { + u32 offset; + + offset = *scan++; + dest[BINK_BLOCK_PATTERN_OFFSET(offset, pitch)] = value; + } while (--count != 0); + } else { + do { + u32 offset; + + offset = *scan++; + dest[BINK_BLOCK_PATTERN_OFFSET(offset, pitch)] = *colors->cur++; + } while (--count != 0); + } + } while (filled < BINK_RUN_BLOCK_LAST_PIXEL); + + if (filled == BINK_RUN_BLOCK_LAST_PIXEL) { + u32 offset; + + offset = *scan++; + dest[BINK_BLOCK_PATTERN_OFFSET(offset, pitch)] = *colors->cur++; + } +} + +static inline void expand_pattern_block(u8 PTR4* dest, + u32 pitch, + READBUNDLE PTR4* colors, + READBUNDLE PTR4* patterns_bundle) +{ + u8 color0; + u8 color1; + u32 i; + + color0 = colors->cur[0]; + color1 = colors->cur[1]; + colors->cur += 2; + for (i = 0; i < BINK_BLOCK_SIDE; ++i) { + u32 bits; + u32 j; + + bits = *patterns_bundle->cur++; + for (j = 0; j < BINK_BLOCK_SIDE; ++j) { + dest[i * pitch + j] = (bits & 1) != 0 ? color1 : color0; + bits >>= 1; + } + } +} + +static u32 getbunsize(s32 width, u32 rows, u32 bits, s32 pitch) +{ + if (bits < (BINK_BUNDLE_MIN_BYTE_BITS + 1)) { + bits = BINK_BUNDLE_MIN_BYTE_BITS; + } else if (bits < BINK_BUNDLE_MIN_WORD_BITS) { + bits = BINK_BUNDLE_MIN_WORD_BITS; + } + return BINK_BUNDLE_ALIGN_SIZE(BINK_BUNDLE_STORAGE_SIZE(width, rows, bits, pitch)); +} + +void ExpandBundleSizes(u32 PTR4* sizes, u32 rows) +{ + /* Order matches the Bink value sources: block/subblock types, colors, patterns, + motion X/Y, intra/inter DC, then run lengths. */ + sizes[BINK_BUNDLE_BLOCK_TYPES] = getbunsize(BINK_BUNDLE_WIDTH, rows, BINK_BLOCK_TYPE_BITS, 1); + sizes[BINK_BUNDLE_SUBBLOCK_TYPES] = + getbunsize(BINK_BUNDLE_WIDTH, rows >> BINK_CHROMA_SHIFT, BINK_BLOCK_TYPE_BITS, 1); + sizes[BINK_BUNDLE_COLORS] = + getbunsize(BINK_BUNDLE_WIDTH, rows, BINK_COLOR_BITS, BINK_COLOR_BLOCK_BYTES); + sizes[BINK_BUNDLE_PATTERNS] = + getbunsize(BINK_BUNDLE_WIDTH, rows, BINK_PATTERN_BITS, BINK_PATTERN_BLOCK_BYTES); + sizes[BINK_BUNDLE_X_OFFSETS] = getbunsize(BINK_BUNDLE_WIDTH, rows, BINK_MOTION_BITS, 1); + sizes[BINK_BUNDLE_Y_OFFSETS] = getbunsize(BINK_BUNDLE_WIDTH, rows, BINK_MOTION_BITS, 1); + sizes[BINK_BUNDLE_INTRA_DC] = getbunsize(BINK_BUNDLE_WIDTH, rows, BINK_DC_START_BITS, 1); + sizes[BINK_BUNDLE_INTER_DC] = getbunsize(BINK_BUNDLE_WIDTH, rows, BINK_DC_START_BITS, 1); + sizes[BINK_BUNDLE_RUNS] = + getbunsize(BINK_BUNDLE_WIDTH, rows, BINK_RUN_BITS, BINK_RUN_BLOCK_BYTES); +} + +static u32 PTR4* ExpandPlane(u8 PTR4* out, + u8 PTR4* prev, + u32 width, + u32 height, + u32 pitch, + u32 PTR4* bundles, + u32 key_frame, + u8 PTR4* work, + u32 plane, + BUNDLEPOINTERS PTR4* table, + u32 flags) +{ + READBUNDLE block_types; + READBUNDLE subblock_types; + READBUNDLE colors; + READBUNDLE patterns_bundle; + READBUNDLE xoff; + READBUNDLE yoff; + READBUNDLE intra_dc; + READBUNDLE inter_dc; + READBUNDLE runs; + HUFF8TABLE huff8_table; + s16 dct_block[BINK_BLOCK_PIXELS]; + u8 motion_block[BINK_BLOCK_PIXELS]; + EXPBITS bitstate; + void (*read_huff8)(READBUNDLE PTR4*, EXPBITS PTR4*, HUFF8TABLE PTR4*); + u32 row; + u32 col; + u32 work_col; + u32 work_pitch; + enum BINKBLOCKTYPE block_type; + u32 quant; + u8 PTR4* dest; + u8 PTR4* old; + u8 PTR4* work_row; + + (void)key_frame; + + read_huff8 = + (flags & BINKOLDFRAMEFORMAT) != 0 ? CheckReadHuff8Bundle : NewCheckReadHuff8Bundle; + VarBitsOpen(bitstate, bundles); + + OpenReadBundle(table->typeptr, &block_types, + BINK_BUNDLE_WIDTH, width, BINK_BLOCK_TYPE_BITS, 1, 0); + OpenReadBundle(table->type16ptr, &subblock_types, + BINK_BUNDLE_WIDTH, width >> BINK_CHROMA_SHIFT, BINK_BLOCK_TYPE_BITS, 1, 0); + OpenReadBundle(table->colorptr, &colors, BINK_BUNDLE_WIDTH, width, + BINK_COLOR_BITS, BINK_COLOR_BLOCK_BYTES, 0); + OpenReadBundle(table->bits2ptr, &patterns_bundle, + BINK_BUNDLE_WIDTH, width, BINK_PATTERN_BITS, BINK_PATTERN_BLOCK_BYTES, 0); + OpenReadBundle(table->motionXptr, &xoff, BINK_BUNDLE_WIDTH, width, + BINK_MOTION_BITS, 1, 1); + OpenReadBundle(table->motionYptr, &yoff, BINK_BUNDLE_WIDTH, width, + BINK_MOTION_BITS, 1, 1); + OpenReadBundle(table->dctptr, &intra_dc, BINK_BUNDLE_WIDTH, + width, BINK_DC_START_BITS, 1, 0); + OpenReadBundle(table->mdctptr, &inter_dc, BINK_BUNDLE_WIDTH, + width, BINK_DC_START_BITS, 1, 1); + OpenReadBundle(table->patptr, &runs, BINK_BUNDLE_WIDTH, width, + BINK_RUN_BITS, BINK_RUN_BLOCK_BYTES, 0); + + StartReadHuff4Bundle(&block_types, &bitstate); + StartReadHuff4Bundle(&subblock_types, &bitstate); + StartReadHuff8Bundle(&colors, &bitstate, &huff8_table); + StartReadHuff4Bundle(&patterns_bundle, &bitstate); + StartReadHuff4Bundle(&xoff, &bitstate); + StartReadHuff4Bundle(&yoff, &bitstate); + StartReadHuff4Bundle(&runs, &bitstate); + + dest = out; + old = prev; + work_row = work; + work_pitch = pitch / (BINK_WORK_BLOCK_SPAN / plane); + row = 0; + if (height != 0) { + do { + CheckReadRLEHuff4Bundle(&block_types, &bitstate); + CheckReadRLEHuff4Bundle(&subblock_types, &bitstate); + read_huff8(&colors, &bitstate, &huff8_table); + CheckReadHuff4PairBundle(&patterns_bundle, &bitstate); + CheckReadHuff4SBundle(&xoff, &bitstate); + CheckReadHuff4SBundle(&yoff, &bitstate); + CheckReadDelta16Bundle(&intra_dc, &bitstate); + CheckReadDelta16Bundle(&inter_dc, &bitstate); + CheckReadHuff4Bundle(&runs, &bitstate); + + col = 0; + work_col = 0; + while (col < width) { + block_type = *block_types.cur; + block_types.cur++; + + if (BINK_BLOCK_ODD_ROW(row) && block_type == BINK_BLOCK_SCALED) { + col += BINK_BLOCK_SIDE; + dest += BINK_BLOCK_SIDE; + old += BINK_BLOCK_SIDE; + work_col += plane; + continue; + } + + switch (block_type) { + case BINK_BLOCK_SKIP: { + u32 i; + for (i = 0; i < BINK_BLOCK_SIDE; ++i) { + BINK_BLOCK_ROW_WORD(dest, pitch, i, 0) = BINK_BLOCK_ROW_WORD(old, pitch, i, 0); + BINK_BLOCK_ROW_WORD(dest, pitch, i, 1) = BINK_BLOCK_ROW_WORD(old, pitch, i, 1); + } + break; + } + case BINK_BLOCK_MOTION: { + s32 mx = *(s8 PTR4*)xoff.cur; + s32 my = *(s8 PTR4*)yoff.cur; + u8 PTR4* motion; + u32 i; + + BINK_MARK_WORK_BLOCK(work_row, work_col); + xoff.cur++; + yoff.cur++; + motion = BINK_MOTION_SOURCE(old, pitch, mx, my); + for (i = 0; i < BINK_BLOCK_SIDE; ++i) { + BINK_BLOCK_ROW_WORD(dest, pitch, i, 0) = BINK_BLOCK_ROW_WORD(motion, pitch, i, 0); + BINK_BLOCK_ROW_WORD(dest, pitch, i, 1) = BINK_BLOCK_ROW_WORD(motion, pitch, i, 1); + } + break; + } + case BINK_BLOCK_RESIDUE: { + s32 mx = *(s8 PTR4*)xoff.cur; + s32 my = *(s8 PTR4*)yoff.cur; + u8 PTR4* motion; + u32 i; + + BINK_MARK_WORK_BLOCK(work_row, work_col); + xoff.cur++; + yoff.cur++; + motion = BINK_MOTION_SOURCE(old, pitch, mx, my); + for (i = 0; i < BINK_BLOCK_SIDE; ++i) { + BINK_LINEAR_BLOCK_ROW_WORD(motion_block, i, 0) = + BINK_BLOCK_ROW_WORD(motion, pitch, i, 0); + BINK_LINEAR_BLOCK_ROW_WORD(motion_block, i, 1) = + BINK_BLOCK_ROW_WORD(motion, pitch, i, 1); + } + ReadBPLossyWithMotion((char PTR4*)dest, (s32)pitch, (BPBITSTREAM PTR4*)&bitstate, + exp_get_bits(&bitstate, BINK_RESIDUE_LIMIT_BITS), + (char PTR4*)motion_block); + break; + } + case BINK_BLOCK_INTRA: + BINK_MARK_WORK_BLOCK(work_row, work_col); + dct_block[0] = *(s16 PTR4*)intra_dc.cur; + intra_dc.cur += BINK_DC_BYTES; + ReadBPLossless(dct_block, (BPBITSTREAM PTR4*)&bitstate); + quant = exp_get_bits(&bitstate, BINK_DCT_QUANT_BITS); + FastIDCT8x8(dest, pitch, dct_block, quant); + break; + case BINK_BLOCK_INTER: { + s32 mx = *(s8 PTR4*)xoff.cur; + s32 my = *(s8 PTR4*)yoff.cur; + u8 PTR4* motion; + u32 i; + + BINK_MARK_WORK_BLOCK(work_row, work_col); + dct_block[0] = *(s16 PTR4*)inter_dc.cur; + inter_dc.cur += BINK_DC_BYTES; + xoff.cur++; + yoff.cur++; + motion = BINK_MOTION_SOURCE(old, pitch, mx, my); + for (i = 0; i < BINK_BLOCK_SIDE; ++i) { + BINK_LINEAR_BLOCK_ROW_WORD(motion_block, i, 0) = + BINK_BLOCK_ROW_WORD(motion, pitch, i, 0); + BINK_LINEAR_BLOCK_ROW_WORD(motion_block, i, 1) = + BINK_BLOCK_ROW_WORD(motion, pitch, i, 1); + } + ReadBPLossless(dct_block, (BPBITSTREAM PTR4*)&bitstate); + quant = exp_get_bits(&bitstate, BINK_DCT_QUANT_BITS); + FastmIDCT8x8WithMotion(dest, (s32)pitch, dct_block, quant, motion_block); + break; + } + case BINK_BLOCK_FILL: { + u8 value = *colors.cur; + u32 fill = BINK_FILL_WORD(value); + u32 i; + + BINK_MARK_WORK_BLOCK(work_row, work_col); + colors.cur++; + for (i = 0; i < BINK_BLOCK_SIDE; ++i) { + BINK_BLOCK_ROW_WORD(dest, pitch, i, 0) = fill; + BINK_BLOCK_ROW_WORD(dest, pitch, i, 1) = fill; + } + break; + } + case BINK_BLOCK_PATTERN: + BINK_MARK_WORK_BLOCK(work_row, work_col); + expand_pattern_block(dest, pitch, &colors, &patterns_bundle); + break; + case BINK_BLOCK_RAW: { + u32 i; + + BINK_MARK_WORK_BLOCK(work_row, work_col); + for (i = 0; i < BINK_BLOCK_SIDE; ++i) { + BINK_BLOCK_ROW_WORD(dest, pitch, i, 0) = + BINK_LINEAR_BLOCK_ROW_WORD(colors.cur, i, 0); + BINK_BLOCK_ROW_WORD(dest, pitch, i, 1) = + BINK_LINEAR_BLOCK_ROW_WORD(colors.cur, i, 1); + } + colors.cur += BINK_COLOR_BLOCK_BYTES; + break; + } + case BINK_BLOCK_RUN: + BINK_MARK_WORK_BLOCK(work_row, work_col); + expand_run_block(dest, pitch, &colors, &runs, &bitstate); + break; + case BINK_BLOCK_SCALED: + block_type = *subblock_types.cur; + subblock_types.cur++; + if (block_type == BINK_BLOCK_FILL) { + colors.cur++; + } else if (block_type == BINK_BLOCK_PATTERN) { + colors.cur += 2; + patterns_bundle.cur += BINK_PATTERN_BLOCK_BYTES; + } else if (block_type == BINK_BLOCK_RAW) { + colors.cur += BINK_COLOR_BLOCK_BYTES; + } else if (block_type == BINK_BLOCK_INTRA) { + dct_block[0] = *(s16 PTR4*)intra_dc.cur; + intra_dc.cur += BINK_DC_BYTES; + ReadBPLossless(dct_block, (BPBITSTREAM PTR4*)&bitstate); + quant = exp_get_bits(&bitstate, BINK_DCT_QUANT_BITS); + FastIDCT8x8d(dest, pitch, dct_block, quant); + } + BINK_MARK_WORK_BLOCK(work_row, work_col); + col += BINK_BLOCK_SIDE; + dest += BINK_BLOCK_SIDE; + old += BINK_BLOCK_SIDE; + work_col += plane; + BINK_MARK_WORK_BLOCK(work_row, work_col); + break; + } + + col += BINK_BLOCK_SIDE; + dest += BINK_BLOCK_SIDE; + old += BINK_BLOCK_SIDE; + work_col += plane; + } + + if (plane == BINK_LUMA_PLANE_SCALE) { + if (BINK_BLOCK_ODD_ROW(row)) { + work_row += work_pitch; + } + } else { + work_row += work_pitch; + } + row += BINK_BLOCK_SIDE; + dest = out + row * pitch; + old = prev + row * pitch; + } while (row < height); + } + + if (bitstate.bitlen != 0) { + bitstate.cur++; + } + return bitstate.cur; +} + +void ExpandBink(u8 PTR4* yout, + u8 PTR4* yprev, + u8 PTR4* aout, + u8 PTR4* aprev, + u8 PTR4* work, + u32 width, + u32 height, + u32 pitch, + u32 uvpitch, + u32 PTR4* bundles, + u32 key_frame, + BUNDLEPOINTERS PTR4* table, + u32 yflags, + u32 aflags) +{ + u32 PTR4* next; + u32 uv_width; + u32 uv_height; + u32 uv_size; + + if ((aflags & BINKALPHA) != 0) { + if ((yflags & BINKALPHA) != 0) { + ExpandPlane(aout, aprev, BINK_BLOCK_ROUND(width), BINK_BLOCK_ROUND(height), + pitch, bundles + 1, key_frame, work, BINK_LUMA_PLANE_SCALE, table, + yflags); + } + bundles = (u32 PTR4*)((u8 PTR4*)bundles + *bundles); + } + + if ((yflags & BINKOLDFRAMEFORMAT) == 0) { + bundles++; + } + + next = ExpandPlane(yout, yprev, BINK_BLOCK_ROUND(width), BINK_BLOCK_ROUND(height), + pitch, bundles, key_frame, work, BINK_LUMA_PLANE_SCALE, table, yflags); + if ((yflags & BINKOLDFRAMEFORMAT) == 0) { + next = (u32 PTR4*)((u8 PTR4*)bundles + bundles[-1] - EXP_WORD_BYTES); + } + + if ((yflags & BINKGRAYSCALE) == 0) { + yout = yout + pitch * uvpitch; + yprev = yprev + pitch * uvpitch; + uvpitch >>= BINK_CHROMA_SHIFT; + uv_width = BINK_BLOCK_ROUND(BINK_CHROMA_ROUND(width)); + uv_height = BINK_BLOCK_ROUND(BINK_CHROMA_ROUND(height)); + pitch >>= BINK_CHROMA_SHIFT; + next = ExpandPlane(yout, yprev, uv_width, uv_height, pitch, next, key_frame, work, + BINK_CHROMA_PLANE_SCALE, table, yflags); + uv_size = pitch * uvpitch; + ExpandPlane(yout + uv_size, yprev + uv_size, uv_width, uv_height, pitch, + next, key_frame, work, BINK_CHROMA_PLANE_SCALE, table, yflags); + } +} diff --git a/src/bink/src/sdk/decode/expand.h b/src/bink/src/sdk/decode/expand.h new file mode 100644 index 000000000..7cb9169ad --- /dev/null +++ b/src/bink/src/sdk/decode/expand.h @@ -0,0 +1,27 @@ +#ifndef BINK_SDK_DECODE_EXPAND_H +#define BINK_SDK_DECODE_EXPAND_H + +#include "bink.h" + +enum BINKBUNDLETYPE +{ + /* Bink video keeps each coded value source in a separate decoded bundle. */ + BINK_BUNDLE_BLOCK_TYPES, + BINK_BUNDLE_SUBBLOCK_TYPES, + BINK_BUNDLE_COLORS, + BINK_BUNDLE_PATTERNS, + BINK_BUNDLE_X_OFFSETS, + BINK_BUNDLE_Y_OFFSETS, + BINK_BUNDLE_INTRA_DC, + BINK_BUNDLE_INTER_DC, + BINK_BUNDLE_RUNS, + BINK_BUNDLE_COUNT +}; + +void ExpandBundleSizes(u32 PTR4* sizes, u32 rows); +void ExpandBink(u8 PTR4* yout, u8 PTR4* yprev, u8 PTR4* aout, u8 PTR4* aprev, + u8 PTR4* work, u32 width, u32 height, u32 pitch, u32 uvpitch, + u32 PTR4* bundles, u32 key_frame, BUNDLEPOINTERS PTR4* table, u32 yflags, + u32 aflags); + +#endif diff --git a/src/bink/src/sdk/decode/ngc/binkngc.c b/src/bink/src/sdk/decode/ngc/binkngc.c index 318fd7532..e5679cf35 100644 --- a/src/bink/src/sdk/decode/ngc/binkngc.c +++ b/src/bink/src/sdk/decode/ngc/binkngc.c @@ -1,49 +1,253 @@ #include "binkngc.h" #include "bink.h" #include "dolphin/os/OSAlloc.h" +#include "dolphin/os/OSTime.h" +static RADMEMALLOC usermalloc = NULL; +static RADMEMFREE userfree = NULL; +static RADMEMALLOC userarammalloc = NULL; +static RADMEMFREE useraramfree = NULL; -RADMEMALLOC usermalloc = NULL; -RADMEMFREE userfree = NULL; -RADMEMALLOC userarammalloc = NULL; -RADMEMFREE useraramfree = NULL; +/* OSGetTime runs at the GameCube timer clock: 40.5 MHz, or 40500 ticks/ms. */ +#define RAD_TIMER_TICKS_PER_MS 40500 +#define RAD_TIMER_HIGH_QUOTIENT 0x19e40 +/* Reciprocal multiply constants for the remaining 64-bit ticks-to-ms division. */ +#define RAD_TIMER_RECIP_MAGIC 0xCF2049A1 +#define RAD_TIMER_RECIP_SHIFT 15 +#define RAD_DIV_RECIP_NUMERATOR 0xFFFFFFFF +#define RAD_DIV_ROUND_TO_HIGH_WORD 0xFFFF +#define RAD_DIV_IS_POWER_OF_TWO(value) (((value) & ((value) - 1)) == 0) +#define RAD_DIV_HIGH_WORD_CEIL(value) (((value) + RAD_DIV_ROUND_TO_HIGH_WORD) >> 16) +#define RAD_ALLOC_ALIGNMENT 0x20 +#define RAD_ALLOC_ALIGNMENT_MASK (RAD_ALLOC_ALIGNMENT - 1) +#define RAD_ALLOC_HEADER_SIZE 0x40 +#define RAD_ALLOC_OFFSET_MASK 0xFF +#define RAD_INVALID_ALLOC_SIZE 0xFFFFFFFF +#define RAD_INVALID_USER_ALLOC ((void PTR4*)-1) +#define RAD_MEMSET16_PER_WORD 2 +#define RAD_MEMSET16_WORD_SHIFT 16 +#define RAD_TIMEBASE_HIGH_WORD 0 +#define RAD_TIMEBASE_LOW_WORD 1 +typedef enum RADAllocOwner +{ + RAD_ALLOC_SYSTEM_OWNED = 0, + RAD_ALLOC_USER_OWNED = 3 +} RADAllocOwner; + +typedef struct RADAllocPrefix +{ + RADMEMFREE custom_free; /* Only valid for user-owned allocations. */ + u16 reserved; + u8 owner; /* RAD_ALLOC_SYSTEM_OWNED or RAD_ALLOC_USER_OWNED. */ + u8 offset; /* Bytes from the raw allocation to the aligned user pointer. */ +} RADAllocPrefix; + +typedef struct RADTimebase +{ + u32 high; + u32 low; +} RADTimebase; + +#define RAD_ALLOC_PREFIX(ptr) (((RADAllocPrefix PTR4*)(ptr)) - 1) +#define RAD_ALLOC_PREFIX_CUSTOM_FREE(ptr) (RAD_ALLOC_PREFIX(ptr)->custom_free) +#define RAD_ALLOC_PREFIX_OWNER(ptr) (RAD_ALLOC_PREFIX(ptr)->owner) +#define RAD_ALLOC_PREFIX_OFFSET(ptr) (RAD_ALLOC_PREFIX(ptr)->offset) +#define RAD_ALLOC_BASE(ptr) ((u8 PTR4*)(ptr) - RAD_ALLOC_PREFIX_OFFSET(ptr)) + +static inline u32 radcntlzw(u32 value) +{ + u32 result; + __asm__("cntlzw %0, %1" : "=r"(result) : "r"(value)); + return result; +} -void radmemset16(void* dest, u16 value, u32 size) { - int half_size = size >> 1; - int sprayed_value = (value << 16) | value; - u16* d16 = dest; - u32* d32 = dest; +u32 mult64anddiv(u32 left, u32 right, u32 divisor) +{ + u32 hi; + u32 lo; + u32 quotient; + + __asm__("mulhwu %0, %2, %3\n\tmullw %1, %2, %3" : "=&r"(hi), "=&r"(lo) : "r"(left), "r"(right)); + + /* Fast path for exact power-of-two divisors after the 64-bit multiply. */ + if (RAD_DIV_IS_POWER_OF_TWO(divisor)) { + u32 clz = radcntlzw(divisor); + lo >>= (31 - clz); + hi <<= (clz + 1); + return lo | hi; + } + + { + u32 recip = RAD_DIV_RECIP_NUMERATOR / divisor; + u32 upper = RAD_DIV_HIGH_WORD_CEIL(divisor); + + quotient = 0; + + /* Estimate the high-word quotient first so the correction loop is small. */ + if (upper != 0) { + u32 clz = radcntlzw(hi); + u32 est = (hi << clz) / upper; + s32 adj = 16 - (s32)clz; + s32 sign = adj >> 31; + u32 rshift = (u32)(-(s32)adj) & (u32)sign; + u32 lshift = (u32)adj & ~(u32)sign; + est = (est >> rshift) << lshift; + quotient = est; + { + u32 prod_hi, prod_lo; + __asm__("mulhwu %0, %2, %3\n\tmullw %1, %2, %3" : "=&r"(prod_hi), "=&r"(prod_lo) : "r"(est), "r"(divisor)); + __asm__("subfc %0, %3, %0\n\tsubfe %1, %2, %1" : "+r"(lo), "+r"(hi) : "r"(prod_hi), "r"(prod_lo)); + } + } + + while (hi != 0) { + u32 step = recip * hi; + u32 prod_hi, prod_lo; + __asm__("mulhwu %0, %2, %3\n\tmullw %1, %2, %3" : "=&r"(prod_hi), "=&r"(prod_lo) : "r"(step), "r"(divisor)); + __asm__("subfc %0, %3, %0\n\tsubfe %1, %2, %1" : "+r"(lo), "+r"(hi) : "r"(prod_hi), "r"(prod_lo)); + quotient += step; + } + + quotient += lo / divisor; + } + + return quotient; +} + +u32 mult64andshift(u32 left, u32 right, u32 shift) +{ + u32 hi, lo; + __asm__("mulhwu %1, %2, %3\n\tmullw %0, %2, %3" : "=&r"(lo), "=&r"(hi) : "r"(left), "r"(right)); + lo >>= shift; + hi <<= (32 - shift); + return lo | hi; +} + +void radmemset16(void PTR4* dest, u16 value, u32 size) { + int half_size = size >> (RAD_MEMSET16_PER_WORD - 1); + int sprayed_value = (value << RAD_MEMSET16_WORD_SHIFT) | value; + u16 PTR4* d16 = dest; + u32 PTR4* d32 = dest; while (half_size--) { *d32++ = sprayed_value; } - d16 = (U16*)d32; + d16 = (u16 PTR4*)d32; if ((size & 1)) *d16 = value; } +u32 RADTimerRead(void) +{ + static OSTime starttime; + OSTime now; + u32 quotient; + u32 high_prod; + u32 low_prod; + + now = OSGetTime(); + + if (starttime == 0) { + starttime = now; + } + + now -= starttime; + /* Convert elapsed OS ticks to milliseconds without a full 64-bit divide. */ + quotient = (u32)(now >> 32) * RAD_TIMER_HIGH_QUOTIENT; + now -= (u64)RAD_TIMER_TICKS_PER_MS * quotient; + low_prod = (u32)(((u64)(u32)now * RAD_TIMER_RECIP_MAGIC) >> 32); + high_prod = (u32)(now >> 32) * RAD_TIMER_RECIP_MAGIC; + + return quotient + ((low_prod + high_prod) >> RAD_TIMER_RECIP_SHIFT); +} + +static inline void radtimebase(RADTimebase PTR4* dest) +{ + u32 h1, l, h2; + /* Read TBU/TBL/TBU until the high word is stable across the low-word read. */ + __asm__ volatile( + "0:\n\t" + "mftb %0, 269\n\t" + "mftb %1, 268\n\t" + "mftb %2, 269\n\t" + "cmpw %0, %2\n\t" + "bne 0b\n\t" + "stw %0, 0(%3)\n\t" + "stw %1, 4(%3)" + : "=&r"(h1), "=&r"(l), "=r"(h2) + : "r"(dest) + : "memory" + ); +} + +void RADCycleTimerStartAddr(u32 PTR4* dest) +{ + RADTimebase tb; + radtimebase(&tb); + *dest = tb.low; +} + +u32 RADCycleTimerDeltaAddr(u32 PTR4* dest) +{ + RADTimebase tb; + u32 delta; + radtimebase(&tb); + delta = tb.low - *dest; + *dest = delta; + return delta; +} + +void RADCycleTimerStartAddr64(u64 PTR4* dest) +{ + radtimebase((RADTimebase PTR4*)dest); +} + +void RADCycleTimerDeltaAddr64(u64 PTR4* dest) +{ + RADTimebase tb; + u32 PTR4* delta = (u32 PTR4*)dest; + u32 prevhi, prevlo, nowhi, nowlo; + u32 dlo, dhi; + radtimebase(&tb); + nowlo = tb.low; + prevhi = delta[RAD_TIMEBASE_HIGH_WORD]; + prevlo = delta[RAD_TIMEBASE_LOW_WORD]; + nowhi = tb.high; + /* Store the unsigned 64-bit delta in the same high/low memory layout. */ + __asm__("subfc %0, %2, %4\n\tsubfe %1, %3, %5" : "=r"(dlo), "=r"(dhi) : "r"(prevlo), "r"(prevhi), "r"(nowlo), "r"(nowhi)); + delta[RAD_TIMEBASE_LOW_WORD] = dlo; + delta[RAD_TIMEBASE_HIGH_WORD] = dhi; +} + +void ReadTimeBase(u32 PTR4* dest) +{ + radtimebase((RADTimebase PTR4*)dest); +} + void RADSetMemory(RADMEMALLOC malloc_fn, RADMEMFREE free_fn) { usermalloc = malloc_fn; userfree = free_fn; } -void* radmalloc(u32 size) +void PTR4* radmalloc(u32 size) { u32 request; - void *rawBlock; + void PTR4* rawBlock; u8 fromUser; u32 addr; u32 offset; - u8 *aligned; - if (size == 0 || size == 0xFFFFFFFF) + u8 PTR4* aligned; + + if (size == 0 || size == RAD_INVALID_ALLOC_SIZE) return 0; - request = size + 0x40; + /* Reserve room for the prefix and enough slack to return a 32-byte aligned pointer. */ + request = size + RAD_ALLOC_HEADER_SIZE; if (usermalloc != 0 && (rawBlock = usermalloc(request))) { - if (rawBlock != 0 && rawBlock != (void *)-1) { - fromUser = 3; + if (rawBlock != 0 && rawBlock != RAD_INVALID_USER_ALLOC) { + fromUser = RAD_ALLOC_USER_OWNED; } else { return 0; } @@ -52,34 +256,33 @@ void* radmalloc(u32 size) if (rawBlock == 0) { return 0; } - fromUser= 0; + fromUser= RAD_ALLOC_SYSTEM_OWNED; } addr = (u32)rawBlock; - offset = (u32)(0x40 - (addr & 0x1F)) & 0xFF; - aligned = (u8 *)rawBlock + offset; - aligned[-1] = (u8)offset; - aligned[-2] = fromUser; - if (fromUser == 3) - *(void **)(aligned - 8) = (void *)userfree; + offset = (u32)(RAD_ALLOC_HEADER_SIZE - (addr & RAD_ALLOC_ALIGNMENT_MASK)) & RAD_ALLOC_OFFSET_MASK; + aligned = (u8 PTR4*)rawBlock + offset; + RAD_ALLOC_PREFIX(aligned)->offset = (u8)offset; + RAD_ALLOC_PREFIX(aligned)->owner = fromUser; + if (fromUser == RAD_ALLOC_USER_OWNED) + RAD_ALLOC_PREFIX(aligned)->custom_free = userfree; return aligned; } -void radfree(void* ptr) +void radfree(void PTR4* ptr) { - u8* ptrU8 = (u8*)ptr; - u32* ptrU32 = (u32*)ptr; + u8 PTR4* ptrU8 = (u8 PTR4*)ptr; + RADMEMFREE custom_free; - void (*customFree)(void*); if (ptr) { - if ((ptrU8[-2]) == 3) + if (RAD_ALLOC_PREFIX_OWNER(ptrU8) == RAD_ALLOC_USER_OWNED) { - customFree = (void*)(ptrU32[-2]); - customFree(ptrU8 - ptrU8[-1]); + custom_free = RAD_ALLOC_PREFIX_CUSTOM_FREE(ptrU8); + custom_free(RAD_ALLOC_BASE(ptrU8)); } else { - OSFreeToHeap((void*)__OSCurrHeap, ptrU8 - ptrU8[-1]); + OSFreeToHeap(__OSCurrHeap, RAD_ALLOC_BASE(ptrU8)); } } } @@ -89,15 +292,63 @@ void RADSetAudioMemory(RADMEMALLOC malloc_fn, RADMEMFREE free_fn) { useraramfree = free_fn; } -void* radaudiomalloc(u32 size) { +void PTR4* radaudiomalloc(u32 size) { + /* Bink's GameCube sound path expects the client to provide ARAM allocation hooks. */ if (userarammalloc) { return userarammalloc(size); } return NULL; } -void radaudiofree(void* ptr) { +void radaudiofree(void PTR4* ptr) { if (useraramfree) { useraramfree(ptr); } } + +u32 div64(u32 high, u32 low, u32 divisor) +{ + u32 hi = high; + u32 quotient; + + /* 64-bit numerator, 32-bit divisor helper used by the Bink platform layer. */ + if (RAD_DIV_IS_POWER_OF_TWO(divisor)) { + u32 clz = radcntlzw(divisor); + return (low >> (31 - clz)) | (hi << (clz + 1)); + } + + { + u32 recip = RAD_DIV_RECIP_NUMERATOR / divisor; + u32 upper = RAD_DIV_HIGH_WORD_CEIL(divisor); + + quotient = 0; + + if (upper != 0) { + u32 clz = radcntlzw(hi); + u32 est = (hi << clz) / upper; + s32 adj = 16 - (s32)clz; + s32 sign = adj >> 31; + u32 rshift = (u32)(-(s32)adj) & (u32)sign; + u32 lshift = (u32)adj & ~(u32)sign; + est = (est >> rshift) << lshift; + quotient = est; + { + u32 prod_hi, prod_lo; + __asm__("mulhwu %0, %2, %3\n\tmullw %1, %2, %3" : "=&r"(prod_hi), "=&r"(prod_lo) : "r"(est), "r"(divisor)); + __asm__("subfc %0, %3, %0\n\tsubfe %1, %2, %1" : "+r"(low), "+r"(hi) : "r"(prod_hi), "r"(prod_lo)); + } + } + + while (hi != 0) { + u32 step = recip * hi; + u32 prod_hi, prod_lo; + __asm__("mulhwu %0, %2, %3\n\tmullw %1, %2, %3" : "=&r"(prod_hi), "=&r"(prod_lo) : "r"(step), "r"(divisor)); + __asm__("subfc %0, %3, %0\n\tsubfe %1, %2, %1" : "+r"(low), "+r"(hi) : "r"(prod_hi), "r"(prod_lo)); + quotient += step; + } + + quotient += low / divisor; + } + + return quotient; +} diff --git a/src/bink/src/sdk/decode/ngc/ngcfile.c b/src/bink/src/sdk/decode/ngc/ngcfile.c index e69de29bb..492ffd28f 100644 --- a/src/bink/src/sdk/decode/ngc/ngcfile.c +++ b/src/bink/src/sdk/decode/ngc/ngcfile.c @@ -0,0 +1,584 @@ +#include "bink.h" +#include "binkngc.h" +#include "ngcfile.h" +#include "ngcrgb.h" + +#include +#include +#include + +void PTR4* memmove(void PTR4* dest, const void PTR4* src, u32 len); + +typedef enum NGCBinkFileOwnership +{ + NGC_FILE_OWNS_DVD, + NGC_FILE_BORROWS_DVD +} NGCBinkFileOwnership; + +typedef struct NGCBinkIOData +{ + DVDFileInfo file; + u32 read_cursor; /* file-relative byte offset scheduled/read from DVD */ + u32 align_extra; /* extra bytes requested to satisfy DVD alignment */ + u32 consume_cursor; /* file-relative byte offset consumed by Bink */ + u8 PTR4* read_ptr; /* ring-buffer read pointer */ + u32 free_size; /* free bytes remaining in the ring buffer */ + u8 PTR4* buffer; + u8 PTR4* buffer_end; + u8 PTR4* write_ptr; /* ring-buffer write pointer for async DVD fills */ + NGCBinkFileOwnership borrowed_file; + u32 file_offset; /* base offset when reading from a borrowed DVDFileInfo */ + u32 file_size; + u32 simulate_rate; /* nonzero throttles reads to this byte/sec rate */ + s32 simulate_delay; /* accumulated milliseconds still to wait */ + BOOL cancel_read; /* suppresses async callback accounting while canceling DVD reads */ + BOOL callback_working; /* snapshot of BINKIO.Working when the async read was kicked */ + u32 read_start_time; /* RADTimerRead value for the active read */ + BINKIO PTR4* owner; /* parent BINKIO recovered from DVDFileInfo callback argument */ +} NGCBinkIOData; + +typedef char NGCBinkIODataFitsInBinkIOData + [(sizeof(NGCBinkIOData) <= sizeof(((BINKIO PTR4*)0)->iodata)) ? 1 : -1]; + +u32 ytable[RGB_LUMA_TABLE_SIZE] RAD_ATTRIBUTE_ALIGN(NGC_TABLE_ALIGNMENT) = { 0 }; +u32 ytable_x4[RGB_LUMA_TABLE_SIZE] RAD_ATTRIBUTE_ALIGN(NGC_TABLE_ALIGNMENT) = { 0 }; +u32 PTR4* clamp_ytable[RGB_LUMA_TABLE_SIZE] RAD_ATTRIBUTE_ALIGN(NGC_TABLE_ALIGNMENT) = { 0 }; +u32 clamptable[RGB_CLAMP_TABLE_SIZE] RAD_ATTRIBUTE_ALIGN(NGC_TABLE_ALIGNMENT) = { 0 }; +u32 clamp_a4[RGB_LUMA_TABLE_SIZE] RAD_ATTRIBUTE_ALIGN(NGC_TABLE_ALIGNMENT) = { 0 }; +u32 clamp_r[RGB_CLAMP_TABLE_SIZE] RAD_ATTRIBUTE_ALIGN(NGC_TABLE_ALIGNMENT) = { 0 }; +u32 clamp_g[RGB_CLAMP_TABLE_SIZE] RAD_ATTRIBUTE_ALIGN(NGC_TABLE_ALIGNMENT) = { 0 }; +u32 clamp_b[RGB_CLAMP_TABLE_SIZE] RAD_ATTRIBUTE_ALIGN(NGC_TABLE_ALIGNMENT) = { 0 }; +u32 clamp_rh[RGB_CLAMP_TABLE_SIZE] RAD_ATTRIBUTE_ALIGN(NGC_TABLE_ALIGNMENT) = { 0 }; +u32 clamp_gh[RGB_CLAMP_TABLE_SIZE] RAD_ATTRIBUTE_ALIGN(NGC_TABLE_ALIGNMENT) = { 0 }; +u32 clamp_bh[RGB_CLAMP_TABLE_SIZE] RAD_ATTRIBUTE_ALIGN(NGC_TABLE_ALIGNMENT) = { 0 }; +u32 clamp_rr[RGB_CLAMP_TABLE_SIZE] RAD_ATTRIBUTE_ALIGN(NGC_TABLE_ALIGNMENT) = { 0 }; +u32 clamp_gg[RGB_CLAMP_TABLE_SIZE] RAD_ATTRIBUTE_ALIGN(NGC_TABLE_ALIGNMENT) = { 0 }; +u32 clamp_bb[RGB_CLAMP_TABLE_SIZE] RAD_ATTRIBUTE_ALIGN(NGC_TABLE_ALIGNMENT) = { 0 }; +u32 mono16[RGB_MONO_TABLE_SIZE] RAD_ATTRIBUTE_ALIGN(NGC_TABLE_ALIGNMENT) = { 0 }; +u32 mono16x2[RGB_MONO_TABLE_SIZE] RAD_ATTRIBUTE_ALIGN(NGC_TABLE_ALIGNMENT) = { 0 }; +u32 mono32[RGB_MONO_TABLE_SIZE] RAD_ATTRIBUTE_ALIGN(NGC_TABLE_ALIGNMENT) = { 0 }; +RGBContext S RAD_ATTRIBUTE_ALIGN(NGC_TABLE_ALIGNMENT) = { 0 }; +s32 YUVTables[YUV_TABLE_SIZE] RAD_ATTRIBUTE_ALIGN(NGC_TABLE_ALIGNMENT) = { 0 }; +u32 RGBshift[RGB_SHIFT_TABLE_SIZE] RAD_ATTRIBUTE_ALIGN(NGC_TABLE_ALIGNMENT) = { 0 }; + +#define NGC_DATA(io) ((NGCBinkIOData PTR4*)((io)->iodata)) +#define NGC_DVD(io) (&NGC_DATA(io)->file) +#define NGC_READ_CURSOR(io) (NGC_DATA(io)->read_cursor) +#define NGC_ALIGN_EXTRA(io) (NGC_DATA(io)->align_extra) +#define NGC_CONSUME_CURSOR(io) (NGC_DATA(io)->consume_cursor) +#define NGC_READ_PTR(io) (NGC_DATA(io)->read_ptr) +#define NGC_FREE_SIZE(io) (NGC_DATA(io)->free_size) +#define NGC_BUFFER(io) (NGC_DATA(io)->buffer) +#define NGC_BUFFER_END(io) (NGC_DATA(io)->buffer_end) +#define NGC_WRITE_PTR(io) (NGC_DATA(io)->write_ptr) +#define NGC_BORROWED_FILE(io) (NGC_DATA(io)->borrowed_file) +#define NGC_FILE_OFFSET(io) (NGC_DATA(io)->file_offset) +#define NGC_FILE_SIZE(io) (NGC_DATA(io)->file_size) +#define NGC_SIMULATE_RATE(io) (NGC_DATA(io)->simulate_rate) +#define NGC_SIMULATE_DELAY(io) (NGC_DATA(io)->simulate_delay) +#define NGC_CANCEL_READ(io) (NGC_DATA(io)->cancel_read) +#define NGC_CALLBACK_WORKING(io) (NGC_DATA(io)->callback_working) +#define NGC_READ_START_TIME(io) (NGC_DATA(io)->read_start_time) +#define NGC_OWNER(io) (NGC_DATA(io)->owner) +#define NGC_VOLATILE_U32(field) (*(volatile u32 PTR4*)&(field)) + +#define NGC_DVD_ALIGNMENT DVD_MIN_TRANSFER_SIZE +#define NGC_DVD_ALIGN_MASK (NGC_DVD_ALIGNMENT - 1) +#define NGC_ALIGN_UP(value, mask) (((value) + (mask)) & ~(mask)) +#define NGC_ALIGN_DOWN(value, mask) ((value) & ~(mask)) +#define NGC_BYTES_LEFT_TO_READ(io) (NGC_FILE_SIZE(io) - NGC_READ_CURSOR(io)) +#define NGC_BYTES_LEFT_TO_CONSUME(io) (NGC_FILE_SIZE(io) - NGC_CONSUME_CURSOR(io)) +#define NGC_STACK_READ_PAYLOAD_SIZE 0x400 +#define NGC_STACK_READ_BUFFER_SIZE (NGC_STACK_READ_PAYLOAD_SIZE + NGC_DVD_ALIGNMENT) +#define NGC_READ_BLOCK_SIZE 0x1000 +#define NGC_READ_BLOCK_MASK (NGC_READ_BLOCK_SIZE - 1) +#define NGC_ASYNC_WHOLE_BLOCK NGC_READ_BLOCK_SIZE +#define NGC_DVD_PRIORITY_LOW 0 +#define NGC_MILLISECONDS_PER_SECOND 1000 +#define BINK_FILE_CURRENT_OFFSET -1 +#define NGC_DVD_STATUS_FAILED(status) \ + ((status) <= DVD_STATE_IGNORED ? \ + (DVD_STATE_COVER_CLOSED <= (status) || (status) == DVD_STATE_FATAL_ERROR) : \ + (status) == DVD_STATE_RETRY) + +static void ReadKickoff(BINKIO PTR4* io); + +static void intelendian(void PTR4* ptr, u32 size) +{ + register void PTR4* r3 __asm__("r3") = ptr; + register u32 r4 __asm__("r4") = size; + + __asm__ volatile( + "addi 4,4,3\n\t" + "srwi 4,4,2\n\t" + "cmpwi 4,0\n\t" + "subi 4,4,1\n\t" + ".long 0x4D820020\n\t" + "li 9,0\n\t" + "0:\n\t" + "lwbrx 0,9,3\n\t" + "cmpwi 4,0\n\t" + "stw 0,0(3)\n\t" + "subi 4,4,1\n\t" + "addi 3,3,4\n\t" + "bne 0b" + : "+r"(r3), "+r"(r4) + : + : "r0", "r9", "memory"); +} + +static u32 radreadngc(DVDFileInfo PTR4* file, u32 offset, void PTR4* dest, u32 size) +{ + u8 buffer[NGC_STACK_READ_BUFFER_SIZE]; + u8 PTR4* aligned = (u8 PTR4*)NGC_ALIGN_UP((u32)buffer, NGC_DVD_ALIGN_MASK); + u32 max_size = NGC_ALIGN_DOWN(buffer + sizeof(buffer) - 1 - aligned, NGC_DVD_ALIGN_MASK); + u8 PTR4* out = dest; + u32 total = 0; + + while (size != 0) { + u32 read_size; + s32 status; + + if (size >= max_size) { + read_size = max_size; + } else { + read_size = NGC_ALIGN_UP(size, NGC_DVD_ALIGN_MASK); + } + + DVDReadAsyncPrio(file, aligned, read_size, offset, 0, NGC_DVD_PRIORITY_LOW); + + do { + status = DVDGetCommandBlockStatus(&file->cb); + if (status != DVD_STATE_END) { + if (status <= DVD_STATE_END) { + if (DVD_STATE_FATAL_ERROR == status) { + return 0; + } + } else if (status <= DVD_STATE_RETRY) { + if (DVD_STATE_WAITING < status) { + return 0; + } + } + } + } while (status != DVD_STATE_END); + + if (read_size > size) { + read_size = size; + } + + memcpy(out, aligned, read_size); + out += read_size; + total += read_size; + offset += read_size; + size -= read_size; + } + + return total; +} + +static u32 BinkFileReadHeader(BINKIO PTR4* io, s32 offset, void PTR4* dest, u32 size) +{ + u32 amount; + u32 cursor; + u32 remaining; + u32 limit; + + if (offset != BINK_FILE_CURRENT_OFFSET && NGC_READ_CURSOR(io) != (u32)offset) { + NGC_READ_CURSOR(io) = offset; + } + + amount = radreadngc(NGC_DVD(io), NGC_READ_CURSOR(io) + NGC_FILE_OFFSET(io), dest, size); + cursor = NGC_READ_CURSOR(io) + amount; + NGC_VOLATILE_U32(NGC_READ_CURSOR(io)) = cursor; + cursor = NGC_VOLATILE_U32(NGC_READ_CURSOR(io)); + limit = io->BufSize; + remaining = NGC_FILE_SIZE(io) - cursor; + NGC_VOLATILE_U32(NGC_CONSUME_CURSOR(io)) = cursor; + + if (remaining >= limit) { + remaining = io->BufSize; + } + + io->CurBufSize = remaining; + intelendian(dest, amount); + return amount; +} + +static void dosimulate(BINKIO PTR4* io, u32 read_size, u32 start) +{ + u32 last; + u32 elapsed; + s32 delay; + + delay = mult64anddiv(read_size, NGC_MILLISECONDS_PER_SECOND, NGC_SIMULATE_RATE(io)); + last = RADTimerRead(); + elapsed = last - start; + delay -= elapsed; + NGC_SIMULATE_DELAY(io) += delay; + + while (NGC_SIMULATE_DELAY(io) > 0) { + u32 now; + + do { + now = RADTimerRead(); + } while ((s32)(now - last) < NGC_SIMULATE_DELAY(io)); + + NGC_SIMULATE_DELAY(io) -= now - last; + last = now; + } +} + +static void DVDReadCallback(s32 result, DVDFileInfo PTR4* fileInfo) +{ + /* DVDFileInfo is the first field of NGCBinkIOData, so the callback can recover owner. */ + BINKIO PTR4* io = ((NGCBinkIOData PTR4*)fileInfo)->owner; + + if (NGC_CANCEL_READ(io) == 0) { + u32 extra; + + if (result != NGC_ASYNC_WHOLE_BLOCK) { + if (NGC_BYTES_LEFT_TO_READ(io) + NGC_ALIGN_EXTRA(io) != (u32)result) { + io->DoingARead = 0; + io->ReadError = 1; + return; + } + } + + extra = NGC_ALIGN_EXTRA(io); + if (extra != 0) { + /* Final DVD reads are aligned upward; drop the leading alignment padding. */ + memmove(NGC_WRITE_PTR(io), NGC_WRITE_PTR(io) + extra, + NGC_BYTES_LEFT_TO_READ(io)); + extra = NGC_ALIGN_EXTRA(io); + NGC_ALIGN_EXTRA(io) = 0; + result -= extra; + } + + io->BytesRead += result; + { + u8 PTR4* write = NGC_WRITE_PTR(io) + result; + u32 cursor = NGC_VOLATILE_U32(NGC_READ_CURSOR(io)) + result; + + NGC_VOLATILE_U32(NGC_READ_CURSOR(io)) = cursor; + NGC_WRITE_PTR(io) = write; + + if ((u32)write >= (u32)NGC_BUFFER_END(io)) { + NGC_WRITE_PTR(io) = NGC_BUFFER(io); + } + } + + NGC_VOLATILE_U32(NGC_FREE_SIZE(io)) = NGC_FREE_SIZE(io) - result; + io->CurBufUsed += result; + if (io->CurBufUsed > io->BufHighUsed) { + io->BufHighUsed = io->CurBufUsed; + } + + if (NGC_SIMULATE_RATE(io) != 0) { + dosimulate(io, result, NGC_READ_START_TIME(io)); + } + + { + u32 elapsed = RADTimerRead() - NGC_READ_START_TIME(io); + + NGC_READ_START_TIME(io) = elapsed; + io->TotalTime += elapsed; + + if (NGC_CALLBACK_WORKING(io) != 0 || io->Working != 0) { + io->ThreadTime += elapsed; + } else { + io->IdleTime += elapsed; + } + } + + io->DoingARead = 0; + ReadKickoff(io); + } +} + +static void ReadKickoff(BINKIO PTR4* io) +{ + s32 status = DVDGetCommandBlockStatus(&NGC_DVD(io)->cb); + u32 remaining = NGC_BYTES_LEFT_TO_READ(io); + + if (NGC_DVD_STATUS_FAILED(status)) { + io->ReadError = 1; + return; + } + + if (NGC_CANCEL_READ(io) == 0 && + (status == DVD_STATE_END || status == DVD_STATE_CANCELED)) { + if (NGC_FREE_SIZE(io) < NGC_READ_BLOCK_SIZE) { + io->CurBufSize = io->CurBufUsed; + } else if (remaining != 0) { + NGC_CALLBACK_WORKING(io) = io->Working; + NGC_READ_START_TIME(io) = RADTimerRead(); + io->DoingARead = 1; + + if (remaining > NGC_READ_BLOCK_SIZE) { + remaining = NGC_READ_BLOCK_SIZE; + } else if ((remaining & NGC_DVD_ALIGN_MASK) != 0) { + /* DVD reads must be aligned, so the last request starts early and trims in callback. */ + NGC_ALIGN_EXTRA(io) = (-remaining) & NGC_DVD_ALIGN_MASK; + remaining += NGC_ALIGN_EXTRA(io); + } + + DVDReadAsyncPrio(NGC_DVD(io), NGC_WRITE_PTR(io), remaining, + (NGC_READ_CURSOR(io) - NGC_ALIGN_EXTRA(io)) + NGC_FILE_OFFSET(io), + DVDReadCallback, NGC_DVD_PRIORITY_LOW); + } else { + io->CurBufSize = io->CurBufUsed; + } + } +} + +static u32 BinkFileIdle(BINKIO PTR4* io) +{ + s32 status; + + if (io->ReadError != 0) { + return 0; + } + + status = DVDGetCommandBlockStatus(&NGC_DVD(io)->cb); + if (status > DVD_STATE_WAITING) { + if (status == DVD_STATE_CANCELED) { + return io->DoingARead; + } + if (status < DVD_STATE_CANCELED) { + goto read_error; + } + if (status == DVD_STATE_RETRY) { + goto read_error; + } + return io->DoingARead; + } + + if (status >= DVD_STATE_BUSY) { + return io->DoingARead; + } + if (status == DVD_STATE_FATAL_ERROR) { + goto read_error; + } + if (status == DVD_STATE_END) { + ReadKickoff(io); + } + return io->DoingARead; + +read_error: + io->ReadError = 1; + return io->DoingARead; +} + +static void CancelReadRequests(BINKIO PTR4* io) +{ + s32 status; + + NGC_CANCEL_READ(io) = 1; + DVDCancel(&NGC_DVD(io)->cb); + + do { + status = DVDGetCommandBlockStatus(&NGC_DVD(io)->cb); + } while ((u32)(status - DVD_STATE_BUSY) <= (DVD_STATE_WAITING - DVD_STATE_BUSY)); + + NGC_VOLATILE_U32(NGC_CANCEL_READ(io)) = 0; + io->DoingARead = 0; +} + +static u32 BinkFileReadFrame(BINKIO PTR4* io, u32 frame_num, s32 offset, void PTR4* dest, u32 size) +{ + u32 start_time; + u32 read_time; + u32 foreground_time; + u32 total = 0; + void PTR4* start_dest = dest; + + if (io->ReadError != 0) { + return 0; + } + + start_time = RADTimerRead(); + + if (offset != BINK_FILE_CURRENT_OFFSET && NGC_CONSUME_CURSOR(io) != (u32)offset) { + if ((u32)offset > NGC_CONSUME_CURSOR(io) && (u32)offset <= NGC_READ_CURSOR(io)) { + BOOL enabled = OSDisableInterrupts(); + u32 skip = offset - NGC_CONSUME_CURSOR(io); + u8 PTR4* ptr; + + /* A forward seek already buffered by DVD can be consumed by advancing the ring pointer. */ + NGC_VOLATILE_U32(NGC_FREE_SIZE(io)) = NGC_FREE_SIZE(io) + skip; + ptr = NGC_READ_PTR(io) + skip; + NGC_CONSUME_CURSOR(io) = offset; + NGC_READ_PTR(io) = ptr; + io->CurBufUsed -= skip; + if ((u32)ptr > (u32)NGC_BUFFER_END(io)) { + NGC_READ_PTR(io) = ptr - io->BufSize; + } + + OSRestoreInterrupts(enabled); + } else { + BOOL enabled; + + /* Seeks outside the buffered window discard pending async state and restart the ring. */ + CancelReadRequests(io); + enabled = OSDisableInterrupts(); + NGC_VOLATILE_U32(NGC_READ_CURSOR(io)) = offset; + NGC_CONSUME_CURSOR(io) = offset; + NGC_VOLATILE_U32(NGC_FREE_SIZE(io)) = io->BufSize; + io->CurBufUsed = 0; + NGC_READ_PTR(io) = NGC_BUFFER(io); + NGC_WRITE_PTR(io) = NGC_BUFFER(io); + OSRestoreInterrupts(enabled); + } + } + + if (NGC_BUFFER(io) == 0) { + u32 direct_start = RADTimerRead(); + u32 read; + + read = radreadngc(NGC_DVD(io), NGC_READ_CURSOR(io), dest, size); + if (read < size) { + io->ReadError = 1; + } + + NGC_VOLATILE_U32(NGC_READ_CURSOR(io)) = NGC_VOLATILE_U32(NGC_READ_CURSOR(io)) + read; + NGC_CONSUME_CURSOR(io) += read; + io->BytesRead += read; + total = read; + + if (NGC_SIMULATE_RATE(io) != 0) { + dosimulate(io, total, direct_start); + } + + read_time = RADTimerRead(); + io->TotalTime += read_time - direct_start; + foreground_time = io->ForegroundTime + (read_time - start_time); + } else { + while (size != 0 && io->ReadError == 0) { + u32 amount; + + ReadKickoff(io); + amount = io->CurBufUsed; + if (amount != 0) { + u32 first; + + if (amount > size) { + amount = size; + } + + size -= amount; + total += amount; + NGC_CONSUME_CURSOR(io) += amount; + first = NGC_BUFFER_END(io) - NGC_READ_PTR(io); + + /* Copy to the end of the ring first, then wrap to the buffer base. */ + if (first <= amount) { + BOOL enabled; + + memcpy(dest, NGC_READ_PTR(io), first); + dest = (u8 PTR4*)dest + first; + amount -= first; + NGC_READ_PTR(io) = NGC_BUFFER(io); + + enabled = OSDisableInterrupts(); + io->CurBufUsed -= first; + NGC_VOLATILE_U32(NGC_FREE_SIZE(io)) = NGC_FREE_SIZE(io) + first; + OSRestoreInterrupts(enabled); + } + + if (amount != 0) { + BOOL enabled; + + memcpy(dest, NGC_READ_PTR(io), amount); + dest = (u8 PTR4*)dest + amount; + NGC_READ_PTR(io) = NGC_READ_PTR(io) + amount; + + enabled = OSDisableInterrupts(); + io->CurBufUsed -= amount; + NGC_VOLATILE_U32(NGC_FREE_SIZE(io)) = NGC_FREE_SIZE(io) + amount; + OSRestoreInterrupts(enabled); + } + } + } + + read_time = RADTimerRead(); + foreground_time = io->ForegroundTime + (read_time - start_time); + } + io->ForegroundTime = foreground_time; + + { + u32 cur_buf_size = NGC_BYTES_LEFT_TO_CONSUME(io); + + if (cur_buf_size >= io->BufSize) { + cur_buf_size = io->BufSize; + } + + io->CurBufSize = cur_buf_size; + } + + if (io->CurBufUsed + NGC_READ_BLOCK_SIZE > io->CurBufSize) { + io->CurBufSize = io->CurBufUsed; + } + + intelendian(start_dest, total); + return total; +} + +static u32 BinkFileGetBufferSize(BINKIO PTR4* io, u32 size) +{ + return NGC_ALIGN_UP(size, NGC_READ_BLOCK_MASK); +} + +static void BinkFileSetInfo(BINKIO PTR4* io, void PTR4* buf, u32 size, u32 file_size, u32 simulate) +{ + u32 aligned_size; + BOOL enabled = OSDisableInterrupts(); + + aligned_size = NGC_ALIGN_DOWN(size, NGC_READ_BLOCK_MASK); + NGC_VOLATILE_U32(NGC_FREE_SIZE(io)) = aligned_size; + io->BufSize = aligned_size; + NGC_FILE_SIZE(io) = file_size; + NGC_SIMULATE_RATE(io) = simulate; + NGC_OWNER(io) = io; + NGC_BUFFER(io) = (u8 PTR4*)buf; + NGC_READ_PTR(io) = (u8 PTR4*)buf; + NGC_WRITE_PTR(io) = (u8 PTR4*)buf; + io->CurBufUsed = 0; + NGC_BUFFER_END(io) = (u8 PTR4*)buf + aligned_size; + + OSRestoreInterrupts(enabled); +} + +static void BinkFileClose(BINKIO PTR4* io) +{ + CancelReadRequests(io); + + if (NGC_BORROWED_FILE(io) == NGC_FILE_OWNS_DVD) { + DVDClose(NGC_DVD(io)); + } +} + +s32 BinkFileOpen(BINKIO PTR4* io, const char PTR4* name, u32 flags) +{ + memset(io, 0, sizeof(*io)); + + if ((flags & BINKFILEHANDLE) != 0) { + *NGC_DVD(io) = *(DVDFileInfo PTR4*)name; + NGC_BORROWED_FILE(io) = NGC_FILE_BORROWS_DVD; + NGC_FILE_OFFSET(io) = (u32)NGC_DVD(io)->cb.userData; + } else { + s32 entry = DVDConvertPathToEntrynum(name); + + if (entry == -1 || DVDFastOpen(entry, NGC_DVD(io)) == 0) { + return 0; + } + } + + io->ReadHeader = BinkFileReadHeader; + io->ReadFrame = BinkFileReadFrame; + io->GetBufferSize = BinkFileGetBufferSize; + io->SetInfo = BinkFileSetInfo; + io->Idle = BinkFileIdle; + io->Close = BinkFileClose; + + return 1; +} diff --git a/src/bink/src/sdk/decode/ngc/ngcfile.h b/src/bink/src/sdk/decode/ngc/ngcfile.h new file mode 100644 index 000000000..5bb90754c --- /dev/null +++ b/src/bink/src/sdk/decode/ngc/ngcfile.h @@ -0,0 +1,8 @@ +#ifndef BINK_DECODE_NGC_NGCFILE_H +#define BINK_DECODE_NGC_NGCFILE_H + +#include "bink.h" + +s32 BinkFileOpen(BINKIO PTR4* io, const char PTR4* name, u32 flags); + +#endif diff --git a/src/bink/src/sdk/decode/ngc/ngcrgb.c b/src/bink/src/sdk/decode/ngc/ngcrgb.c index e69de29bb..455ae9578 100644 --- a/src/bink/src/sdk/decode/ngc/ngcrgb.c +++ b/src/bink/src/sdk/decode/ngc/ngcrgb.c @@ -0,0 +1,1570 @@ +#include "bink.h" +#include "ngcrgb.h" + +// GameCube texture memory is tiled in four-row groups. These helpers convert +// the linear Bink row pointers in S into the swizzled destination addresses +// used by the RGB and alpha 4x2 core kernels. +#define RGB_TILE_ROWS 4 +#define RGB_TILE_ROW_BITS 2 +#define RGB_TILE_ROW_MASK (RGB_TILE_ROWS - 1) +#define RGB_TILE_ROW_SHIFT 3 +#define RGB_16BIT_TILE_ALIGN_MASK 0x1f +#define RGB_32BIT_TILE_ALIGN_MASK 0x3f +#define RGB_BYTES_PER_PIXEL32 4 +#define RGB_16_4X2_ROW_BYTES 8 +#define RGB_16_X2_4X2_ROW_BYTES 16 +#define RGB_32_4X2_ROW_BYTES 16 +#define RGB_32_X2_4X2_ROW_BYTES 32 +#define RGB_TILE_HALF_BLOCK_WORDS 8 +#define RGB_TILE_BLOCK_WORDS 16 +#define RGB_TILE_X2_BLOCK_WORDS 32 +#define RGB_TILE_PITCH16(pitch) (((pitch) * RGB_BYTES_PER_PIXEL32 + RGB_16BIT_TILE_ALIGN_MASK) & ~RGB_16BIT_TILE_ALIGN_MASK) +#define RGB_TILE_PITCH32(pitch) (((pitch) * RGB_BYTES_PER_PIXEL32 + RGB_32BIT_TILE_ALIGN_MASK) & ~RGB_32BIT_TILE_ALIGN_MASK) +#define RGB_TILE_ROW(ptr, base, pitch) ((s32)((u8 PTR4*)(ptr) - (base)) / (s32)(pitch)) +#define RGB_TILE_ROW_START(base, row, pitch) ((base) + (row) * (pitch)) +#define RGB_TILE_LOC(base, ptr, pitch, tilePitch, row) \ + ((base) + (tilePitch) * ((u32)(row) >> RGB_TILE_ROW_BITS) + \ + (((u32)(row) & RGB_TILE_ROW_MASK) << RGB_TILE_ROW_SHIFT) + \ + (((u8 PTR4*)(ptr) - RGB_TILE_ROW_START((base), (row), (pitch))) << RGB_TILE_ROW_BITS)) +#define RGB_BYTE_MASK 0xff +#define RGB_WORD_BYTE3(word) (((word) >> 24) & RGB_BYTE_MASK) +#define RGB_WORD_BYTE2(word) (((word) >> 16) & RGB_BYTE_MASK) +#define RGB_WORD_BYTE1(word) (((word) >> 8) & RGB_BYTE_MASK) +#define RGB_WORD_BYTE0(word) ((word) & RGB_BYTE_MASK) +#define RGB_WORD_LO_MASK 0x0000ffff +#define RGB_WORD_HI_MASK 0xffff0000 +#define RGB_ALPHA0_MASK 0xff000000 +#define RGB_ALPHA2_MASK 0x0000ff00 + +#define RGB_CLAMP_BIAS 0x100 +#define YUV_V_TO_GB_OFFSET 0x100 +#define YUV_U_TO_GB_OFFSET 0x200 +#define YUV_V_TO_R_OFFSET 0x300 + +#define RGB565(y, r, g, b) \ + ((u16)clamp_b[RGB_CLAMP_BIAS + (y) + (b)] | (u16)clamp_r[RGB_CLAMP_BIAS + (y) + (r)] | \ + (u16)clamp_g[RGB_CLAMP_BIAS + (y) + (g)]) + +#define RGB565_BIASED(cr, cg, cb, y, r, g, b) \ + ((u16)(cb)[(y) + (r)] | (u16)(cr)[(y) + (b)] | (u16)(cg)[(y) + (g)]) +#define RGB565_PAIR(pixel) (((pixel) << 16) | (pixel)) +#define RGB565_A4(y, r, g, b, a) (RGB565((y), (r), (g), (b)) | (u16)clamp_a4[(a)]) +#define RGB565_A4_BIASED(cr, cg, cb, ca, y, r, g, b, a) \ + ((u16)(cb)[(y) + (r)] | (u16)(cr)[(y) + (b)] | (u16)(cg)[(y) + (g)] | (u16)(ca)[(a)]) + +// Core kernels consume two luma rows and one chroma row, producing a 4x2 tile +// chunk. The monochrome paths use mono16/mono32 directly, while color paths +// bias through the YUV contribution tables prepared by YUV_init. +void YUV_32_4x2_even(u32 count) +{ + u16 vword; + u16 uword; + u32 yv0; + u32 yv1; + u32 PTR4* dest0; + u32 PTR4* dest1; + u32 PTR4* y1; + u32 PTR4* y0; + u16 PTR4* u; + u16 PTR4* v; + u32 pitch; + u8 PTR4* base; + s32 row0; + s32 row1; + u32 tiledPitch; + s32 PTR4* u_to_b; + s32 PTR4* v_to_gb; + s32 PTR4* u_to_gb; + s32 PTR4* v_to_r; + + dest0 = (u32 PTR4*)S.dest0; + base = S.base; + dest1 = (u32 PTR4*)S.dest1; + pitch = S.pitch; + tiledPitch = RGB_TILE_PITCH32(pitch); + + row0 = RGB_TILE_ROW(dest0, base, pitch); + row1 = RGB_TILE_ROW(dest1, base, pitch); + dest0 = (u32 PTR4*)RGB_TILE_LOC(base, dest0, pitch, tiledPitch, row0); + dest1 = (u32 PTR4*)RGB_TILE_LOC(base, dest1, pitch, tiledPitch, row1); + + S.dest0 += count * RGB_32_4X2_ROW_BYTES; + S.dest1 += count * RGB_32_4X2_ROW_BYTES; + y1 = S.y1; + y0 = S.y0; + u = S.u; + v = S.v; + u_to_b = YUVTables; + v_to_gb = YUVTables + YUV_V_TO_GB_OFFSET; + u_to_gb = YUVTables + YUV_U_TO_GB_OFFSET; + v_to_r = YUVTables + YUV_V_TO_R_OFFSET; + + do { + u32 vhi; + u32 uhi; + u32 vlo; + u32 ulo; + u32 PTR4* y00; + u32 PTR4* y01; + u32 PTR4* y10; + u32 PTR4* y11; + s32 r; + s32 b; + s32 gb; + + vword = *v++; + yv0 = *y0++; + vhi = RGB_WORD_BYTE1(vword); + uword = *u++; + uhi = RGB_WORD_BYTE1(uword); + + y00 = clamp_ytable[RGB_WORD_BYTE3(yv0)]; + y01 = clamp_ytable[RGB_WORD_BYTE2(yv0)]; + gb = v_to_gb[vhi] + u_to_gb[uhi]; + r = v_to_r[vhi]; + b = u_to_b[uhi]; + yv1 = *y1++; + dest0[0] = (y00[r] << 16) | y01[r]; + dest0[8] = (y00[gb] << 24) | (y00[b] << 16) | (y01[gb] << 8) | y01[b]; + + y10 = clamp_ytable[RGB_WORD_BYTE3(yv1)]; + y11 = clamp_ytable[RGB_WORD_BYTE2(yv1)]; + dest1[0] = (y10[r] << 16) | y11[r]; + dest1[8] = (y10[gb] << 24) | (y10[b] << 16) | (y11[gb] << 8) | y11[b]; + + vlo = RGB_WORD_BYTE0(vword); + ulo = RGB_WORD_BYTE0(uword); + y00 = clamp_ytable[RGB_WORD_BYTE1(yv0)]; + y01 = clamp_ytable[RGB_WORD_BYTE0(yv0)]; + gb = v_to_gb[vlo] + u_to_gb[ulo]; + r = v_to_r[vlo]; + b = u_to_b[ulo]; + dest0[1] = (y00[r] << 16) | y01[r]; + dest0[9] = (y00[gb] << 24) | (y00[b] << 16) | (y01[gb] << 8) | y01[b]; + dest0 += RGB_TILE_BLOCK_WORDS; + + y10 = clamp_ytable[RGB_WORD_BYTE1(yv1)]; + y11 = clamp_ytable[RGB_WORD_BYTE0(yv1)]; + dest1[1] = (y10[r] << 16) | y11[r]; + dest1[9] = (y10[gb] << 24) | (y10[b] << 16) | (y11[gb] << 8) | y11[b]; + dest1 += RGB_TILE_BLOCK_WORDS; + + count--; + } while (count != 0); + + S.u = u; + S.v = v; + S.y0 = y0; + S.y1 = y1; +} +void YUV_32x2_4x2_even(u32 count) +{ + u16 vword; + u16 uword; + u32 yv0; + u32 yv1; + u32 PTR4* dest0; + u32 PTR4* dest1; + u32 PTR4* y1; + u32 PTR4* y0; + u16 PTR4* u; + u16 PTR4* v; + u32 pitch; + u8 PTR4* base; + s32 row0; + s32 row1; + u32 tiledPitch; + s32 PTR4* u_to_b; + s32 PTR4* v_to_gb; + s32 PTR4* u_to_gb; + s32 PTR4* v_to_r; + + dest0 = (u32 PTR4*)S.dest0; + base = S.base; + dest1 = (u32 PTR4*)S.dest1; + pitch = S.pitch; + tiledPitch = RGB_TILE_PITCH32(pitch); + + row0 = RGB_TILE_ROW(dest0, base, pitch); + row1 = RGB_TILE_ROW(dest1, base, pitch); + dest0 = (u32 PTR4*)RGB_TILE_LOC(base, dest0, pitch, tiledPitch, row0); + dest1 = (u32 PTR4*)RGB_TILE_LOC(base, dest1, pitch, tiledPitch, row1); + + S.dest0 += count * RGB_32_X2_4X2_ROW_BYTES; + S.dest1 += count * RGB_32_X2_4X2_ROW_BYTES; + y1 = S.y1; + y0 = S.y0; + u = S.u; + v = S.v; + u_to_b = YUVTables; + v_to_gb = YUVTables + YUV_V_TO_GB_OFFSET; + u_to_gb = YUVTables + YUV_U_TO_GB_OFFSET; + v_to_r = YUVTables + YUV_V_TO_R_OFFSET; + + do { + u32 vhi; + u32 uhi; + u32 vlo; + u32 ulo; + u32 PTR4* y00; + u32 PTR4* y01; + u32 PTR4* y10; + u32 PTR4* y11; + s32 r; + s32 b; + s32 gb; + + vword = *v++; + yv0 = *y0++; + uword = *u++; + vhi = RGB_WORD_BYTE1(vword); + uhi = RGB_WORD_BYTE1(uword); + + y00 = clamp_ytable[RGB_WORD_BYTE3(yv0)]; + y01 = clamp_ytable[RGB_WORD_BYTE2(yv0)]; + r = v_to_r[vhi]; + gb = v_to_gb[vhi] + u_to_gb[uhi]; + b = u_to_b[uhi]; + yv1 = *y1++; + + dest0[0] = (y00[r] << 16) | y00[r]; + dest0[1] = (y01[r] << 16) | y01[r]; + dest0[8] = (y00[gb] << 24) | (y00[b] << 16) | (y00[gb] << 8) | y00[b]; + dest0[9] = (y01[gb] << 24) | (y01[b] << 16) | (y01[gb] << 8) | y01[b]; + + y10 = clamp_ytable[RGB_WORD_BYTE3(yv1)]; + y11 = clamp_ytable[RGB_WORD_BYTE2(yv1)]; + dest1[0] = (y10[r] << 16) | y10[r]; + dest1[1] = (y11[r] << 16) | y11[r]; + dest1[8] = (y10[gb] << 24) | (y10[b] << 16) | (y10[gb] << 8) | y10[b]; + dest1[9] = (y11[gb] << 24) | (y11[b] << 16) | (y11[gb] << 8) | y11[b]; + + vlo = RGB_WORD_BYTE0(vword); + ulo = RGB_WORD_BYTE0(uword); + y00 = clamp_ytable[RGB_WORD_BYTE1(yv0)]; + y01 = clamp_ytable[RGB_WORD_BYTE0(yv0)]; + r = v_to_r[vlo]; + gb = v_to_gb[vlo] + u_to_gb[ulo]; + b = u_to_b[ulo]; + dest0[16] = (y00[r] << 16) | y00[r]; + dest0[17] = (y01[r] << 16) | y01[r]; + dest0[24] = (y00[gb] << 24) | (y00[b] << 16) | (y00[gb] << 8) | y00[b]; + dest0[25] = (y01[gb] << 24) | (y01[b] << 16) | (y01[gb] << 8) | y01[b]; + dest0 += RGB_TILE_X2_BLOCK_WORDS; + + y10 = clamp_ytable[RGB_WORD_BYTE1(yv1)]; + y11 = clamp_ytable[RGB_WORD_BYTE0(yv1)]; + dest1[16] = (y10[r] << 16) | y10[r]; + dest1[17] = (y11[r] << 16) | y11[r]; + dest1[24] = (y10[gb] << 24) | (y10[b] << 16) | (y10[gb] << 8) | y10[b]; + dest1[25] = (y11[gb] << 24) | (y11[b] << 16) | (y11[gb] << 8) | y11[b]; + dest1 += RGB_TILE_X2_BLOCK_WORDS; + + count--; + } while (count != 0); + + S.u = u; + S.v = v; + S.y0 = y0; + S.y1 = y1; +} + +void YUV_32m_4x2(u32 count) +{ + u32 PTR4* dest0; + u32 PTR4* dest1; + u32 PTR4* y0; + u32 PTR4* y1; + u32 pitch; + u8 PTR4* base; + u8 PTR4* dest0_start; + u8 PTR4* dest1_start; + s32 row0; + s32 row1; + u32 tiledPitch; + + dest0_start = S.dest0; + base = S.base; + dest1_start = S.dest1; + dest0 = (u32 PTR4*)dest0_start; + dest1 = (u32 PTR4*)dest1_start; + pitch = S.pitch; + tiledPitch = RGB_TILE_PITCH32(pitch); + + row0 = RGB_TILE_ROW(dest0, base, pitch); + row1 = RGB_TILE_ROW(dest1, base, pitch); + dest0 = (u32 PTR4*)RGB_TILE_LOC(base, dest0, pitch, tiledPitch, row0); + dest1 = (u32 PTR4*)RGB_TILE_LOC(base, dest1, pitch, tiledPitch, row1); + + S.dest0 = dest0_start + count * RGB_32_4X2_ROW_BYTES; + S.dest1 = dest1_start + count * RGB_32_4X2_ROW_BYTES; + + y0 = S.y0; + y1 = S.y1; + + do { + u32 yv0 = *y0++; + u32 yv1 = *y1++; + u32 a = mono32[RGB_WORD_BYTE3(yv0)]; + u32 b = mono32[RGB_WORD_BYTE2(yv0)]; + u32 c = mono32[RGB_WORD_BYTE1(yv0)]; + u32 d = mono32[RGB_WORD_BYTE0(yv0)]; + + dest0[0] = (a & RGB_WORD_HI_MASK) | (b >> 16); + dest0[1] = (c & RGB_WORD_HI_MASK) | (d >> 16); + dest0[8] = (a << 16) | (b & RGB_WORD_LO_MASK); + dest0[9] = (c << 16) | (d & RGB_WORD_LO_MASK); + + a = mono32[RGB_WORD_BYTE3(yv1)]; + b = mono32[RGB_WORD_BYTE2(yv1)]; + c = mono32[RGB_WORD_BYTE1(yv1)]; + d = mono32[RGB_WORD_BYTE0(yv1)]; + dest1[0] = (a & RGB_WORD_HI_MASK) | (b >> 16); + dest1[1] = (c & RGB_WORD_HI_MASK) | (d >> 16); + dest1[8] = (a << 16) | (b & RGB_WORD_LO_MASK); + dest1[9] = (c << 16) | (d & RGB_WORD_LO_MASK); + + dest0 += RGB_TILE_BLOCK_WORDS; + dest1 += RGB_TILE_BLOCK_WORDS; + --count; + } while (count != 0); + + S.y0 = y0; + S.y1 = y1; +} + +void YUV_32mx2_4x2(u32 count) +{ + u32 PTR4* dest0; + u32 PTR4* dest1; + u32 PTR4* y0; + u32 PTR4* y1; + u32 pitch; + u8 PTR4* base; + u8 PTR4* dest0_start; + u8 PTR4* dest1_start; + s32 row0; + s32 row1; + u32 tiledPitch; + + dest0_start = S.dest0; + base = S.base; + dest1_start = S.dest1; + dest0 = (u32 PTR4*)dest0_start; + dest1 = (u32 PTR4*)dest1_start; + pitch = S.pitch; + tiledPitch = RGB_TILE_PITCH32(pitch); + + row0 = RGB_TILE_ROW(dest0, base, pitch); + row1 = RGB_TILE_ROW(dest1, base, pitch); + dest0 = (u32 PTR4*)RGB_TILE_LOC(base, dest0, pitch, tiledPitch, row0); + dest1 = (u32 PTR4*)RGB_TILE_LOC(base, dest1, pitch, tiledPitch, row1); + + S.dest0 = dest0_start + count * RGB_32_X2_4X2_ROW_BYTES; + S.dest1 = dest1_start + count * RGB_32_X2_4X2_ROW_BYTES; + + y0 = S.y0; + y1 = S.y1; + + do { + u32 yv0 = *y0++; + u32 yv1 = *y1++; + u32 a = mono32[RGB_WORD_BYTE3(yv0)]; + u32 b = mono32[RGB_WORD_BYTE2(yv0)]; + u32 c = mono32[RGB_WORD_BYTE1(yv0)]; + u32 d = mono32[RGB_WORD_BYTE0(yv0)]; + + dest0[0] = (a & RGB_WORD_HI_MASK) | (a >> 16); + dest0[1] = (b & RGB_WORD_HI_MASK) | (b >> 16); + dest0[8] = (a << 16) | (a & RGB_WORD_LO_MASK); + dest0[9] = (b << 16) | (b & RGB_WORD_LO_MASK); + dest0[16] = (c & RGB_WORD_HI_MASK) | (c >> 16); + dest0[17] = (d & RGB_WORD_HI_MASK) | (d >> 16); + dest0[24] = (c << 16) | (c & RGB_WORD_LO_MASK); + dest0[25] = (d << 16) | (d & RGB_WORD_LO_MASK); + + a = mono32[RGB_WORD_BYTE3(yv1)]; + b = mono32[RGB_WORD_BYTE2(yv1)]; + c = mono32[RGB_WORD_BYTE1(yv1)]; + d = mono32[RGB_WORD_BYTE0(yv1)]; + dest1[0] = (a & RGB_WORD_HI_MASK) | (a >> 16); + dest1[1] = (b & RGB_WORD_HI_MASK) | (b >> 16); + dest1[8] = (a << 16) | (a & RGB_WORD_LO_MASK); + dest1[9] = (b << 16) | (b & RGB_WORD_LO_MASK); + dest1[16] = (c & RGB_WORD_HI_MASK) | (c >> 16); + dest1[17] = (d & RGB_WORD_HI_MASK) | (d >> 16); + dest1[24] = (c << 16) | (c & RGB_WORD_LO_MASK); + dest1[25] = (d << 16) | (d & RGB_WORD_LO_MASK); + + dest0 += RGB_TILE_X2_BLOCK_WORDS; + dest1 += RGB_TILE_X2_BLOCK_WORDS; + --count; + } while (count != 0); + + S.y0 = y0; + S.y1 = y1; +} + +void YUV_16_4x2_even(u32 count) +{ + u16 uword; + u16 vword; + u32 yv0; + u32 yv1; + u32 PTR4* dest0; + u32 PTR4* dest1; + u32 PTR4* y0; + u32 PTR4* y1; + u16 PTR4* u; + u16 PTR4* v; + u32 pitch; + u8 PTR4* base; + s32 row0; + s32 row1; + u32 tiledPitch; + s32 PTR4* u_to_b; + s32 PTR4* v_to_gb; + s32 PTR4* u_to_gb; + s32 PTR4* v_to_r; + u32 PTR4* clamp_r_base; + u32 PTR4* clamp_g_base; + u32 PTR4* clamp_b_base; + + dest0 = (u32 PTR4*)S.dest0; + dest1 = (u32 PTR4*)S.dest1; + y0 = S.y0; + y1 = S.y1; + u = S.u; + v = S.v; + pitch = S.pitch; + base = S.base; + tiledPitch = RGB_TILE_PITCH16(pitch); + u_to_b = YUVTables; + v_to_gb = YUVTables + YUV_V_TO_GB_OFFSET; + u_to_gb = YUVTables + YUV_U_TO_GB_OFFSET; + v_to_r = YUVTables + YUV_V_TO_R_OFFSET; + clamp_r_base = clamp_r + RGB_CLAMP_BIAS; + clamp_g_base = clamp_g + RGB_CLAMP_BIAS; + clamp_b_base = clamp_b + RGB_CLAMP_BIAS; + + row0 = RGB_TILE_ROW(dest0, base, pitch); + row1 = RGB_TILE_ROW(dest1, base, pitch); + dest0 = (u32 PTR4*)RGB_TILE_LOC(base, dest0, pitch, tiledPitch, row0); + dest1 = (u32 PTR4*)RGB_TILE_LOC(base, dest1, pitch, tiledPitch, row1); + + S.dest0 += count * RGB_16_4X2_ROW_BYTES; + S.dest1 += count * RGB_16_4X2_ROW_BYTES; + + do { + u32 uhi; + u32 vhi; + u32 ulo; + u32 vlo; + s32 rb; + s32 gb; + s32 bb; + s32 ya; + s32 yb; + + uword = *u++; + vword = *v++; + yv0 = *y0++; + uhi = RGB_WORD_BYTE1(uword); + vhi = RGB_WORD_BYTE1(vword); + rb = u_to_b[uhi]; + gb = u_to_gb[uhi] + v_to_gb[vhi]; + bb = v_to_r[vhi]; + ya = ytable[RGB_WORD_BYTE3(yv0)]; + yb = ytable[RGB_WORD_BYTE2(yv0)]; + yv1 = *y1++; + dest0[0] = (RGB565_BIASED(clamp_r_base, clamp_g_base, clamp_b_base, ya, rb, gb, bb) << 16) | + RGB565_BIASED(clamp_r_base, clamp_g_base, clamp_b_base, yb, rb, gb, bb); + + ya = ytable[RGB_WORD_BYTE3(yv1)]; + yb = ytable[RGB_WORD_BYTE2(yv1)]; + dest1[0] = (RGB565_BIASED(clamp_r_base, clamp_g_base, clamp_b_base, ya, rb, gb, bb) << 16) | + RGB565_BIASED(clamp_r_base, clamp_g_base, clamp_b_base, yb, rb, gb, bb); + + ulo = RGB_WORD_BYTE0(uword); + vlo = RGB_WORD_BYTE0(vword); + rb = u_to_b[ulo]; + gb = u_to_gb[ulo] + v_to_gb[vlo]; + bb = v_to_r[vlo]; + ya = ytable[RGB_WORD_BYTE1(yv0)]; + yb = ytable[RGB_WORD_BYTE0(yv0)]; + dest0[1] = (RGB565_BIASED(clamp_r_base, clamp_g_base, clamp_b_base, ya, rb, gb, bb) << 16) | + RGB565_BIASED(clamp_r_base, clamp_g_base, clamp_b_base, yb, rb, gb, bb); + dest0 += RGB_TILE_HALF_BLOCK_WORDS; + + ya = ytable[RGB_WORD_BYTE1(yv1)]; + yb = ytable[RGB_WORD_BYTE0(yv1)]; + dest1[1] = (RGB565_BIASED(clamp_r_base, clamp_g_base, clamp_b_base, ya, rb, gb, bb) << 16) | + RGB565_BIASED(clamp_r_base, clamp_g_base, clamp_b_base, yb, rb, gb, bb); + dest1 += RGB_TILE_HALF_BLOCK_WORDS; + + count--; + } while (count != 0); + + S.u = u; + S.v = v; + S.y0 = y0; + S.y1 = y1; +} + +void YUV_16x2_4x2_even(u32 count) +{ + u16 uword; + u16 vword; + u32 yv0; + u32 yv1; + u32 PTR4* dest0; + u32 PTR4* dest1; + u32 PTR4* y0; + u32 PTR4* y1; + u16 PTR4* u; + u16 PTR4* v; + u32 pitch; + u8 PTR4* base; + s32 row0; + s32 row1; + u32 tiledPitch; + s32 PTR4* u_to_b; + s32 PTR4* v_to_gb; + s32 PTR4* u_to_gb; + s32 PTR4* v_to_r; + u32 PTR4* clamp_r_base; + u32 PTR4* clamp_g_base; + u32 PTR4* clamp_b_base; + + dest0 = (u32 PTR4*)S.dest0; + dest1 = (u32 PTR4*)S.dest1; + y0 = S.y0; + y1 = S.y1; + u = S.u; + v = S.v; + pitch = S.pitch; + base = S.base; + tiledPitch = RGB_TILE_PITCH16(pitch); + u_to_b = YUVTables; + v_to_gb = YUVTables + YUV_V_TO_GB_OFFSET; + u_to_gb = YUVTables + YUV_U_TO_GB_OFFSET; + v_to_r = YUVTables + YUV_V_TO_R_OFFSET; + clamp_r_base = clamp_r + RGB_CLAMP_BIAS; + clamp_g_base = clamp_g + RGB_CLAMP_BIAS; + clamp_b_base = clamp_b + RGB_CLAMP_BIAS; + + row0 = RGB_TILE_ROW(dest0, base, pitch); + row1 = RGB_TILE_ROW(dest1, base, pitch); + dest0 = (u32 PTR4*)RGB_TILE_LOC(base, dest0, pitch, tiledPitch, row0); + dest1 = (u32 PTR4*)RGB_TILE_LOC(base, dest1, pitch, tiledPitch, row1); + + S.dest0 += count * RGB_16_X2_4X2_ROW_BYTES; + S.dest1 += count * RGB_16_X2_4X2_ROW_BYTES; + + do { + u32 uhi; + u32 vhi; + u32 ulo; + u32 vlo; + u32 pix; + s32 rb; + s32 gb; + s32 bb; + s32 ya; + s32 yb; + + uword = *u++; + vword = *v++; + yv0 = *y0++; + uhi = RGB_WORD_BYTE1(uword); + vhi = RGB_WORD_BYTE1(vword); + rb = u_to_b[uhi]; + gb = u_to_gb[uhi] + v_to_gb[vhi]; + bb = v_to_r[vhi]; + ya = ytable[RGB_WORD_BYTE3(yv0)]; + yb = ytable[RGB_WORD_BYTE2(yv0)]; + yv1 = *y1++; + pix = RGB565_BIASED(clamp_r_base, clamp_g_base, clamp_b_base, ya, rb, gb, bb); + dest0[0] = RGB565_PAIR(pix); + pix = RGB565_BIASED(clamp_r_base, clamp_g_base, clamp_b_base, yb, rb, gb, bb); + dest0[1] = RGB565_PAIR(pix); + + ya = ytable[RGB_WORD_BYTE3(yv1)]; + yb = ytable[RGB_WORD_BYTE2(yv1)]; + pix = RGB565_BIASED(clamp_r_base, clamp_g_base, clamp_b_base, ya, rb, gb, bb); + dest1[0] = RGB565_PAIR(pix); + pix = RGB565_BIASED(clamp_r_base, clamp_g_base, clamp_b_base, yb, rb, gb, bb); + dest1[1] = RGB565_PAIR(pix); + + ulo = RGB_WORD_BYTE0(uword); + vlo = RGB_WORD_BYTE0(vword); + rb = u_to_b[ulo]; + gb = u_to_gb[ulo] + v_to_gb[vlo]; + bb = v_to_r[vlo]; + ya = ytable[RGB_WORD_BYTE1(yv0)]; + yb = ytable[RGB_WORD_BYTE0(yv0)]; + pix = RGB565_BIASED(clamp_r_base, clamp_g_base, clamp_b_base, ya, rb, gb, bb); + dest0[8] = RGB565_PAIR(pix); + pix = RGB565_BIASED(clamp_r_base, clamp_g_base, clamp_b_base, yb, rb, gb, bb); + dest0[9] = RGB565_PAIR(pix); + dest0 += RGB_TILE_BLOCK_WORDS; + + ya = ytable[RGB_WORD_BYTE1(yv1)]; + yb = ytable[RGB_WORD_BYTE0(yv1)]; + pix = RGB565_BIASED(clamp_r_base, clamp_g_base, clamp_b_base, ya, rb, gb, bb); + dest1[8] = RGB565_PAIR(pix); + pix = RGB565_BIASED(clamp_r_base, clamp_g_base, clamp_b_base, yb, rb, gb, bb); + dest1[9] = RGB565_PAIR(pix); + dest1 += RGB_TILE_BLOCK_WORDS; + + count--; + } while (count != 0); + + S.u = u; + S.v = v; + S.y0 = y0; + S.y1 = y1; +} + +void YUV_16m_4x2(u32 count) +{ + u32 PTR4* dest0; + u32 PTR4* dest1; + u32 PTR4* y0; + u32 PTR4* y1; + u32 pitch; + u8 PTR4* base; + u8 PTR4* dest0_start; + u8 PTR4* dest1_start; + s32 row0; + s32 row1; + u32 tiledPitch; + u32 PTR4* table; + + dest0_start = S.dest0; + dest1_start = S.dest1; + dest0 = (u32 PTR4*)dest0_start; + dest1 = (u32 PTR4*)dest1_start; + y0 = S.y0; + y1 = S.y1; + pitch = S.pitch; + base = S.base; + tiledPitch = RGB_TILE_PITCH16(pitch); + table = mono16; + + row0 = RGB_TILE_ROW(dest0, base, pitch); + row1 = RGB_TILE_ROW(dest1, base, pitch); + dest0 = (u32 PTR4*)RGB_TILE_LOC(base, dest0, pitch, tiledPitch, row0); + dest1 = (u32 PTR4*)RGB_TILE_LOC(base, dest1, pitch, tiledPitch, row1); + + S.dest0 = dest0_start + count * RGB_16_4X2_ROW_BYTES; + S.dest1 = dest1_start + count * RGB_16_4X2_ROW_BYTES; + + do { + u32 yv0 = *y0++; + u32 yv1 = *y1++; + u32 a = (u16)table[RGB_WORD_BYTE3(yv0)]; + u32 b = (u16)table[RGB_WORD_BYTE2(yv0)]; + u32 c = (u16)table[RGB_WORD_BYTE1(yv0)]; + u32 d = (u16)table[RGB_WORD_BYTE0(yv0)]; + + dest0[0] = (a << 16) | b; + dest0[1] = (c << 16) | d; + + a = (u16)table[RGB_WORD_BYTE3(yv1)]; + b = (u16)table[RGB_WORD_BYTE2(yv1)]; + c = (u16)table[RGB_WORD_BYTE1(yv1)]; + d = (u16)table[RGB_WORD_BYTE0(yv1)]; + dest1[0] = (a << 16) | b; + dest1[1] = (c << 16) | d; + + dest0 += RGB_TILE_HALF_BLOCK_WORDS; + dest1 += RGB_TILE_HALF_BLOCK_WORDS; + --count; + } while (count != 0); + + S.y0 = y0; + S.y1 = y1; +} + +void YUV_16mx2_4x2(u32 count) +{ + u32 PTR4* dest0; + u32 PTR4* dest1; + u32 PTR4* y0; + u32 PTR4* y1; + u32 pitch; + u8 PTR4* base; + u8 PTR4* dest0_start; + u8 PTR4* dest1_start; + s32 row0; + s32 row1; + u32 tiledPitch; + u32 PTR4* table; + + dest0_start = S.dest0; + dest1_start = S.dest1; + dest0 = (u32 PTR4*)dest0_start; + dest1 = (u32 PTR4*)dest1_start; + y0 = S.y0; + y1 = S.y1; + pitch = S.pitch; + base = S.base; + tiledPitch = RGB_TILE_PITCH16(pitch); + table = mono16; + + row0 = RGB_TILE_ROW(dest0, base, pitch); + row1 = RGB_TILE_ROW(dest1, base, pitch); + dest0 = (u32 PTR4*)RGB_TILE_LOC(base, dest0, pitch, tiledPitch, row0); + dest1 = (u32 PTR4*)RGB_TILE_LOC(base, dest1, pitch, tiledPitch, row1); + + S.dest0 = dest0_start + count * RGB_16_X2_4X2_ROW_BYTES; + S.dest1 = dest1_start + count * RGB_16_X2_4X2_ROW_BYTES; + + do { + u32 yv0 = *y0++; + u32 yv1 = *y1++; + u32 a = (u16)table[RGB_WORD_BYTE3(yv0)]; + u32 b = (u16)table[RGB_WORD_BYTE2(yv0)]; + u32 c = (u16)table[RGB_WORD_BYTE1(yv0)]; + u32 d = (u16)table[RGB_WORD_BYTE0(yv0)]; + + dest0[0] = (a << 16) | a; + dest0[1] = (b << 16) | b; + dest0[8] = (c << 16) | c; + dest0[9] = (d << 16) | d; + + a = (u16)table[RGB_WORD_BYTE3(yv1)]; + b = (u16)table[RGB_WORD_BYTE2(yv1)]; + c = (u16)table[RGB_WORD_BYTE1(yv1)]; + d = (u16)table[RGB_WORD_BYTE0(yv1)]; + dest1[0] = (a << 16) | a; + dest1[1] = (b << 16) | b; + dest1[8] = (c << 16) | c; + dest1[9] = (d << 16) | d; + + dest0 += RGB_TILE_BLOCK_WORDS; + dest1 += RGB_TILE_BLOCK_WORDS; + --count; + } while (count != 0); + + S.y0 = y0; + S.y1 = y1; +} + +void YUV_32a_4x2_even(u32 count) +{ + u16 vword; + u16 uword; + u32 yv0; + u32 yv1; + u32 av0; + u32 av1; + u32 PTR4* dest0; + u32 PTR4* dest1; + u32 PTR4* y1; + u32 PTR4* y0; + u16 PTR4* u; + u16 PTR4* v; + u32 PTR4* a0; + u32 PTR4* a1; + u32 pitch; + u8 PTR4* base; + s32 row0; + s32 row1; + u32 tiledPitch; + s32 PTR4* u_to_b; + s32 PTR4* v_to_gb; + s32 PTR4* u_to_gb; + s32 PTR4* v_to_r; + + dest0 = (u32 PTR4*)S.dest0; + dest1 = (u32 PTR4*)S.dest1; + y1 = S.y1; + y0 = S.y0; + u = S.u; + v = S.v; + a0 = S.a0; + a1 = S.a1; + pitch = S.pitch; + base = S.base; + tiledPitch = RGB_TILE_PITCH32(pitch); + u_to_b = YUVTables; + v_to_gb = YUVTables + YUV_V_TO_GB_OFFSET; + u_to_gb = YUVTables + YUV_U_TO_GB_OFFSET; + v_to_r = YUVTables + YUV_V_TO_R_OFFSET; + + row0 = RGB_TILE_ROW(dest0, base, pitch); + row1 = RGB_TILE_ROW(dest1, base, pitch); + dest0 = (u32 PTR4*)RGB_TILE_LOC(base, dest0, pitch, tiledPitch, row0); + dest1 = (u32 PTR4*)RGB_TILE_LOC(base, dest1, pitch, tiledPitch, row1); + + S.dest0 += count * RGB_32_4X2_ROW_BYTES; + S.dest1 += count * RGB_32_4X2_ROW_BYTES; + + do { + u32 vhi; + u32 uhi; + u32 vlo; + u32 ulo; + u32 PTR4* y00; + u32 PTR4* y01; + u32 PTR4* y10; + u32 PTR4* y11; + s32 r; + s32 b; + s32 gb; + + yv0 = *y0++; + vword = *v++; + av0 = *a0++; + uword = *u++; + av1 = *a1++; + yv1 = *y1++; + vhi = RGB_WORD_BYTE1(vword); + uhi = RGB_WORD_BYTE1(uword); + + y00 = clamp_ytable[RGB_WORD_BYTE3(yv0)]; + y01 = clamp_ytable[RGB_WORD_BYTE2(yv0)]; + r = v_to_r[vhi]; + gb = v_to_gb[vhi] + u_to_gb[uhi]; + b = u_to_b[uhi]; + dest0[0] = (av0 & RGB_ALPHA0_MASK) | (y00[r] << 16) | ((av0 >> 8) & RGB_ALPHA2_MASK) | y01[r]; + dest0[8] = (y00[gb] << 24) | (y00[b] << 16) | (y01[gb] << 8) | y01[b]; + + y10 = clamp_ytable[RGB_WORD_BYTE3(yv1)]; + y11 = clamp_ytable[RGB_WORD_BYTE2(yv1)]; + dest1[0] = (av1 & RGB_ALPHA0_MASK) | (y10[r] << 16) | ((av1 >> 8) & RGB_ALPHA2_MASK) | y11[r]; + dest1[8] = (y10[gb] << 24) | (y10[b] << 16) | (y11[gb] << 8) | y11[b]; + + vlo = RGB_WORD_BYTE0(vword); + ulo = RGB_WORD_BYTE0(uword); + y00 = clamp_ytable[RGB_WORD_BYTE1(yv0)]; + y01 = clamp_ytable[RGB_WORD_BYTE0(yv0)]; + r = v_to_r[vlo]; + gb = v_to_gb[vlo] + u_to_gb[ulo]; + b = u_to_b[ulo]; + dest0[1] = ((av0 & RGB_ALPHA2_MASK) | y00[r]) << 16 | (RGB_WORD_BYTE0(av0) << 8) | y01[r]; + dest0[9] = (y00[gb] << 24) | (y00[b] << 16) | (y01[gb] << 8) | y01[b]; + dest0 += RGB_TILE_BLOCK_WORDS; + + y10 = clamp_ytable[RGB_WORD_BYTE1(yv1)]; + y11 = clamp_ytable[RGB_WORD_BYTE0(yv1)]; + dest1[1] = ((av1 & RGB_ALPHA2_MASK) | y10[r]) << 16 | (RGB_WORD_BYTE0(av1) << 8) | y11[r]; + dest1[9] = (y10[gb] << 24) | (y10[b] << 16) | (y11[gb] << 8) | y11[b]; + dest1 += RGB_TILE_BLOCK_WORDS; + + count--; + } while (count != 0); + + S.v = v; + S.u = u; + S.y1 = y1; + S.y0 = y0; + S.a0 = a0; + S.a1 = a1; +} + +void YUV_32ax2_4x2_even(u32 count) +{ + u16 vword; + u16 uword; + u32 yv0; + u32 yv1; + u32 av0; + u32 av1; + u32 PTR4* dest0; + u32 PTR4* dest1; + u32 PTR4* y1; + u32 PTR4* y0; + u16 PTR4* u; + u16 PTR4* v; + u32 PTR4* a0; + u32 PTR4* a1; + u32 pitch; + u8 PTR4* base; + s32 row0; + s32 row1; + u32 tiledPitch; + s32 PTR4* u_to_b; + s32 PTR4* v_to_gb; + s32 PTR4* u_to_gb; + s32 PTR4* v_to_r; + + dest0 = (u32 PTR4*)S.dest0; + base = S.base; + dest1 = (u32 PTR4*)S.dest1; + pitch = S.pitch; + tiledPitch = RGB_TILE_PITCH32(pitch); + + row0 = RGB_TILE_ROW(dest0, base, pitch); + row1 = RGB_TILE_ROW(dest1, base, pitch); + dest0 = (u32 PTR4*)RGB_TILE_LOC(base, dest0, pitch, tiledPitch, row0); + dest1 = (u32 PTR4*)RGB_TILE_LOC(base, dest1, pitch, tiledPitch, row1); + + S.dest0 += count * RGB_32_X2_4X2_ROW_BYTES; + S.dest1 += count * RGB_32_X2_4X2_ROW_BYTES; + y1 = S.y1; + y0 = S.y0; + u = S.u; + v = S.v; + a0 = S.a0; + a1 = S.a1; + u_to_b = YUVTables; + v_to_gb = YUVTables + YUV_V_TO_GB_OFFSET; + u_to_gb = YUVTables + YUV_U_TO_GB_OFFSET; + v_to_r = YUVTables + YUV_V_TO_R_OFFSET; + + do { + u32 vhi; + u32 uhi; + u32 vlo; + u32 ulo; + u32 PTR4* y00; + u32 PTR4* y01; + u32 PTR4* y10; + u32 PTR4* y11; + u32 alpha0; + u32 alpha1; + s32 r; + s32 b; + s32 gb; + + vword = *v++; + yv0 = *y0++; + uword = *u++; + av0 = *a0++; + av1 = *a1++; + yv1 = *y1++; + vhi = RGB_WORD_BYTE1(vword); + uhi = RGB_WORD_BYTE1(uword); + alpha0 = av0 & RGB_ALPHA0_MASK; + alpha1 = RGB_WORD_BYTE2(av0); + + y00 = clamp_ytable[RGB_WORD_BYTE3(yv0)]; + y01 = clamp_ytable[RGB_WORD_BYTE2(yv0)]; + r = v_to_r[vhi]; + gb = v_to_gb[vhi] + u_to_gb[uhi]; + b = u_to_b[uhi]; + + dest0[0] = alpha0 | (y00[r] << 16) | (RGB_WORD_BYTE3(av0) << 8) | y00[r]; + dest0[1] = (alpha1 << 24) | (y01[r] << 16) | (alpha1 << 8) | y01[r]; + dest0[8] = (y00[gb] << 24) | (y00[b] << 16) | (y00[gb] << 8) | y00[b]; + dest0[9] = (y01[gb] << 24) | (y01[b] << 16) | (y01[gb] << 8) | y01[b]; + + alpha0 = av1 & RGB_ALPHA0_MASK; + alpha1 = RGB_WORD_BYTE2(av1); + y10 = clamp_ytable[RGB_WORD_BYTE3(yv1)]; + y11 = clamp_ytable[RGB_WORD_BYTE2(yv1)]; + dest1[0] = alpha0 | (y10[r] << 16) | (RGB_WORD_BYTE3(av1) << 8) | y10[r]; + dest1[1] = (alpha1 << 24) | (y11[r] << 16) | (alpha1 << 8) | y11[r]; + dest1[8] = (y10[gb] << 24) | (y10[b] << 16) | (y10[gb] << 8) | y10[b]; + dest1[9] = (y11[gb] << 24) | (y11[b] << 16) | (y11[gb] << 8) | y11[b]; + + vlo = RGB_WORD_BYTE0(vword); + ulo = RGB_WORD_BYTE0(uword); + y00 = clamp_ytable[RGB_WORD_BYTE1(yv0)]; + y01 = clamp_ytable[RGB_WORD_BYTE0(yv0)]; + r = v_to_r[vlo]; + gb = v_to_gb[vlo] + u_to_gb[ulo]; + b = u_to_b[ulo]; + + alpha0 = RGB_WORD_BYTE1(av0); + alpha1 = RGB_WORD_BYTE0(av0); + dest0[16] = (alpha0 << 24) | (y00[r] << 16) | (alpha0 << 8) | y00[r]; + dest0[17] = (alpha1 << 24) | (y01[r] << 16) | (alpha1 << 8) | y01[r]; + dest0[24] = (y00[gb] << 24) | (y00[b] << 16) | (y00[gb] << 8) | y00[b]; + dest0[25] = (y01[gb] << 24) | (y01[b] << 16) | (y01[gb] << 8) | y01[b]; + dest0 += RGB_TILE_X2_BLOCK_WORDS; + + alpha0 = RGB_WORD_BYTE1(av1); + alpha1 = RGB_WORD_BYTE0(av1); + y10 = clamp_ytable[RGB_WORD_BYTE1(yv1)]; + y11 = clamp_ytable[RGB_WORD_BYTE0(yv1)]; + dest1[16] = (alpha0 << 24) | (y10[r] << 16) | (alpha0 << 8) | y10[r]; + dest1[17] = (alpha1 << 24) | (y11[r] << 16) | (alpha1 << 8) | y11[r]; + dest1[24] = (y10[gb] << 24) | (y10[b] << 16) | (y10[gb] << 8) | y10[b]; + dest1[25] = (y11[gb] << 24) | (y11[b] << 16) | (y11[gb] << 8) | y11[b]; + dest1 += RGB_TILE_X2_BLOCK_WORDS; + + count--; + } while (count != 0); + + S.y0 = y0; + S.y1 = y1; + S.u = u; + S.v = v; + S.a0 = a0; + S.a1 = a1; +} + +void YUV_32am_4x2(u32 count) +{ + u32 PTR4* dest0; + u32 PTR4* dest1; + u32 PTR4* y0; + u32 PTR4* y1; + u32 PTR4* a0; + u32 PTR4* a1; + u32 pitch; + u8 PTR4* base; + s32 row0; + s32 row1; + u32 tiledPitch; + + dest0 = (u32 PTR4*)S.dest0; + base = S.base; + dest1 = (u32 PTR4*)S.dest1; + pitch = S.pitch; + tiledPitch = RGB_TILE_PITCH32(pitch); + + row0 = RGB_TILE_ROW(dest0, base, pitch); + row1 = RGB_TILE_ROW(dest1, base, pitch); + dest0 = (u32 PTR4*)RGB_TILE_LOC(base, dest0, pitch, tiledPitch, row0); + dest1 = (u32 PTR4*)RGB_TILE_LOC(base, dest1, pitch, tiledPitch, row1); + + S.dest0 += count * RGB_32_4X2_ROW_BYTES; + S.dest1 += count * RGB_32_4X2_ROW_BYTES; + + y0 = S.y0; + y1 = S.y1; + a0 = S.a0; + a1 = S.a1; + + do { + u32 yv0 = *y0++; + u32 av0 = *a0++; + u32 yv1 = *y1++; + u32 av1 = *a1++; + u32 p0 = mono32[RGB_WORD_BYTE3(yv0)]; + u32 p1 = mono32[RGB_WORD_BYTE2(yv0)]; + u32 p2 = mono32[RGB_WORD_BYTE1(yv0)]; + u32 p3 = mono32[RGB_WORD_BYTE0(yv0)]; + + dest0[0] = (av0 & RGB_ALPHA0_MASK) | (p0 & RGB_WORD_HI_MASK) | (p1 >> 16) | ((av0 >> 8) & RGB_ALPHA2_MASK); + dest0[1] = + ((av0 & RGB_ALPHA2_MASK) << 16) | (p2 & RGB_WORD_HI_MASK) | (p3 >> 16) | (RGB_WORD_BYTE0(av0) << 8); + dest0[8] = (p0 << 16) | (p1 & RGB_WORD_LO_MASK); + dest0[9] = (p2 << 16) | (p3 & RGB_WORD_LO_MASK); + + p0 = mono32[RGB_WORD_BYTE3(yv1)]; + p1 = mono32[RGB_WORD_BYTE2(yv1)]; + p2 = mono32[RGB_WORD_BYTE1(yv1)]; + p3 = mono32[RGB_WORD_BYTE0(yv1)]; + dest1[0] = (av1 & RGB_ALPHA0_MASK) | (p0 & RGB_WORD_HI_MASK) | (p1 >> 16) | ((av1 >> 8) & RGB_ALPHA2_MASK); + dest1[1] = + ((av1 & RGB_ALPHA2_MASK) << 16) | (p2 & RGB_WORD_HI_MASK) | (p3 >> 16) | (RGB_WORD_BYTE0(av1) << 8); + dest1[8] = (p0 << 16) | (p1 & RGB_WORD_LO_MASK); + dest1[9] = (p2 << 16) | (p3 & RGB_WORD_LO_MASK); + + dest0 += RGB_TILE_BLOCK_WORDS; + dest1 += RGB_TILE_BLOCK_WORDS; + --count; + } while (count != 0); + + S.y0 = y0; + S.y1 = y1; + S.a0 = a0; + S.a1 = a1; +} + +void YUV_32amx2_4x2(u32 count) +{ + u32 PTR4* dest0; + u32 PTR4* dest1; + u32 PTR4* y0; + u32 PTR4* y1; + u32 PTR4* a0; + u32 PTR4* a1; + u32 pitch; + u8 PTR4* base; + s32 row0; + s32 row1; + u32 tiledPitch; + + dest0 = (u32 PTR4*)S.dest0; + base = S.base; + dest1 = (u32 PTR4*)S.dest1; + pitch = S.pitch; + tiledPitch = RGB_TILE_PITCH32(pitch); + + row0 = RGB_TILE_ROW(dest0, base, pitch); + row1 = RGB_TILE_ROW(dest1, base, pitch); + dest0 = (u32 PTR4*)RGB_TILE_LOC(base, dest0, pitch, tiledPitch, row0); + dest1 = (u32 PTR4*)RGB_TILE_LOC(base, dest1, pitch, tiledPitch, row1); + + S.dest0 += count * RGB_32_X2_4X2_ROW_BYTES; + S.dest1 += count * RGB_32_X2_4X2_ROW_BYTES; + + y0 = S.y0; + y1 = S.y1; + a0 = S.a0; + a1 = S.a1; + + do { + u32 yv0 = *y0++; + u32 av0 = *a0++; + u32 yv1 = *y1++; + u32 av1 = *a1++; + u32 p0 = mono32[RGB_WORD_BYTE3(yv0)]; + u32 p1 = mono32[RGB_WORD_BYTE2(yv0)]; + u32 p2 = mono32[RGB_WORD_BYTE1(yv0)]; + u32 p3 = mono32[RGB_WORD_BYTE0(yv0)]; + u32 a_hi0 = av0 & RGB_ALPHA0_MASK; + u32 a_hi1 = RGB_WORD_BYTE2(av0) << 24; + u32 a_lo0 = RGB_WORD_BYTE3(av0) << 8; + u32 a_lo1 = RGB_WORD_BYTE2(av0) << 8; + + dest0[0] = a_hi0 | (p0 & RGB_WORD_HI_MASK) | (p0 >> 16) | a_lo0; + dest0[1] = a_hi1 | (p1 & RGB_WORD_HI_MASK) | (p1 >> 16) | a_lo1; + dest0[8] = (p0 << 16) | (p0 & RGB_WORD_LO_MASK); + dest0[9] = (p1 << 16) | (p1 & RGB_WORD_LO_MASK); + dest0[16] = a_hi0 | (p2 & RGB_WORD_HI_MASK) | (p2 >> 16) | a_lo0; + dest0[17] = a_hi1 | (p3 & RGB_WORD_HI_MASK) | (p3 >> 16) | a_lo1; + dest0[24] = (p2 << 16) | (p2 & RGB_WORD_LO_MASK); + dest0[25] = (p3 << 16) | (p3 & RGB_WORD_LO_MASK); + + p0 = mono32[RGB_WORD_BYTE3(yv1)]; + p1 = mono32[RGB_WORD_BYTE2(yv1)]; + p2 = mono32[RGB_WORD_BYTE1(yv1)]; + p3 = mono32[RGB_WORD_BYTE0(yv1)]; + a_hi0 = av1 & RGB_ALPHA0_MASK; + a_hi1 = RGB_WORD_BYTE2(av1) << 24; + a_lo0 = RGB_WORD_BYTE3(av1) << 8; + a_lo1 = RGB_WORD_BYTE2(av1) << 8; + dest1[0] = a_hi0 | (p0 & RGB_WORD_HI_MASK) | (p0 >> 16) | a_lo0; + dest1[1] = a_hi1 | (p1 & RGB_WORD_HI_MASK) | (p1 >> 16) | a_lo1; + dest1[8] = (p0 << 16) | (p0 & RGB_WORD_LO_MASK); + dest1[9] = (p1 << 16) | (p1 & RGB_WORD_LO_MASK); + dest1[16] = a_hi0 | (p2 & RGB_WORD_HI_MASK) | (p2 >> 16) | a_lo0; + dest1[17] = a_hi1 | (p3 & RGB_WORD_HI_MASK) | (p3 >> 16) | a_lo1; + dest1[24] = (p2 << 16) | (p2 & RGB_WORD_LO_MASK); + dest1[25] = (p3 << 16) | (p3 & RGB_WORD_LO_MASK); + + dest0 += RGB_TILE_X2_BLOCK_WORDS; + dest1 += RGB_TILE_X2_BLOCK_WORDS; + --count; + } while (count != 0); + + S.y0 = y0; + S.y1 = y1; + S.a0 = a0; + S.a1 = a1; +} + +void YUV_16a4_4x2_even(u32 count) +{ + u16 uword; + u16 vword; + u32 yv0; + u32 yv1; + u32 av0; + u32 av1; + u32 PTR4* dest0; + u32 PTR4* dest1; + u32 PTR4* y0; + u32 PTR4* y1; + u16 PTR4* u; + u16 PTR4* v; + u32 PTR4* a0; + u32 PTR4* a1; + u32 pitch; + u8 PTR4* base; + s32 row0; + s32 row1; + u32 tiledPitch; + s32 PTR4* u_to_b; + s32 PTR4* v_to_gb; + s32 PTR4* u_to_gb; + s32 PTR4* v_to_r; + + dest0 = (u32 PTR4*)S.dest0; + dest1 = (u32 PTR4*)S.dest1; + y0 = S.y0; + y1 = S.y1; + u = S.u; + v = S.v; + a0 = S.a0; + a1 = S.a1; + pitch = S.pitch; + base = S.base; + tiledPitch = RGB_TILE_PITCH16(pitch); + u_to_b = YUVTables; + v_to_gb = YUVTables + YUV_V_TO_GB_OFFSET; + u_to_gb = YUVTables + YUV_U_TO_GB_OFFSET; + v_to_r = YUVTables + YUV_V_TO_R_OFFSET; + + row0 = RGB_TILE_ROW(dest0, base, pitch); + row1 = RGB_TILE_ROW(dest1, base, pitch); + dest0 = (u32 PTR4*)RGB_TILE_LOC(base, dest0, pitch, tiledPitch, row0); + dest1 = (u32 PTR4*)RGB_TILE_LOC(base, dest1, pitch, tiledPitch, row1); + + S.dest0 += count * RGB_16_4X2_ROW_BYTES; + S.dest1 += count * RGB_16_4X2_ROW_BYTES; + + do { + u32 uhi; + u32 vhi; + u32 ulo; + u32 vlo; + s32 rb; + s32 gb; + s32 bb; + s32 ya; + s32 yb; + + uword = *u++; + vword = *v++; + yv0 = *y0++; + av0 = *a0++; + uhi = RGB_WORD_BYTE1(uword); + vhi = RGB_WORD_BYTE1(vword); + rb = u_to_b[uhi]; + gb = u_to_gb[uhi] + v_to_gb[vhi]; + bb = v_to_r[vhi]; + ya = ytable[RGB_WORD_BYTE3(yv0)]; + yb = ytable[RGB_WORD_BYTE2(yv0)]; + yv1 = *y1++; + av1 = *a1++; + dest0[0] = (RGB565_A4(ya, rb, gb, bb, RGB_WORD_BYTE3(av0)) << 16) | + RGB565_A4(yb, rb, gb, bb, RGB_WORD_BYTE2(av0)); + + ya = ytable[RGB_WORD_BYTE3(yv1)]; + yb = ytable[RGB_WORD_BYTE2(yv1)]; + dest1[0] = (RGB565_A4(ya, rb, gb, bb, RGB_WORD_BYTE3(av1)) << 16) | + RGB565_A4(yb, rb, gb, bb, RGB_WORD_BYTE2(av1)); + + ulo = RGB_WORD_BYTE0(uword); + vlo = RGB_WORD_BYTE0(vword); + rb = u_to_b[ulo]; + gb = u_to_gb[ulo] + v_to_gb[vlo]; + bb = v_to_r[vlo]; + ya = ytable[RGB_WORD_BYTE1(yv0)]; + yb = ytable[RGB_WORD_BYTE0(yv0)]; + dest0[1] = (RGB565_A4(ya, rb, gb, bb, RGB_WORD_BYTE1(av0)) << 16) | + RGB565_A4(yb, rb, gb, bb, RGB_WORD_BYTE0(av0)); + dest0 += RGB_TILE_HALF_BLOCK_WORDS; + + ya = ytable[RGB_WORD_BYTE1(yv1)]; + yb = ytable[RGB_WORD_BYTE0(yv1)]; + dest1[1] = (RGB565_A4(ya, rb, gb, bb, RGB_WORD_BYTE1(av1)) << 16) | + RGB565_A4(yb, rb, gb, bb, RGB_WORD_BYTE0(av1)); + dest1 += RGB_TILE_HALF_BLOCK_WORDS; + + count--; + } while (count != 0); + + S.u = u; + S.v = v; + S.y0 = y0; + S.y1 = y1; + S.a0 = a0; + S.a1 = a1; +} + +void YUV_16a4x2_4x2_even(u32 count) +{ + u16 uword; + u16 vword; + u32 yv0; + u32 yv1; + u32 av0; + u32 av1; + u32 PTR4* dest0; + u32 PTR4* dest1; + u32 PTR4* y0; + u32 PTR4* y1; + u16 PTR4* u; + u16 PTR4* v; + u32 PTR4* a0; + u32 PTR4* a1; + u32 pitch; + u8 PTR4* base; + s32 row0; + s32 row1; + u32 tiledPitch; + s32 PTR4* u_to_b; + s32 PTR4* v_to_gb; + s32 PTR4* u_to_gb; + s32 PTR4* v_to_r; + u32 PTR4* clamp_r_base; + u32 PTR4* clamp_g_base; + u32 PTR4* clamp_b_base; + u32 PTR4* clamp_a4_base; + + dest0 = (u32 PTR4*)S.dest0; + dest1 = (u32 PTR4*)S.dest1; + y0 = S.y0; + y1 = S.y1; + u = S.u; + v = S.v; + a0 = S.a0; + a1 = S.a1; + pitch = S.pitch; + base = S.base; + tiledPitch = RGB_TILE_PITCH16(pitch); + u_to_b = YUVTables; + v_to_gb = YUVTables + YUV_V_TO_GB_OFFSET; + u_to_gb = YUVTables + YUV_U_TO_GB_OFFSET; + v_to_r = YUVTables + YUV_V_TO_R_OFFSET; + clamp_r_base = clamp_r + RGB_CLAMP_BIAS; + clamp_g_base = clamp_g + RGB_CLAMP_BIAS; + clamp_b_base = clamp_b + RGB_CLAMP_BIAS; + clamp_a4_base = clamp_a4; + + row0 = RGB_TILE_ROW(dest0, base, pitch); + row1 = RGB_TILE_ROW(dest1, base, pitch); + dest0 = (u32 PTR4*)RGB_TILE_LOC(base, dest0, pitch, tiledPitch, row0); + dest1 = (u32 PTR4*)RGB_TILE_LOC(base, dest1, pitch, tiledPitch, row1); + + S.dest0 += count * RGB_16_X2_4X2_ROW_BYTES; + S.dest1 += count * RGB_16_X2_4X2_ROW_BYTES; + + do { + u32 uhi; + u32 vhi; + u32 ulo; + u32 vlo; + u32 pix; + s32 rb; + s32 gb; + s32 bb; + s32 ya; + s32 yb; + + uword = *u++; + vword = *v++; + yv0 = *y0++; + av0 = *a0++; + uhi = RGB_WORD_BYTE1(uword); + vhi = RGB_WORD_BYTE1(vword); + rb = u_to_b[uhi]; + gb = u_to_gb[uhi] + v_to_gb[vhi]; + bb = v_to_r[vhi]; + ya = ytable[RGB_WORD_BYTE3(yv0)]; + yb = ytable[RGB_WORD_BYTE2(yv0)]; + yv1 = *y1++; + av1 = *a1++; + + pix = RGB565_A4_BIASED(clamp_r_base, clamp_g_base, clamp_b_base, clamp_a4_base, ya, rb, gb, bb, RGB_WORD_BYTE3(av0)); + dest0[0] = RGB565_PAIR(pix); + pix = RGB565_A4_BIASED(clamp_r_base, clamp_g_base, clamp_b_base, clamp_a4_base, yb, rb, gb, bb, RGB_WORD_BYTE2(av0)); + dest0[1] = RGB565_PAIR(pix); + + ya = ytable[RGB_WORD_BYTE3(yv1)]; + yb = ytable[RGB_WORD_BYTE2(yv1)]; + pix = RGB565_A4_BIASED(clamp_r_base, clamp_g_base, clamp_b_base, clamp_a4_base, ya, rb, gb, bb, RGB_WORD_BYTE3(av1)); + dest1[0] = RGB565_PAIR(pix); + pix = RGB565_A4_BIASED(clamp_r_base, clamp_g_base, clamp_b_base, clamp_a4_base, yb, rb, gb, bb, RGB_WORD_BYTE2(av1)); + dest1[1] = RGB565_PAIR(pix); + + ulo = RGB_WORD_BYTE0(uword); + vlo = RGB_WORD_BYTE0(vword); + rb = u_to_b[ulo]; + gb = u_to_gb[ulo] + v_to_gb[vlo]; + bb = v_to_r[vlo]; + ya = ytable[RGB_WORD_BYTE1(yv0)]; + yb = ytable[RGB_WORD_BYTE0(yv0)]; + pix = RGB565_A4_BIASED(clamp_r_base, clamp_g_base, clamp_b_base, clamp_a4_base, ya, rb, gb, bb, RGB_WORD_BYTE1(av0)); + dest0[8] = RGB565_PAIR(pix); + pix = RGB565_A4_BIASED(clamp_r_base, clamp_g_base, clamp_b_base, clamp_a4_base, yb, rb, gb, bb, RGB_WORD_BYTE0(av0)); + dest0[9] = RGB565_PAIR(pix); + dest0 += RGB_TILE_BLOCK_WORDS; + + ya = ytable[RGB_WORD_BYTE1(yv1)]; + yb = ytable[RGB_WORD_BYTE0(yv1)]; + pix = RGB565_A4_BIASED(clamp_r_base, clamp_g_base, clamp_b_base, clamp_a4_base, ya, rb, gb, bb, RGB_WORD_BYTE1(av1)); + dest1[8] = RGB565_PAIR(pix); + pix = RGB565_A4_BIASED(clamp_r_base, clamp_g_base, clamp_b_base, clamp_a4_base, yb, rb, gb, bb, RGB_WORD_BYTE0(av1)); + dest1[9] = RGB565_PAIR(pix); + dest1 += RGB_TILE_BLOCK_WORDS; + + count--; + } while (count != 0); + + S.u = u; + S.v = v; + S.y0 = y0; + S.y1 = y1; + S.a0 = a0; + S.a1 = a1; +} + +void YUV_16a4m_4x2(u32 count) +{ + u32 PTR4* dest0; + u32 PTR4* dest1; + u32 PTR4* y0; + u32 PTR4* y1; + u32 PTR4* a0; + u32 PTR4* a1; + u32 pitch; + u8 PTR4* base; + u8 PTR4* dest0_start; + u8 PTR4* dest1_start; + s32 row0; + s32 row1; + u32 tiledPitch; + + dest0_start = S.dest0; + dest1_start = S.dest1; + dest0 = (u32 PTR4*)dest0_start; + dest1 = (u32 PTR4*)dest1_start; + y0 = S.y0; + y1 = S.y1; + a0 = S.a0; + a1 = S.a1; + pitch = S.pitch; + base = S.base; + tiledPitch = RGB_TILE_PITCH16(pitch); + + row0 = RGB_TILE_ROW(dest0, base, pitch); + row1 = RGB_TILE_ROW(dest1, base, pitch); + dest0 = (u32 PTR4*)RGB_TILE_LOC(base, dest0, pitch, tiledPitch, row0); + dest1 = (u32 PTR4*)RGB_TILE_LOC(base, dest1, pitch, tiledPitch, row1); + + S.dest0 = dest0_start + count * RGB_16_4X2_ROW_BYTES; + S.dest1 = dest1_start + count * RGB_16_4X2_ROW_BYTES; + + do { + u32 av0 = *a0++; + u32 yv0 = *y0++; + u32 av1 = *a1++; + u32 yv1 = *y1++; + u32 p0 = (u16)clamp_a4[RGB_WORD_BYTE3(av0)] | (u16)mono16[RGB_WORD_BYTE3(yv0)]; + u32 p1 = (u16)clamp_a4[RGB_WORD_BYTE2(av0)] | (u16)mono16[RGB_WORD_BYTE2(yv0)]; + u32 p2 = (u16)clamp_a4[RGB_WORD_BYTE1(av0)] | (u16)mono16[RGB_WORD_BYTE1(yv0)]; + u32 p3 = (u16)clamp_a4[RGB_WORD_BYTE0(av0)] | (u16)mono16[RGB_WORD_BYTE0(yv0)]; + + dest0[0] = (p0 << 16) | p1; + dest0[1] = (p2 << 16) | p3; + + p0 = (u16)clamp_a4[RGB_WORD_BYTE3(av1)] | (u16)mono16[RGB_WORD_BYTE3(yv1)]; + p1 = (u16)clamp_a4[RGB_WORD_BYTE2(av1)] | (u16)mono16[RGB_WORD_BYTE2(yv1)]; + p2 = (u16)clamp_a4[RGB_WORD_BYTE1(av1)] | (u16)mono16[RGB_WORD_BYTE1(yv1)]; + p3 = (u16)clamp_a4[RGB_WORD_BYTE0(av1)] | (u16)mono16[RGB_WORD_BYTE0(yv1)]; + dest1[0] = (p0 << 16) | p1; + dest1[1] = (p2 << 16) | p3; + + dest0 += RGB_TILE_HALF_BLOCK_WORDS; + dest1 += RGB_TILE_HALF_BLOCK_WORDS; + --count; + } while (count != 0); + + S.y0 = y0; + S.y1 = y1; + S.a0 = a0; + S.a1 = a1; +} + +void YUV_16a4mx2_4x2(u32 count) +{ + u32 PTR4* dest0; + u32 PTR4* dest1; + u32 PTR4* y0; + u32 PTR4* y1; + u32 PTR4* a0; + u32 PTR4* a1; + u32 pitch; + u8 PTR4* base; + u8 PTR4* dest0_start; + u8 PTR4* dest1_start; + s32 row0; + s32 row1; + u32 tiledPitch; + + dest0_start = S.dest0; + dest1_start = S.dest1; + dest0 = (u32 PTR4*)dest0_start; + dest1 = (u32 PTR4*)dest1_start; + y0 = S.y0; + y1 = S.y1; + a0 = S.a0; + a1 = S.a1; + pitch = S.pitch; + base = S.base; + tiledPitch = RGB_TILE_PITCH16(pitch); + + row0 = RGB_TILE_ROW(dest0, base, pitch); + row1 = RGB_TILE_ROW(dest1, base, pitch); + dest0 = (u32 PTR4*)RGB_TILE_LOC(base, dest0, pitch, tiledPitch, row0); + dest1 = (u32 PTR4*)RGB_TILE_LOC(base, dest1, pitch, tiledPitch, row1); + + S.dest0 = dest0_start + count * RGB_16_X2_4X2_ROW_BYTES; + S.dest1 = dest1_start + count * RGB_16_X2_4X2_ROW_BYTES; + + do { + u32 av0 = *a0++; + u32 yv0 = *y0++; + u32 av1 = *a1++; + u32 yv1 = *y1++; + u32 p0 = (u16)clamp_a4[RGB_WORD_BYTE3(av0)] | (u16)mono16[RGB_WORD_BYTE3(yv0)]; + u32 p1 = (u16)clamp_a4[RGB_WORD_BYTE2(av0)] | (u16)mono16[RGB_WORD_BYTE2(yv0)]; + u32 p2 = (u16)clamp_a4[RGB_WORD_BYTE1(av0)] | (u16)mono16[RGB_WORD_BYTE1(yv0)]; + u32 p3 = (u16)clamp_a4[RGB_WORD_BYTE0(av0)] | (u16)mono16[RGB_WORD_BYTE0(yv0)]; + + dest0[0] = (p0 << 16) | p0; + dest0[1] = (p1 << 16) | p1; + dest0[8] = (p2 << 16) | p2; + dest0[9] = (p3 << 16) | p3; + + p0 = (u16)clamp_a4[RGB_WORD_BYTE3(av1)] | (u16)mono16[RGB_WORD_BYTE3(yv1)]; + p1 = (u16)clamp_a4[RGB_WORD_BYTE2(av1)] | (u16)mono16[RGB_WORD_BYTE2(yv1)]; + p2 = (u16)clamp_a4[RGB_WORD_BYTE1(av1)] | (u16)mono16[RGB_WORD_BYTE1(yv1)]; + p3 = (u16)clamp_a4[RGB_WORD_BYTE0(av1)] | (u16)mono16[RGB_WORD_BYTE0(yv1)]; + dest1[0] = (p0 << 16) | p0; + dest1[1] = (p1 << 16) | p1; + dest1[8] = (p2 << 16) | p2; + dest1[9] = (p3 << 16) | p3; + + dest0 += RGB_TILE_BLOCK_WORDS; + dest1 += RGB_TILE_BLOCK_WORDS; + --count; + } while (count != 0); + + S.y0 = y0; + S.y1 = y1; + S.a0 = a0; + S.a1 = a1; +} + +void YUV_16_4x2_odd(void) +{ +} + +void YUV_32_4x2_odd(void) +{ +} + +void YUV_16x2_4x2_odd(void) +{ +} + +void YUV_16a4x2_4x2_odd(void) +{ +} + +void YUV_32x2_4x2_odd(void) +{ +} + +void YUV_16a4_4x2_odd(void) +{ +} + +void YUV_32a_4x2_odd(void) +{ +} + +void YUV_32ax2_4x2_odd(void) +{ +} + +void PTR4* GetTiledRgbLoc(void PTR4* ptr, u32 tilePitch) +{ + s32 row; + + row = RGB_TILE_ROW(ptr, S.base, S.pitch); + + return RGB_TILE_LOC(S.base, ptr, S.pitch, tilePitch, row); +} diff --git a/src/bink/src/sdk/decode/ngc/ngcrgb.h b/src/bink/src/sdk/decode/ngc/ngcrgb.h new file mode 100644 index 000000000..d3120548e --- /dev/null +++ b/src/bink/src/sdk/decode/ngc/ngcrgb.h @@ -0,0 +1,61 @@ +#ifndef BINK_DECODE_NGC_NGCRGB_H +#define BINK_DECODE_NGC_NGCRGB_H + +#include "bink.h" + +#define RGB_CONTEXT_RESERVED_SIZE 0x18 +#define RGB_LUMA_TABLE_SIZE 0x104 +#define RGB_CLAMP_TABLE_SIZE 0x304 +#define RGB_MONO_TABLE_SIZE 0x100 +#define YUV_TABLE_SIZE 0x400 +#define RGB_SHIFT_TABLE_SIZE 0xc +#define NGC_TABLE_ALIGNMENT 32 + +typedef struct RGBContext +{ + u8 PTR4* dest0; + u8 PTR4* dest1; + u32 PTR4* y0; + u32 PTR4* y1; + u16 PTR4* u; + u16 PTR4* v; + u32 PTR4* a0; + u32 PTR4* a1; + s32 gb; + s32 b; + s32 r; + u8 pad[RGB_CONTEXT_RESERVED_SIZE]; + s32 pitch; + u8 PTR4* base; +} RGBContext; + +#ifdef __cplusplus +extern "C" { +#endif + +extern u32 ytable[RGB_LUMA_TABLE_SIZE]; +extern u32 ytable_x4[RGB_LUMA_TABLE_SIZE]; +extern u32 PTR4* clamp_ytable[RGB_LUMA_TABLE_SIZE]; +extern u32 clamptable[RGB_CLAMP_TABLE_SIZE]; +extern u32 clamp_a4[RGB_LUMA_TABLE_SIZE]; +extern u32 clamp_r[RGB_CLAMP_TABLE_SIZE]; +extern u32 clamp_g[RGB_CLAMP_TABLE_SIZE]; +extern u32 clamp_b[RGB_CLAMP_TABLE_SIZE]; +extern u32 clamp_rh[RGB_CLAMP_TABLE_SIZE]; +extern u32 clamp_gh[RGB_CLAMP_TABLE_SIZE]; +extern u32 clamp_bh[RGB_CLAMP_TABLE_SIZE]; +extern u32 clamp_rr[RGB_CLAMP_TABLE_SIZE]; +extern u32 clamp_gg[RGB_CLAMP_TABLE_SIZE]; +extern u32 clamp_bb[RGB_CLAMP_TABLE_SIZE]; +extern u32 mono16[RGB_MONO_TABLE_SIZE]; +extern u32 mono16x2[RGB_MONO_TABLE_SIZE]; +extern u32 mono32[RGB_MONO_TABLE_SIZE]; +extern RGBContext S; +extern s32 YUVTables[YUV_TABLE_SIZE]; +extern u32 RGBshift[RGB_SHIFT_TABLE_SIZE]; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/bink/src/sdk/decode/ngc/ngcsnd.c b/src/bink/src/sdk/decode/ngc/ngcsnd.c index e69de29bb..ec9c11f9b 100644 --- a/src/bink/src/sdk/decode/ngc/ngcsnd.c +++ b/src/bink/src/sdk/decode/ngc/ngcsnd.c @@ -0,0 +1,1042 @@ +#include "bink.h" +#include "binkngc.h" +#include "ngcsnd.h" +#include +#include +#include + +f32 powf(f32 x, f32 y); + +#define NGC_SOUND_MONO_CHANNELS 1 +#define NGC_SOUND_STEREO_CHANNELS 2 +#define NGC_SOUND_LOCK_BUFFER_COUNT 2 +#define NGC_SOUND_ARQ_TASK_COUNT (NGC_SOUND_LOCK_BUFFER_COUNT * NGC_SOUND_STEREO_CHANNELS) +#define NGC_SOUND_LAST_LOCK_INDEX (NGC_SOUND_LOCK_BUFFER_COUNT - 1) +#define NGC_SOUND_RIGHT_TASK_OFFSET NGC_SOUND_STEREO_CHANNELS +/* ARQRequest.owner keeps the NGCSoundState pointer with bit 0 as an in-flight latch. */ +#define NGC_TASK_BUSY_FLAG 1 +#define NGC_TASK_OWNER_MASK (~NGC_TASK_BUSY_FLAG) +#define NGC_SOUND_BITS_16 16 +#define NGC_SOUND_FRAME_ALIGN ARQ_DMA_ALIGNMENT +#define NGC_SOUND_FRAME_ALIGN_MASK (NGC_SOUND_FRAME_ALIGN - 1) +#define NGC_SOUND_BUFFER_ALIGN 0x40 +#define NGC_SOUND_BUFFER_ALIGN_MASK (NGC_SOUND_BUFFER_ALIGN - 1) +#define NGC_SOUND_FRAMES_PER_SECOND 25 +#define NGC_SOUND_BUFFER_MILLISECONDS 800 +#define NGC_SOUND_MILLISECONDS_PER_SECOND 1000 +#define NGC_SOUND_PERCENT_SCALE 100 +#define NGC_SOUND_STARVATION_PERCENT 90 +#define NGC_DEFAULT_STARVATION_TIME 0x2d0 +#define NGC_SOUND_MAX_BUSY_POLLS 99999 +#define NGC_SOUND_BITS_TO_BYTES_SHIFT 3 +#define NGC_SOUND_BEST_SIZE_SHIFT 5 + +#define BINK_NGC_VOLUME_MAX 0x7fff +#define BINK_NGC_PAN_MAX 0x10000 + +#define AX_VOICE_PRIORITY_BINK 0x1f +#define AX_SYNC_VOLUME_MIX (AX_SYNC_FLAG_COPYVOL | AX_SYNC_FLAG_COPYAXPBMIX | AX_SYNC_FLAG_COPYMXRCTRL) + +#define NGC_SAMPLE_HALF_SHIFT 16 +#define NGC_SAMPLE_BYTE_SHIFT 8 +#define NGC_SAMPLE_HIGH_HALF_MASK 0xffff0000 +#define NGC_SAMPLE_LOW_HALF_MASK 0xffff +#define NGC_SAMPLE_HIGH_BYTE_MASK 0xff00 +#define NGC_SAMPLE_LOW_BYTE_MASK 0xff +#define NGC_SAMPLE_HIGH_HALF(value) ((value) >> NGC_SAMPLE_HALF_SHIFT) +#define NGC_SAMPLE_LOW_HALF(value) ((value) & NGC_SAMPLE_LOW_HALF_MASK) +#define NGC_SAMPLE_LEFT_8_PAIR(value) \ + ((((value) >> NGC_SAMPLE_HALF_SHIFT) & NGC_SAMPLE_HIGH_BYTE_MASK) | \ + (((value) >> NGC_SAMPLE_BYTE_SHIFT) & NGC_SAMPLE_LOW_BYTE_MASK)) +#define NGC_SAMPLE_RIGHT_8_PAIR(value) \ + ((((value) >> NGC_SAMPLE_BYTE_SHIFT) & NGC_SAMPLE_HIGH_BYTE_MASK) | \ + ((value) & NGC_SAMPLE_LOW_BYTE_MASK)) +#define NGC_SAMPLE_LEFT_16_PAIR(first, second) \ + (NGC_SAMPLE_HIGH_HALF(first) | ((second) & NGC_SAMPLE_HIGH_HALF_MASK)) +#define NGC_SAMPLE_RIGHT_16_PAIR(first, second) \ + (NGC_SAMPLE_LOW_HALF(first) | ((second) << NGC_SAMPLE_HALF_SHIFT)) +#define NGC_SOUND_HALF_BUFFER_SHIFT 1 +#define NGC_STEREO16_GROUP_SHIFT 3 +#define NGC_STEREO16_TAIL_SHIFT 2 +#define NGC_STEREO8_GROUP_SHIFT 2 +#define NGC_STEREO8_TAIL_SHIFT 1 + +typedef struct NGCSoundState NGCSoundState; + +typedef enum NGCPlayState +{ + NGC_PLAY_STATE_STOPPED, + NGC_PLAY_STATE_STARVED, + NGC_PLAY_STATE_RUNNING +} NGCPlayState; + +typedef enum NGCSoundOnOff +{ + NGC_SOUND_OFF, + NGC_SOUND_ON +} NGCSoundOnOff; + +typedef enum NGCSoundPauseState +{ + NGC_SOUND_UNPAUSED, + NGC_SOUND_PAUSED +} NGCSoundPauseState; + +struct NGCSoundState +{ + s32 volume; + f32 pan; + NGCPlayState play_state; + NGCSoundPauseState paused; + s32 lock_index; + u32 play_cursor; /* ARAM write cursor for the next decoded frame */ + u32 pending_end; /* delayed voice end address when the cursor wraps */ + u32 frame_size; /* bytes submitted per Bink audio frame */ + u32 channel_stride; /* bytes reserved per channel in ARAM */ + u32 starvation_threshold; + u8 PTR4* decode_buffer; /* MRAM staging buffers for ARQ uploads */ + u8 PTR4* stereo_buffer; /* temporary split buffer for interleaved stereo */ + u32 last_ready_time; + u32 starvation_time; + ARQRequest tasks[NGC_SOUND_ARQ_TASK_COUNT]; /* two lock buffers by left/right ARQ uploads */ + AXVPB PTR4* left_voice; + AXVPB PTR4* right_voice; + u8 PTR4* audio_buffer; /* ARAM ring buffer base */ + u32 address_shift; /* AX addresses are samples for 16-bit, bytes for 8-bit */ +}; + +typedef BINKSND NGCBinkSound; +typedef char NGCSoundStateFitsInBinkSndData + [(sizeof(NGCSoundState) <= sizeof(((BINKSND PTR4*)0)->snddata)) ? 1 : -1]; + +#define NGC_STATE(ptr) ((NGCSoundState PTR4*)(ptr)) +#define NGC_SND(snd) ((NGCBinkSound PTR4*)(snd)) +#define NGC_SOUND_STATE(snd) ((NGCSoundState PTR4*)NGC_SND(snd)->snddata) +#define NGC_TASK(state, index) (&NGC_STATE(state)->tasks[(index)]) +#define NGC_TASK_FOR_INDEX(state, index, side) NGC_TASK(state, (side) + ((index) << 1)) +#define NGC_LEFT_VOICE(ptr) (NGC_STATE(ptr)->left_voice) +#define NGC_RIGHT_VOICE(ptr) (NGC_STATE(ptr)->right_voice) +#define NGC_CHANNEL_STRIDE(ptr) (NGC_STATE(ptr)->channel_stride) +#define NGC_ADDRESS_SHIFT(ptr) (NGC_STATE(ptr)->address_shift) +#define NGC_ADVANCE_U32_BYTES(ptr, bytes) ((u32 PTR4*)((u8 PTR4*)(ptr) + (bytes))) +#define NGC_ADVANCE_U16_BYTES(ptr, bytes) ((u16 PTR4*)((u8 PTR4*)(ptr) + (bytes))) +#define NGC_ALIGN_UP(value, mask) (((value) + (mask)) & ~(mask)) +#define NGC_SOUND_RATE_BYTES(sound) (((sound)->freq * (sound)->bits) >> NGC_SOUND_BITS_TO_BYTES_SHIFT) +#define NGC_SOUND_BEST_SIZE_MASK(chans) (-((chans) << NGC_SOUND_BEST_SIZE_SHIFT)) +#define NGC_SOUND_PLAY_LIMIT(state) \ + (((u32)(state)->audio_buffer + (state)->channel_stride) - (state)->frame_size) +#define NGC_SOUND_RIGHT_CURSOR(state) ((state)->play_cursor + (state)->channel_stride) +#define NGC_AX_ADDR(addr, shift) ((addr) >> (shift)) +#define NGC_AX_END_ADDR(addr, shift) (NGC_AX_ADDR(addr, shift) - 1) +#define NGC_AX_RIGHT_ADDR(state, addr) \ + (((addr) + NGC_CHANNEL_STRIDE(state)) >> NGC_ADDRESS_SHIFT(state)) +#define NGC_AX_RIGHT_END_ADDR(state, addr) (NGC_AX_RIGHT_ADDR(state, addr) - 1) +#define NGC_AX_CURRENT_CURSOR(voice, shift) (AX_VOICE_CURRENT_ADDR(voice) << (shift)) +#define NGC_AX_END_CURSOR(voice, shift) (AX_VOICE_END_ADDR(voice) << (shift)) + +#define AX_VOICE_END_ADDR(voice) (*(u32 PTR4*)&(voice)->pb.addr.endAddressHi) +#define AX_VOICE_CURRENT_ADDR(voice) (*(u32 PTR4*)&(voice)->pb.addr.currentAddressHi) +#define AX_VOICE_MIX(voice) (&(voice)->pb.mix) +#define AX_VOICE_VOLUME(voice) ((voice)->pb.ve.currentVolume) +#define AX_VOICE_VOLUME_DELTA(voice) ((voice)->pb.ve.currentDelta) +#define AX_VOICE_MIX_MODE(voice) ((voice)->pb.mixerCtrl) +#define AX_VOICE_SYNC_FLAGS(voice) ((voice)->sync) + +extern const f64 BINK_NGC_SOUND_U32_TO_F64_BIAS; +extern const f64 BINK_NGC_SOUND_SIGN_BIAS; + +const f32 BINK_NGC_PAN_ONE = 1.0f; +const f32 BINK_NGC_PAN_EXPONENT = 0.3f; +const f32 BINK_NGC_MIX_SCALE = 65535.0f; +const f32 BINK_NGC_AX_SAMPLE_RATE = 32000.0f; +const f32 BINK_NGC_PAN_TO_FLOAT = 0.0000152587890625f; +const f32 BINK_NGC_PAN_CENTER = 0.5f; + +#define NGC_SOUND_PAN_ONE BINK_NGC_PAN_ONE +#define NGC_SOUND_PAN_EXPONENT BINK_NGC_PAN_EXPONENT +#define NGC_SOUND_MIX_SCALE BINK_NGC_MIX_SCALE +#define NGC_SOUND_AX_SAMPLE_RATE BINK_NGC_AX_SAMPLE_RATE +#define NGC_SOUND_PAN_TO_FLOAT BINK_NGC_PAN_TO_FLOAT +#define NGC_SOUND_PAN_CENTER BINK_NGC_PAN_CENTER + +static void NGC_SoundPlay(BINKSND PTR4* snd, u32 index, u32 size); +static void NGC_StarvedClear(BINKSND PTR4* snd); +static void NGC_SoundVolume(BINKSND PTR4* snd); + +const char BINK_ERROR_OPENING_FILE[] = "Error opening file."; +const char BINK_ERROR_NOT_BINK[20] = "Not a Bink file."; +const char BINK_ERROR_NO_COMPRESSED_FRAMES[] = "The file doesn't contain any compressed frames yet."; +const char BINK_ERROR_OUT_OF_MEMORY[20] = "Out of memory."; + +const f64 BINK_NGC_SOUND_U32_TO_F64_BIAS = 4503599627370496.0; +const f64 BINK_NGC_SOUND_SIGN_BIAS = 2147483648.0; + +static void startVoices(u32 task) +{ + ARQRequest PTR4* arq_task = (ARQRequest PTR4*)task; + NGCSoundState PTR4* state = (NGCSoundState PTR4*)(arq_task->owner & NGC_TASK_OWNER_MASK); + + if (state != 0 && state->play_state == NGC_PLAY_STATE_STOPPED) { + AXVPB PTR4* voice = NGC_LEFT_VOICE(state); + + if (voice != 0) { + AXSetVoiceState(voice, AX_PB_STATE_RUN); + } + + voice = NGC_RIGHT_VOICE(state); + if (voice != 0) { + AXSetVoiceState(voice, AX_PB_STATE_RUN); + } + + state->play_state = NGC_PLAY_STATE_RUNNING; + } + + arq_task->owner &= NGC_TASK_OWNER_MASK; /* clear the in-flight latch after the left upload */ +} +static void NGC_SoundPlay(BINKSND PTR4* snd, u32 index, u32 size) +{ + NGCSoundState PTR4* state = NGC_SOUND_STATE(snd); + u32 end_pos = NGC_SOUND_STATE(snd)->play_cursor + size; + ARQRequest PTR4* task = NGC_TASK(state, index); + ARQRequest PTR4* second_task; + + if (NGC_SOUND_STATE(snd)->right_voice != 0) { + second_task = &NGC_SOUND_STATE(snd)->tasks[NGC_SOUND_RIGHT_TASK_OFFSET]; + second_task += index; + + DCFlushRange((void PTR4*)second_task->source, size); + ARQPostRequest(second_task, 0, ARQ_TYPE_MRAM_TO_ARAM, ARQ_PRIORITY_HIGH, (u32)second_task->source, + NGC_SOUND_RIGHT_CURSOR(NGC_SOUND_STATE(snd)), size, 0); + } + + DCFlushRange((void PTR4*)task->source, size); + ARQPostRequest(task, task->owner, ARQ_TYPE_MRAM_TO_ARAM, ARQ_PRIORITY_HIGH, (u32)task->source, + NGC_SOUND_STATE(snd)->play_cursor, size, startVoices); + + if (end_pos > NGC_SOUND_PLAY_LIMIT(NGC_SOUND_STATE(snd))) { + AXVPB PTR4* voice = NGC_LEFT_VOICE(state); + u32 shift = NGC_ADDRESS_SHIFT(state); + + if (NGC_AX_CURRENT_CURSOR(voice, shift) > end_pos) { + NGC_SOUND_STATE(snd)->pending_end = end_pos; + } else { + AXSetVoiceEndAddr(voice, NGC_AX_END_ADDR(end_pos, shift)); + + voice = NGC_RIGHT_VOICE(state); + if (voice != 0) { + AXSetVoiceEndAddr(voice, NGC_AX_RIGHT_END_ADDR(state, end_pos)); + } + + end_pos = (u32)NGC_SOUND_STATE(snd)->audio_buffer; + } + } + + NGC_SOUND_STATE(snd)->play_cursor = end_pos; + NGC_SOUND_STATE(snd)->last_ready_time = RADTimerRead(); +} + +static s32 NGC_SoundReinit(BINKSND PTR4* snd) +{ + NGCSoundState PTR4* state; + AXVPB PTR4* voice; + u32 addr; + u32 end; + u32 i; + + state = NGC_SOUND_STATE(snd); + voice = NGC_LEFT_VOICE(state); + if (voice != 0) { + AXSetVoiceState(voice, AX_PB_STATE_STOP); + } + + voice = NGC_RIGHT_VOICE(state); + if (voice != 0) { + AXSetVoiceState(voice, AX_PB_STATE_STOP); + } + + addr = (u32)NGC_SOUND_STATE(snd)->audio_buffer; + NGC_SOUND_STATE(snd)->lock_index = -1; + NGC_SOUND_STATE(snd)->play_state = NGC_PLAY_STATE_STOPPED; + NGC_SOUND_STATE(snd)->play_cursor = addr; + + AXSetVoiceCurrentAddr(NGC_LEFT_VOICE(state), NGC_AX_ADDR(addr, NGC_ADDRESS_SHIFT(state))); + voice = NGC_RIGHT_VOICE(state); + if (voice != 0) { + AXSetVoiceCurrentAddr(voice, NGC_AX_RIGHT_ADDR(state, addr)); + } + + addr = NGC_SOUND_STATE(snd)->play_cursor; + AXSetVoiceLoopAddr(NGC_LEFT_VOICE(state), NGC_AX_ADDR(addr, NGC_ADDRESS_SHIFT(state))); + voice = NGC_RIGHT_VOICE(state); + if (voice != 0) { + AXSetVoiceLoopAddr(voice, NGC_AX_RIGHT_ADDR(state, addr)); + } + + end = NGC_SOUND_STATE(snd)->play_cursor + NGC_SOUND_STATE(snd)->channel_stride; + AXSetVoiceEndAddr(NGC_LEFT_VOICE(state), NGC_AX_END_ADDR(end, NGC_ADDRESS_SHIFT(state))); + voice = NGC_RIGHT_VOICE(state); + if (voice != 0) { + AXSetVoiceEndAddr(voice, NGC_AX_RIGHT_END_ADDR(state, end)); + } + + NGC_SOUND_STATE(snd)->pending_end = 0; + for (i = 0; i < NGC_SOUND_ARQ_TASK_COUNT; ++i) { + if ((state->tasks[i].owner & NGC_TASK_BUSY_FLAG) != 0) { + ARQRemoveRequest(&state->tasks[i]); + } + + state->tasks[i].owner = (u32)state; /* reset owner and clear the busy latch */ + } + + return 1; +} + +static void SetStreamVolumePan(AXVPB PTR4* voice, u32 volume, u32 left, u32 right) +{ + AXPBMIX PTR4* mix = AX_VOICE_MIX(voice); + + memset(mix, 0, sizeof(*mix)); + mix->vL = left; + mix->vR = right; + AX_VOICE_VOLUME(voice) = volume; + AX_VOICE_VOLUME_DELTA(voice) = 0; + AX_VOICE_MIX_MODE(voice) = AX_MIX_MODE_DEFAULT; + AX_VOICE_SYNC_FLAGS(voice) |= AX_SYNC_VOLUME_MIX; +} + +static void NGC_SoundVolume(BINKSND PTR4* snd) +{ + s32 left = (s32)(powf(NGC_SOUND_PAN_ONE - NGC_SOUND_STATE(snd)->pan, NGC_SOUND_PAN_EXPONENT) * NGC_SOUND_MIX_SCALE); + s32 right = (s32)(powf(NGC_SOUND_STATE(snd)->pan, NGC_SOUND_PAN_EXPONENT) * NGC_SOUND_MIX_SCALE); + AXVPB PTR4* voice = NGC_SOUND_STATE(snd)->left_voice; + + if (voice != 0) { + SetStreamVolumePan(voice, NGC_SOUND_STATE(snd)->volume, left, + NGC_SND(snd)->chans == NGC_SOUND_MONO_CHANNELS ? right : 0); + } + + voice = NGC_SOUND_STATE(snd)->right_voice; + if (voice != 0) { + SetStreamVolumePan(voice, NGC_SOUND_STATE(snd)->volume, 0, right); + } +} + +static s32 NGC_SoundInit(BINKSND PTR4* snd) +{ + u32 rate_bytes; + u32 frame_size; + u32 buf_size; + u32 i; + u32 start; + u32 end; + AXPBADDR addr; + AXPBSRC src; + AXVPB PTR4** voices; + NGCSoundState PTR4* owner; + + NGC_SOUND_STATE(snd)->starvation_time = NGC_DEFAULT_STARVATION_TIME; + NGC_SOUND_STATE(snd)->lock_index = -1; + NGC_SOUND_STATE(snd)->play_state = NGC_PLAY_STATE_STOPPED; + + rate_bytes = NGC_SOUND_RATE_BYTES(NGC_SND(snd)); + frame_size = NGC_ALIGN_UP(rate_bytes / NGC_SOUND_FRAMES_PER_SECOND, NGC_SOUND_FRAME_ALIGN_MASK); + buf_size = NGC_ALIGN_UP((rate_bytes * NGC_SOUND_BUFFER_MILLISECONDS) / NGC_SOUND_MILLISECONDS_PER_SECOND, + NGC_SOUND_BUFFER_ALIGN_MASK); + + NGC_SOUND_STATE(snd)->frame_size = frame_size; + NGC_SOUND_STATE(snd)->channel_stride = buf_size; + NGC_SND(snd)->BestSizeIn16 = frame_size * NGC_SND(snd)->chans; + NGC_SOUND_STATE(snd)->starvation_threshold = buf_size - (buf_size * NGC_SOUND_STARVATION_PERCENT) / NGC_SOUND_PERCENT_SCALE; + if (NGC_SND(snd)->bits != NGC_SOUND_BITS_16) { + NGC_SND(snd)->BestSizeIn16 = frame_size * NGC_SND(snd)->chans * sizeof(s16); + } + + NGC_SND(snd)->BestSizeMask = NGC_SOUND_BEST_SIZE_MASK(NGC_SND(snd)->chans); + NGC_SOUND_STATE(snd)->decode_buffer = (u8 PTR4*)radmalloc(NGC_SOUND_STATE(snd)->channel_stride * NGC_SND(snd)->chans); + if (NGC_SOUND_STATE(snd)->decode_buffer == 0) { + return 0; + } + + if (NGC_SND(snd)->chans == NGC_SOUND_STEREO_CHANNELS) { + NGC_SOUND_STATE(snd)->stereo_buffer = (u8 PTR4*)radmalloc(NGC_SOUND_STATE(snd)->channel_stride); + if (NGC_SOUND_STATE(snd)->stereo_buffer == 0) { + return 0; + } + } + + NGC_SOUND_STATE(snd)->audio_buffer = (u8 PTR4*)radaudiomalloc(NGC_SOUND_STATE(snd)->channel_stride * NGC_SND(snd)->chans); + if (NGC_SOUND_STATE(snd)->audio_buffer == 0) { + return 0; + } + + owner = NGC_SOUND_STATE(snd); + NGC_SOUND_STATE(snd)->address_shift = (NGC_SND(snd)->bits == NGC_SOUND_BITS_16); + + for (i = 0; i < NGC_SOUND_ARQ_TASK_COUNT; ++i) { + owner->tasks[i].owner = (u32)owner; /* seed owner and clear the busy latch */ + } + + NGC_SOUND_STATE(snd)->play_cursor = (u32)NGC_SOUND_STATE(snd)->audio_buffer; + NGC_SOUND_STATE(snd)->pending_end = 0; + + voices = &owner->left_voice; + for (i = 0; i < NGC_SND(snd)->chans; ++i) { + voices[i] = AXAcquireVoice(AX_VOICE_PRIORITY_BINK, 0, 0); + if (voices[i] == 0) { + return 0; + } + + start = ((u32)owner->audio_buffer + (i * owner->channel_stride)) >> owner->address_shift; + end = (((u32)owner->audio_buffer + ((i + 1) * owner->channel_stride)) >> + owner->address_shift) - + 1; + + addr.loopFlag = 1; + addr.format = (NGC_SND(snd)->bits == NGC_SOUND_BITS_16) ? AX_PB_FORMAT_PCM16 : AX_PB_FORMAT_PCM8; + addr.loopAddressHi = start >> AX_ADDR_HIGH_SHIFT; + addr.loopAddressLo = start; + addr.endAddressHi = end >> AX_ADDR_HIGH_SHIFT; + addr.endAddressLo = end; + addr.currentAddressHi = addr.loopAddressHi; + addr.currentAddressLo = addr.loopAddressLo; + AXSetVoiceAddr(voices[i], &addr); + + if (NGC_SND(snd)->freq == AX_SAMPLE_RATE) { + AXSetVoiceSrcType(voices[i], AX_SRC_TYPE_NONE); + } else { + AXSetVoiceSrcType(voices[i], AX_SRC_TYPE_LINEAR); + src.ratioHi = 1; + src.ratioLo = 0; + src.currentAddressFrac = 0; + src.last_samples[0] = 0; + src.last_samples[1] = 0; + src.last_samples[2] = 0; + src.last_samples[3] = 0; + AXSetVoiceSrc(voices[i], &src); + AXSetVoiceSrcRatio(voices[i], (f32)NGC_SND(snd)->freq / NGC_SOUND_AX_SAMPLE_RATE); + } + } + + NGC_SoundVolume(snd); + return 1; +} + +static void NGC_SoundPause(BINKSND PTR4* snd) +{ + NGCSoundState PTR4* state = NGC_SOUND_STATE(snd); + AXVPB PTR4* voice = NGC_LEFT_VOICE(state); + + if (voice != 0) { + AXSetVoiceState(voice, AX_PB_STATE_STOP); + } + + voice = NGC_RIGHT_VOICE(state); + if (voice != 0) { + AXSetVoiceState(voice, AX_PB_STATE_STOP); + } +} + +static void NGC_SoundResume(BINKSND PTR4* snd) +{ + NGCSoundState PTR4* state = NGC_SOUND_STATE(snd); + AXVPB PTR4* voice = NGC_LEFT_VOICE(state); + + if (voice != 0) { + AXSetVoiceState(voice, AX_PB_STATE_RUN); + } + + voice = NGC_RIGHT_VOICE(state); + if (voice != 0) { + AXSetVoiceState(voice, AX_PB_STATE_RUN); + } +} + +static void NGC_SoundShutdown(BINKSND PTR4* snd) +{ + u32 i; + void PTR4* ptr; + AXVPB PTR4* voice; + AXVPB PTR4** voices; + NGCSoundState PTR4* state; + + NGC_SOUND_STATE(snd)->paused = 1; + + state = NGC_SOUND_STATE(snd); + voice = NGC_LEFT_VOICE(state); + if (voice != 0) { + AXSetVoiceState(voice, AX_PB_STATE_STOP); + } + + voice = NGC_RIGHT_VOICE(state); + if (voice != 0) { + AXSetVoiceState(voice, AX_PB_STATE_STOP); + } + + i = 0; + voices = &state->left_voice; + for (; i < NGC_SOUND_STEREO_CHANNELS; ++i) { + voice = voices[i]; + if (voice != 0) { + AXFreeVoice(voice); + voices[i] = 0; + } + } + + ptr = NGC_SOUND_STATE(snd)->decode_buffer; + if (ptr != 0) { + radfree(ptr); + NGC_SOUND_STATE(snd)->decode_buffer = 0; + } + + ptr = NGC_SOUND_STATE(snd)->stereo_buffer; + if (ptr != 0) { + radfree(ptr); + NGC_SOUND_STATE(snd)->stereo_buffer = 0; + } + + ptr = NGC_SOUND_STATE(snd)->audio_buffer; + if (ptr != 0) { + radaudiofree(ptr); + NGC_SOUND_STATE(snd)->audio_buffer = 0; + } +} + +static s32 Lock(BINKSND PTR4* snd, u8 PTR4* PTR4* addr, u32 PTR4* len) +{ + NGCBinkSound PTR4* ngc_snd; + NGCSoundState PTR4* state; + u32 writable; + u32 play_pos; + u32 half_size; + u32 block_size; + u32 chans; + ARQRequest PTR4* task; + ARQRequest PTR4* right_task; + u8 PTR4* out; + u8 PTR4* out_addr; + + ngc_snd = NGC_SND(snd); + if (NGC_SOUND_STATE(ngc_snd)->lock_index >= 0) { + state = NGC_SOUND_STATE(ngc_snd); + writable = state->play_cursor; + play_pos = NGC_AX_CURRENT_CURSOR(NGC_LEFT_VOICE(state), NGC_ADDRESS_SHIFT(state)); + + if (writable >= play_pos) { + block_size = state->channel_stride; + writable = ((u32)state->audio_buffer + block_size) - writable; + } else { + block_size = state->channel_stride; + writable = (play_pos - writable) - 1; + } + + half_size = block_size >> NGC_SOUND_HALF_BUFFER_SHIFT; + if (writable > half_size) { + writable = half_size; + } else if (writable < state->frame_size) { + writable = state->frame_size; + } + + /* The lock index selects one half of the MRAM staging buffer. */ + task = NGC_TASK(state, state->lock_index); + out = state->decode_buffer + ((state->lock_index * block_size) >> NGC_SOUND_HALF_BUFFER_SHIFT); + chans = ngc_snd->chans; + writable *= chans; + task->source = (u32)out; + out_addr = out; + right_task = NGC_TASK(state, state->lock_index + NGC_SOUND_RIGHT_TASK_OFFSET); + right_task->source = (u32)(out + state->channel_stride); + + if (ngc_snd->chans == NGC_SOUND_STEREO_CHANNELS) { + /* Bink decodes interleaved stereo; AX/ARAM playback uses split left/right channels. */ + out_addr = state->stereo_buffer; + } + + *addr = out_addr; + *len = writable; + + return 1; + } + + return 0; +} + +static s32 Unlock(BINKSND PTR4* snd, u32 filled) +{ + NGCBinkSound PTR4* ngc_snd; + NGCSoundState PTR4* state; + u32 padded; + + ngc_snd = NGC_SND(snd); + if (NGC_SOUND_STATE(ngc_snd)->lock_index == -1) { + return 0; + } + + state = NGC_SOUND_STATE(ngc_snd); + NGC_TASK(state, state->lock_index)->owner |= NGC_TASK_BUSY_FLAG; + + if (ngc_snd->chans == NGC_SOUND_STEREO_CHANNELS) { + /* Split the temporary interleaved stereo buffer into the two ARQ upload buffers. */ + ARQRequest PTR4* task = NGC_TASK(state, state->lock_index); + u8 PTR4* left = (u8 PTR4*)task->source; + u8 PTR4* right = (u8 PTR4*)NGC_TASK(state, state->lock_index + + NGC_SOUND_RIGHT_TASK_OFFSET)->source; + u8 PTR4* src = state->stereo_buffer; + + if (ngc_snd->bits == NGC_SOUND_BITS_16) { + u32 i; + u32 PTR4* src32 = (u32 PTR4*)src; + u32 PTR4* left32 = (u32 PTR4*)left; + u32 PTR4* right32 = (u32 PTR4*)right; + u32 groups = filled >> NGC_STEREO16_GROUP_SHIFT; + + for (i = 0; i < groups; ++i) { + u32 first = src32[0]; + u32 second = src32[1]; + src32 += 2; + + *left32++ = NGC_SAMPLE_LEFT_16_PAIR(first, second); + *right32++ = NGC_SAMPLE_RIGHT_16_PAIR(first, second); + } + + groups = (filled - (groups << NGC_STEREO16_GROUP_SHIFT)) >> NGC_STEREO16_TAIL_SHIFT; + for (i = 0; i < groups; ++i) { + u32 value = *src32++; + + *(u16 PTR4*)left32 = (u16)NGC_SAMPLE_HIGH_HALF(value); + *(u16 PTR4*)right32 = (u16)value; + left32 = NGC_ADVANCE_U32_BYTES(left32, 2); + right32 = NGC_ADVANCE_U32_BYTES(right32, 2); + } + } else { + u32 i; + u32 PTR4* src32 = (u32 PTR4*)src; + u16 PTR4* left16 = (u16 PTR4*)left; + u16 PTR4* right16 = (u16 PTR4*)right; + u32 groups = filled >> NGC_STEREO8_GROUP_SHIFT; + + for (i = 0; i < groups; ++i) { + u32 value = *src32++; + + *left16++ = (u16)NGC_SAMPLE_LEFT_8_PAIR(value); + *right16++ = (u16)NGC_SAMPLE_RIGHT_8_PAIR(value); + } + + groups = (filled - (groups << NGC_STEREO8_GROUP_SHIFT)) >> NGC_STEREO8_TAIL_SHIFT; + for (i = 0; i < groups; ++i) { + u16 value = *(u16 PTR4*)src32; + + src32 = NGC_ADVANCE_U32_BYTES(src32, 2); + *(u8 PTR4*)left16 = (u8)(value >> NGC_SAMPLE_BYTE_SHIFT); + *(u8 PTR4*)right16 = (u8)value; + left16 = NGC_ADVANCE_U16_BYTES(left16, 1); + right16 = NGC_ADVANCE_U16_BYTES(right16, 1); + } + } + + filled >>= NGC_SOUND_HALF_BUFFER_SHIFT; + } + + padded = filled; + if ((filled & NGC_SOUND_FRAME_ALIGN_MASK) != 0) { + u32 i; + + padded = NGC_ALIGN_UP(filled, NGC_SOUND_FRAME_ALIGN_MASK); + for (i = 0; i < ngc_snd->chans; ++i) { + memset((u8 PTR4*)NGC_TASK_FOR_INDEX(state, i, state->lock_index)->source + filled, 0, + padded - filled); + } + } + + if (state->play_state == NGC_PLAY_STATE_STOPPED) { + NGC_TASK(state, state->lock_index)->length = padded; + if (state->lock_index == NGC_SOUND_LAST_LOCK_INDEX) { + NGC_SoundPlay(snd, 0, NGC_TASK(state, 0)->length); + NGC_SoundPlay(snd, 1, NGC_TASK(state, 1)->length); + } + } else { + NGC_SoundPlay(snd, state->lock_index, padded); + } + + return 1; +} + +static void NGC_StarvedClear(BINKSND PTR4* snd) +{ + u32 i; + u32 side; + u32 start; + u32 end; + NGCBinkSound PTR4* ngc_snd; + NGCSoundState PTR4* state; + ARQRequest PTR4* task; + ARQRequest PTR4* right_task; + u8 PTR4* out; + AXVPB PTR4* voice; + + ngc_snd = NGC_SND(snd); + state = NGC_SOUND_STATE(ngc_snd); + i = 0; +check_busy: + /* Wait for one staging half to be free before injecting silence. */ + side = i & NGC_SOUND_LAST_LOCK_INDEX; + if ((NGC_TASK(state, side)->owner & NGC_TASK_BUSY_FLAG) != 0) { + goto busy; + } + start = state->play_cursor; + end = start + state->frame_size; + if (ngc_snd->chans == NGC_SOUND_STEREO_CHANNELS) { + out = state->stereo_buffer; + } else { + out = state->decode_buffer + + ((side * state->channel_stride) >> NGC_SOUND_HALF_BUFFER_SHIFT); + } + + memset(out, 0, state->frame_size); + task = NGC_TASK(state, side); + right_task = task + NGC_SOUND_RIGHT_TASK_OFFSET; + right_task->source = (u32)out; + task->source = (u32)out; + NGC_SoundPlay(snd, side, state->frame_size); + + /* Re-anchor playback to the silent frame if AX has already passed it. */ + voice = NGC_LEFT_VOICE(state); + if (NGC_AX_CURRENT_CURSOR(voice, NGC_ADDRESS_SHIFT(state)) > end) { + AXSetVoiceCurrentAddr(voice, NGC_AX_ADDR(start, NGC_ADDRESS_SHIFT(state))); + voice = NGC_RIGHT_VOICE(state); + if (voice != 0) { + AXSetVoiceCurrentAddr(voice, NGC_AX_RIGHT_ADDR(state, start)); + } + } + + AXSetVoiceLoopAddr(NGC_LEFT_VOICE(state), NGC_AX_ADDR(start, NGC_ADDRESS_SHIFT(state))); + voice = NGC_RIGHT_VOICE(state); + if (voice != 0) { + AXSetVoiceLoopAddr(voice, NGC_AX_RIGHT_ADDR(state, start)); + } + + AXSetVoiceEndAddr(NGC_LEFT_VOICE(state), NGC_AX_END_ADDR(end, NGC_ADDRESS_SHIFT(state))); + voice = NGC_RIGHT_VOICE(state); + if (voice != 0) { + AXSetVoiceEndAddr(voice, NGC_AX_RIGHT_END_ADDR(state, end)); + } + return; + +busy: + ++i; + if ((s32)i <= NGC_SOUND_MAX_BUSY_POLLS) { + goto check_busy; + } +} + +static s32 Ready(BINKSND PTR4* snd) +{ + s32 index; + u32 now; + NGCSoundState PTR4* state; + AXVPB PTR4* voice; + u32 play_pos; + u32 end_pos; + + index = -1; + if (NGC_SOUND_STATE(snd)->paused != 0 || NGC_SND(snd)->OnOff == NGC_SOUND_OFF || + NGC_SOUND_STATE(snd)->left_voice == 0) { + return 0; + } + + now = RADTimerRead(); + voice = NGC_SOUND_STATE(snd)->left_voice; + state = NGC_SOUND_STATE(snd); + play_pos = NGC_AX_CURRENT_CURSOR(voice, NGC_ADDRESS_SHIFT(state)); + if (NGC_SOUND_STATE(snd)->play_state == NGC_PLAY_STATE_RUNNING) { + end_pos = NGC_AX_END_CURSOR(voice, NGC_ADDRESS_SHIFT(state)); + /* A pending wrapped end address becomes valid once playback crosses the wrap. */ + if (play_pos < NGC_SOUND_STATE(snd)->pending_end) { + end_pos = NGC_SOUND_STATE(snd)->pending_end; + AXSetVoiceEndAddr(voice, NGC_AX_END_ADDR(end_pos, NGC_ADDRESS_SHIFT(state))); + voice = NGC_RIGHT_VOICE(state); + if (voice != 0) { + AXSetVoiceEndAddr(voice, NGC_AX_RIGHT_END_ADDR(state, end_pos)); + } + NGC_SOUND_STATE(snd)->pending_end = 0; + NGC_SOUND_STATE(snd)->play_cursor = (u32)NGC_SOUND_STATE(snd)->audio_buffer; + } + + if ((now - NGC_SOUND_STATE(snd)->last_ready_time) < NGC_SOUND_STATE(snd)->starvation_time) { + u32 pending; + + pending = NGC_SOUND_STATE(snd)->play_cursor; + if (play_pos < pending) { + pending -= play_pos; + } else { + pending = (end_pos - play_pos) + (pending - (u32)NGC_SOUND_STATE(snd)->audio_buffer); + } + if (pending >= NGC_SOUND_STATE(snd)->starvation_threshold) { + goto check_tasks; + } + } + + NGC_StarvedClear(snd); + snd->SoundDroppedOut = 1; + NGC_SOUND_STATE(snd)->play_state = NGC_PLAY_STATE_STARVED; + } + +check_tasks: + if (NGC_SOUND_STATE(snd)->pending_end == 0) { + /* Only offer a lock when the writer is safely ahead of the AX cursor. */ + if (NGC_SOUND_STATE(snd)->play_cursor >= play_pos || play_pos - NGC_SOUND_STATE(snd)->play_cursor > NGC_SOUND_STATE(snd)->frame_size) { + index = 0; + for (;;) { + if ((NGC_TASK(state, index)->owner & NGC_TASK_BUSY_FLAG) == 0) { + break; + } + ++index; + if (index > NGC_SOUND_LAST_LOCK_INDEX) { + index = -1; + break; + } + } + } + } + + NGC_SOUND_STATE(snd)->last_ready_time = now; + NGC_SOUND_STATE(snd)->lock_index = index; + return index != -1; +} + +static void Volume(BINKSND PTR4* snd, s32 volume) +{ + if (volume < 0) { + volume = 0; + } + if (volume > BINK_NGC_VOLUME_MAX) { + volume = BINK_NGC_VOLUME_MAX; + } + + NGC_SOUND_STATE(snd)->volume = volume; + NGC_SoundVolume(snd); +} + +static void Pan(BINKSND PTR4* snd, s32 pan) +{ + if (pan < 0) { + pan = 0; + } + if (pan > BINK_NGC_PAN_MAX) { + pan = BINK_NGC_PAN_MAX; + } + + NGC_SOUND_STATE(snd)->pan = (f32)pan * NGC_SOUND_PAN_TO_FLOAT; + NGC_SoundVolume(snd); +} + +static s32 SetOnOff(BINKSND PTR4* snd, s32 status) +{ + if (status == NGC_SOUND_ON && NGC_SND(snd)->OnOff == NGC_SOUND_OFF) { + if (NGC_SoundReinit(snd) == 0) { + return NGC_SND(snd)->OnOff; + } + NGC_SND(snd)->OnOff = status; + } else if (status == NGC_SOUND_OFF && NGC_SND(snd)->OnOff == NGC_SOUND_ON) { + AXVPB PTR4* voice; + NGCSoundState PTR4* state = NGC_SOUND_STATE(snd); + + voice = NGC_LEFT_VOICE(state); + if (voice != 0) { + AXSetVoiceState(voice, AX_PB_STATE_STOP); + } + + voice = NGC_RIGHT_VOICE(state); + if (voice != 0) { + AXSetVoiceState(voice, AX_PB_STATE_STOP); + } + + NGC_SND(snd)->OnOff = status; + } + + return NGC_SND(snd)->OnOff; +} + +static s32 Pause(BINKSND PTR4* snd, s32 status) +{ + if (status) { + NGC_SoundPause(snd); + NGC_SOUND_STATE(snd)->paused = NGC_SOUND_PAUSED; + } else { + NGC_SoundResume(snd); + NGC_SOUND_STATE(snd)->paused = status; + } + + return NGC_SOUND_STATE(snd)->paused; +} + +static void Close(BINKSND PTR4* snd) +{ + NGC_SoundShutdown(snd); +} + +static s32 Open(BINKSND PTR4* snd, u32 freq, s32 bits, s32 chans, u32 flags, HBINK bink) +{ + s32 result; + + memset(snd, 0, sizeof(*snd)); + + NGC_SND(snd)->freq = freq; + NGC_SND(snd)->bits = bits; + NGC_SND(snd)->chans = chans; + NGC_SND(snd)->SoundDroppedOut = 0; + NGC_SOUND_STATE(snd)->paused = NGC_SOUND_UNPAUSED; + NGC_SOUND_STATE(snd)->volume = BINK_NGC_VOLUME_MAX; + NGC_SOUND_STATE(snd)->pan = NGC_SOUND_PAN_CENTER; + NGC_SND(snd)->OnOff = NGC_SOUND_ON; + + snd->Ready = Ready; + snd->Lock = Lock; + snd->Unlock = Unlock; + snd->Volume = Volume; + snd->Pan = Pan; + snd->Pause = Pause; + snd->SetOnOff = SetOnOff; + snd->Close = Close; + + if (NGC_SoundInit(snd) != 0) { + NGC_SND(snd)->NoThreadService = 0; + result = 1; + } else { + result = 0; + } + + return result; +} + +BINKSNDOPEN BinkOpenNGCSound(u32 param) +{ + (void)param; + return Open; +} + +void MyAXSetVoiceCurrentAddr(NGCSoundState PTR4* state, u32 addr) +{ + u32 shift = NGC_ADDRESS_SHIFT(state); + AXVPB PTR4* voice = NGC_LEFT_VOICE(state); + + AXSetVoiceCurrentAddr(voice, NGC_AX_ADDR(addr, shift)); + + voice = NGC_RIGHT_VOICE(state); + if (voice != 0) { + AXSetVoiceCurrentAddr(voice, NGC_AX_RIGHT_ADDR(state, addr)); + } +} + +void MyAXSetVoiceLoopAddr(NGCSoundState PTR4* state, u32 addr) +{ + u32 shift = NGC_ADDRESS_SHIFT(state); + AXVPB PTR4* voice = NGC_LEFT_VOICE(state); + + AXSetVoiceLoopAddr(voice, NGC_AX_ADDR(addr, shift)); + + voice = NGC_RIGHT_VOICE(state); + if (voice != 0) { + AXSetVoiceLoopAddr(voice, NGC_AX_RIGHT_ADDR(state, addr)); + } +} + +void MyAXSetVoiceEndAddr(NGCSoundState PTR4* state, u32 addr) +{ + u32 shift = NGC_ADDRESS_SHIFT(state); + AXVPB PTR4* voice = NGC_LEFT_VOICE(state); + + AXSetVoiceEndAddr(voice, NGC_AX_END_ADDR(addr, shift)); + + voice = NGC_RIGHT_VOICE(state); + if (voice != 0) { + AXSetVoiceEndAddr(voice, NGC_AX_RIGHT_END_ADDR(state, addr)); + } +} + +void MyAXSetVoiceState(NGCSoundState PTR4* state, u16 voice_state) +{ + AXVPB PTR4* voice = NGC_LEFT_VOICE(state); + + if (voice != 0) { + AXSetVoiceState(voice, voice_state); + } + + voice = NGC_RIGHT_VOICE(state); + if (voice != 0) { + AXSetVoiceState(voice, voice_state); + } +} + +u32 get_play_pos(NGCSoundState PTR4* state) +{ + AXVPB PTR4* voice = NGC_LEFT_VOICE(state); + u32 shift = NGC_ADDRESS_SHIFT(state); + + return NGC_AX_CURRENT_CURSOR(voice, shift); +} + +u32 get_end_pos(NGCSoundState PTR4* state) +{ + AXVPB PTR4* voice = NGC_LEFT_VOICE(state); + u32 shift = NGC_ADDRESS_SHIFT(state); + + return NGC_AX_END_CURSOR(voice, shift); +} + +ARQRequest PTR4* get_task(NGCSoundState PTR4* state, u32 index, u32 side) +{ + return NGC_TASK_FOR_INDEX(state, index, side); +} + +void ConvDataToStereo8(u32 PTR4* src, u16 PTR4* left, u16 PTR4* right, u32 bytes) +{ + u32 i; + u32 total = bytes; + u32 count; + + count = total >> NGC_STEREO8_GROUP_SHIFT; + + for (i = 0; i < count; ++i) { + u32 value = *src++; + + *left++ = (u16)NGC_SAMPLE_LEFT_8_PAIR(value); + *right++ = (u16)NGC_SAMPLE_RIGHT_8_PAIR(value); + } + + count = (total - (count << NGC_STEREO8_GROUP_SHIFT)) >> NGC_STEREO8_TAIL_SHIFT; + for (i = 0; i < count; ++i) { + u16 value = *(u16 PTR4*)src; + src = NGC_ADVANCE_U32_BYTES(src, 2); + *(u8 PTR4*)left = (u8)(value >> NGC_SAMPLE_BYTE_SHIFT); + *(u8 PTR4*)right = (u8)value; + left = NGC_ADVANCE_U16_BYTES(left, 1); + right = NGC_ADVANCE_U16_BYTES(right, 1); + } +} + +void ConvDataToStereo16(u32 PTR4* src, u32 PTR4* left, u32 PTR4* right, u32 bytes) +{ + u32 i; + u32 total = bytes; + u32 count; + + count = total >> NGC_STEREO16_GROUP_SHIFT; + + for (i = 0; i < count; ++i) { + u32 first = src[0]; + u32 second = src[1]; + src += 2; + + *left++ = NGC_SAMPLE_LEFT_16_PAIR(first, second); + *right++ = NGC_SAMPLE_RIGHT_16_PAIR(first, second); + } + + count = (total - (count << NGC_STEREO16_GROUP_SHIFT)) >> NGC_STEREO16_TAIL_SHIFT; + for (i = 0; i < count; ++i) { + u32 value = *src++; + *(u16 PTR4*)left = (u16)NGC_SAMPLE_HIGH_HALF(value); + *(u16 PTR4*)right = (u16)value; + left = NGC_ADVANCE_U32_BYTES(left, 2); + right = NGC_ADVANCE_U32_BYTES(right, 2); + } +} diff --git a/src/bink/src/sdk/decode/ngc/ngcsnd.h b/src/bink/src/sdk/decode/ngc/ngcsnd.h new file mode 100644 index 000000000..1f2617a60 --- /dev/null +++ b/src/bink/src/sdk/decode/ngc/ngcsnd.h @@ -0,0 +1,8 @@ +#ifndef BINK_DECODE_NGC_NGCSND_H +#define BINK_DECODE_NGC_NGCSND_H + +#include "bink.h" + +BINKSNDOPEN BinkOpenNGCSound(u32 param); + +#endif diff --git a/src/bink/src/sdk/decode/ngc/ngcyuy2.c b/src/bink/src/sdk/decode/ngc/ngcyuy2.c index e69de29bb..f76f1db79 100644 --- a/src/bink/src/sdk/decode/ngc/ngcyuy2.c +++ b/src/bink/src/sdk/decode/ngc/ngcyuy2.c @@ -0,0 +1,410 @@ +#include "bink.h" +#include "ngcrgb.h" + +// GameCube YUY2 stores four luma samples as a big-endian word and splices U/V +// bytes between adjacent luma values. The x2 paths duplicate each luma sample +// into two output pixels while reusing the same chroma byte. +#define YUY2_LUMA16_DUP 0x10001 +#define YUY2_NEUTRAL_CHROMA 0x800080 +#define YUY2_CHROMA16_MASK 0x00ff0000 +#define YUY2_CHROMA8_MASK 0x000000ff +#define YUY2_CHROMA_LOW16_MASK 0x0000ff00 +#define YUY2_WORDS_PER_BLOCK 2 +#define YUY2_X2_WORDS_PER_BLOCK 4 +#define YUY2_WORD_BYTES 4 +#define YUY2_PAIR_STRIDE 2 +#define YUY2_BLOCK_PAIRS(count) ((s32)(count) >> 1) +#define YUY2_HAS_TAIL_BLOCK(count) (((count) & 1) != 0) +#define YUY2_ROW_BYTES(count) ((count) * YUY2_WORDS_PER_BLOCK * YUY2_WORD_BYTES) +#define YUY2_X2_ROW_BYTES(count) ((count) * YUY2_X2_WORDS_PER_BLOCK * YUY2_WORD_BYTES) +#define YUY2_Y0_MASK 0xff000000 +#define YUY2_Y1_MASK 0x00ff0000 +#define YUY2_Y2_MASK 0x0000ff00 +#define YUY2_Y3_MASK 0x000000ff +#define YUY2_PACK_4Y01(y, chroma0, chroma1) ((y) & YUY2_Y0_MASK) + (chroma0) + (((y) >> 8) & YUY2_Y2_MASK) + (chroma1) +#define YUY2_PACK_4Y23(y, chroma0, chroma1) (((y) & YUY2_Y2_MASK) << 16) + (chroma0) + (((y) & YUY2_Y3_MASK) << 8) + (chroma1) +#define YUY2_PACK_M4Y01(y) ((y) & YUY2_Y0_MASK) + (((y) >> 8) & YUY2_Y2_MASK) + YUY2_NEUTRAL_CHROMA +#define YUY2_PACK_M4Y23(y) (((y) & YUY2_Y2_MASK) << 16) + (((y) & YUY2_Y3_MASK) << 8) + YUY2_NEUTRAL_CHROMA +#define YUY2_PACK_X2Y0(y, chroma) ((y) & YUY2_Y0_MASK) + (((y) & YUY2_Y0_MASK) >> 16) + (chroma) +#define YUY2_PACK_X2Y1(y, chroma) (((y) & YUY2_Y1_MASK) << 8) + (((y) & YUY2_Y1_MASK) >> 8) + (chroma) +#define YUY2_PACK_X2Y2(y, chroma) (((y) & YUY2_Y2_MASK) * YUY2_LUMA16_DUP) + (chroma) +#define YUY2_PACK_X2Y3(y, chroma) (((y) & YUY2_Y3_MASK) << 24) + (((y) & YUY2_Y3_MASK) << 8) + (chroma) +#define YUY2_CHROMA0_U(u) (((u) >> 8) & YUY2_CHROMA16_MASK) +#define YUY2_CHROMA0_V(v) ((v) >> 24) +#define YUY2_CHROMA1_U(u) ((u) & YUY2_CHROMA16_MASK) +#define YUY2_CHROMA1_V(v) (((v) >> 16) & YUY2_CHROMA8_MASK) +#define YUY2_CHROMA2_U(u) (((u) & YUY2_CHROMA_LOW16_MASK) << 8) +#define YUY2_CHROMA2_V(v) (((v) >> 8) & YUY2_CHROMA8_MASK) +#define YUY2_CHROMA3_U(u) (((u) & YUY2_CHROMA8_MASK) << 16) +#define YUY2_CHROMA3_V(v) ((v) & YUY2_CHROMA8_MASK) +#define YUY2_TAIL_CHROMA0_U(u) (((u) & YUY2_CHROMA_LOW16_MASK) << 8) +#define YUY2_TAIL_CHROMA0_V(v) ((v) >> 8) +#define YUY2_TAIL_CHROMA1_U(u) (((u) & YUY2_CHROMA8_MASK) << 16) +#define YUY2_TAIL_CHROMA1_V(v) ((v) & YUY2_CHROMA8_MASK) +#define YUY2_CHROMA0(u, v) (((u) >> 8) & YUY2_CHROMA16_MASK) + ((v) >> 24) +#define YUY2_CHROMA1(u, v) ((u) & YUY2_CHROMA16_MASK) + (((v) >> 16) & YUY2_CHROMA8_MASK) +#define YUY2_CHROMA2(u, v) (((u) & YUY2_CHROMA_LOW16_MASK) << 8) + (((v) >> 8) & YUY2_CHROMA8_MASK) +#define YUY2_CHROMA3(u, v) (((u) & YUY2_CHROMA8_MASK) << 16) + ((v) & YUY2_CHROMA8_MASK) +#define YUY2_TAIL_CHROMA0(u, v) (((u) & YUY2_CHROMA_LOW16_MASK) << 8) + ((v) >> 8) +#define YUY2_TAIL_CHROMA1(u, v) (((u) & YUY2_CHROMA8_MASK) << 16) + ((v) & YUY2_CHROMA8_MASK) + +// Helpers operate on one destination row; the public 4x2 entry points call the +// same packing logic for S.dest0 and S.dest1, then advance the shared context. +void YUY2_4x2Helper(u32 count, u32 PTR4* dest, const u32 PTR4* y, const u32 PTR4* u, + const u32 PTR4* v); +void YUY2_x2_4x2Helper(u32 count, u32 PTR4* dest, const u32 PTR4* y, const u32 PTR4* u, + const u32 PTR4* v); +void YUY2_m_4x2Helper(u32 count, u32 PTR4* dest, const u32 PTR4* y); +void YUY2_mx2_4x2Helper(u32 count, u32 PTR4* dest, const u32 PTR4* y); + +void YUY2_4x2(u32 count) +{ + s32 pairs; + u32 PTR4* dest; + const u32 PTR4* y; + const u16 PTR4* u; + const u16 PTR4* v; + + pairs = YUY2_BLOCK_PAIRS(count); + dest = (u32 PTR4*)S.dest0; + y = S.y0; + u = S.u; + v = S.v; + while (pairs-- != 0) { + u32 y0 = y[0]; + u32 u0 = *(u32 PTR4*)u; + u32 v0 = *(u32 PTR4*)v; + + *dest++ = YUY2_PACK_4Y01(y0, YUY2_CHROMA0_U(u0), YUY2_CHROMA0_V(v0)); + *dest++ = YUY2_PACK_4Y23(y0, YUY2_CHROMA1_U(u0), YUY2_CHROMA1_V(v0)); + + y0 = y[1]; + u += YUY2_PAIR_STRIDE; + v += YUY2_PAIR_STRIDE; + *dest++ = YUY2_PACK_4Y01(y0, YUY2_CHROMA2_U(u0), YUY2_CHROMA2_V(v0)); + y += YUY2_PAIR_STRIDE; + *dest++ = YUY2_PACK_4Y23(y0, YUY2_CHROMA3_U(u0), YUY2_CHROMA3_V(v0)); + + } + + if (YUY2_HAS_TAIL_BLOCK(count)) { + u32 y0 = y[0]; + u16 u0 = *u; + u16 v0 = *v; + + *dest++ = YUY2_PACK_4Y01(y0, YUY2_TAIL_CHROMA0_U(u0), YUY2_TAIL_CHROMA0_V(v0)); + *dest++ = YUY2_PACK_4Y23(y0, YUY2_TAIL_CHROMA1_U(u0), YUY2_TAIL_CHROMA1_V(v0)); + } + + pairs = YUY2_BLOCK_PAIRS(count); + dest = (u32 PTR4*)S.dest1; + y = S.y1; + u = S.u; + v = S.v; + while (pairs-- != 0) { + u32 y0 = y[0]; + u32 u0 = *(u32 PTR4*)u; + u32 v0 = *(u32 PTR4*)v; + + *dest++ = YUY2_PACK_4Y01(y0, YUY2_CHROMA0_U(u0), YUY2_CHROMA0_V(v0)); + *dest++ = YUY2_PACK_4Y23(y0, YUY2_CHROMA1_U(u0), YUY2_CHROMA1_V(v0)); + + y0 = y[1]; + u += YUY2_PAIR_STRIDE; + v += YUY2_PAIR_STRIDE; + *dest++ = YUY2_PACK_4Y01(y0, YUY2_CHROMA2_U(u0), YUY2_CHROMA2_V(v0)); + y += YUY2_PAIR_STRIDE; + *dest++ = YUY2_PACK_4Y23(y0, YUY2_CHROMA3_U(u0), YUY2_CHROMA3_V(v0)); + + } + + if (YUY2_HAS_TAIL_BLOCK(count)) { + u32 y0 = y[0]; + u16 u0 = *u; + u16 v0 = *v; + + *dest++ = YUY2_PACK_4Y01(y0, YUY2_TAIL_CHROMA0_U(u0), YUY2_TAIL_CHROMA0_V(v0)); + *dest++ = YUY2_PACK_4Y23(y0, YUY2_TAIL_CHROMA1_U(u0), YUY2_TAIL_CHROMA1_V(v0)); + } + + S.dest0 += YUY2_ROW_BYTES(count); + S.dest1 += YUY2_ROW_BYTES(count); + S.y0 += count; + S.y1 += count; + S.u += count; + S.v += count; +} + +void YUY2_x2_4x2(u32 count) +{ + s32 pairs; + u32 PTR4* dest; + const u32 PTR4* y; + const u16 PTR4* u; + const u16 PTR4* v; + + pairs = YUY2_BLOCK_PAIRS(count); + dest = (u32 PTR4*)S.dest0; + y = S.y0; + u = S.u; + v = S.v; + while (pairs-- != 0) { + u32 y0 = y[0]; + u32 u0 = *(u32 PTR4*)u; + u32 v0 = *(u32 PTR4*)v; + u32 chroma0 = YUY2_CHROMA0(u0, v0); + u32 chroma1 = YUY2_CHROMA1(u0, v0); + u32 chroma2 = YUY2_CHROMA2(u0, v0); + u32 chroma3 = YUY2_CHROMA3(u0, v0); + + *dest++ = YUY2_PACK_X2Y0(y0, chroma0); + *dest++ = YUY2_PACK_X2Y1(y0, chroma0); + *dest++ = YUY2_PACK_X2Y2(y0, chroma1); + *dest++ = YUY2_PACK_X2Y3(y0, chroma1); + + y0 = y[1]; + *dest++ = YUY2_PACK_X2Y0(y0, chroma2); + *dest++ = YUY2_PACK_X2Y1(y0, chroma2); + *dest++ = YUY2_PACK_X2Y2(y0, chroma3); + *dest++ = YUY2_PACK_X2Y3(y0, chroma3); + + y += YUY2_PAIR_STRIDE; + u += YUY2_PAIR_STRIDE; + v += YUY2_PAIR_STRIDE; + } + + if (YUY2_HAS_TAIL_BLOCK(count)) { + u32 y0 = y[0]; + u16 u0 = *u; + u16 v0 = *v; + u32 chroma0 = YUY2_TAIL_CHROMA0(u0, v0); + u32 chroma1 = YUY2_TAIL_CHROMA1(u0, v0); + + *dest++ = YUY2_PACK_X2Y0(y0, chroma0); + *dest++ = YUY2_PACK_X2Y1(y0, chroma0); + *dest++ = YUY2_PACK_X2Y2(y0, chroma1); + *dest++ = YUY2_PACK_X2Y3(y0, chroma1); + } + + pairs = YUY2_BLOCK_PAIRS(count); + dest = (u32 PTR4*)S.dest1; + y = S.y1; + u = S.u; + v = S.v; + while (pairs-- != 0) { + u32 y0 = y[0]; + u32 u0 = *(u32 PTR4*)u; + u32 v0 = *(u32 PTR4*)v; + u32 chroma0 = YUY2_CHROMA0(u0, v0); + u32 chroma1 = YUY2_CHROMA1(u0, v0); + u32 chroma2 = YUY2_CHROMA2(u0, v0); + u32 chroma3 = YUY2_CHROMA3(u0, v0); + + *dest++ = YUY2_PACK_X2Y0(y0, chroma0); + *dest++ = YUY2_PACK_X2Y1(y0, chroma0); + *dest++ = YUY2_PACK_X2Y2(y0, chroma1); + *dest++ = YUY2_PACK_X2Y3(y0, chroma1); + + y0 = y[1]; + *dest++ = YUY2_PACK_X2Y0(y0, chroma2); + *dest++ = YUY2_PACK_X2Y1(y0, chroma2); + *dest++ = YUY2_PACK_X2Y2(y0, chroma3); + *dest++ = YUY2_PACK_X2Y3(y0, chroma3); + + y += YUY2_PAIR_STRIDE; + u += YUY2_PAIR_STRIDE; + v += YUY2_PAIR_STRIDE; + } + + if (YUY2_HAS_TAIL_BLOCK(count)) { + u32 y0 = y[0]; + u16 u0 = *u; + u16 v0 = *v; + u32 chroma0 = YUY2_TAIL_CHROMA0(u0, v0); + u32 chroma1 = YUY2_TAIL_CHROMA1(u0, v0); + + *dest++ = YUY2_PACK_X2Y0(y0, chroma0); + *dest++ = YUY2_PACK_X2Y1(y0, chroma0); + *dest++ = YUY2_PACK_X2Y2(y0, chroma1); + *dest++ = YUY2_PACK_X2Y3(y0, chroma1); + } + + S.dest0 += YUY2_X2_ROW_BYTES(count); + S.dest1 += YUY2_X2_ROW_BYTES(count); + S.y0 += count; + S.y1 += count; + S.u += count; + S.v += count; +} + +void YUY2_m_4x2(u32 count) +{ + u32 n; + u32 PTR4* dest; + const u32 PTR4* y; + + n = count; + dest = (u32 PTR4*)S.dest0; + y = S.y0; + while (n-- != 0) { + u32 y0 = *y++; + + *dest++ = YUY2_PACK_M4Y01(y0); + *dest++ = YUY2_PACK_M4Y23(y0); + } + + n = count; + dest = (u32 PTR4*)S.dest1; + y = S.y1; + while (n-- != 0) { + u32 y0 = *y++; + + *dest++ = YUY2_PACK_M4Y01(y0); + *dest++ = YUY2_PACK_M4Y23(y0); + } + + S.dest0 += YUY2_ROW_BYTES(count); + S.dest1 += YUY2_ROW_BYTES(count); + S.y0 += count; + S.y1 += count; +} + +void YUY2_mx2_4x2(u32 count) +{ + u32 n; + u32 PTR4* dest; + const u32 PTR4* y; + + n = count; + dest = (u32 PTR4*)S.dest0; + y = S.y0; + while (n-- != 0) { + u32 y0 = *y++; + + *dest++ = YUY2_PACK_X2Y0(y0, YUY2_NEUTRAL_CHROMA); + *dest++ = YUY2_PACK_X2Y1(y0, YUY2_NEUTRAL_CHROMA); + *dest++ = YUY2_PACK_X2Y2(y0, YUY2_NEUTRAL_CHROMA); + *dest++ = YUY2_PACK_X2Y3(y0, YUY2_NEUTRAL_CHROMA); + } + + n = count; + dest = (u32 PTR4*)S.dest1; + y = S.y1; + while (n-- != 0) { + u32 y0 = *y++; + + *dest++ = YUY2_PACK_X2Y0(y0, YUY2_NEUTRAL_CHROMA); + *dest++ = YUY2_PACK_X2Y1(y0, YUY2_NEUTRAL_CHROMA); + *dest++ = YUY2_PACK_X2Y2(y0, YUY2_NEUTRAL_CHROMA); + *dest++ = YUY2_PACK_X2Y3(y0, YUY2_NEUTRAL_CHROMA); + } + + S.dest0 += YUY2_X2_ROW_BYTES(count); + S.dest1 += YUY2_X2_ROW_BYTES(count); + S.y0 += count; + S.y1 += count; +} + +void YUY2_4x2Helper(u32 count, u32 PTR4* dest, const u32 PTR4* y, const u32 PTR4* u, + const u32 PTR4* v) +{ + s32 pairs; + + pairs = YUY2_BLOCK_PAIRS(count); + for (; pairs != 0; --pairs) { + u32 y0 = y[0]; + u32 u0 = *u; + u32 v0 = *v; + + *dest++ = YUY2_PACK_4Y01(y0, YUY2_CHROMA0_U(u0), YUY2_CHROMA0_V(v0)); + *dest++ = YUY2_PACK_4Y23(y0, YUY2_CHROMA1_U(u0), YUY2_CHROMA1_V(v0)); + + y0 = y[1]; + ++u; + ++v; + *dest++ = YUY2_PACK_4Y01(y0, YUY2_CHROMA2_U(u0), YUY2_CHROMA2_V(v0)); + y += YUY2_PAIR_STRIDE; + *dest++ = YUY2_PACK_4Y23(y0, YUY2_CHROMA3_U(u0), YUY2_CHROMA3_V(v0)); + + } + + if (YUY2_HAS_TAIL_BLOCK(count)) { + u32 y0 = y[0]; + u16 u0 = *(const u16 PTR4*)u; + u16 v0 = *(const u16 PTR4*)v; + + *dest++ = YUY2_PACK_4Y01(y0, YUY2_TAIL_CHROMA0_U(u0), YUY2_TAIL_CHROMA0_V(v0)); + *dest++ = YUY2_PACK_4Y23(y0, YUY2_TAIL_CHROMA1_U(u0), YUY2_TAIL_CHROMA1_V(v0)); + } +} + +void YUY2_x2_4x2Helper(u32 count, u32 PTR4* dest, const u32 PTR4* y, const u32 PTR4* u, + const u32 PTR4* v) +{ + s32 pairs; + + pairs = YUY2_BLOCK_PAIRS(count); + while (pairs-- != 0) { + u32 u0 = *u; + u32 v0 = *v; + u32 y0 = y[0]; + u32 chroma0 = YUY2_CHROMA0(u0, v0); + u32 chroma1 = YUY2_CHROMA1(u0, v0); + u32 chroma2 = YUY2_CHROMA2(u0, v0); + u32 chroma3 = YUY2_CHROMA3(u0, v0); + + ++u; + *dest++ = YUY2_PACK_X2Y0(y0, chroma0); + *dest++ = YUY2_PACK_X2Y1(y0, chroma0); + *dest++ = YUY2_PACK_X2Y2(y0, chroma1); + *dest++ = YUY2_PACK_X2Y3(y0, chroma1); + + y0 = y[1]; + ++v; + *dest++ = YUY2_PACK_X2Y0(y0, chroma2); + *dest++ = YUY2_PACK_X2Y1(y0, chroma2); + *dest++ = YUY2_PACK_X2Y2(y0, chroma3); + *dest++ = YUY2_PACK_X2Y3(y0, chroma3); + + y += YUY2_PAIR_STRIDE; + } + + if (YUY2_HAS_TAIL_BLOCK(count)) { + u32 y0 = y[0]; + u16 u0 = *(const u16 PTR4*)u; + u16 v0 = *(const u16 PTR4*)v; + u32 chroma0 = YUY2_TAIL_CHROMA0(u0, v0); + u32 chroma1 = YUY2_TAIL_CHROMA1(u0, v0); + + *dest++ = YUY2_PACK_X2Y0(y0, chroma0); + *dest++ = YUY2_PACK_X2Y1(y0, chroma0); + *dest++ = YUY2_PACK_X2Y2(y0, chroma1); + *dest++ = YUY2_PACK_X2Y3(y0, chroma1); + } +} + +void YUY2_m_4x2Helper(u32 count, u32 PTR4* dest, const u32 PTR4* y) +{ + if (count != 0) { + do { + u32 y0 = *y++; + + *dest++ = YUY2_PACK_M4Y01(y0); + *dest++ = YUY2_PACK_M4Y23(y0); + + } while (--count != 0); + } +} + +void YUY2_mx2_4x2Helper(u32 count, u32 PTR4* dest, const u32 PTR4* y) +{ + while (count-- != 0) { + u32 y0 = *y++; + + *dest++ = YUY2_PACK_X2Y0(y0, YUY2_NEUTRAL_CHROMA); + *dest++ = YUY2_PACK_X2Y1(y0, YUY2_NEUTRAL_CHROMA); + *dest++ = YUY2_PACK_X2Y2(y0, YUY2_NEUTRAL_CHROMA); + *dest++ = YUY2_PACK_X2Y3(y0, YUY2_NEUTRAL_CHROMA); + + } +} diff --git a/src/bink/src/sdk/decode/yuv.cpp b/src/bink/src/sdk/decode/yuv.cpp index e69de29bb..799db84e0 100644 --- a/src/bink/src/sdk/decode/yuv.cpp +++ b/src/bink/src/sdk/decode/yuv.cpp @@ -0,0 +1,3890 @@ +#include "bink.h" +#include "ngc/ngcrgb.h" +#include "yuv.h" + +typedef void (*CoreBlitFn)(u32); +typedef void (*RowBlitFn)(u32, u32); +typedef u32 (*ColBlitFn)(u32, s32); + +#define YUV_TABLE_PLANE_SIZE 0x100 +#define RGB_CLAMP_BIAS 0x100 +#define RGB_CLAMP_HIGH_OFFSET 0x200 +#define YUV_V_TO_GB_OFFSET 0x100 +#define YUV_U_TO_GB_OFFSET 0x200 +#define YUV_V_TO_R_OFFSET 0x300 +#define YUY2_NEUTRAL_CHROMA 0x80008000U +#define YUY2_CHROMA_MASK 0xff00ff00U +#define YUV_MASK_BLOCK_PIXELS 0x10 +#define YUV_MASK_BLOCK_SHIFT 4 +#define YUV_MASK_BLOCK_PAIR_PIXELS 0x20 +#define YUV_MASK_BLOCK_MASK (YUV_MASK_BLOCK_PIXELS - 1) +#define YUV_MASK_HALF_BLOCK_ROWS 8 +#define YUV_MASK_HALF_BLOCKS 4 +#define YUV_MASK_FULL_BLOCKS 8 +#define YUV_CHROMA_BLOCK_BYTES 8 +#define YUV_CHROMA_SHIFT 1 +#define BINKSURFACE_INVALID 0xffffffffU +#define YUV_ROUND_SHIFT 15 +#define YUV_ROUND_BIAS ((1 << YUV_ROUND_SHIFT) - 1) +#define YUV_LUMA_BLACK 0x10 +#define YUV_LUMA_BLACK_CUTOFF (YUV_LUMA_BLACK + 1) +#define YUV_LUMA_WHITE_CUTOFF 0xeb +#define YUV_LUMA_MAX 0xfe +#define YUV_LUMA_RANGE 0xdb +#define YUV_CHROMA_CENTER 0x80 +#define RGB_CHANNEL_MAX 0xff +#define RGB_MONO_WHITE 0xffffffU +#define YUV_COEFF_Y_TO_RGB 0x950a +#define YUV_COEFF_V_TO_GB 0x680f +#define YUV_COEFF_U_TO_GB 0x3225 +#define YUV_COEFF_V_TO_R 0xcc4b +#define YUV_COEFF_U_TO_B 0x10235 +#define RGB_DUP16 0x10001 +#define RGB_HIGH16 0x10000 +#define RGB_A4_SOURCE_MASK 0x1ffffe0 +#define RGB_A4_SHIFT 7 +#define YUV_ZOOM_ALIGN 0x20 +#define YUV_ZOOM_ALIGN_MASK (YUV_ZOOM_ALIGN - 1) +#define YUV_ZOOM_ALIGN_SIZE(width) (((width) + YUV_ZOOM_ALIGN_MASK) & ~YUV_ZOOM_ALIGN_MASK) +#define YUV_ZOOM_BUFFER_COUNT 2 +#define YUV_PACKED_WORD_BYTES 4 +#define YUV_TESTING_WORDS 2 +#define YUV_BLIT_PAD_WORDS 3 +#define YUV_BYTES_PER_PIXEL_32 4 +#define YUV_BYTES_PER_PIXEL_24 3 +#define YUV_BYTES_PER_PIXEL_16 2 +#define YUV_CORE_4X2_STEP 2 +#define YUV_BLIT_ROW_BYTES(width, blits) ((width) * (blits)->bytes_per_pixel) +#define YUV_BLIT_ROW_BYTES_X2(width, blits) (YUV_BLIT_ROW_BYTES((width), (blits)) * 2) +#define YUV_BLIT_SCALED_PIXEL_BYTES(blits, scale) ((blits)->bytes_per_pixel * (scale)) +#define YUV_BLIT_SCALED_ROW_BYTES(width, blits, scale) \ + ((width) * YUV_BLIT_SCALED_PIXEL_BYTES((blits), (scale))) + +enum YUVTableOrder { + YUV_TABLE_ORDER_NORMAL, + YUV_TABLE_ORDER_RB_INVERTED +}; + +// Per-surface dispatch: packed core kernels handle aligned 4x2 blocks, while +// row/column helpers cover grayscale, scaled, masked, and odd-edge cases. +struct BLITS { + u32 bytes_per_pixel; + u32 even_step; + CoreBlitFn even; + u32 odd_step; + CoreBlitFn odd; + u32 masked_step; + CoreBlitFn masked; + u32 even_x2_step; + CoreBlitFn even_x2; + u32 odd_x2_step; + CoreBlitFn odd_x2; + u32 masked_x2_step; + CoreBlitFn masked_x2; + RowBlitFn row; + ColBlitFn col; + RowBlitFn rowm; + ColBlitFn colm; + RowBlitFn row2w; + ColBlitFn col2w; + RowBlitFn rowm2w; + ColBlitFn colm2w; + RowBlitFn row2h; + ColBlitFn col2h; + RowBlitFn rowm2h; + ColBlitFn colm2h; + RowBlitFn row2wh; + ColBlitFn col2wh; + RowBlitFn rowm2wh; + ColBlitFn colm2wh; + u32 pad[YUV_BLIT_PAD_WORDS]; +}; + +extern "C" void PTR4* radmalloc(u32 size); +extern "C" void radfree(void PTR4* ptr); +extern "C" u32 mult64anddiv(u32 left, u32 right, u32 divisor); + +static s32 origYUVTables[YUV_TABLE_SIZE]; + +static YUVTableOrder whichyuv = YUV_TABLE_ORDER_NORMAL; +extern "C" { +u32 rgb_layout = BINKSURFACE_INVALID; +} + +static inline s32 yuv_round15(s32 value) +{ + if (value < 0) { + value += YUV_ROUND_BIAS; + } + + return value >> YUV_ROUND_SHIFT; +} + +#define RGB565(y, r, g, b) \ + ((u16)clamp_r[RGB_CLAMP_BIAS + (r) + (y)] | (u16)clamp_g[RGB_CLAMP_BIAS + (g) + (y)] | (u16)clamp_b[RGB_CLAMP_BIAS + (b) + (y)]) + +#define RGB565_A4(y, r, g, b, a) (RGB565((y), (r), (g), (b)) | (u16)clamp_a4[(a)]) +#define RGB565_M(y) ((u16)mono16[(y)]) +#define RGB565_M_A4(y, a) (RGB565_M((y)) | (u16)clamp_a4[(a)]) +#define RGB32_M(y) (mono32[(y)]) +#define RGB32_M_A(y, a) (RGB32_M((y)) | ((u32)(a) << 24)) +#define YUY2_M(y0, y1) ((u32)(y0) | YUY2_NEUTRAL_CHROMA | ((u32)(y1) << 16)) + +#define DECL_CORE(name) extern "C" void name(u32) +DECL_CORE(YUV_32_4x2_even); +DECL_CORE(YUV_32_4x2_odd); +DECL_CORE(YUV_32m_4x2); +DECL_CORE(YUV_32x2_4x2_even); +DECL_CORE(YUV_32x2_4x2_odd); +DECL_CORE(YUV_32mx2_4x2); +DECL_CORE(YUV_32a_4x2_even); +DECL_CORE(YUV_32a_4x2_odd); +DECL_CORE(YUV_32am_4x2); +DECL_CORE(YUV_32ax2_4x2_even); +DECL_CORE(YUV_32ax2_4x2_odd); +DECL_CORE(YUV_32amx2_4x2); +DECL_CORE(YUV_16_4x2_even); +DECL_CORE(YUV_16_4x2_odd); +DECL_CORE(YUV_16m_4x2); +DECL_CORE(YUV_16x2_4x2_even); +DECL_CORE(YUV_16x2_4x2_odd); +DECL_CORE(YUV_16mx2_4x2); +DECL_CORE(YUV_16a4_4x2_even); +DECL_CORE(YUV_16a4_4x2_odd); +DECL_CORE(YUV_16a4m_4x2); +DECL_CORE(YUV_16a4x2_4x2_even); +DECL_CORE(YUV_16a4x2_4x2_odd); +DECL_CORE(YUV_16a4mx2_4x2); +DECL_CORE(YUY2_4x2); +DECL_CORE(YUY2_m_4x2); +DECL_CORE(YUY2_x2_4x2); +DECL_CORE(YUY2_mx2_4x2); +#undef DECL_CORE + +#define DECL_ROW(name) static void name(u32 phase, u32 count) +#define DECL_COL(name) static u32 name(u32 count, s32 phase) +DECL_ROW(dounaligned32rowm2w); +DECL_COL(dounaligned32colm2w); +DECL_ROW(dounaligned32rowm2h); +DECL_COL(dounaligned32colm2h); +DECL_ROW(dounaligned32rowm2wh); +DECL_COL(dounaligned32colm2wh); +DECL_ROW(dounaligned32rowm); +DECL_COL(dounaligned32colm); +DECL_ROW(dounaligned32row); +DECL_COL(dounaligned32col); +DECL_ROW(dounaligned32row2w); +DECL_COL(dounaligned32col2w); +DECL_ROW(dounaligned32row2h); +DECL_COL(dounaligned32col2h); +DECL_ROW(dounaligned32row2wh); +DECL_COL(dounaligned32col2wh); +DECL_ROW(dounaligned32arowm2w); +DECL_COL(dounaligned32acolm2w); +DECL_ROW(dounaligned32arowm2h); +DECL_COL(dounaligned32acolm2h); +DECL_ROW(dounaligned32arowm2wh); +DECL_COL(dounaligned32acolm2wh); +DECL_ROW(dounaligned32arowm); +DECL_COL(dounaligned32acolm); +DECL_ROW(dounaligned32arow); +DECL_COL(dounaligned32acol); +DECL_ROW(dounaligned32arow2w); +DECL_COL(dounaligned32acol2w); +DECL_ROW(dounaligned32arow2h); +DECL_COL(dounaligned32acol2h); +DECL_ROW(dounaligned32arow2wh); +DECL_COL(dounaligned32acol2wh); +DECL_ROW(dounaligned16rowm2h); +DECL_COL(dounaligned16colm2h); +DECL_ROW(dounaligned16rowm2w); +DECL_COL(dounaligned16colm2w); +DECL_ROW(dounaligned16rowm2wh); +DECL_COL(dounaligned16colm2wh); +DECL_ROW(dounaligned16rowm); +DECL_COL(dounaligned16colm); +DECL_ROW(dounaligned16row); +DECL_COL(dounaligned16col); +DECL_ROW(dounaligned16row2h); +DECL_COL(dounaligned16col2h); +DECL_ROW(dounaligned16row2w); +DECL_COL(dounaligned16col2w); +DECL_ROW(dounaligned16row2wh); +DECL_COL(dounaligned16col2wh); +DECL_ROW(dounaligned16a4rowm2h); +DECL_COL(dounaligned16a4colm2h); +DECL_ROW(dounaligned16a4rowm2w); +DECL_COL(dounaligned16a4colm2w); +DECL_ROW(dounaligned16a4rowm2wh); +DECL_COL(dounaligned16a4colm2wh); +DECL_ROW(dounaligned16a4rowm); +DECL_COL(dounaligned16a4colm); +DECL_ROW(dounaligned16a4row); +DECL_COL(dounaligned16a4col); +DECL_ROW(dounaligned16a4row2h); +DECL_COL(dounaligned16a4col2h); +DECL_ROW(dounaligned16a4row2w); +DECL_COL(dounaligned16a4col2w); +DECL_ROW(dounaligned16a4row2wh); +DECL_COL(dounaligned16a4col2wh); +DECL_ROW(dounalignedYUY2rowm2wh); +DECL_COL(dounalignedYUY2colm2wh); +DECL_ROW(dounalignedYUY2rowm2w); +DECL_COL(dounalignedYUY2colm2w); +DECL_ROW(dounalignedYUY2rowm2h); +DECL_COL(dounalignedYUY2colm2h); +DECL_ROW(dounalignedYUY2row2wh); +DECL_COL(dounalignedYUY2col2wh); +DECL_ROW(dounalignedYUY2row2w); +DECL_COL(dounalignedYUY2col2w); +DECL_ROW(dounalignedYUY2row2h); +DECL_COL(dounalignedYUY2col2h); +DECL_ROW(dounalignedYUY2rowm); +DECL_COL(dounalignedYUY2colm); +DECL_ROW(dounalignedYUY2row); +DECL_COL(dounalignedYUY2col); +#undef DECL_ROW +#undef DECL_COL + +static u32 z2hsize = 0; +static void PTR4* z2hbuf1 = 0; +static void PTR4* z2hbuf2 = 0; +static u32 donetables = 0; +static void (*EVEN)(u32); +static void (*ODD)(u32); +static void (*EVENx)(u32); +static void (*ODDx)(u32); +static void (*dounalignedrow)(u32, u32); +static u32 (*dounalignedcol)(u32, s32); +static u32 align; +static u32 alignm1; +static u32 alignshift; +static u32 testing[YUV_TESTING_WORDS]; +static BLITS blits32 = { YUV_BYTES_PER_PIXEL_32, + YUV_CORE_4X2_STEP, YUV_32_4x2_even, + YUV_CORE_4X2_STEP, YUV_32_4x2_odd, + YUV_CORE_4X2_STEP, YUV_32m_4x2, + YUV_CORE_4X2_STEP, YUV_32x2_4x2_even, + YUV_CORE_4X2_STEP, YUV_32x2_4x2_odd, + YUV_CORE_4X2_STEP, YUV_32mx2_4x2, + dounaligned32row, dounaligned32col, + dounaligned32rowm, dounaligned32colm, + dounaligned32row2w, dounaligned32col2w, + dounaligned32rowm2w, dounaligned32colm2w, + dounaligned32row2h, dounaligned32col2h, + dounaligned32rowm2h, dounaligned32colm2h, + dounaligned32row2wh, dounaligned32col2wh, + dounaligned32rowm2wh, dounaligned32colm2wh, + { 0, 0, 0 } }; +static BLITS blits32a = { YUV_BYTES_PER_PIXEL_32, + YUV_CORE_4X2_STEP, YUV_32a_4x2_even, + YUV_CORE_4X2_STEP, YUV_32a_4x2_odd, + YUV_CORE_4X2_STEP, YUV_32am_4x2, + YUV_CORE_4X2_STEP, YUV_32ax2_4x2_even, + YUV_CORE_4X2_STEP, YUV_32ax2_4x2_odd, + YUV_CORE_4X2_STEP, YUV_32amx2_4x2, + dounaligned32arow, dounaligned32acol, + dounaligned32arowm, dounaligned32acolm, + dounaligned32arow2w, dounaligned32acol2w, + dounaligned32arowm2w, dounaligned32acolm2w, + dounaligned32arow2h, dounaligned32acol2h, + dounaligned32arowm2h, dounaligned32acolm2h, + dounaligned32arow2wh, dounaligned32acol2wh, + dounaligned32arowm2wh, dounaligned32acolm2wh, + { 0, 0, 0 } }; +static BLITS blits16 = { YUV_BYTES_PER_PIXEL_16, + YUV_CORE_4X2_STEP, YUV_16_4x2_even, + YUV_CORE_4X2_STEP, YUV_16_4x2_odd, + YUV_CORE_4X2_STEP, YUV_16m_4x2, + YUV_CORE_4X2_STEP, YUV_16x2_4x2_even, + YUV_CORE_4X2_STEP, YUV_16x2_4x2_odd, + YUV_CORE_4X2_STEP, YUV_16mx2_4x2, + dounaligned16row, dounaligned16col, + dounaligned16rowm, dounaligned16colm, + dounaligned16row2w, dounaligned16col2w, + dounaligned16rowm2w, dounaligned16colm2w, + dounaligned16row2h, dounaligned16col2h, + dounaligned16rowm2h, dounaligned16colm2h, + dounaligned16row2wh, dounaligned16col2wh, + dounaligned16rowm2wh, dounaligned16colm2wh, + { 0, 0, 0 } }; +static BLITS blits16a4 = { YUV_BYTES_PER_PIXEL_16, + YUV_CORE_4X2_STEP, YUV_16a4_4x2_even, + YUV_CORE_4X2_STEP, YUV_16a4_4x2_odd, + YUV_CORE_4X2_STEP, YUV_16a4m_4x2, + YUV_CORE_4X2_STEP, YUV_16a4x2_4x2_even, + YUV_CORE_4X2_STEP, YUV_16a4x2_4x2_odd, + YUV_CORE_4X2_STEP, YUV_16a4mx2_4x2, + dounaligned16a4row, dounaligned16a4col, + dounaligned16a4rowm, dounaligned16a4colm, + dounaligned16a4row2w, dounaligned16a4col2w, + dounaligned16a4rowm2w, dounaligned16a4colm2w, + dounaligned16a4row2h, dounaligned16a4col2h, + dounaligned16a4rowm2h, dounaligned16a4colm2h, + dounaligned16a4row2wh, dounaligned16a4col2wh, + dounaligned16a4rowm2wh, dounaligned16a4colm2wh, + { 0, 0, 0 } }; +static BLITS blitsyuy2 = { YUV_BYTES_PER_PIXEL_16, + YUV_CORE_4X2_STEP, YUY2_4x2, + YUV_CORE_4X2_STEP, YUY2_4x2, + YUV_CORE_4X2_STEP, YUY2_m_4x2, + YUV_CORE_4X2_STEP, YUY2_x2_4x2, + YUV_CORE_4X2_STEP, YUY2_x2_4x2, + YUV_CORE_4X2_STEP, YUY2_mx2_4x2, + dounalignedYUY2row, dounalignedYUY2col, + dounalignedYUY2rowm, dounalignedYUY2colm, + dounalignedYUY2row2w, dounalignedYUY2col2w, + dounalignedYUY2rowm2w, dounalignedYUY2colm2w, + dounalignedYUY2row2h, dounalignedYUY2col2h, + dounalignedYUY2rowm2h, dounalignedYUY2colm2h, + dounalignedYUY2row2wh, dounalignedYUY2col2wh, + dounalignedYUY2rowm2wh, dounalignedYUY2colm2wh, + { 0, 0, 0 } }; + +#pragma dont_inline on +static void checkzoombufs(u32 width) +{ + u32 rounded; + u32 size; + + rounded = YUV_ZOOM_ALIGN_SIZE(width); + size = rounded * YUV_ZOOM_BUFFER_COUNT; + if (size > z2hsize) { + if (z2hbuf1 != 0) { + radfree(z2hbuf1); + z2hbuf1 = 0; + } + z2hbuf1 = radmalloc(size); + z2hsize = size; + } + z2hbuf2 = (u8 PTR4*)z2hbuf1 + rounded; +} + +static void zoom2heven(s32 count) +{ + u8 PTR4* dest0; + u8 PTR4* dest1; + u32 PTR4* y0; + u32 PTR4* y1; + u16 PTR4* u; + u16 PTR4* v; + u32 PTR4* a0; + u32 PTR4* a1; + s32 pitch_delta; + + dest1 = S.dest1; + dest0 = S.dest0; + y0 = S.y0; + y1 = S.y1; + a0 = S.a0; + a1 = S.a1; + u = S.u; + v = S.v; + EVENx(count); + S.dest0 = dest0 + S.pitch; + pitch_delta = S.dest1 - dest1; + S.dest1 = dest1 + S.pitch; + S.y0 = y0; + S.y1 = y1; + S.a0 = a0; + S.a1 = a1; + S.u = u; + S.v = v; + EVENx(count); + S.dest0 = dest0 + pitch_delta; + S.dest1 = dest1 + pitch_delta; +} + +static void zoom2hodd(s32 count) +{ + u8 PTR4* dest0; + u8 PTR4* dest1; + u32 PTR4* y0; + u32 PTR4* y1; + u16 PTR4* u; + u16 PTR4* v; + u32 PTR4* a0; + u32 PTR4* a1; + s32 pitch_delta; + + dest1 = S.dest1; + dest0 = S.dest0; + y0 = S.y0; + y1 = S.y1; + a0 = S.a0; + a1 = S.a1; + u = S.u; + v = S.v; + ODDx(count); + S.dest0 = dest0 + S.pitch; + pitch_delta = S.dest1 - dest1; + S.dest1 = dest1 + S.pitch; + S.y0 = y0; + S.y1 = y1; + S.a0 = a0; + S.a1 = a1; + S.u = u; + S.v = v; + ODDx(count); + S.dest0 = dest0 + pitch_delta; + S.dest1 = dest1 + pitch_delta; +} + +static void setup_scaling(u32 flags, u32 PTR4* pitch, u32 width, u32 srcpitch, BLITS PTR4* blits, + u32 PTR4* pitch_delta) +{ + u32 mode; + u32 step; + + testing[0] = 0; + S.pitch = *pitch; + + // BINKRBINVERT swaps the U/V contribution tables so the packed RGB helpers + // can use the same per-pixel math for both channel orders. + if ((flags & BINKRBINVERT) != 0) { + if (whichyuv != YUV_TABLE_ORDER_RB_INVERTED) { + whichyuv = YUV_TABLE_ORDER_RB_INVERTED; + memcpy(YUVTables + YUV_V_TO_R_OFFSET, origYUVTables, + YUV_TABLE_PLANE_SIZE * sizeof(s32)); + memcpy(YUVTables + YUV_V_TO_GB_OFFSET, origYUVTables + YUV_U_TO_GB_OFFSET, + YUV_TABLE_PLANE_SIZE * sizeof(s32)); + memcpy(YUVTables + YUV_U_TO_GB_OFFSET, origYUVTables + YUV_V_TO_GB_OFFSET, + YUV_TABLE_PLANE_SIZE * sizeof(s32)); + memcpy(YUVTables, origYUVTables + YUV_V_TO_R_OFFSET, + YUV_TABLE_PLANE_SIZE * sizeof(s32)); + } + } else if (whichyuv != YUV_TABLE_ORDER_NORMAL) { + whichyuv = YUV_TABLE_ORDER_NORMAL; + memcpy(YUVTables, origYUVTables, sizeof(origYUVTables)); + } + + mode = flags & BINKCOPYNOSCALING; + + if (mode == BINKCOPY2XH) { + checkzoombufs(YUV_BLIT_ROW_BYTES(width, blits)); + *pitch *= 2; + *pitch_delta = *pitch - YUV_BLIT_ROW_BYTES(width, blits); + if ((flags & BINKGRAYSCALE) == 0) { + step = blits->odd_step; + EVENx = blits->even; + ODDx = blits->odd; + EVEN = (CoreBlitFn)zoom2heven; + ODD = (CoreBlitFn)zoom2hodd; + dounalignedrow = blits->row2h; + dounalignedcol = blits->col2h; + if (step < blits->even_step) { + step = blits->even_step; + } + } else { + step = blits->masked_step; + EVENx = blits->masked; + ODDx = EVENx; + EVEN = (CoreBlitFn)zoom2heven; + ODD = (CoreBlitFn)zoom2hodd; + dounalignedrow = blits->rowm2h; + dounalignedcol = blits->colm2h; + } + } else if (mode == BINKCOPY2XW || mode == BINKCOPY2XWHI) { + *pitch_delta = *pitch - YUV_BLIT_ROW_BYTES_X2(width, blits); + if ((flags & BINKGRAYSCALE) == 0) { + step = blits->odd_x2_step; + EVEN = blits->even_x2; + ODD = blits->odd_x2; + dounalignedrow = blits->row2w; + dounalignedcol = blits->col2w; + if (step < blits->even_x2_step) { + step = blits->even_x2_step; + } + } else { + step = blits->masked_x2_step; + EVEN = blits->masked_x2; + ODD = EVEN; + dounalignedrow = blits->rowm2w; + dounalignedcol = blits->colm2w; + } + } else if (mode == BINKCOPY2XWH) { + checkzoombufs(YUV_BLIT_ROW_BYTES_X2(width, blits)); + *pitch *= 2; + *pitch_delta = *pitch - YUV_BLIT_ROW_BYTES_X2(width, blits); + if ((flags & BINKGRAYSCALE) == 0) { + step = blits->odd_x2_step; + EVENx = blits->even_x2; + ODDx = blits->odd_x2; + EVEN = (CoreBlitFn)zoom2heven; + ODD = (CoreBlitFn)zoom2hodd; + dounalignedrow = blits->row2wh; + dounalignedcol = blits->col2wh; + if (step < blits->even_x2_step) { + step = blits->even_x2_step; + } + } else { + step = blits->masked_x2_step; + EVENx = blits->masked_x2; + ODDx = EVENx; + EVEN = (CoreBlitFn)zoom2heven; + ODD = (CoreBlitFn)zoom2hodd; + dounalignedrow = blits->rowm2wh; + dounalignedcol = blits->colm2wh; + } + } else { + *pitch_delta = *pitch - YUV_BLIT_ROW_BYTES(width, blits); + if ((flags & BINKGRAYSCALE) == 0) { + step = blits->odd_step; + EVEN = blits->even; + ODD = blits->odd; + dounalignedrow = blits->row; + dounalignedcol = blits->col; + if (step < blits->even_step) { + step = blits->even_step; + } + } else { + step = blits->masked_step; + EVEN = blits->masked; + ODD = EVEN; + dounalignedrow = blits->rowm; + dounalignedcol = blits->colm; + } + } + + alignshift = step; + align = 1 << alignshift; + alignm1 = align - 1; +} + +extern "C" { +// Generic rectangle blitter: scalar row/column helpers cover odd edges, then +// the aligned interior is handed to the 4x2 core kernels in the BLITS table. +static void YUV_blit(void PTR4* dest, + u32 destx, + u32 desty, + u32 destpitch, + void PTR4* src, + u32 srcx, + u32 srcy, + u32 srcw, + u32 srch, + u32 srcpitch, + u32 srcheight, + u32 flags, + void PTR4* alpha, + BLITS PTR4* blits) +{ + u32 pitch; + u32 pitch_delta; + u32 chroma_pitch; + u32 mode; + u32 endy; + u32 align_count; + s32 y_delta; + s32 c_delta; + s32 a_delta; + u8 PTR4* ybase; + u8 PTR4* abase; + u8 PTR4* cbase; + + pitch = destpitch; + mode = flags & BINKCOPYNOSCALING; + + if (mode == BINKCOPY1XI) { + pitch *= 2; + srcy >>= 1; + srch >>= 1; + srcheight >>= 1; + srcpitch *= 2; + } + + S.base = (u8 PTR4*)dest; + if (blits->bytes_per_pixel == YUV_BYTES_PER_PIXEL_16) { + if ((((u32)dest) & 3) == 2) { + destx++; + dest = (void PTR4*)(((u32)dest) & ~3); + } + } else if (blits->bytes_per_pixel == YUV_BYTES_PER_PIXEL_24) { + if ((((u32)dest - 3) & 3) == 0) { + destx++; + dest = (void PTR4*)((u32)dest - 3); + } else if ((((u32)dest - 6) & 3) == 0) { + destx += 2; + dest = (void PTR4*)((u32)dest - 6); + } else if ((((u32)dest - 9) & 3) == 0) { + destx += 3; + dest = (void PTR4*)((u32)dest - 9); + } + } + + S.dest0 = (u8 PTR4*)dest + desty * destpitch + YUV_BLIT_ROW_BYTES(destx, blits); + if (mode == BINKCOPY2XHI || mode == BINKCOPY2XWHI) { + pitch *= 2; + } + + setup_scaling(flags, &pitch, srcw, srch, blits, &pitch_delta); + + S.dest1 = S.dest0 + pitch; + ybase = (u8 PTR4*)src + srcx + srcy * srcpitch; + S.y0 = (u32 PTR4*)ybase; + S.y1 = (u32 PTR4*)(ybase + srcpitch); + + if (alpha != 0) { + abase = (u8 PTR4*)alpha + srcx + srcy * srcpitch; + S.a0 = (u32 PTR4*)abase; + S.a1 = (u32 PTR4*)(abase + srcpitch); + } else { + S.a0 = 0; + S.a1 = 0; + } + + chroma_pitch = srcpitch >> YUV_CHROMA_SHIFT; + cbase = (u8 PTR4*)src + srcpitch * srcheight; + if ((flags & BINKRBINVERT) != 0) { + S.u = (u16 PTR4*)cbase; + S.v = (u16 PTR4*)(cbase + chroma_pitch * (srcheight >> YUV_CHROMA_SHIFT)); + } else { + S.v = (u16 PTR4*)cbase; + S.u = (u16 PTR4*)(cbase + chroma_pitch * (srcheight >> YUV_CHROMA_SHIFT)); + } + + S.u = (u16 PTR4*)((u8 PTR4*)S.u + (srcx >> YUV_CHROMA_SHIFT) + + (srcy >> YUV_CHROMA_SHIFT) * chroma_pitch); + S.v = (u16 PTR4*)((u8 PTR4*)S.v + (srcx >> YUV_CHROMA_SHIFT) + + (srcy >> YUV_CHROMA_SHIFT) * chroma_pitch); + + y_delta = srcpitch - srcw; + a_delta = y_delta; + c_delta = chroma_pitch - (srcw >> YUV_CHROMA_SHIFT); + if ((srcw & 1) != 0 && (srcx & 1) != 0) { + c_delta--; + } + + endy = srcy + srch - 1; + if ((srcy & 1) != 0 && (s32)srcy <= (s32)endy) { + srcy++; + dounalignedrow(srcx, srcw); + S.dest0 += pitch_delta; + S.dest1 = S.dest0 + pitch; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + y_delta); + S.y1 = (u32 PTR4*)((u8 PTR4*)S.y0 + srcpitch); + S.u = (u16 PTR4*)((u8 PTR4*)S.u + c_delta); + S.v = (u16 PTR4*)((u8 PTR4*)S.v + c_delta); + if (alpha != 0) { + S.a0 = (u32 PTR4*)((u8 PTR4*)S.a0 + a_delta); + S.a1 = (u32 PTR4*)((u8 PTR4*)S.a0 + srcpitch); + } + } + + align_count = (4 - (destx & 3)) & 3; + if (srcw < align_count) { + align_count = srcw; + } + + while ((s32)srcy < (s32)endy) { + u32 phase; + u32 count; + u32 aligned; + u32 blocks; + u32 tail; + + phase = srcx; + count = srcw; + if (align_count != 0) { + count -= align_count; + phase = dounalignedcol(align_count, phase); + } + + aligned = count & ~alignm1; + blocks = aligned >> 2; + tail = count - blocks * 4; + + if (blocks != 0) { + if ((phase & 1) == 0) { + EVEN(blocks); + } else { + ODD(blocks); + } + } + + if (tail != 0) { + dounalignedcol(tail, phase + (aligned & ~3)); + } + + srcy += 2; + S.dest0 = S.dest1 + pitch_delta; + S.dest1 = S.dest0 + pitch; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y1 + y_delta); + S.y1 = (u32 PTR4*)((u8 PTR4*)S.y0 + srcpitch); + S.u = (u16 PTR4*)((u8 PTR4*)S.u + c_delta); + S.v = (u16 PTR4*)((u8 PTR4*)S.v + c_delta); + if (alpha != 0) { + S.a0 = (u32 PTR4*)((u8 PTR4*)S.a1 + a_delta); + S.a1 = (u32 PTR4*)((u8 PTR4*)S.a0 + srcpitch); + } + } + + if ((s32)srcy <= (s32)endy) { + dounalignedrow(srcx, srcw); + } +} +} + +static inline void blit_mask_block(u32 count, s32 y_delta, s32 c_delta, s32 pitch_delta, u32 srcpitch) +{ + RGBContext saved; + s32 i; + + saved = S; + i = YUV_MASK_HALF_BLOCK_ROWS; + do { + EVEN(count); + i--; + S.dest0 = S.dest1 + pitch_delta; + S.dest1 = S.dest0 + S.pitch; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y1 + y_delta); + S.y1 = (u32 PTR4*)((u8 PTR4*)S.y0 + srcpitch); + S.u = (u16 PTR4*)((u8 PTR4*)S.u + c_delta); + S.v = (u16 PTR4*)((u8 PTR4*)S.v + c_delta); + if (S.a0 != 0) { + S.a0 = (u32 PTR4*)((u8 PTR4*)S.a1 + y_delta); + S.a1 = (u32 PTR4*)((u8 PTR4*)S.a0 + srcpitch); + } + } while (i != 0); + S = saved; +} + +extern "C" { +// Dirty-mask blitter works in 16x16 mask blocks and falls back to the generic +// rectangle path for partial right and bottom edges. +static void YUV_blit_mask(void PTR4* dest, + u32 destx, + u32 desty, + u32 destpitch, + u8 PTR4* mask, + u32 maskpitch, + void PTR4* src, + u32 srcx, + u32 srcy, + u32 srcw, + u32 srch, + u32 srcpitch, + u32 srcheight, + u32 flags, + void PTR4* alpha, + BLITS PTR4* blits) +{ + u32 pitch16; + u32 pitch32; + u32 pitch_delta16; + u32 pitch_delta32; + u32 mode; + u32 mask_step; + u32 xscale; + u32 chroma_pitch; + u32 inner_x; + u32 inner_y; + u32 end_x; + u32 end_y; + u32 full_w; + u32 full_h; + u32 old_srcw; + u32 old_srch; + u32 old_srcheight; + u32 old_srcpitch; + u8 PTR4* maskp; + u8 PTR4* ybase; + u8 PTR4* abase; + u8 PTR4* cbase; + s32 y_delta16; + s32 y_delta32; + s32 c_delta16; + s32 c_delta32; + s32 row_skip; + s32 mask_row_skip; + + old_srcw = srcw; + old_srch = srch; + old_srcheight = srcheight; + old_srcpitch = srcpitch; + + pitch16 = destpitch; + mode = flags & BINKCOPYNOSCALING; + if (mode == BINKCOPY1XI) { + pitch16 *= 2; + srch >>= 1; + srcy >>= 1; + srcheight >>= 1; + srcpitch *= 2; + maskpitch *= 2; + mask_step = maskpitch >> YUV_CHROMA_SHIFT; + } else { + mask_step = 0; + } + + S.base = (u8 PTR4*)dest; + S.dest0 = (u8 PTR4*)dest + desty * destpitch + YUV_BLIT_ROW_BYTES(destx, blits); + if (mode == BINKCOPY2XHI || mode == BINKCOPY2XWHI) { + pitch16 *= 2; + } + + setup_scaling(flags, &pitch16, YUV_MASK_BLOCK_PIXELS, srch, blits, &pitch_delta16); + pitch32 = pitch16; + setup_scaling(flags, &pitch32, YUV_MASK_BLOCK_PAIR_PIXELS, srch, blits, &pitch_delta32); + + if (mode == BINKCOPY2XW || mode == BINKCOPY2XWHI || mode == BINKCOPY2XWH) { + xscale = 2; + } else { + xscale = 1; + } + + chroma_pitch = srcpitch >> YUV_CHROMA_SHIFT; + S.dest1 = S.dest0 + pitch32; + ybase = (u8 PTR4*)src + srcx + srcy * srcpitch; + S.y0 = (u32 PTR4*)ybase; + S.y1 = (u32 PTR4*)(ybase + srcpitch); + if (alpha != 0) { + abase = (u8 PTR4*)alpha + srcx + srcy * srcpitch; + S.a0 = (u32 PTR4*)abase; + S.a1 = (u32 PTR4*)(abase + srcpitch); + } else { + S.a0 = 0; + S.a1 = 0; + } + + cbase = (u8 PTR4*)src + srcpitch * srcheight; + if ((flags & BINKRBINVERT) == 0) { + S.v = (u16 PTR4*)cbase; + S.u = (u16 PTR4*)(cbase + chroma_pitch * (srcheight >> YUV_CHROMA_SHIFT)); + } else { + S.u = (u16 PTR4*)cbase; + S.v = (u16 PTR4*)(cbase + chroma_pitch * (srcheight >> YUV_CHROMA_SHIFT)); + } + S.u = (u16 PTR4*)((u8 PTR4*)S.u + (srcx >> YUV_CHROMA_SHIFT) + + (srcy >> YUV_CHROMA_SHIFT) * chroma_pitch); + S.v = (u16 PTR4*)((u8 PTR4*)S.v + (srcx >> YUV_CHROMA_SHIFT) + + (srcy >> YUV_CHROMA_SHIFT) * chroma_pitch); + + y_delta16 = srcpitch - YUV_MASK_BLOCK_PIXELS; + y_delta32 = srcpitch - YUV_MASK_BLOCK_PAIR_PIXELS; + c_delta16 = chroma_pitch - YUV_CHROMA_BLOCK_BYTES; + c_delta32 = chroma_pitch - YUV_MASK_BLOCK_PIXELS; + row_skip = (pitch16 * YUV_MASK_BLOCK_MASK - YUV_BLIT_SCALED_ROW_BYTES(srcw, blits, xscale)) + + YUV_BLIT_SCALED_ROW_BYTES(srcw & YUV_MASK_BLOCK_MASK, blits, xscale); + mask_row_skip = maskpitch - (srcw >> YUV_MASK_BLOCK_SHIFT); + + end_y = srcy + srch - 1; + end_x = srcx + srcw - 1; + maskp = mask + maskpitch * (srcy >> YUV_MASK_BLOCK_SHIFT) + + (((s32)srcx < 0 ? srcx + YUV_MASK_BLOCK_MASK : srcx) >> YUV_MASK_BLOCK_SHIFT); + + inner_y = srcy; + while ((s32)(inner_y + YUV_MASK_BLOCK_MASK) <= (s32)end_y) { + u32 next_y; + u32 x; + + next_y = inner_y + YUV_MASK_BLOCK_PIXELS; + x = srcx; + while ((s32)(x + (YUV_MASK_BLOCK_PAIR_PIXELS - 1)) <= (s32)end_x) { + u8 bits; + + bits = (maskp[0] != 0) ? 1 : 0; + if (maskp[1] != 0) { + bits += 2; + } + if (mask_step != 0) { + u8 lower; + lower = 0; + if (maskp[mask_step] != 0) { + lower += 1; + } + if (maskp[mask_step + 1] != 0) { + lower += 2; + } + bits |= lower; + } + + if (bits == 1) { + blit_mask_block(YUV_MASK_HALF_BLOCKS, y_delta16, c_delta16, pitch_delta16, srcpitch); + } else if (bits == 2) { + RGBContext saved = S; + S.dest0 += YUV_MASK_BLOCK_PIXELS; + S.dest1 += YUV_MASK_BLOCK_PIXELS; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + YUV_MASK_BLOCK_PIXELS); + S.y1 = (u32 PTR4*)((u8 PTR4*)S.y1 + YUV_MASK_BLOCK_PIXELS); + S.u = (u16 PTR4*)((u8 PTR4*)S.u + YUV_CHROMA_BLOCK_BYTES); + S.v = (u16 PTR4*)((u8 PTR4*)S.v + YUV_CHROMA_BLOCK_BYTES); + blit_mask_block(YUV_MASK_HALF_BLOCKS, y_delta16, c_delta16, pitch_delta16, srcpitch); + S = saved; + } else if (bits == 3) { + blit_mask_block(YUV_MASK_FULL_BLOCKS, y_delta32, c_delta32, pitch_delta32, srcpitch); + } + + maskp += 2; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + YUV_MASK_BLOCK_PAIR_PIXELS); + S.y1 = (u32 PTR4*)((u8 PTR4*)S.y1 + YUV_MASK_BLOCK_PAIR_PIXELS); + S.u = (u16 PTR4*)((u8 PTR4*)S.u + YUV_MASK_BLOCK_PIXELS); + S.v = (u16 PTR4*)((u8 PTR4*)S.v + YUV_MASK_BLOCK_PIXELS); + S.dest0 += YUV_BLIT_SCALED_ROW_BYTES(YUV_MASK_BLOCK_PAIR_PIXELS, blits, xscale); + S.dest1 += YUV_BLIT_SCALED_ROW_BYTES(YUV_MASK_BLOCK_PAIR_PIXELS, blits, xscale); + x += YUV_MASK_BLOCK_PAIR_PIXELS; + } + + if ((s32)(x + YUV_MASK_BLOCK_MASK) <= (s32)end_x) { + u8 bits = *maskp++; + if (mask_step != 0) { + bits |= maskp[mask_step - 1]; + } + if (bits != 0) { + blit_mask_block(YUV_MASK_HALF_BLOCKS, y_delta16, c_delta16, pitch_delta16, srcpitch); + } + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + YUV_MASK_BLOCK_PIXELS); + S.y1 = (u32 PTR4*)((u8 PTR4*)S.y1 + YUV_MASK_BLOCK_PIXELS); + S.u = (u16 PTR4*)((u8 PTR4*)S.u + YUV_CHROMA_BLOCK_BYTES); + S.v = (u16 PTR4*)((u8 PTR4*)S.v + YUV_CHROMA_BLOCK_BYTES); + S.dest0 += YUV_BLIT_SCALED_ROW_BYTES(YUV_MASK_BLOCK_PIXELS, blits, xscale); + S.dest1 += YUV_BLIT_SCALED_ROW_BYTES(YUV_MASK_BLOCK_PIXELS, blits, xscale); + } + + S.dest0 = S.dest1 + row_skip; + S.dest1 = S.dest0 + pitch16; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y1 + y_delta16 + (srcpitch * (YUV_MASK_BLOCK_PIXELS - 2))); + S.y1 = (u32 PTR4*)((u8 PTR4*)S.y0 + srcpitch); + S.u = (u16 PTR4*)((u8 PTR4*)S.u + c_delta16 + (chroma_pitch * 7)); + S.v = (u16 PTR4*)((u8 PTR4*)S.v + c_delta16 + (chroma_pitch * 7)); + maskp += mask_row_skip; + inner_y = next_y; + } + + if ((old_srcw & YUV_MASK_BLOCK_MASK) != 0) { + full_w = old_srcw & ~YUV_MASK_BLOCK_MASK; + YUV_blit(dest, destx + full_w * xscale, desty, destpitch, src, full_w, 0, old_srcw - full_w, old_srch, + old_srcpitch, old_srcheight, flags, alpha, blits); + } + + if ((srch & YUV_MASK_BLOCK_MASK) != 0) { + full_h = srch & ~YUV_MASK_BLOCK_MASK; + YUV_blit(dest, destx, desty + mult64anddiv(full_h, pitch16 * old_srcheight, destpitch * srcpitch), + destpitch, src, 0, full_h, old_srcw, srch - full_h, old_srcpitch, old_srcheight, flags, alpha, blits); + } +} +} + +extern "C" void YUV_init(u32 flags) +{ + s32 i; + s32 y; + s32 uv; + u32 red_bits; + u32 green_bits; + u32 blue_bits; + u32 blue_shift; + u32 red_down; + u32 green_down; + u32 blue_down; + u32 red_mask; + u32 green_mask; + u32 blue_mask; + u32 white; + + // Build luma/chroma contribution tables once, then rebuild the packed RGB + // clamp tables when the destination surface format changes. + if (donetables == 0) { + for (i = 0; i < YUV_TABLE_PLANE_SIZE; i++) { + if (i < YUV_LUMA_BLACK_CUTOFF) { + y = 0; + } else if (i < YUV_LUMA_WHITE_CUTOFF) { + y = i - YUV_LUMA_BLACK; + y = yuv_round15(y * YUV_COEFF_Y_TO_RGB); + } else { + y = YUV_LUMA_MAX; + } + + ytable[i] = y; + + uv = i - YUV_CHROMA_CENTER; + ytable_x4[i] = y << 2; + YUVTables[YUV_V_TO_GB_OFFSET + i] = -yuv_round15(uv * YUV_COEFF_V_TO_GB); + YUVTables[YUV_U_TO_GB_OFFSET + i] = -yuv_round15(uv * YUV_COEFF_U_TO_GB); + YUVTables[YUV_V_TO_R_OFFSET + i] = yuv_round15(uv * YUV_COEFF_V_TO_R); + YUVTables[i] = yuv_round15(uv * YUV_COEFF_U_TO_B); + } + + for (i = 0; i < YUV_TABLE_PLANE_SIZE; i++) { + clamptable[i] = 0; + clamptable[RGB_CLAMP_BIAS + i] = i; + clamptable[RGB_CLAMP_HIGH_OFFSET + i] = RGB_CHANNEL_MAX; + clamp_ytable[i] = &clamptable[RGB_CLAMP_BIAS + ytable[i]]; + } + + donetables = 1; + + memcpy(origYUVTables, YUVTables, sizeof(origYUVTables)); + } + + if (rgb_layout == flags) { + return; + } + + if (flags == BINKSURFACE_INVALID) { + return; + } + + rgb_layout = flags; + + if (flags > BINKSURFACE555) { + if (flags == BINKSURFACE655) { + red_bits = 6; + green_bits = 5; + blue_bits = 5; + blue_shift = 11; + } else if (flags < BINKSURFACE655) { + red_bits = 5; + green_bits = 6; + blue_bits = 5; + blue_shift = 11; + } else if (flags == BINKSURFACE664) { + red_bits = 6; + green_bits = 6; + blue_bits = 4; + blue_shift = 12; + } else { + red_bits = 0; + green_bits = 0; + blue_bits = 0; + blue_shift = 0; + } + } else if (flags >= BINKSURFACE5551) { + red_bits = 5; + green_bits = 5; + blue_bits = 5; + blue_shift = 10; + } else if (flags == BINKSURFACE4444) { + red_bits = 4; + green_bits = 4; + blue_bits = 4; + blue_shift = 8; + } else { + red_bits = 0; + green_bits = 0; + blue_bits = 0; + blue_shift = 0; + } + + red_down = 8 - red_bits; + green_down = 8 - green_bits; + blue_down = 8 - blue_bits; + red_mask = (1 << red_bits) - 1; + green_mask = ((1 << green_bits) - 1) << red_bits; + blue_mask = ((1 << blue_bits) - 1) << blue_shift; + white = (RGB_CHANNEL_MAX >> red_down) | ((RGB_CHANNEL_MAX >> green_down) << red_bits) | + ((RGB_CHANNEL_MAX >> blue_down) << blue_shift); + + RGBshift[0] = 0; + RGBshift[1] = 0; + RGBshift[2] = red_bits; + RGBshift[3] = 0; + RGBshift[4] = blue_shift; + RGBshift[5] = 0; + RGBshift[6] = red_down; + RGBshift[7] = green_down; + RGBshift[8] = blue_down; + RGBshift[9] = 0; + RGBshift[10] = 0; + RGBshift[11] = 0; + for (i = 0; i < YUV_TABLE_PLANE_SIZE; i++) { + u32 mono; + u32 red; + u32 green; + u32 blue; + + if (i < YUV_LUMA_BLACK_CUTOFF) { + mono16[i] = 0; + mono16x2[i] = 0; + mono32[i] = 0; + } else if (i < YUV_LUMA_WHITE_CUTOFF) { + mono = ((i - YUV_LUMA_BLACK) * RGB_CHANNEL_MAX) / YUV_LUMA_RANGE; + y = (mono >> red_down) | ((mono >> green_down) << red_bits) | ((mono >> blue_down) << blue_shift); + mono16x2[i] = y | (y << 16); + mono32[i] = (mono << 16) | (mono << 8) | mono; + mono16[i] = y; + } else { + mono16[i] = white; + mono16x2[i] = white | (white << 16); + mono32[i] = RGB_MONO_WHITE; + } + + clamp_r[i] = 0; + clamp_g[i] = 0; + clamp_b[i] = 0; + + red = i >> red_down; + green = (i >> green_down) << red_bits; + blue = (i >> blue_down) << blue_shift; + + clamp_r[RGB_CLAMP_BIAS + i] = red; + clamp_g[RGB_CLAMP_BIAS + i] = green; + clamp_b[RGB_CLAMP_BIAS + i] = blue; + clamp_r[RGB_CLAMP_HIGH_OFFSET + i] = red_mask; + clamp_g[RGB_CLAMP_HIGH_OFFSET + i] = green_mask; + clamp_b[RGB_CLAMP_HIGH_OFFSET + i] = blue_mask; + + clamp_rh[i] = clamp_r[i] << 16; + clamp_gh[i] = clamp_g[i] << 16; + clamp_bh[i] = clamp_b[i] << 16; + clamp_a4[i] = (i & RGB_A4_SOURCE_MASK) << RGB_A4_SHIFT; + + clamp_rr[i] = 0; + clamp_gg[i] = 0; + clamp_bb[i] = 0; + clamp_rr[RGB_CLAMP_BIAS + i] = red * RGB_DUP16; + clamp_gg[RGB_CLAMP_BIAS + i] = green * RGB_DUP16; + clamp_bb[RGB_CLAMP_BIAS + i] = blue * RGB_DUP16; + clamp_rh[RGB_CLAMP_BIAS + i] = clamp_r[RGB_CLAMP_BIAS + i] << 16; + clamp_gh[RGB_CLAMP_BIAS + i] = clamp_g[RGB_CLAMP_BIAS + i] << 16; + clamp_bh[RGB_CLAMP_BIAS + i] = clamp_b[RGB_CLAMP_BIAS + i] << 16; + + clamp_rr[RGB_CLAMP_HIGH_OFFSET + i] = red_mask * RGB_DUP16; + clamp_gg[RGB_CLAMP_HIGH_OFFSET + i] = green_mask * RGB_DUP16; + clamp_bb[RGB_CLAMP_HIGH_OFFSET + i] = blue_mask * RGB_DUP16; + clamp_rh[RGB_CLAMP_HIGH_OFFSET + i] = red_mask * RGB_HIGH16; + clamp_gh[RGB_CLAMP_HIGH_OFFSET + i] = green_mask * RGB_HIGH16; + clamp_bh[RGB_CLAMP_HIGH_OFFSET + i] = blue_mask * RGB_HIGH16; + } +} + +static void dounaligned32rowm2w(u32 phase, u32 count) +{ + const u32 PTR4* table; + u8 y; + u32 pixel; + + (void)phase; + if (count-- == 0) { + return; + } + + table = mono32; + do { + y = *(u8 PTR4*)S.y0; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + pixel = table[y]; + ((u32 PTR4*)S.dest0)[0] = pixel; + ((u32 PTR4*)S.dest0)[1] = pixel; + S.dest0 += 8; + } while (count-- != 0); +} + +static u32 dounaligned32colm2w(u32 count, s32 phase) +{ + const u32 PTR4* table; + u32 remaining; + u8 y; + u32 pixel; + + table = mono32; + remaining = count; + do { + y = *(u8 PTR4*)S.y0; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + pixel = table[y]; + ((u32 PTR4*)S.dest0)[0] = pixel; + ((u32 PTR4*)S.dest0)[1] = pixel; + y = *(u8 PTR4*)S.y1; + S.y1 = (u32 PTR4*)((u8 PTR4*)S.y1 + 1); + pixel = table[y]; + ((u32 PTR4*)S.dest1)[0] = pixel; + ((u32 PTR4*)S.dest1)[1] = pixel; + S.dest0 += 8; + S.dest1 += 8; + remaining--; + } while (remaining != 0); + + return phase + count; +} + +static void dounaligned32rowm2h(u32 phase, u32 count) +{ + const u32 PTR4* table; + u8 y; + u32 pixel; + + (void)phase; + if (count-- == 0) { + return; + } + + table = mono32; + do { + y = *(u8 PTR4*)S.y0; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + pixel = table[y]; + *(u32 PTR4*)S.dest0 = pixel; + *(u32 PTR4*)(S.dest0 + S.pitch) = pixel; + S.dest0 += YUV_PACKED_WORD_BYTES; + } while (count-- != 0); +} + +static u32 dounaligned32colm2h(u32 count, s32 phase) +{ + const u32 PTR4* table; + u32 remaining; + u8 y; + u32 pixel; + + table = mono32; + remaining = count; + do { + y = *(u8 PTR4*)S.y0; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + pixel = table[y]; + *(u32 PTR4*)S.dest0 = pixel; + *(u32 PTR4*)(S.dest0 + S.pitch) = pixel; + y = *(u8 PTR4*)S.y1; + S.y1 = (u32 PTR4*)((u8 PTR4*)S.y1 + 1); + pixel = table[y]; + *(u32 PTR4*)S.dest1 = pixel; + *(u32 PTR4*)(S.dest1 + S.pitch) = pixel; + S.dest0 += YUV_PACKED_WORD_BYTES; + S.dest1 += YUV_PACKED_WORD_BYTES; + remaining--; + } while (remaining != 0); + + return phase + count; +} + +static void dounaligned32rowm2wh(u32 phase, u32 count) +{ + const u32 PTR4* table; + u8 y; + u32 pixel; + + (void)phase; + if (count-- == 0) { + return; + } + + table = mono32; + do { + y = *(u8 PTR4*)S.y0; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + pixel = table[y]; + ((u32 PTR4*)S.dest0)[0] = pixel; + ((u32 PTR4*)S.dest0)[1] = pixel; + *(u32 PTR4*)(S.dest0 + S.pitch) = pixel; + *(u32 PTR4*)(S.dest0 + S.pitch + YUV_PACKED_WORD_BYTES) = pixel; + S.dest0 += 8; + } while (count-- != 0); +} + +static u32 dounaligned32colm2wh(u32 count, s32 phase) +{ + const u32 PTR4* table; + u32 remaining; + u8 y; + u32 pixel; + + table = mono32; + remaining = count; + do { + y = *(u8 PTR4*)S.y0; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + pixel = table[y]; + ((u32 PTR4*)S.dest0)[0] = pixel; + ((u32 PTR4*)S.dest0)[1] = pixel; + *(u32 PTR4*)(S.dest0 + S.pitch) = pixel; + *(u32 PTR4*)(S.dest0 + S.pitch + YUV_PACKED_WORD_BYTES) = pixel; + y = *(u8 PTR4*)S.y1; + S.y1 = (u32 PTR4*)((u8 PTR4*)S.y1 + 1); + pixel = table[y]; + ((u32 PTR4*)S.dest1)[0] = pixel; + ((u32 PTR4*)S.dest1)[1] = pixel; + *(u32 PTR4*)(S.dest1 + S.pitch) = pixel; + *(u32 PTR4*)(S.dest1 + S.pitch + YUV_PACKED_WORD_BYTES) = pixel; + S.dest0 += 8; + S.dest1 += 8; + remaining--; + } while (remaining != 0); + + return phase + count; +} + +static void dounaligned32row2w(u32 phase, u32 count) +{ + u8 y; + const u32 PTR4* ytable; + u32 pixel; + + if (count-- == 0) { + return; + } + + do { + phase++; + S.b = YUVTables[*(u8 PTR4*)S.u]; + S.gb = YUVTables[YUV_U_TO_GB_OFFSET + *(u8 PTR4*)S.u] + YUVTables[YUV_V_TO_GB_OFFSET + *(u8 PTR4*)S.v]; + S.r = YUVTables[YUV_V_TO_R_OFFSET + *(u8 PTR4*)S.v]; + y = *(u8 PTR4*)S.y0; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + ytable = clamp_ytable[y]; + pixel = ytable[S.b] | (ytable[S.gb] << 8) | (ytable[S.r] << 16); + ((u32 PTR4*)S.dest0)[0] = pixel; + ((u32 PTR4*)S.dest0)[1] = pixel; + S.dest0 += 8; + if (((phase ^ 1) & 1) != 0) { + S.u = (u16 PTR4*)((u8 PTR4*)S.u + 1); + S.v = (u16 PTR4*)((u8 PTR4*)S.v + 1); + } + } while (count-- != 0); +} + +static u32 dounaligned32col2w(u32 count, s32 phase) +{ + u8 y; + const u32 PTR4* ytable; + u32 pixel; + + do { + phase++; + S.b = YUVTables[*(u8 PTR4*)S.u]; + S.gb = YUVTables[YUV_U_TO_GB_OFFSET + *(u8 PTR4*)S.u] + YUVTables[YUV_V_TO_GB_OFFSET + *(u8 PTR4*)S.v]; + S.r = YUVTables[YUV_V_TO_R_OFFSET + *(u8 PTR4*)S.v]; + y = *(u8 PTR4*)S.y0; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + ytable = clamp_ytable[y]; + pixel = ytable[S.b] | (ytable[S.gb] << 8) | (ytable[S.r] << 16); + ((u32 PTR4*)S.dest0)[0] = pixel; + ((u32 PTR4*)S.dest0)[1] = pixel; + y = *(u8 PTR4*)S.y1; + S.y1 = (u32 PTR4*)((u8 PTR4*)S.y1 + 1); + ytable = clamp_ytable[y]; + pixel = ytable[S.b] | (ytable[S.gb] << 8) | (ytable[S.r] << 16); + ((u32 PTR4*)S.dest1)[0] = pixel; + ((u32 PTR4*)S.dest1)[1] = pixel; + S.dest0 += 8; + S.dest1 += 8; + if (((phase ^ 1) & 1) != 0) { + S.u = (u16 PTR4*)((u8 PTR4*)S.u + 1); + S.v = (u16 PTR4*)((u8 PTR4*)S.v + 1); + } + count--; + } while (count != 0); + + return phase; +} + +static void dounaligned32row2h(u32 phase, u32 count) +{ + u8 y; + const u32 PTR4* ytable; + u32 pixel; + + if (count-- == 0) { + return; + } + + do { + phase++; + S.b = YUVTables[*(u8 PTR4*)S.u]; + S.gb = YUVTables[YUV_U_TO_GB_OFFSET + *(u8 PTR4*)S.u] + YUVTables[YUV_V_TO_GB_OFFSET + *(u8 PTR4*)S.v]; + S.r = YUVTables[YUV_V_TO_R_OFFSET + *(u8 PTR4*)S.v]; + y = *(u8 PTR4*)S.y0; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + ytable = clamp_ytable[y]; + pixel = ytable[S.b] | (ytable[S.gb] << 8) | (ytable[S.r] << 16); + *(u32 PTR4*)S.dest0 = pixel; + *(u32 PTR4*)(S.dest0 + S.pitch) = pixel; + S.dest0 += YUV_PACKED_WORD_BYTES; + if (((phase ^ 1) & 1) != 0) { + S.u = (u16 PTR4*)((u8 PTR4*)S.u + 1); + S.v = (u16 PTR4*)((u8 PTR4*)S.v + 1); + } + } while (count-- != 0); +} + +static u32 dounaligned32col2h(u32 count, s32 phase) +{ + u8 y; + const u32 PTR4* ytable; + u32 pixel; + + do { + phase++; + S.b = YUVTables[*(u8 PTR4*)S.u]; + S.gb = YUVTables[YUV_U_TO_GB_OFFSET + *(u8 PTR4*)S.u] + YUVTables[YUV_V_TO_GB_OFFSET + *(u8 PTR4*)S.v]; + S.r = YUVTables[YUV_V_TO_R_OFFSET + *(u8 PTR4*)S.v]; + y = *(u8 PTR4*)S.y0; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + ytable = clamp_ytable[y]; + pixel = ytable[S.b] | (ytable[S.gb] << 8) | (ytable[S.r] << 16); + *(u32 PTR4*)S.dest0 = pixel; + *(u32 PTR4*)(S.dest0 + S.pitch) = pixel; + y = *(u8 PTR4*)S.y1; + S.y1 = (u32 PTR4*)((u8 PTR4*)S.y1 + 1); + ytable = clamp_ytable[y]; + pixel = ytable[S.b] | (ytable[S.gb] << 8) | (ytable[S.r] << 16); + *(u32 PTR4*)S.dest1 = pixel; + *(u32 PTR4*)(S.dest1 + S.pitch) = pixel; + S.dest0 += YUV_PACKED_WORD_BYTES; + S.dest1 += YUV_PACKED_WORD_BYTES; + if (((phase ^ 1) & 1) != 0) { + S.u = (u16 PTR4*)((u8 PTR4*)S.u + 1); + S.v = (u16 PTR4*)((u8 PTR4*)S.v + 1); + } + count--; + } while (count != 0); + + return phase; +} + +static void dounaligned32row2wh(u32 phase, u32 count) +{ + u8 y; + const u32 PTR4* ytable; + u32 pixel; + + if (count-- == 0) { + return; + } + + do { + phase++; + S.b = YUVTables[*(u8 PTR4*)S.u]; + S.gb = YUVTables[YUV_U_TO_GB_OFFSET + *(u8 PTR4*)S.u] + YUVTables[YUV_V_TO_GB_OFFSET + *(u8 PTR4*)S.v]; + S.r = YUVTables[YUV_V_TO_R_OFFSET + *(u8 PTR4*)S.v]; + y = *(u8 PTR4*)S.y0; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + ytable = clamp_ytable[y]; + pixel = ytable[S.b] | (ytable[S.gb] << 8) | (ytable[S.r] << 16); + ((u32 PTR4*)S.dest0)[0] = pixel; + ((u32 PTR4*)S.dest0)[1] = pixel; + *(u32 PTR4*)(S.dest0 + S.pitch) = pixel; + *(u32 PTR4*)(S.dest0 + S.pitch + YUV_PACKED_WORD_BYTES) = pixel; + S.dest0 += 8; + if (((phase ^ 1) & 1) != 0) { + S.u = (u16 PTR4*)((u8 PTR4*)S.u + 1); + S.v = (u16 PTR4*)((u8 PTR4*)S.v + 1); + } + } while (count-- != 0); +} + +static u32 dounaligned32col2wh(u32 count, s32 phase) +{ + u8 y; + const u32 PTR4* ytable; + u32 pixel; + + do { + phase++; + S.b = YUVTables[*(u8 PTR4*)S.u]; + S.gb = YUVTables[YUV_U_TO_GB_OFFSET + *(u8 PTR4*)S.u] + YUVTables[YUV_V_TO_GB_OFFSET + *(u8 PTR4*)S.v]; + S.r = YUVTables[YUV_V_TO_R_OFFSET + *(u8 PTR4*)S.v]; + y = *(u8 PTR4*)S.y0; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + ytable = clamp_ytable[y]; + pixel = ytable[S.b] | (ytable[S.gb] << 8) | (ytable[S.r] << 16); + ((u32 PTR4*)S.dest0)[0] = pixel; + ((u32 PTR4*)S.dest0)[1] = pixel; + *(u32 PTR4*)(S.dest0 + S.pitch) = pixel; + *(u32 PTR4*)(S.dest0 + S.pitch + YUV_PACKED_WORD_BYTES) = pixel; + y = *(u8 PTR4*)S.y1; + S.y1 = (u32 PTR4*)((u8 PTR4*)S.y1 + 1); + ytable = clamp_ytable[y]; + pixel = ytable[S.b] | (ytable[S.gb] << 8) | (ytable[S.r] << 16); + ((u32 PTR4*)S.dest1)[0] = pixel; + ((u32 PTR4*)S.dest1)[1] = pixel; + *(u32 PTR4*)(S.dest1 + S.pitch) = pixel; + *(u32 PTR4*)(S.dest1 + S.pitch + YUV_PACKED_WORD_BYTES) = pixel; + S.dest0 += 8; + S.dest1 += 8; + if (((phase ^ 1) & 1) != 0) { + S.u = (u16 PTR4*)((u8 PTR4*)S.u + 1); + S.v = (u16 PTR4*)((u8 PTR4*)S.v + 1); + } + count--; + } while (count != 0); + + return phase; +} + +static void dounaligned32rowm(u32 phase, u32 count) +{ + u8 y; + + (void)phase; + if (count-- == 0) { + return; + } + + do { + y = *(u8 PTR4*)S.y0; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + *(u32 PTR4*)S.dest0 = RGB32_M(y); + S.dest0 += YUV_PACKED_WORD_BYTES; + } while (count-- != 0); +} + +static u32 dounaligned32colm(u32 count, s32 phase) +{ + u32 remaining; + u8 y; + + remaining = count; + do { + y = *(u8 PTR4*)S.y0; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + *(u32 PTR4*)S.dest0 = RGB32_M(y); + y = *(u8 PTR4*)S.y1; + S.y1 = (u32 PTR4*)((u8 PTR4*)S.y1 + 1); + *(u32 PTR4*)S.dest1 = RGB32_M(y); + S.dest0 += YUV_PACKED_WORD_BYTES; + S.dest1 += YUV_PACKED_WORD_BYTES; + remaining--; + } while (remaining != 0); + + return phase + count; +} + +static void dounaligned32row(u32 phase, u32 count) +{ + u8 y; + const u32 PTR4* ytable; + + if (count-- == 0) { + return; + } + + do { + phase++; + S.b = YUVTables[*(u8 PTR4*)S.u]; + S.gb = YUVTables[YUV_U_TO_GB_OFFSET + *(u8 PTR4*)S.u] + YUVTables[YUV_V_TO_GB_OFFSET + *(u8 PTR4*)S.v]; + S.r = YUVTables[YUV_V_TO_R_OFFSET + *(u8 PTR4*)S.v]; + y = *(u8 PTR4*)S.y0; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + ytable = clamp_ytable[y]; + *(u32 PTR4*)S.dest0 = ytable[S.b] | (ytable[S.gb] << 8) | (ytable[S.r] << 16); + S.dest0 += YUV_PACKED_WORD_BYTES; + if (((phase ^ 1) & 1) != 0) { + S.u = (u16 PTR4*)((u8 PTR4*)S.u + 1); + S.v = (u16 PTR4*)((u8 PTR4*)S.v + 1); + } + } while (count-- != 0); +} + +static u32 dounaligned32col(u32 count, s32 phase) +{ + u8 y; + const u32 PTR4* ytable; + + do { + phase++; + S.b = YUVTables[*(u8 PTR4*)S.u]; + S.gb = YUVTables[YUV_U_TO_GB_OFFSET + *(u8 PTR4*)S.u] + YUVTables[YUV_V_TO_GB_OFFSET + *(u8 PTR4*)S.v]; + S.r = YUVTables[YUV_V_TO_R_OFFSET + *(u8 PTR4*)S.v]; + y = *(u8 PTR4*)S.y0; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + ytable = clamp_ytable[y]; + *(u32 PTR4*)S.dest0 = ytable[S.b] | (ytable[S.gb] << 8) | (ytable[S.r] << 16); + y = *(u8 PTR4*)S.y1; + S.y1 = (u32 PTR4*)((u8 PTR4*)S.y1 + 1); + ytable = clamp_ytable[y]; + *(u32 PTR4*)S.dest1 = ytable[S.b] | (ytable[S.gb] << 8) | (ytable[S.r] << 16); + S.dest0 += YUV_PACKED_WORD_BYTES; + S.dest1 += YUV_PACKED_WORD_BYTES; + if (((phase ^ 1) & 1) != 0) { + S.u++; + S.v++; + } + count--; + } while (count != 0); + + return phase; +} + +extern "C" void YUV_blit_32bpp(void PTR4* dest, + u32 destx, + u32 desty, + u32 destpitch, + void PTR4* src, + u32 srcx, + u32 srcy, + u32 srcw, + u32 srch, + u32 srcpitch, + u32 srcheight, + u32 flags) +{ + YUV_blit(dest, destx, desty, destpitch, src, srcx, srcy, srcw, srch, srcpitch, srcheight, flags, 0, &blits32); +} + +static void dounaligned32arowm2w(u32 phase, u32 count) +{ + const u32 PTR4* table; + u8 PTR4* yptr; + u8 PTR4* aptr; + u8 y; + u8 a; + u32 pixel; + + (void)phase; + if (count-- == 0) { + return; + } + + table = mono32; + do { + yptr = (u8 PTR4*)S.y0; + aptr = (u8 PTR4*)S.a0; + y = *yptr++; + a = *aptr++; + S.y0 = (u32 PTR4*)yptr; + S.a0 = (u32 PTR4*)aptr; + pixel = table[y] | (a << 24); + ((u32 PTR4*)S.dest0)[0] = pixel; + ((u32 PTR4*)S.dest0)[1] = pixel; + S.dest0 += 8; + } while (count-- != 0); +} + +static u32 dounaligned32acolm2w(u32 count, s32 phase) +{ + const u32 PTR4* table; + u32 remaining; + u32 y; + u32 a; + u32 pixel; + + table = mono32; + remaining = count; + do { + y = *(u8 PTR4*)S.y0; + a = *(u8 PTR4*)S.a0; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + S.a0 = (u32 PTR4*)((u8 PTR4*)S.a0 + 1); + pixel = table[y] | (a << 24); + ((u32 PTR4*)S.dest0)[0] = pixel; + ((u32 PTR4*)S.dest0)[1] = pixel; + y = *(u8 PTR4*)S.y1; + a = *(u8 PTR4*)S.a1; + S.y1 = (u32 PTR4*)((u8 PTR4*)S.y1 + 1); + S.a1 = (u32 PTR4*)((u8 PTR4*)S.a1 + 1); + pixel = table[y] | (a << 24); + ((u32 PTR4*)S.dest1)[0] = pixel; + ((u32 PTR4*)S.dest1)[1] = pixel; + S.dest0 += 8; + S.dest1 += 8; + remaining--; + } while (remaining != 0); + + return phase + count; +} + +static void dounaligned32arowm2h(u32 phase, u32 count) +{ + const u32 PTR4* table; + u8 PTR4* yptr; + u8 PTR4* aptr; + u8 y; + u8 a; + u32 pixel; + + (void)phase; + if (count-- == 0) { + return; + } + + table = mono32; + do { + yptr = (u8 PTR4*)S.y0; + aptr = (u8 PTR4*)S.a0; + y = *yptr++; + a = *aptr++; + S.y0 = (u32 PTR4*)yptr; + S.a0 = (u32 PTR4*)aptr; + pixel = table[y] | (a << 24); + *(u32 PTR4*)S.dest0 = pixel; + *(u32 PTR4*)(S.dest0 + S.pitch) = pixel; + S.dest0 += YUV_PACKED_WORD_BYTES; + } while (count-- != 0); +} + +static u32 dounaligned32acolm2h(u32 count, s32 phase) +{ + const u32 PTR4* table; + u32 remaining; + u32 y; + u32 a; + u32 pixel; + + table = mono32; + remaining = count; + do { + y = *(u8 PTR4*)S.y0; + a = *(u8 PTR4*)S.a0; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + S.a0 = (u32 PTR4*)((u8 PTR4*)S.a0 + 1); + pixel = table[y] | (a << 24); + *(u32 PTR4*)S.dest0 = pixel; + *(u32 PTR4*)(S.dest0 + S.pitch) = pixel; + y = *(u8 PTR4*)S.y1; + a = *(u8 PTR4*)S.a1; + S.y1 = (u32 PTR4*)((u8 PTR4*)S.y1 + 1); + S.a1 = (u32 PTR4*)((u8 PTR4*)S.a1 + 1); + pixel = table[y] | (a << 24); + *(u32 PTR4*)S.dest1 = pixel; + *(u32 PTR4*)(S.dest1 + S.pitch) = pixel; + S.dest0 += YUV_PACKED_WORD_BYTES; + S.dest1 += YUV_PACKED_WORD_BYTES; + remaining--; + } while (remaining != 0); + + return phase + count; +} + +static void dounaligned32arowm2wh(u32 phase, u32 count) +{ + const u32 PTR4* table; + u8 PTR4* yptr; + u8 PTR4* aptr; + u8 y; + u8 a; + u32 pixel; + + (void)phase; + if (count-- == 0) { + return; + } + + table = mono32; + do { + yptr = (u8 PTR4*)S.y0; + aptr = (u8 PTR4*)S.a0; + y = *yptr++; + a = *aptr++; + S.y0 = (u32 PTR4*)yptr; + S.a0 = (u32 PTR4*)aptr; + pixel = table[y] | (a << 24); + ((u32 PTR4*)S.dest0)[0] = pixel; + ((u32 PTR4*)S.dest0)[1] = pixel; + *(u32 PTR4*)(S.dest0 + S.pitch) = pixel; + *(u32 PTR4*)(S.dest0 + S.pitch + YUV_PACKED_WORD_BYTES) = pixel; + S.dest0 += 8; + } while (count-- != 0); +} + +static u32 dounaligned32acolm2wh(u32 count, s32 phase) +{ + const u32 PTR4* table; + u32 remaining; + u32 y; + u32 a; + u32 pixel; + + table = mono32; + remaining = count; + do { + y = *(u8 PTR4*)S.y0; + a = *(u8 PTR4*)S.a0; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + S.a0 = (u32 PTR4*)((u8 PTR4*)S.a0 + 1); + pixel = table[y] | (a << 24); + ((u32 PTR4*)S.dest0)[0] = pixel; + ((u32 PTR4*)S.dest0)[1] = pixel; + *(u32 PTR4*)(S.dest0 + S.pitch) = pixel; + *(u32 PTR4*)(S.dest0 + S.pitch + YUV_PACKED_WORD_BYTES) = pixel; + y = *(u8 PTR4*)S.y1; + a = *(u8 PTR4*)S.a1; + S.y1 = (u32 PTR4*)((u8 PTR4*)S.y1 + 1); + S.a1 = (u32 PTR4*)((u8 PTR4*)S.a1 + 1); + pixel = table[y] | (a << 24); + ((u32 PTR4*)S.dest1)[0] = pixel; + ((u32 PTR4*)S.dest1)[1] = pixel; + *(u32 PTR4*)(S.dest1 + S.pitch) = pixel; + *(u32 PTR4*)(S.dest1 + S.pitch + YUV_PACKED_WORD_BYTES) = pixel; + S.dest0 += 8; + S.dest1 += 8; + remaining--; + } while (remaining != 0); + + return phase + count; +} + +static void dounaligned32arow2w(u32 phase, u32 count) +{ + u32 remaining; + u8 PTR4* yptr; + u8 PTR4* aptr; + u8 y; + u8 a; + const u32 PTR4* ytable; + u32 pixel; + + remaining = count; + if (remaining-- == 0) { + return; + } + + do { + phase++; + S.b = YUVTables[*(u8 PTR4*)S.u]; + S.gb = YUVTables[YUV_U_TO_GB_OFFSET + *(u8 PTR4*)S.u] + YUVTables[YUV_V_TO_GB_OFFSET + *(u8 PTR4*)S.v]; + S.r = YUVTables[YUV_V_TO_R_OFFSET + *(u8 PTR4*)S.v]; + yptr = (u8 PTR4*)S.y0; + aptr = (u8 PTR4*)S.a0; + y = *yptr++; + S.y0 = (u32 PTR4*)yptr; + ytable = clamp_ytable[y]; + a = *aptr++; + S.a0 = (u32 PTR4*)aptr; + pixel = ytable[S.b] | (ytable[S.gb] << 8) | (ytable[S.r] << 16) | ((u32)a << 24); + ((u32 PTR4*)S.dest0)[0] = pixel; + ((u32 PTR4*)S.dest0)[1] = pixel; + S.dest0 += 8; + if (((phase ^ 1) & 1) != 0) { + S.u = (u16 PTR4*)((u8 PTR4*)S.u + 1); + S.v = (u16 PTR4*)((u8 PTR4*)S.v + 1); + } + } while (remaining-- != 0); +} + +static u32 dounaligned32acol2w(u32 count, s32 phase) +{ + u8 y; + u8 a; + const u32 PTR4* ytable; + u32 pixel; + + do { + phase++; + S.b = YUVTables[*(u8 PTR4*)S.u]; + S.gb = YUVTables[YUV_U_TO_GB_OFFSET + *(u8 PTR4*)S.u] + YUVTables[YUV_V_TO_GB_OFFSET + *(u8 PTR4*)S.v]; + S.r = YUVTables[YUV_V_TO_R_OFFSET + *(u8 PTR4*)S.v]; + y = *(u8 PTR4*)S.y0; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + ytable = clamp_ytable[y]; + a = *(u8 PTR4*)S.a0; + S.a0 = (u32 PTR4*)((u8 PTR4*)S.a0 + 1); + pixel = ytable[S.b] | (ytable[S.gb] << 8) | (ytable[S.r] << 16) | ((u32)a << 24); + ((u32 PTR4*)S.dest0)[0] = pixel; + ((u32 PTR4*)S.dest0)[1] = pixel; + y = *(u8 PTR4*)S.y1; + S.y1 = (u32 PTR4*)((u8 PTR4*)S.y1 + 1); + ytable = clamp_ytable[y]; + a = *(u8 PTR4*)S.a1; + S.a1 = (u32 PTR4*)((u8 PTR4*)S.a1 + 1); + pixel = ytable[S.b] | (ytable[S.gb] << 8) | (ytable[S.r] << 16) | ((u32)a << 24); + ((u32 PTR4*)S.dest1)[0] = pixel; + ((u32 PTR4*)S.dest1)[1] = pixel; + S.dest0 += 8; + S.dest1 += 8; + if (((phase ^ 1) & 1) != 0) { + S.u = (u16 PTR4*)((u8 PTR4*)S.u + 1); + S.v = (u16 PTR4*)((u8 PTR4*)S.v + 1); + } + count--; + } while (count != 0); + + return phase; +} + +static void dounaligned32arow2h(u32 phase, u32 count) +{ + u32 remaining; + u8 PTR4* yptr; + u8 PTR4* aptr; + u8 y; + u8 a; + const u32 PTR4* ytable; + u32 pixel; + + remaining = count; + if (remaining-- == 0) { + return; + } + + do { + phase++; + S.b = YUVTables[*(u8 PTR4*)S.u]; + S.gb = YUVTables[YUV_U_TO_GB_OFFSET + *(u8 PTR4*)S.u] + YUVTables[YUV_V_TO_GB_OFFSET + *(u8 PTR4*)S.v]; + S.r = YUVTables[YUV_V_TO_R_OFFSET + *(u8 PTR4*)S.v]; + yptr = (u8 PTR4*)S.y0; + aptr = (u8 PTR4*)S.a0; + y = *yptr++; + S.y0 = (u32 PTR4*)yptr; + ytable = clamp_ytable[y]; + a = *aptr++; + S.a0 = (u32 PTR4*)aptr; + pixel = ytable[S.b] | (ytable[S.gb] << 8) | (ytable[S.r] << 16) | ((u32)a << 24); + *(u32 PTR4*)S.dest0 = pixel; + *(u32 PTR4*)(S.dest0 + S.pitch) = pixel; + S.dest0 += YUV_PACKED_WORD_BYTES; + if (((phase ^ 1) & 1) != 0) { + S.u = (u16 PTR4*)((u8 PTR4*)S.u + 1); + S.v = (u16 PTR4*)((u8 PTR4*)S.v + 1); + } + } while (remaining-- != 0); +} + +static u32 dounaligned32acol2h(u32 count, s32 phase) +{ + u8 y; + u8 a; + const u32 PTR4* ytable; + u32 pixel; + + do { + phase++; + S.b = YUVTables[*(u8 PTR4*)S.u]; + S.gb = YUVTables[YUV_U_TO_GB_OFFSET + *(u8 PTR4*)S.u] + YUVTables[YUV_V_TO_GB_OFFSET + *(u8 PTR4*)S.v]; + S.r = YUVTables[YUV_V_TO_R_OFFSET + *(u8 PTR4*)S.v]; + y = *(u8 PTR4*)S.y0; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + ytable = clamp_ytable[y]; + a = *(u8 PTR4*)S.a0; + S.a0 = (u32 PTR4*)((u8 PTR4*)S.a0 + 1); + pixel = ytable[S.b] | (ytable[S.gb] << 8) | (ytable[S.r] << 16) | ((u32)a << 24); + *(u32 PTR4*)S.dest0 = pixel; + *(u32 PTR4*)(S.dest0 + S.pitch) = pixel; + y = *(u8 PTR4*)S.y1; + S.y1 = (u32 PTR4*)((u8 PTR4*)S.y1 + 1); + ytable = clamp_ytable[y]; + a = *(u8 PTR4*)S.a1; + S.a1 = (u32 PTR4*)((u8 PTR4*)S.a1 + 1); + pixel = ytable[S.b] | (ytable[S.gb] << 8) | (ytable[S.r] << 16) | ((u32)a << 24); + *(u32 PTR4*)S.dest1 = pixel; + *(u32 PTR4*)(S.dest1 + S.pitch) = pixel; + S.dest0 += YUV_PACKED_WORD_BYTES; + S.dest1 += YUV_PACKED_WORD_BYTES; + if (((phase ^ 1) & 1) != 0) { + S.u = (u16 PTR4*)((u8 PTR4*)S.u + 1); + S.v = (u16 PTR4*)((u8 PTR4*)S.v + 1); + } + count--; + } while (count != 0); + + return phase; +} + +static void dounaligned32arow2wh(u32 phase, u32 count) +{ + u32 remaining; + u8 PTR4* yptr; + u8 PTR4* aptr; + u8 y; + u8 a; + const u32 PTR4* ytable; + u32 pixel; + + remaining = count; + if (remaining-- == 0) { + return; + } + + do { + phase++; + S.b = YUVTables[*(u8 PTR4*)S.u]; + S.gb = YUVTables[YUV_U_TO_GB_OFFSET + *(u8 PTR4*)S.u] + YUVTables[YUV_V_TO_GB_OFFSET + *(u8 PTR4*)S.v]; + S.r = YUVTables[YUV_V_TO_R_OFFSET + *(u8 PTR4*)S.v]; + yptr = (u8 PTR4*)S.y0; + aptr = (u8 PTR4*)S.a0; + y = *yptr++; + S.y0 = (u32 PTR4*)yptr; + ytable = clamp_ytable[y]; + a = *aptr++; + S.a0 = (u32 PTR4*)aptr; + pixel = ytable[S.b] | (ytable[S.gb] << 8) | (ytable[S.r] << 16) | ((u32)a << 24); + ((u32 PTR4*)S.dest0)[0] = pixel; + ((u32 PTR4*)S.dest0)[1] = pixel; + *(u32 PTR4*)(S.dest0 + S.pitch) = pixel; + *(u32 PTR4*)(S.dest0 + S.pitch + YUV_PACKED_WORD_BYTES) = pixel; + S.dest0 += 8; + if (((phase ^ 1) & 1) != 0) { + S.u = (u16 PTR4*)((u8 PTR4*)S.u + 1); + S.v = (u16 PTR4*)((u8 PTR4*)S.v + 1); + } + } while (remaining-- != 0); +} + +static u32 dounaligned32acol2wh(u32 count, s32 phase) +{ + u8 y; + u8 a; + const u32 PTR4* ytable; + u32 pixel; + + do { + phase++; + S.b = YUVTables[*(u8 PTR4*)S.u]; + S.gb = YUVTables[YUV_U_TO_GB_OFFSET + *(u8 PTR4*)S.u] + YUVTables[YUV_V_TO_GB_OFFSET + *(u8 PTR4*)S.v]; + S.r = YUVTables[YUV_V_TO_R_OFFSET + *(u8 PTR4*)S.v]; + y = *(u8 PTR4*)S.y0; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + ytable = clamp_ytable[y]; + a = *(u8 PTR4*)S.a0; + S.a0 = (u32 PTR4*)((u8 PTR4*)S.a0 + 1); + pixel = ytable[S.b] | (ytable[S.gb] << 8) | (ytable[S.r] << 16) | ((u32)a << 24); + ((u32 PTR4*)S.dest0)[0] = pixel; + ((u32 PTR4*)S.dest0)[1] = pixel; + *(u32 PTR4*)(S.dest0 + S.pitch) = pixel; + *(u32 PTR4*)(S.dest0 + S.pitch + YUV_PACKED_WORD_BYTES) = pixel; + y = *(u8 PTR4*)S.y1; + S.y1 = (u32 PTR4*)((u8 PTR4*)S.y1 + 1); + ytable = clamp_ytable[y]; + a = *(u8 PTR4*)S.a1; + S.a1 = (u32 PTR4*)((u8 PTR4*)S.a1 + 1); + pixel = ytable[S.b] | (ytable[S.gb] << 8) | (ytable[S.r] << 16) | ((u32)a << 24); + ((u32 PTR4*)S.dest1)[0] = pixel; + ((u32 PTR4*)S.dest1)[1] = pixel; + *(u32 PTR4*)(S.dest1 + S.pitch) = pixel; + *(u32 PTR4*)(S.dest1 + S.pitch + YUV_PACKED_WORD_BYTES) = pixel; + S.dest0 += 8; + S.dest1 += 8; + if (((phase ^ 1) & 1) != 0) { + S.u = (u16 PTR4*)((u8 PTR4*)S.u + 1); + S.v = (u16 PTR4*)((u8 PTR4*)S.v + 1); + } + count--; + } while (count != 0); + + return phase; +} + +static void dounaligned32arowm(u32 phase, u32 count) +{ + u32 remaining; + u8 y; + u8 a; + + (void)phase; + remaining = count; + if (remaining-- == 0) { + return; + } + + do { + y = *(u8 PTR4*)S.y0; + a = *(u8 PTR4*)S.a0; + S.a0 = (u32 PTR4*)((u8 PTR4*)S.a0 + 1); + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + *(u32 PTR4*)S.dest0 = RGB32_M_A(y, a); + S.dest0 += YUV_PACKED_WORD_BYTES; + } while (remaining-- != 0); +} + +static u32 dounaligned32acolm(u32 count, s32 phase) +{ + u32 remaining; + u8 PTR4* yptr; + u8 PTR4* aptr; + u8 y; + u8 a; + + remaining = count; + do { + yptr = (u8 PTR4*)S.y0; + aptr = (u8 PTR4*)S.a0; + y = *yptr++; + a = *aptr++; + S.y0 = (u32 PTR4*)yptr; + S.a0 = (u32 PTR4*)aptr; + *(u32 PTR4*)S.dest0 = RGB32_M_A(y, a); + yptr = (u8 PTR4*)S.y1; + aptr = (u8 PTR4*)S.a1; + y = *yptr++; + a = *aptr++; + S.y1 = (u32 PTR4*)yptr; + S.a1 = (u32 PTR4*)aptr; + *(u32 PTR4*)S.dest1 = RGB32_M_A(y, a); + S.dest0 += YUV_PACKED_WORD_BYTES; + S.dest1 += YUV_PACKED_WORD_BYTES; + remaining--; + } while (remaining != 0); + + return phase + count; +} + +static void dounaligned32arow(u32 phase, u32 count) +{ + u32 remaining; + u8 PTR4* yptr; + u8 PTR4* aptr; + u8 y; + u8 a; + const u32 PTR4* ytable; + + remaining = count; + if (remaining-- == 0) { + return; + } + + do { + phase++; + S.b = YUVTables[*(u8 PTR4*)S.u]; + S.gb = YUVTables[YUV_U_TO_GB_OFFSET + *(u8 PTR4*)S.u] + YUVTables[YUV_V_TO_GB_OFFSET + *(u8 PTR4*)S.v]; + S.r = YUVTables[YUV_V_TO_R_OFFSET + *(u8 PTR4*)S.v]; + yptr = (u8 PTR4*)S.y0; + aptr = (u8 PTR4*)S.a0; + y = *yptr++; + S.y0 = (u32 PTR4*)yptr; + ytable = clamp_ytable[y]; + a = *aptr++; + S.a0 = (u32 PTR4*)aptr; + *(u32 PTR4*)S.dest0 = ytable[S.b] | (ytable[S.gb] << 8) | (ytable[S.r] << 16) | ((u32)a << 24); + S.dest0 += YUV_PACKED_WORD_BYTES; + if (((phase ^ 1) & 1) != 0) { + S.u = (u16 PTR4*)((u8 PTR4*)S.u + 1); + S.v = (u16 PTR4*)((u8 PTR4*)S.v + 1); + } + } while (remaining-- != 0); +} + +static u32 dounaligned32acol(u32 count, s32 phase) +{ + u8 y; + u8 a; + const u32 PTR4* ytable; + + do { + phase++; + S.b = YUVTables[*(u8 PTR4*)S.u]; + S.gb = YUVTables[YUV_U_TO_GB_OFFSET + *(u8 PTR4*)S.u] + YUVTables[YUV_V_TO_GB_OFFSET + *(u8 PTR4*)S.v]; + S.r = YUVTables[YUV_V_TO_R_OFFSET + *(u8 PTR4*)S.v]; + y = *(u8 PTR4*)S.y0; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + ytable = clamp_ytable[y]; + a = *(u8 PTR4*)S.a0; + S.a0 = (u32 PTR4*)((u8 PTR4*)S.a0 + 1); + *(u32 PTR4*)S.dest0 = ytable[S.b] | (ytable[S.gb] << 8) | (ytable[S.r] << 16) | ((u32)a << 24); + y = *(u8 PTR4*)S.y1; + S.y1 = (u32 PTR4*)((u8 PTR4*)S.y1 + 1); + ytable = clamp_ytable[y]; + a = *(u8 PTR4*)S.a1; + S.a1 = (u32 PTR4*)((u8 PTR4*)S.a1 + 1); + *(u32 PTR4*)S.dest1 = ytable[S.b] | (ytable[S.gb] << 8) | (ytable[S.r] << 16) | ((u32)a << 24); + S.dest0 += YUV_PACKED_WORD_BYTES; + S.dest1 += YUV_PACKED_WORD_BYTES; + if (((phase ^ 1) & 1) != 0) { + S.u = (u16 PTR4*)((u8 PTR4*)S.u + 1); + S.v = (u16 PTR4*)((u8 PTR4*)S.v + 1); + } + count--; + } while (count != 0); + + return phase; +} + +extern "C" void YUV_blit_32abpp(void PTR4* dest, + u32 destx, + u32 desty, + u32 destpitch, + void PTR4* src, + u32 srcx, + u32 srcy, + u32 srcw, + u32 srch, + u32 srcpitch, + u32 srcheight, + void PTR4* alpha, + u32 flags) +{ + YUV_blit(dest, destx, desty, destpitch, src, srcx, srcy, srcw, srch, srcpitch, srcheight, flags, alpha, &blits32a); +} + +static void dounaligned16rowm2h(u32 phase, u32 count) +{ + const u32 PTR4* table; + u8 y; + u16 pixel; + + (void)phase; + if (count-- == 0) { + return; + } + + table = mono16; + do { + y = *(u8 PTR4*)S.y0; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + pixel = (u16)table[y]; + *(u16 PTR4*)S.dest0 = pixel; + *(u16 PTR4*)(S.dest0 + S.pitch) = pixel; + S.dest0 += 2; + } while (count-- != 0); +} + +static u32 dounaligned16colm2h(u32 count, s32 phase) +{ + const u32 PTR4* table; + u32 remaining; + u32 y; + u16 pixel; + + table = mono16; + remaining = count; + do { + y = *(u8 PTR4*)S.y0; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + pixel = (u16)table[y]; + *(u16 PTR4*)S.dest0 = pixel; + *(u16 PTR4*)(S.dest0 + S.pitch) = pixel; + y = *(u8 PTR4*)S.y1; + S.y1 = (u32 PTR4*)((u8 PTR4*)S.y1 + 1); + pixel = (u16)table[y]; + *(u16 PTR4*)S.dest1 = pixel; + *(u16 PTR4*)(S.dest1 + S.pitch) = pixel; + S.dest0 += 2; + S.dest1 += 2; + remaining--; + } while (remaining != 0); + + return phase + count; +} + +static void dounaligned16rowm2w(u32 phase, u32 count) +{ + const u32 PTR4* table; + u8 y; + u16 pixel; + + (void)phase; + if (count-- == 0) { + return; + } + + table = mono16; + do { + y = *(u8 PTR4*)S.y0; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + pixel = (u16)table[y]; + ((u16 PTR4*)S.dest0)[0] = pixel; + ((u16 PTR4*)S.dest0)[1] = pixel; + S.dest0 += YUV_PACKED_WORD_BYTES; + } while (count-- != 0); +} + +static u32 dounaligned16colm2w(u32 count, s32 phase) +{ + const u32 PTR4* table; + u32 remaining; + u32 y; + u16 pixel; + + table = mono16; + remaining = count; + do { + y = *(u8 PTR4*)S.y0; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + pixel = (u16)table[y]; + ((u16 PTR4*)S.dest0)[0] = pixel; + ((u16 PTR4*)S.dest0)[1] = pixel; + y = *(u8 PTR4*)S.y1; + S.y1 = (u32 PTR4*)((u8 PTR4*)S.y1 + 1); + pixel = (u16)table[y]; + ((u16 PTR4*)S.dest1)[0] = pixel; + ((u16 PTR4*)S.dest1)[1] = pixel; + S.dest0 += YUV_PACKED_WORD_BYTES; + S.dest1 += YUV_PACKED_WORD_BYTES; + remaining--; + } while (remaining != 0); + + return phase + count; +} + +static void dounaligned16rowm2wh(u32 phase, u32 count) +{ + const u32 PTR4* table; + u8 y; + u16 pixel; + + (void)phase; + if (count-- == 0) { + return; + } + + table = mono16; + do { + y = *(u8 PTR4*)S.y0; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + pixel = (u16)table[y]; + ((u16 PTR4*)S.dest0)[0] = pixel; + ((u16 PTR4*)S.dest0)[1] = pixel; + *(u16 PTR4*)(S.dest0 + S.pitch) = pixel; + *(u16 PTR4*)(S.dest0 + S.pitch + 2) = pixel; + S.dest0 += YUV_PACKED_WORD_BYTES; + } while (count-- != 0); +} + +static u32 dounaligned16colm2wh(u32 count, s32 phase) +{ + const u32 PTR4* table; + u32 remaining; + u32 y; + u16 pixel; + + table = mono16; + remaining = count; + do { + y = *(u8 PTR4*)S.y0; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + pixel = (u16)table[y]; + ((u16 PTR4*)S.dest0)[0] = pixel; + ((u16 PTR4*)S.dest0)[1] = pixel; + *(u16 PTR4*)(S.dest0 + S.pitch) = pixel; + *(u16 PTR4*)(S.dest0 + S.pitch + 2) = pixel; + y = *(u8 PTR4*)S.y1; + S.y1 = (u32 PTR4*)((u8 PTR4*)S.y1 + 1); + pixel = (u16)table[y]; + ((u16 PTR4*)S.dest1)[0] = pixel; + ((u16 PTR4*)S.dest1)[1] = pixel; + *(u16 PTR4*)(S.dest1 + S.pitch) = pixel; + *(u16 PTR4*)(S.dest1 + S.pitch + 2) = pixel; + S.dest0 += YUV_PACKED_WORD_BYTES; + S.dest1 += YUV_PACKED_WORD_BYTES; + remaining--; + } while (remaining != 0); + + return phase + count; +} + +static void dounaligned16row2h(u32 phase, u32 count) +{ + u32 remaining; + u8 y; + u32 ybase; + u16 pixel; + + if (count == 0) { + return; + } + remaining = count - 1; + + do { + phase++; + S.b = YUVTables[*(u8 PTR4*)S.u]; + S.gb = YUVTables[YUV_U_TO_GB_OFFSET + *(u8 PTR4*)S.u] + YUVTables[YUV_V_TO_GB_OFFSET + *(u8 PTR4*)S.v]; + S.r = YUVTables[YUV_V_TO_R_OFFSET + *(u8 PTR4*)S.v]; + y = *(u8 PTR4*)S.y0; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + ybase = ytable[y]; + pixel = RGB565(ybase, S.r, S.gb, S.b); + *(u16 PTR4*)S.dest0 = pixel; + *(u16 PTR4*)(S.dest0 + S.pitch) = pixel; + S.dest0 += 2; + if (((phase ^ 1) & 1) != 0) { + S.u = (u16 PTR4*)((u8 PTR4*)S.u + 1); + S.v = (u16 PTR4*)((u8 PTR4*)S.v + 1); + } + } while (remaining-- != 0); +} + +static u32 dounaligned16col2h(u32 count, s32 phase) +{ + u32 remaining; + u8 y; + u32 ybase; + u16 pixel; + + remaining = count; + do { + phase++; + S.b = YUVTables[*(u8 PTR4*)S.u]; + S.gb = YUVTables[YUV_U_TO_GB_OFFSET + *(u8 PTR4*)S.u] + YUVTables[YUV_V_TO_GB_OFFSET + *(u8 PTR4*)S.v]; + S.r = YUVTables[YUV_V_TO_R_OFFSET + *(u8 PTR4*)S.v]; + y = *(u8 PTR4*)S.y0; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + ybase = ytable[y]; + pixel = RGB565(ybase, S.r, S.gb, S.b); + *(u16 PTR4*)S.dest0 = pixel; + *(u16 PTR4*)(S.dest0 + S.pitch) = pixel; + y = *(u8 PTR4*)S.y1; + S.y1 = (u32 PTR4*)((u8 PTR4*)S.y1 + 1); + ybase = ytable[y]; + pixel = RGB565(ybase, S.r, S.gb, S.b); + *(u16 PTR4*)S.dest1 = pixel; + *(u16 PTR4*)(S.dest1 + S.pitch) = pixel; + S.dest0 += 2; + S.dest1 += 2; + if (((phase ^ 1) & 1) != 0) { + S.u = (u16 PTR4*)((u8 PTR4*)S.u + 1); + S.v = (u16 PTR4*)((u8 PTR4*)S.v + 1); + } + remaining--; + } while (remaining != 0); + + return phase; +} + +static void dounaligned16row2w(u32 phase, u32 count) +{ + u32 remaining; + u8 y; + u32 ybase; + u16 pixel; + + if (count == 0) { + return; + } + remaining = count - 1; + + do { + phase++; + S.b = YUVTables[*(u8 PTR4*)S.u]; + S.gb = YUVTables[YUV_U_TO_GB_OFFSET + *(u8 PTR4*)S.u] + YUVTables[YUV_V_TO_GB_OFFSET + *(u8 PTR4*)S.v]; + S.r = YUVTables[YUV_V_TO_R_OFFSET + *(u8 PTR4*)S.v]; + y = *(u8 PTR4*)S.y0; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + ybase = ytable[y]; + pixel = RGB565(ybase, S.r, S.gb, S.b); + ((u16 PTR4*)S.dest0)[0] = pixel; + ((u16 PTR4*)S.dest0)[1] = pixel; + S.dest0 += YUV_PACKED_WORD_BYTES; + if (((phase ^ 1) & 1) != 0) { + S.u = (u16 PTR4*)((u8 PTR4*)S.u + 1); + S.v = (u16 PTR4*)((u8 PTR4*)S.v + 1); + } + } while (remaining-- != 0); +} + +static u32 dounaligned16col2w(u32 count, s32 phase) +{ + u32 remaining; + u8 y; + u32 ybase; + u16 pixel; + + remaining = count; + do { + phase++; + S.b = YUVTables[*(u8 PTR4*)S.u]; + S.gb = YUVTables[YUV_U_TO_GB_OFFSET + *(u8 PTR4*)S.u] + YUVTables[YUV_V_TO_GB_OFFSET + *(u8 PTR4*)S.v]; + S.r = YUVTables[YUV_V_TO_R_OFFSET + *(u8 PTR4*)S.v]; + y = *(u8 PTR4*)S.y0; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + ybase = ytable[y]; + pixel = RGB565(ybase, S.r, S.gb, S.b); + ((u16 PTR4*)S.dest0)[0] = pixel; + ((u16 PTR4*)S.dest0)[1] = pixel; + y = *(u8 PTR4*)S.y1; + S.y1 = (u32 PTR4*)((u8 PTR4*)S.y1 + 1); + ybase = ytable[y]; + pixel = RGB565(ybase, S.r, S.gb, S.b); + ((u16 PTR4*)S.dest1)[0] = pixel; + ((u16 PTR4*)S.dest1)[1] = pixel; + S.dest0 += YUV_PACKED_WORD_BYTES; + S.dest1 += YUV_PACKED_WORD_BYTES; + if (((phase ^ 1) & 1) != 0) { + S.u = (u16 PTR4*)((u8 PTR4*)S.u + 1); + S.v = (u16 PTR4*)((u8 PTR4*)S.v + 1); + } + remaining--; + } while (remaining != 0); + + return phase; +} + +static void dounaligned16row2wh(u32 phase, u32 count) +{ + u32 remaining; + u8 y; + u32 ybase; + u16 pixel; + + if (count == 0) { + return; + } + remaining = count - 1; + + do { + phase++; + S.b = YUVTables[*(u8 PTR4*)S.u]; + S.gb = YUVTables[YUV_U_TO_GB_OFFSET + *(u8 PTR4*)S.u] + YUVTables[YUV_V_TO_GB_OFFSET + *(u8 PTR4*)S.v]; + S.r = YUVTables[YUV_V_TO_R_OFFSET + *(u8 PTR4*)S.v]; + y = *(u8 PTR4*)S.y0; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + ybase = ytable[y]; + pixel = RGB565(ybase, S.r, S.gb, S.b); + ((u16 PTR4*)S.dest0)[0] = pixel; + ((u16 PTR4*)S.dest0)[1] = pixel; + *(u16 PTR4*)(S.dest0 + S.pitch) = pixel; + *(u16 PTR4*)(S.dest0 + S.pitch + 2) = pixel; + S.dest0 += YUV_PACKED_WORD_BYTES; + if (((phase ^ 1) & 1) != 0) { + S.u = (u16 PTR4*)((u8 PTR4*)S.u + 1); + S.v = (u16 PTR4*)((u8 PTR4*)S.v + 1); + } + } while (remaining-- != 0); +} + +static u32 dounaligned16col2wh(u32 count, s32 phase) +{ + u32 remaining; + u8 y; + u32 ybase; + u16 pixel; + + remaining = count; + do { + phase++; + S.b = YUVTables[*(u8 PTR4*)S.u]; + S.gb = YUVTables[YUV_U_TO_GB_OFFSET + *(u8 PTR4*)S.u] + YUVTables[YUV_V_TO_GB_OFFSET + *(u8 PTR4*)S.v]; + S.r = YUVTables[YUV_V_TO_R_OFFSET + *(u8 PTR4*)S.v]; + y = *(u8 PTR4*)S.y0; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + ybase = ytable[y]; + pixel = RGB565(ybase, S.r, S.gb, S.b); + ((u16 PTR4*)S.dest0)[0] = pixel; + ((u16 PTR4*)S.dest0)[1] = pixel; + *(u16 PTR4*)(S.dest0 + S.pitch) = pixel; + *(u16 PTR4*)(S.dest0 + S.pitch + 2) = pixel; + y = *(u8 PTR4*)S.y1; + S.y1 = (u32 PTR4*)((u8 PTR4*)S.y1 + 1); + ybase = ytable[y]; + pixel = RGB565(ybase, S.r, S.gb, S.b); + ((u16 PTR4*)S.dest1)[0] = pixel; + ((u16 PTR4*)S.dest1)[1] = pixel; + *(u16 PTR4*)(S.dest1 + S.pitch) = pixel; + *(u16 PTR4*)(S.dest1 + S.pitch + 2) = pixel; + S.dest0 += YUV_PACKED_WORD_BYTES; + S.dest1 += YUV_PACKED_WORD_BYTES; + if (((phase ^ 1) & 1) != 0) { + S.u = (u16 PTR4*)((u8 PTR4*)S.u + 1); + S.v = (u16 PTR4*)((u8 PTR4*)S.v + 1); + } + remaining--; + } while (remaining != 0); + + return phase; +} + +static void dounaligned16rowm(u32 phase, u32 count) +{ + const u32 PTR4* table; + u8 y; + + (void)phase; + if (count-- == 0) { + return; + } + + table = mono16; + do { + y = *(u8 PTR4*)S.y0; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + *(u16 PTR4*)S.dest0 = (u16)table[y]; + S.dest0 += 2; + } while (count-- != 0); +} + +static u32 dounaligned16colm(u32 count, s32 phase) +{ + const u32 PTR4* table; + u32 remaining; + u32 y; + + table = mono16; + remaining = count; + do { + y = *(u8 PTR4*)S.y0; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + *(u16 PTR4*)S.dest0 = (u16)table[y]; + y = *(u8 PTR4*)S.y1; + S.y1 = (u32 PTR4*)((u8 PTR4*)S.y1 + 1); + *(u16 PTR4*)S.dest1 = (u16)table[y]; + S.dest0 += 2; + S.dest1 += 2; + remaining--; + } while (remaining != 0); + + return phase + count; +} + +static void dounaligned16row(u32 phase, u32 count) +{ + u32 remaining; + u8 y; + u32 ybase; + + if (count == 0) { + return; + } + remaining = count - 1; + + do { + phase++; + S.b = YUVTables[*(u8 PTR4*)S.u]; + S.gb = YUVTables[YUV_U_TO_GB_OFFSET + *(u8 PTR4*)S.u] + YUVTables[YUV_V_TO_GB_OFFSET + *(u8 PTR4*)S.v]; + S.r = YUVTables[YUV_V_TO_R_OFFSET + *(u8 PTR4*)S.v]; + y = *(u8 PTR4*)S.y0; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + ybase = ytable[y]; + *(u16 PTR4*)S.dest0 = RGB565(ybase, S.r, S.gb, S.b); + S.dest0 += 2; + if (((phase ^ 1) & 1) != 0) { + S.u = (u16 PTR4*)((u8 PTR4*)S.u + 1); + S.v = (u16 PTR4*)((u8 PTR4*)S.v + 1); + } + } while (remaining-- != 0); +} + +static u32 dounaligned16col(u32 count, s32 phase) +{ + u32 remaining; + u8 y; + u32 ybase; + + remaining = count; + do { + phase++; + S.b = YUVTables[*(u8 PTR4*)S.u]; + S.gb = YUVTables[YUV_U_TO_GB_OFFSET + *(u8 PTR4*)S.u] + YUVTables[YUV_V_TO_GB_OFFSET + *(u8 PTR4*)S.v]; + S.r = YUVTables[YUV_V_TO_R_OFFSET + *(u8 PTR4*)S.v]; + y = *(u8 PTR4*)S.y0; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + ybase = ytable[y]; + *(u16 PTR4*)S.dest0 = RGB565(ybase, S.r, S.gb, S.b); + y = *(u8 PTR4*)S.y1; + S.y1 = (u32 PTR4*)((u8 PTR4*)S.y1 + 1); + ybase = ytable[y]; + *(u16 PTR4*)S.dest1 = RGB565(ybase, S.r, S.gb, S.b); + S.dest0 += 2; + S.dest1 += 2; + if (((phase ^ 1) & 1) != 0) { + S.u = (u16 PTR4*)((u8 PTR4*)S.u + 1); + S.v = (u16 PTR4*)((u8 PTR4*)S.v + 1); + } + remaining--; + } while (remaining != 0); + + return phase; +} + +extern "C" void YUV_blit_16bpp(void PTR4* dest, + u32 destx, + u32 desty, + u32 destpitch, + void PTR4* src, + u32 srcx, + u32 srcy, + u32 srcw, + u32 srch, + u32 srcpitch, + u32 srcheight, + u32 flags) +{ + YUV_blit(dest, destx, desty, destpitch, src, srcx, srcy, srcw, srch, srcpitch, srcheight, flags, 0, &blits16); +} + +static void dounaligned16a4rowm2h(u32 phase, u32 count) +{ + const u32 PTR4* ytable; + const u32 PTR4* atable; + u8 y; + u8 a; + u16 pixel; + + (void)phase; + if (count-- == 0) { + return; + } + + ytable = mono16; + atable = clamp_a4; + do { + y = *(u8 PTR4*)S.y0; + a = *(u8 PTR4*)S.a0; + S.a0 = (u32 PTR4*)((u8 PTR4*)S.a0 + 1); + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + pixel = (u16)atable[a] | (u16)ytable[y]; + *(u16 PTR4*)S.dest0 = pixel; + *(u16 PTR4*)(S.dest0 + S.pitch) = pixel; + S.dest0 += 2; + } while (count-- != 0); +} + +static u32 dounaligned16a4colm2h(u32 count, s32 phase) +{ + const u32 PTR4* ytable; + const u32 PTR4* atable; + u32 remaining; + u8 y; + u8 a; + u16 pixel; + + ytable = mono16; + atable = clamp_a4; + remaining = count; + do { + y = *(u8 PTR4*)S.y0; + a = *(u8 PTR4*)S.a0; + S.a0 = (u32 PTR4*)((u8 PTR4*)S.a0 + 1); + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + pixel = (u16)atable[a] | (u16)ytable[y]; + *(u16 PTR4*)S.dest0 = pixel; + *(u16 PTR4*)(S.dest0 + S.pitch) = pixel; + y = *(u8 PTR4*)S.y1; + a = *(u8 PTR4*)S.a1; + S.a1 = (u32 PTR4*)((u8 PTR4*)S.a1 + 1); + S.y1 = (u32 PTR4*)((u8 PTR4*)S.y1 + 1); + pixel = (u16)atable[a] | (u16)ytable[y]; + *(u16 PTR4*)S.dest1 = pixel; + *(u16 PTR4*)(S.dest1 + S.pitch) = pixel; + S.dest0 += 2; + S.dest1 += 2; + remaining--; + } while (remaining != 0); + + return phase + count; +} + +static void dounaligned16a4rowm2w(u32 phase, u32 count) +{ + u8 y; + u8 a; + u16 pixel; + + (void)phase; + if (count-- == 0) { + return; + } + + do { + y = *(u8 PTR4*)S.y0; + a = *(u8 PTR4*)S.a0; + S.a0 = (u32 PTR4*)((u8 PTR4*)S.a0 + 1); + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + pixel = RGB565_M_A4(y, a); + ((u16 PTR4*)S.dest0)[0] = pixel; + ((u16 PTR4*)S.dest0)[1] = pixel; + S.dest0 += YUV_PACKED_WORD_BYTES; + } while (count-- != 0); +} + +static u32 dounaligned16a4colm2w(u32 count, s32 phase) +{ + const u32 PTR4* ytable; + const u32 PTR4* atable; + u32 remaining; + u8 y; + u8 a; + u16 pixel; + + ytable = mono16; + atable = clamp_a4; + remaining = count; + do { + y = *(u8 PTR4*)S.y0; + a = *(u8 PTR4*)S.a0; + S.a0 = (u32 PTR4*)((u8 PTR4*)S.a0 + 1); + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + pixel = (u16)ytable[y] | (u16)atable[a]; + ((u16 PTR4*)S.dest0)[0] = pixel; + ((u16 PTR4*)S.dest0)[1] = pixel; + y = *(u8 PTR4*)S.y1; + a = *(u8 PTR4*)S.a1; + S.a1 = (u32 PTR4*)((u8 PTR4*)S.a1 + 1); + S.y1 = (u32 PTR4*)((u8 PTR4*)S.y1 + 1); + pixel = (u16)ytable[y] | (u16)atable[a]; + ((u16 PTR4*)S.dest1)[0] = pixel; + ((u16 PTR4*)S.dest1)[1] = pixel; + S.dest0 += YUV_PACKED_WORD_BYTES; + S.dest1 += YUV_PACKED_WORD_BYTES; + remaining--; + } while (remaining != 0); + + return phase + count; +} + +static void dounaligned16a4rowm2wh(u32 phase, u32 count) +{ + u8 y; + u8 a; + u16 pixel; + + (void)phase; + if (count-- == 0) { + return; + } + + do { + y = *(u8 PTR4*)S.y0; + a = *(u8 PTR4*)S.a0; + S.a0 = (u32 PTR4*)((u8 PTR4*)S.a0 + 1); + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + pixel = RGB565_M_A4(y, a); + ((u16 PTR4*)S.dest0)[0] = pixel; + ((u16 PTR4*)S.dest0)[1] = pixel; + *(u16 PTR4*)(S.dest0 + S.pitch) = pixel; + *(u16 PTR4*)(S.dest0 + S.pitch + 2) = pixel; + S.dest0 += YUV_PACKED_WORD_BYTES; + } while (count-- != 0); +} + +static u32 dounaligned16a4colm2wh(u32 count, s32 phase) +{ + u32 remaining; + u8 y; + u8 a; + u16 pixel; + + remaining = count; + do { + y = *(u8 PTR4*)S.y0; + a = *(u8 PTR4*)S.a0; + S.a0 = (u32 PTR4*)((u8 PTR4*)S.a0 + 1); + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + pixel = RGB565_M_A4(y, a); + ((u16 PTR4*)S.dest0)[0] = pixel; + ((u16 PTR4*)S.dest0)[1] = pixel; + *(u16 PTR4*)(S.dest0 + S.pitch) = pixel; + *(u16 PTR4*)(S.dest0 + S.pitch + 2) = pixel; + y = *(u8 PTR4*)S.y1; + a = *(u8 PTR4*)S.a1; + S.a1 = (u32 PTR4*)((u8 PTR4*)S.a1 + 1); + S.y1 = (u32 PTR4*)((u8 PTR4*)S.y1 + 1); + pixel = RGB565_M_A4(y, a); + ((u16 PTR4*)S.dest1)[0] = pixel; + ((u16 PTR4*)S.dest1)[1] = pixel; + *(u16 PTR4*)(S.dest1 + S.pitch) = pixel; + *(u16 PTR4*)(S.dest1 + S.pitch + 2) = pixel; + S.dest0 += YUV_PACKED_WORD_BYTES; + S.dest1 += YUV_PACKED_WORD_BYTES; + remaining--; + } while (remaining != 0); + + return phase + count; +} + +static void dounaligned16a4row2h(u32 phase, u32 count) +{ + u32 remaining; + u8 PTR4* yptr; + u8 PTR4* aptr; + u8 y; + u8 a; + u32 ybase; + u16 pixel; + + if (count == 0) { + return; + } + remaining = count - 1; + + do { + phase++; + S.b = YUVTables[*(u8 PTR4*)S.u]; + S.gb = YUVTables[YUV_U_TO_GB_OFFSET + *(u8 PTR4*)S.u] + YUVTables[YUV_V_TO_GB_OFFSET + *(u8 PTR4*)S.v]; + S.r = YUVTables[YUV_V_TO_R_OFFSET + *(u8 PTR4*)S.v]; + yptr = (u8 PTR4*)S.y0; + aptr = (u8 PTR4*)S.a0; + y = *yptr++; + S.y0 = (u32 PTR4*)yptr; + ybase = ytable[y]; + a = *aptr++; + S.a0 = (u32 PTR4*)aptr; + pixel = RGB565_A4(ybase, S.r, S.gb, S.b, a); + *(u16 PTR4*)S.dest0 = pixel; + *(u16 PTR4*)(S.dest0 + S.pitch) = pixel; + S.dest0 += 2; + if (((phase ^ 1) & 1) != 0) { + S.u = (u16 PTR4*)((u8 PTR4*)S.u + 1); + S.v = (u16 PTR4*)((u8 PTR4*)S.v + 1); + } + } while (remaining-- != 0); +} + +static u32 dounaligned16a4col2h(u32 count, s32 phase) +{ + u8 PTR4* yptr; + u8 y; + u8 a; + u32 ybase; + u16 pixel; + + do { + phase++; + S.b = YUVTables[*(u8 PTR4*)S.u]; + S.gb = YUVTables[YUV_U_TO_GB_OFFSET + *(u8 PTR4*)S.u] + YUVTables[YUV_V_TO_GB_OFFSET + *(u8 PTR4*)S.v]; + S.r = YUVTables[YUV_V_TO_R_OFFSET + *(u8 PTR4*)S.v]; + yptr = (u8 PTR4*)S.y0; + y = *yptr++; + S.y0 = (u32 PTR4*)yptr; + ybase = ytable[y]; + a = *(u8 PTR4*)S.a0; + S.a0 = (u32 PTR4*)((u8 PTR4*)S.a0 + 1); + pixel = RGB565_A4(ybase, S.r, S.gb, S.b, a); + *(u16 PTR4*)S.dest0 = pixel; + *(u16 PTR4*)(S.dest0 + S.pitch) = pixel; + yptr = (u8 PTR4*)S.y1; + y = *yptr++; + S.y1 = (u32 PTR4*)yptr; + ybase = ytable[y]; + a = *(u8 PTR4*)S.a1; + S.a1 = (u32 PTR4*)((u8 PTR4*)S.a1 + 1); + pixel = RGB565_A4(ybase, S.r, S.gb, S.b, a); + *(u16 PTR4*)S.dest1 = pixel; + *(u16 PTR4*)(S.dest1 + S.pitch) = pixel; + S.dest0 += 2; + S.dest1 += 2; + if (((phase ^ 1) & 1) != 0) { + S.u = (u16 PTR4*)((u8 PTR4*)S.u + 1); + S.v = (u16 PTR4*)((u8 PTR4*)S.v + 1); + } + count--; + } while (count != 0); + + return phase; +} + +static void dounaligned16a4row2w(u32 phase, u32 count) +{ + u32 remaining; + u8 PTR4* yptr; + u8 PTR4* aptr; + u8 y; + u8 a; + u32 ybase; + u16 pixel; + + if (count == 0) { + return; + } + remaining = count - 1; + + do { + phase++; + S.b = YUVTables[*(u8 PTR4*)S.u]; + S.gb = YUVTables[YUV_U_TO_GB_OFFSET + *(u8 PTR4*)S.u] + YUVTables[YUV_V_TO_GB_OFFSET + *(u8 PTR4*)S.v]; + S.r = YUVTables[YUV_V_TO_R_OFFSET + *(u8 PTR4*)S.v]; + yptr = (u8 PTR4*)S.y0; + aptr = (u8 PTR4*)S.a0; + y = *yptr++; + S.y0 = (u32 PTR4*)yptr; + ybase = ytable[y]; + a = *aptr++; + S.a0 = (u32 PTR4*)aptr; + pixel = RGB565_A4(ybase, S.r, S.gb, S.b, a); + ((u16 PTR4*)S.dest0)[0] = pixel; + ((u16 PTR4*)S.dest0)[1] = pixel; + S.dest0 += YUV_PACKED_WORD_BYTES; + if (((phase ^ 1) & 1) != 0) { + S.u = (u16 PTR4*)((u8 PTR4*)S.u + 1); + S.v = (u16 PTR4*)((u8 PTR4*)S.v + 1); + } + } while (remaining-- != 0); +} + +static u32 dounaligned16a4col2w(u32 count, s32 phase) +{ + u8 PTR4* yptr; + u8 y; + u8 a; + u32 ybase; + u16 pixel; + + do { + phase++; + S.b = YUVTables[*(u8 PTR4*)S.u]; + S.gb = YUVTables[YUV_U_TO_GB_OFFSET + *(u8 PTR4*)S.u] + YUVTables[YUV_V_TO_GB_OFFSET + *(u8 PTR4*)S.v]; + S.r = YUVTables[YUV_V_TO_R_OFFSET + *(u8 PTR4*)S.v]; + yptr = (u8 PTR4*)S.y0; + y = *yptr++; + S.y0 = (u32 PTR4*)yptr; + ybase = ytable[y]; + a = *(u8 PTR4*)S.a0; + S.a0 = (u32 PTR4*)((u8 PTR4*)S.a0 + 1); + pixel = RGB565_A4(ybase, S.r, S.gb, S.b, a); + ((u16 PTR4*)S.dest0)[0] = pixel; + ((u16 PTR4*)S.dest0)[1] = pixel; + yptr = (u8 PTR4*)S.y1; + y = *yptr++; + S.y1 = (u32 PTR4*)yptr; + ybase = ytable[y]; + a = *(u8 PTR4*)S.a1; + S.a1 = (u32 PTR4*)((u8 PTR4*)S.a1 + 1); + pixel = RGB565_A4(ybase, S.r, S.gb, S.b, a); + ((u16 PTR4*)S.dest1)[0] = pixel; + ((u16 PTR4*)S.dest1)[1] = pixel; + S.dest0 += YUV_PACKED_WORD_BYTES; + S.dest1 += YUV_PACKED_WORD_BYTES; + if (((phase ^ 1) & 1) != 0) { + S.u = (u16 PTR4*)((u8 PTR4*)S.u + 1); + S.v = (u16 PTR4*)((u8 PTR4*)S.v + 1); + } + count--; + } while (count != 0); + + return phase; +} + +static void dounaligned16a4row2wh(u32 phase, u32 count) +{ + u32 remaining; + u8 PTR4* yptr; + u8 PTR4* aptr; + u8 y; + u8 a; + u32 ybase; + u16 pixel; + + if (count == 0) { + return; + } + remaining = count - 1; + + do { + phase++; + S.b = YUVTables[*(u8 PTR4*)S.u]; + S.gb = YUVTables[YUV_U_TO_GB_OFFSET + *(u8 PTR4*)S.u] + YUVTables[YUV_V_TO_GB_OFFSET + *(u8 PTR4*)S.v]; + S.r = YUVTables[YUV_V_TO_R_OFFSET + *(u8 PTR4*)S.v]; + yptr = (u8 PTR4*)S.y0; + aptr = (u8 PTR4*)S.a0; + y = *yptr++; + S.y0 = (u32 PTR4*)yptr; + ybase = ytable[y]; + a = *aptr++; + S.a0 = (u32 PTR4*)aptr; + pixel = RGB565_A4(ybase, S.r, S.gb, S.b, a); + ((u16 PTR4*)S.dest0)[0] = pixel; + ((u16 PTR4*)S.dest0)[1] = pixel; + *(u16 PTR4*)(S.dest0 + S.pitch) = pixel; + *(u16 PTR4*)(S.dest0 + S.pitch + 2) = pixel; + S.dest0 += YUV_PACKED_WORD_BYTES; + if (((phase ^ 1) & 1) != 0) { + S.u = (u16 PTR4*)((u8 PTR4*)S.u + 1); + S.v = (u16 PTR4*)((u8 PTR4*)S.v + 1); + } + } while (remaining-- != 0); +} + +static u32 dounaligned16a4col2wh(u32 count, s32 phase) +{ + u8 PTR4* yptr; + u8 PTR4* aptr; + u8 y; + u8 a; + u32 ybase; + u16 pixel; + + do { + phase++; + S.b = YUVTables[*(u8 PTR4*)S.u]; + S.gb = YUVTables[YUV_U_TO_GB_OFFSET + *(u8 PTR4*)S.u] + YUVTables[YUV_V_TO_GB_OFFSET + *(u8 PTR4*)S.v]; + S.r = YUVTables[YUV_V_TO_R_OFFSET + *(u8 PTR4*)S.v]; + yptr = (u8 PTR4*)S.y0; + aptr = (u8 PTR4*)S.a0; + y = *yptr++; + S.y0 = (u32 PTR4*)yptr; + ybase = ytable[y]; + a = *aptr++; + S.a0 = (u32 PTR4*)aptr; + pixel = RGB565_A4(ybase, S.r, S.gb, S.b, a); + ((u16 PTR4*)S.dest0)[0] = pixel; + ((u16 PTR4*)S.dest0)[1] = pixel; + *(u16 PTR4*)(S.dest0 + S.pitch) = pixel; + *(u16 PTR4*)(S.dest0 + S.pitch + 2) = pixel; + yptr = (u8 PTR4*)S.y1; + aptr = (u8 PTR4*)S.a1; + y = *yptr++; + S.y1 = (u32 PTR4*)yptr; + ybase = ytable[y]; + a = *aptr++; + S.a1 = (u32 PTR4*)aptr; + pixel = RGB565_A4(ybase, S.r, S.gb, S.b, a); + ((u16 PTR4*)S.dest1)[0] = pixel; + ((u16 PTR4*)S.dest1)[1] = pixel; + *(u16 PTR4*)(S.dest1 + S.pitch) = pixel; + *(u16 PTR4*)(S.dest1 + S.pitch + 2) = pixel; + S.dest0 += YUV_PACKED_WORD_BYTES; + S.dest1 += YUV_PACKED_WORD_BYTES; + if (((phase ^ 1) & 1) != 0) { + S.u = (u16 PTR4*)((u8 PTR4*)S.u + 1); + S.v = (u16 PTR4*)((u8 PTR4*)S.v + 1); + } + count--; + } while (count != 0); + + return phase; +} + +static void dounaligned16a4rowm(u32 phase, u32 count) +{ + u8 y; + u8 a; + + (void)phase; + if (count-- == 0) { + return; + } + + do { + y = *(u8 PTR4*)S.y0; + a = *(u8 PTR4*)S.a0; + S.a0 = (u32 PTR4*)((u8 PTR4*)S.a0 + 1); + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + *(u16 PTR4*)S.dest0 = RGB565_M_A4(y, a); + S.dest0 += 2; + } while (count-- != 0); +} + +static u32 dounaligned16a4colm(u32 count, s32 phase) +{ + const u32 PTR4* ytable; + const u32 PTR4* atable; + u32 remaining; + u8 y; + u8 a; + + ytable = mono16; + atable = clamp_a4; + remaining = count; + do { + y = *(u8 PTR4*)S.y0; + a = *(u8 PTR4*)S.a0; + S.a0 = (u32 PTR4*)((u8 PTR4*)S.a0 + 1); + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 1); + *(u16 PTR4*)S.dest0 = (u16)ytable[y] | (u16)atable[a]; + y = *(u8 PTR4*)S.y1; + a = *(u8 PTR4*)S.a1; + S.a1 = (u32 PTR4*)((u8 PTR4*)S.a1 + 1); + S.y1 = (u32 PTR4*)((u8 PTR4*)S.y1 + 1); + *(u16 PTR4*)S.dest1 = (u16)ytable[y] | (u16)atable[a]; + S.dest0 += 2; + S.dest1 += 2; + remaining--; + } while (remaining != 0); + + return phase + count; +} + +static void dounaligned16a4row(u32 phase, u32 count) +{ + u32 remaining; + u8 PTR4* yptr; + u8 PTR4* aptr; + u8 y; + u8 a; + u32 ybase; + + if (count == 0) { + return; + } + remaining = count - 1; + + do { + phase++; + S.b = YUVTables[*(u8 PTR4*)S.u]; + S.gb = YUVTables[YUV_U_TO_GB_OFFSET + *(u8 PTR4*)S.u] + YUVTables[YUV_V_TO_GB_OFFSET + *(u8 PTR4*)S.v]; + S.r = YUVTables[YUV_V_TO_R_OFFSET + *(u8 PTR4*)S.v]; + yptr = (u8 PTR4*)S.y0; + aptr = (u8 PTR4*)S.a0; + y = *yptr++; + S.y0 = (u32 PTR4*)yptr; + ybase = ytable[y]; + a = *aptr++; + S.a0 = (u32 PTR4*)aptr; + *(u16 PTR4*)S.dest0 = RGB565_A4(ybase, S.r, S.gb, S.b, a); + S.dest0 += 2; + if (((phase ^ 1) & 1) != 0) { + S.u = (u16 PTR4*)((u8 PTR4*)S.u + 1); + S.v = (u16 PTR4*)((u8 PTR4*)S.v + 1); + } + } while (remaining-- != 0); +} + +static u32 dounaligned16a4col(u32 count, s32 phase) +{ + u8 PTR4* yptr; + u8 y; + u8 a; + u32 ybase; + + do { + phase++; + S.b = YUVTables[*(u8 PTR4*)S.u]; + S.gb = YUVTables[YUV_U_TO_GB_OFFSET + *(u8 PTR4*)S.u] + YUVTables[YUV_V_TO_GB_OFFSET + *(u8 PTR4*)S.v]; + S.r = YUVTables[YUV_V_TO_R_OFFSET + *(u8 PTR4*)S.v]; + yptr = (u8 PTR4*)S.y0; + y = *yptr++; + S.y0 = (u32 PTR4*)yptr; + ybase = ytable[y]; + a = *(u8 PTR4*)S.a0; + S.a0 = (u32 PTR4*)((u8 PTR4*)S.a0 + 1); + *(u16 PTR4*)S.dest0 = RGB565_A4(ybase, S.r, S.gb, S.b, a); + yptr = (u8 PTR4*)S.y1; + y = *yptr++; + S.y1 = (u32 PTR4*)yptr; + ybase = ytable[y]; + a = *(u8 PTR4*)S.a1; + S.a1 = (u32 PTR4*)((u8 PTR4*)S.a1 + 1); + *(u16 PTR4*)S.dest1 = RGB565_A4(ybase, S.r, S.gb, S.b, a); + S.dest0 += 2; + S.dest1 += 2; + if (((phase ^ 1) & 1) != 0) { + S.u = (u16 PTR4*)((u8 PTR4*)S.u + 1); + S.v = (u16 PTR4*)((u8 PTR4*)S.v + 1); + } + count--; + } while (count != 0); + + return phase; +} + +extern "C" void YUV_blit_16a4bpp(void PTR4* dest, + u32 destx, + u32 desty, + u32 destpitch, + void PTR4* src, + u32 srcx, + u32 srcy, + u32 srcw, + u32 srch, + u32 srcpitch, + u32 srcheight, + void PTR4* alpha, + u32 flags) +{ + YUV_blit(dest, destx, desty, destpitch, src, srcx, srcy, srcw, srch, srcpitch, srcheight, flags, alpha, &blits16a4); +} + +static void dounalignedYUY2rowm2wh(u32 phase, u32 count) +{ + s32 remaining; + u8 y0; + u8 y1; + u32 pixel0; + u32 pixel1; + + (void)phase; + remaining = count; + remaining -= 2; + if (remaining < 0) { + return; + } + + do { + y0 = *(u8 PTR4*)S.y0; + y1 = *((u8 PTR4*)S.y0 + 1); + pixel0 = YUY2_NEUTRAL_CHROMA | y0 | ((u32)y0 << 16); + pixel1 = YUY2_NEUTRAL_CHROMA | y1 | ((u32)y1 << 16); + ((u32 PTR4*)S.dest0)[0] = pixel0; + ((u32 PTR4*)S.dest0)[1] = pixel1; + *(u32 PTR4*)(S.dest0 + S.pitch) = pixel0; + *(u32 PTR4*)(S.dest0 + S.pitch + YUV_PACKED_WORD_BYTES) = pixel1; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 2); + S.dest0 += 8; + remaining -= 2; + } while (remaining >= 0); +} + +static u32 dounalignedYUY2colm2wh(u32 count, s32 phase) +{ + s32 remaining; + u8 y0; + u8 y1; + u32 pixel0; + u32 pixel1; + + remaining = count; + do { + remaining -= 2; + y0 = *(u8 PTR4*)S.y0; + y1 = *((u8 PTR4*)S.y0 + 1); + pixel0 = YUY2_NEUTRAL_CHROMA | y0 | ((u32)y0 << 16); + pixel1 = YUY2_NEUTRAL_CHROMA | y1 | ((u32)y1 << 16); + ((u32 PTR4*)S.dest0)[0] = pixel0; + ((u32 PTR4*)S.dest0)[1] = pixel1; + *(u32 PTR4*)(S.dest0 + S.pitch) = pixel0; + *(u32 PTR4*)(S.dest0 + S.pitch + YUV_PACKED_WORD_BYTES) = pixel1; + y0 = *(u8 PTR4*)S.y1; + y1 = *((u8 PTR4*)S.y1 + 1); + pixel0 = YUY2_NEUTRAL_CHROMA | y0 | ((u32)y0 << 16); + pixel1 = YUY2_NEUTRAL_CHROMA | y1 | ((u32)y1 << 16); + ((u32 PTR4*)S.dest1)[0] = pixel0; + ((u32 PTR4*)S.dest1)[1] = pixel1; + *(u32 PTR4*)(S.dest1 + S.pitch) = pixel0; + *(u32 PTR4*)(S.dest1 + S.pitch + YUV_PACKED_WORD_BYTES) = pixel1; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 2); + S.y1 = (u32 PTR4*)((u8 PTR4*)S.y1 + 2); + S.dest0 += 8; + S.dest1 += 8; + } while (remaining > 0); + + return phase + count; +} + +static void dounalignedYUY2rowm2w(u32 phase, u32 count) +{ + s32 remaining; + u8 y0; + u8 y1; + u32 pixel; + + (void)phase; + remaining = count; + remaining -= 2; + if (remaining < 0) { + return; + } + + do { + y0 = *(u8 PTR4*)S.y0; + pixel = YUY2_NEUTRAL_CHROMA | y0 | ((u32)y0 << 16); + ((u32 PTR4*)S.dest0)[0] = pixel; + y1 = *((u8 PTR4*)S.y0 + 1); + pixel = YUY2_NEUTRAL_CHROMA | y1 | ((u32)y1 << 16); + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 2); + ((u32 PTR4*)S.dest0)[1] = pixel; + S.dest0 += 8; + remaining -= 2; + } while (remaining >= 0); +} + +static u32 dounalignedYUY2colm2w(u32 count, s32 phase) +{ + s32 remaining; + u8 y0; + u8 y1; + u32 pixel; + + remaining = count; + do { + remaining -= 2; + y0 = *(u8 PTR4*)S.y0; + pixel = YUY2_NEUTRAL_CHROMA | y0 | ((u32)y0 << 16); + ((u32 PTR4*)S.dest0)[0] = pixel; + y1 = *((u8 PTR4*)S.y0 + 1); + pixel = YUY2_NEUTRAL_CHROMA | y1 | ((u32)y1 << 16); + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 2); + ((u32 PTR4*)S.dest0)[1] = pixel; + y0 = *(u8 PTR4*)S.y1; + pixel = YUY2_NEUTRAL_CHROMA | y0 | ((u32)y0 << 16); + ((u32 PTR4*)S.dest1)[0] = pixel; + y1 = *((u8 PTR4*)S.y1 + 1); + pixel = YUY2_NEUTRAL_CHROMA | y1 | ((u32)y1 << 16); + S.y1 = (u32 PTR4*)((u8 PTR4*)S.y1 + 2); + ((u32 PTR4*)S.dest1)[1] = pixel; + S.dest0 += 8; + S.dest1 += 8; + } while (remaining > 0); + + return phase + count; +} + +static void dounalignedYUY2rowm2h(u32 phase, u32 count) +{ + s32 remaining; + u8 y0; + u8 y1; + u32 pixel; + + (void)phase; + remaining = count; + remaining -= 2; + if (remaining < 0) { + return; + } + + do { + y0 = *(u8 PTR4*)S.y0; + y1 = *((u8 PTR4*)S.y0 + 1); + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 2); + pixel = YUY2_M(y0, y1); + *(u32 PTR4*)S.dest0 = pixel; + *(u32 PTR4*)(S.dest0 + S.pitch) = pixel; + S.dest0 += YUV_PACKED_WORD_BYTES; + remaining -= 2; + } while (remaining >= 0); +} + +static u32 dounalignedYUY2colm2h(u32 count, s32 phase) +{ + s32 remaining; + u8 y0; + u8 y1; + u32 pixel; + + remaining = count; + do { + remaining -= 2; + y0 = *(u8 PTR4*)S.y0; + y1 = *((u8 PTR4*)S.y0 + 1); + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 2); + pixel = YUY2_M(y0, y1); + *(u32 PTR4*)S.dest0 = pixel; + *(u32 PTR4*)(S.dest0 + S.pitch) = pixel; + y0 = *(u8 PTR4*)S.y1; + y1 = *((u8 PTR4*)S.y1 + 1); + S.y1 = (u32 PTR4*)((u8 PTR4*)S.y1 + 2); + pixel = YUY2_M(y0, y1); + *(u32 PTR4*)S.dest1 = pixel; + *(u32 PTR4*)(S.dest1 + S.pitch) = pixel; + S.dest0 += YUV_PACKED_WORD_BYTES; + S.dest1 += YUV_PACKED_WORD_BYTES; + } while (remaining > 0); + + return phase + count; +} + +static void dounalignedYUY2row2wh(u32 phase, u32 count) +{ + s32 remaining; + u8 y0; + u8 y1; + u8 u; + u8 v; + u32 pixel0; + u32 pixel1; + + (void)phase; + remaining = count; + remaining -= 2; + if (remaining < 0) { + return; + } + + do { + u = *(u8 PTR4*)S.u; + S.u = (u16 PTR4*)((u8 PTR4*)S.u + 1); + y0 = *(u8 PTR4*)S.y0; + v = *(u8 PTR4*)S.v; + S.v = (u16 PTR4*)((u8 PTR4*)S.v + 1); + pixel0 = y0 | ((u32)u << 8) | ((u32)y0 << 16) | ((u32)v << 24); + ((u32 PTR4*)S.dest0)[0] = pixel0; + y1 = *((u8 PTR4*)S.y0 + 1); + pixel1 = (pixel0 & YUY2_CHROMA_MASK) | y1 | ((u32)y1 << 16); + ((u32 PTR4*)S.dest0)[1] = pixel1; + *(u32 PTR4*)(S.dest0 + S.pitch) = pixel0; + *(u32 PTR4*)(S.dest0 + S.pitch + YUV_PACKED_WORD_BYTES) = pixel1; + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 2); + S.dest0 += 8; + remaining -= 2; + } while (remaining >= 0); +} + +static u32 dounalignedYUY2col2wh(u32 count, s32 phase) +{ + s32 remaining; + u8 y0; + u8 y1; + u8 u; + u8 v; + u32 pixel0; + u32 pixel1; + u32 uv0; + u32 uv1; + + remaining = count; + do { + remaining -= 2; + u = *(u8 PTR4*)S.u; + S.u = (u16 PTR4*)((u8 PTR4*)S.u + 1); + v = *(u8 PTR4*)S.v; + S.v = (u16 PTR4*)((u8 PTR4*)S.v + 1); + y0 = *(u8 PTR4*)S.y0; + pixel0 = y0 | ((u32)u << 8) | ((u32)y0 << 16) | ((u32)v << 24); + y1 = *((u8 PTR4*)S.y0 + 1); + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 2); + pixel1 = (pixel0 & YUY2_CHROMA_MASK) | y1 | ((u32)y1 << 16); + ((u32 PTR4*)S.dest0)[0] = pixel0; + ((u32 PTR4*)S.dest0)[1] = pixel1; + uv0 = pixel0 & YUY2_CHROMA_MASK; + uv1 = pixel1 & YUY2_CHROMA_MASK; + *(u32 PTR4*)(S.dest0 + S.pitch) = pixel0; + *(u32 PTR4*)(S.dest0 + S.pitch + YUV_PACKED_WORD_BYTES) = pixel1; + y0 = *(u8 PTR4*)S.y1; + pixel0 = uv0 | y0 | ((u32)y0 << 16); + y1 = *((u8 PTR4*)S.y1 + 1); + S.y1 = (u32 PTR4*)((u8 PTR4*)S.y1 + 2); + pixel1 = uv1 | y1 | ((u32)y1 << 16); + ((u32 PTR4*)S.dest1)[0] = pixel0; + ((u32 PTR4*)S.dest1)[1] = pixel1; + *(u32 PTR4*)(S.dest1 + S.pitch) = pixel0; + *(u32 PTR4*)(S.dest1 + S.pitch + YUV_PACKED_WORD_BYTES) = pixel1; + S.dest0 += 8; + S.dest1 += 8; + } while (remaining > 0); + + return phase + count; +} + +static void dounalignedYUY2row2w(u32 phase, u32 count) +{ + s32 remaining; + u8 y0; + u8 y1; + u8 u; + u8 v; + u32 pixel; + + (void)phase; + remaining = count; + remaining -= 2; + if (remaining < 0) { + return; + } + + do { + u = *(u8 PTR4*)S.u; + S.u = (u16 PTR4*)((u8 PTR4*)S.u + 1); + y0 = *(u8 PTR4*)S.y0; + v = *(u8 PTR4*)S.v; + S.v = (u16 PTR4*)((u8 PTR4*)S.v + 1); + pixel = y0 | ((u32)u << 8) | ((u32)y0 << 16) | ((u32)v << 24); + ((u32 PTR4*)S.dest0)[0] = pixel; + y1 = *((u8 PTR4*)S.y0 + 1); + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 2); + ((u32 PTR4*)S.dest0)[1] = (pixel & YUY2_CHROMA_MASK) | y1 | ((u32)y1 << 16); + S.dest0 += 8; + remaining -= 2; + } while (remaining >= 0); +} + +static u32 dounalignedYUY2col2w(u32 count, s32 phase) +{ + s32 remaining; + u8 y0; + u8 y1; + u8 u; + u8 v; + u32 pixel; + + remaining = count; + do { + remaining -= 2; + u = *(u8 PTR4*)S.u; + S.u = (u16 PTR4*)((u8 PTR4*)S.u + 1); + y0 = *(u8 PTR4*)S.y0; + v = *(u8 PTR4*)S.v; + S.v = (u16 PTR4*)((u8 PTR4*)S.v + 1); + pixel = y0 | ((u32)u << 8) | ((u32)y0 << 16) | ((u32)v << 24); + ((u32 PTR4*)S.dest0)[0] = pixel; + y1 = *((u8 PTR4*)S.y0 + 1); + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 2); + pixel = (pixel & YUY2_CHROMA_MASK) | y1 | ((u32)y1 << 16); + ((u32 PTR4*)S.dest0)[1] = pixel; + y0 = *(u8 PTR4*)S.y1; + pixel = (pixel & YUY2_CHROMA_MASK) | y0 | ((u32)y0 << 16); + ((u32 PTR4*)S.dest1)[0] = pixel; + y1 = *((u8 PTR4*)S.y1 + 1); + S.y1 = (u32 PTR4*)((u8 PTR4*)S.y1 + 2); + ((u32 PTR4*)S.dest1)[1] = (pixel & YUY2_CHROMA_MASK) | y1 | ((u32)y1 << 16); + S.dest0 += 8; + S.dest1 += 8; + } while (remaining > 0); + + return phase + count; +} + +static void dounalignedYUY2row2h(u32 phase, u32 count) +{ + s32 remaining; + u8 y0; + u8 y1; + u8 u; + u8 v; + u32 pixel; + + (void)phase; + remaining = count; + remaining -= 2; + if (remaining < 0) { + return; + } + + do { + u = *(u8 PTR4*)S.u; + S.u = (u16 PTR4*)((u8 PTR4*)S.u + 1); + y0 = *(u8 PTR4*)S.y0; + y1 = *((u8 PTR4*)S.y0 + 1); + v = *(u8 PTR4*)S.v; + S.v = (u16 PTR4*)((u8 PTR4*)S.v + 1); + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 2); + pixel = y0 | ((u32)u << 8) | ((u32)y1 << 16) | ((u32)v << 24); + *(u32 PTR4*)S.dest0 = pixel; + *(u32 PTR4*)(S.dest0 + S.pitch) = pixel; + S.dest0 += YUV_PACKED_WORD_BYTES; + remaining -= 2; + } while (remaining >= 0); +} + +static u32 dounalignedYUY2col2h(u32 count, s32 phase) +{ + s32 remaining; + u8 y0; + u8 y1; + u8 u; + u8 v; + u32 pixel; + + remaining = count; + do { + remaining -= 2; + u = *(u8 PTR4*)S.u; + y0 = *(u8 PTR4*)S.y0; + y1 = *((u8 PTR4*)S.y0 + 1); + v = *(u8 PTR4*)S.v; + S.u = (u16 PTR4*)((u8 PTR4*)S.u + 1); + S.v = (u16 PTR4*)((u8 PTR4*)S.v + 1); + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 2); + pixel = y0 | ((u32)u << 8) | ((u32)y1 << 16) | ((u32)v << 24); + *(u32 PTR4*)S.dest0 = pixel; + *(u32 PTR4*)(S.dest0 + S.pitch) = pixel; + pixel &= YUY2_CHROMA_MASK; + y0 = *(u8 PTR4*)S.y1; + y1 = *((u8 PTR4*)S.y1 + 1); + S.y1 = (u32 PTR4*)((u8 PTR4*)S.y1 + 2); + pixel = pixel | y0 | ((u32)y1 << 16); + *(u32 PTR4*)S.dest1 = pixel; + *(u32 PTR4*)(S.dest1 + S.pitch) = pixel; + S.dest0 += YUV_PACKED_WORD_BYTES; + S.dest1 += YUV_PACKED_WORD_BYTES; + } while (remaining > 0); + + return phase + count; +} + +static void dounalignedYUY2rowm(u32 phase, u32 count) +{ + s32 remaining; + u8 y0; + u8 y1; + u32 pixel; + + (void)phase; + remaining = count; + remaining -= 2; + if (remaining < 0) { + return; + } + + do { + y1 = *((u8 PTR4*)S.y0 + 1); + y0 = *(u8 PTR4*)S.y0; + pixel = YUY2_NEUTRAL_CHROMA | y0 | ((u32)y1 << 16); + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 2); + *(u32 PTR4*)S.dest0 = pixel; + S.dest0 += YUV_PACKED_WORD_BYTES; + remaining -= 2; + } while (remaining >= 0); +} + +static u32 dounalignedYUY2colm(u32 count, s32 phase) +{ + s32 remaining; + u8 y0; + u8 y1; + u32 pixel; + + remaining = count; + do { + remaining -= 2; + y1 = *((u8 PTR4*)S.y0 + 1); + y0 = *(u8 PTR4*)S.y0; + pixel = YUY2_NEUTRAL_CHROMA | y0 | ((u32)y1 << 16); + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 2); + *(u32 PTR4*)S.dest0 = pixel; + y1 = *((u8 PTR4*)S.y1 + 1); + y0 = *(u8 PTR4*)S.y1; + pixel = YUY2_NEUTRAL_CHROMA | y0 | ((u32)y1 << 16); + S.y1 = (u32 PTR4*)((u8 PTR4*)S.y1 + 2); + *(u32 PTR4*)S.dest1 = pixel; + S.dest0 += YUV_PACKED_WORD_BYTES; + S.dest1 += YUV_PACKED_WORD_BYTES; + } while (remaining > 0); + + return phase + count; +} + +static void dounalignedYUY2row(u32 phase, u32 count) +{ + s32 remaining; + u8 y0; + u8 y1; + u8 u; + u8 v; + u32 pixel; + + (void)phase; + remaining = count; + remaining -= 2; + if (remaining < 0) { + return; + } + + do { + u = *(u8 PTR4*)S.u; + S.u = (u16 PTR4*)((u8 PTR4*)S.u + 1); + y0 = *(u8 PTR4*)S.y0; + y1 = *((u8 PTR4*)S.y0 + 1); + v = *(u8 PTR4*)S.v; + S.v = (u16 PTR4*)((u8 PTR4*)S.v + 1); + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 2); + pixel = y0 | ((u32)u << 8) | ((u32)y1 << 16) | ((u32)v << 24); + *(u32 PTR4*)S.dest0 = pixel; + S.dest0 += YUV_PACKED_WORD_BYTES; + remaining -= 2; + } while (remaining >= 0); +} + +static u32 dounalignedYUY2col(u32 count, s32 phase) +{ + s32 remaining; + u8 y0; + u8 y1; + u8 u; + u8 v; + u32 pixel; + + remaining = count; + do { + remaining -= 2; + u = *(u8 PTR4*)S.u; + y0 = *(u8 PTR4*)S.y0; + y1 = *((u8 PTR4*)S.y0 + 1); + v = *(u8 PTR4*)S.v; + S.u = (u16 PTR4*)((u8 PTR4*)S.u + 1); + S.v = (u16 PTR4*)((u8 PTR4*)S.v + 1); + S.y0 = (u32 PTR4*)((u8 PTR4*)S.y0 + 2); + pixel = y0 | ((u32)u << 8) | ((u32)y1 << 16) | ((u32)v << 24); + *(u32 PTR4*)S.dest0 = pixel; + pixel &= YUY2_CHROMA_MASK; + y0 = *(u8 PTR4*)S.y1; + y1 = *((u8 PTR4*)S.y1 + 1); + S.y1 = (u32 PTR4*)((u8 PTR4*)S.y1 + 2); + *(u32 PTR4*)S.dest1 = pixel | y0 | ((u32)y1 << 16); + S.dest0 += YUV_PACKED_WORD_BYTES; + S.dest1 += YUV_PACKED_WORD_BYTES; + } while (remaining > 0); + + return phase + count; +} + +extern "C" void YUV_blit_YUY2(void PTR4* dest, + u32 destx, + u32 desty, + u32 destpitch, + void PTR4* src, + u32 srcx, + u32 srcy, + u32 srcw, + u32 srch, + u32 srcpitch, + u32 srcheight, + u32 flags) +{ + if ((((u32)dest) & 3) == 2) { + destx++; + dest = (void PTR4*)(((u32)dest) & ~3); + } + + if ((destx & 1) != 0) { + if ((srcx & 1) != 0) { + srcx++; + } + destx++; + srcw--; + } else if ((srcx & 1) != 0) { + srcx++; + srcw--; + } + + if ((srcw & 1) != 0) { + srcw--; + } + + YUV_blit(dest, destx, desty, destpitch, src, srcx, srcy, srcw, srch, srcpitch, srcheight, flags, 0, &blitsyuy2); +} + +extern "C" void YUV_blit_32bpp_mask(void PTR4* dest, + u32 destx, + u32 desty, + u32 destpitch, + u8 PTR4* mask, + u32 maskpitch, + void PTR4* src, + u32 srcx, + u32 srcy, + u32 srcw, + u32 srch, + u32 srcpitch, + u32 srcheight, + u32 flags) +{ + YUV_blit_mask(dest, destx, desty, destpitch, mask, maskpitch, src, srcx, srcy, srcw, srch, srcpitch, srcheight, flags, + 0, &blits32); +} + +extern "C" void YUV_blit_32abpp_mask(void PTR4* dest, + u32 destx, + u32 desty, + u32 destpitch, + u8 PTR4* mask, + u32 maskpitch, + void PTR4* src, + u32 srcx, + u32 srcy, + u32 srcw, + u32 srch, + u32 srcpitch, + u32 srcheight, + void PTR4* alpha, + u32 flags) +{ + YUV_blit_mask(dest, destx, desty, destpitch, mask, maskpitch, src, srcx, srcy, srcw, srch, srcpitch, srcheight, flags, + alpha, &blits32a); +} + +extern "C" void YUV_blit_16bpp_mask(void PTR4* dest, + u32 destx, + u32 desty, + u32 destpitch, + u8 PTR4* mask, + u32 maskpitch, + void PTR4* src, + u32 srcx, + u32 srcy, + u32 srcw, + u32 srch, + u32 srcpitch, + u32 srcheight, + u32 flags) +{ + YUV_blit_mask(dest, destx, desty, destpitch, mask, maskpitch, src, srcx, srcy, srcw, srch, srcpitch, srcheight, flags, + 0, &blits16); +} + +extern "C" void YUV_blit_16a4bpp_mask(void PTR4* dest, + u32 destx, + u32 desty, + u32 destpitch, + u8 PTR4* mask, + u32 maskpitch, + void PTR4* src, + u32 srcx, + u32 srcy, + u32 srcw, + u32 srch, + u32 srcpitch, + u32 srcheight, + void PTR4* alpha, + u32 flags) +{ + YUV_blit_mask(dest, destx, desty, destpitch, mask, maskpitch, src, srcx, srcy, srcw, srch, srcpitch, srcheight, flags, + alpha, &blits16a4); +} + +extern "C" void YUV_blit_YUY2_mask(void PTR4* dest, + u32 destx, + u32 desty, + u32 destpitch, + u8 PTR4* mask, + u32 maskpitch, + void PTR4* src, + u32 srcx, + u32 srcy, + u32 srcw, + u32 srch, + u32 srcpitch, + u32 srcheight, + u32 flags) +{ + YUV_blit_mask(dest, destx, desty, destpitch, mask, maskpitch, src, srcx, srcy, srcw, srch, srcpitch, srcheight, flags, + 0, &blitsyuy2); +} + +#pragma dont_inline reset diff --git a/src/bink/src/sdk/decode/yuv.h b/src/bink/src/sdk/decode/yuv.h new file mode 100644 index 000000000..cd0616594 --- /dev/null +++ b/src/bink/src/sdk/decode/yuv.h @@ -0,0 +1,48 @@ +#ifndef BINK_DECODE_YUV_H +#define BINK_DECODE_YUV_H + +#include "bink.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void YUV_init(u32 flags); +void YUV_blit_32bpp(void PTR4* dest, u32 destx, u32 desty, u32 destpitch, + void PTR4* src, u32 srcx, u32 srcy, u32 srcw, u32 srch, u32 srcpitch, + u32 srcheight, u32 flags); +void YUV_blit_32abpp(void PTR4* dest, u32 destx, u32 desty, u32 destpitch, + void PTR4* src, u32 srcx, u32 srcy, u32 srcw, u32 srch, u32 srcpitch, + u32 srcheight, void PTR4* alpha, u32 flags); +void YUV_blit_16bpp(void PTR4* dest, u32 destx, u32 desty, u32 destpitch, + void PTR4* src, u32 srcx, u32 srcy, u32 srcw, u32 srch, u32 srcpitch, + u32 srcheight, u32 flags); +void YUV_blit_16a4bpp(void PTR4* dest, u32 destx, u32 desty, u32 destpitch, + void PTR4* src, u32 srcx, u32 srcy, u32 srcw, u32 srch, u32 srcpitch, + u32 srcheight, void PTR4* alpha, u32 flags); +void YUV_blit_YUY2(void PTR4* dest, u32 destx, u32 desty, u32 destpitch, + void PTR4* src, u32 srcx, u32 srcy, u32 srcw, u32 srch, u32 srcpitch, + u32 srcheight, u32 flags); +void YUV_blit_32bpp_mask(void PTR4* dest, u32 destx, u32 desty, u32 destpitch, + u8 PTR4* mask, u32 maskpitch, void PTR4* src, u32 srcx, u32 srcy, + u32 srcw, u32 srch, u32 srcpitch, u32 srcheight, u32 flags); +void YUV_blit_32abpp_mask(void PTR4* dest, u32 destx, u32 desty, u32 destpitch, + u8 PTR4* mask, u32 maskpitch, void PTR4* src, u32 srcx, u32 srcy, + u32 srcw, u32 srch, u32 srcpitch, u32 srcheight, + void PTR4* alpha, u32 flags); +void YUV_blit_16bpp_mask(void PTR4* dest, u32 destx, u32 desty, u32 destpitch, + u8 PTR4* mask, u32 maskpitch, void PTR4* src, u32 srcx, u32 srcy, + u32 srcw, u32 srch, u32 srcpitch, u32 srcheight, u32 flags); +void YUV_blit_16a4bpp_mask(void PTR4* dest, u32 destx, u32 desty, u32 destpitch, + u8 PTR4* mask, u32 maskpitch, void PTR4* src, u32 srcx, u32 srcy, + u32 srcw, u32 srch, u32 srcpitch, u32 srcheight, + void PTR4* alpha, u32 flags); +void YUV_blit_YUY2_mask(void PTR4* dest, u32 destx, u32 desty, u32 destpitch, + u8 PTR4* mask, u32 maskpitch, void PTR4* src, u32 srcx, u32 srcy, + u32 srcw, u32 srch, u32 srcpitch, u32 srcheight, u32 flags); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/bink/src/sdk/fft.c b/src/bink/src/sdk/fft.c index e69de29bb..08da7e868 100644 --- a/src/bink/src/sdk/fft.c +++ b/src/bink/src/sdk/fft.c @@ -0,0 +1,2428 @@ +#include "bink.h" +#include "fft.h" + +extern const u32 BINK_RDFT_INVERSE_SCALE_BITS[]; +extern const u32 BINK_FFT_TRIG_ONE_BITS[]; +extern const u32 BINK_FFT_HALF_SECANT_SCALE_BITS[]; +extern const u32 BINK_FFT_SIX_BITS[]; +extern const u32 BINK_FFT_THREE_BITS[]; +extern const u32 BINK_FFT_HALF_RECIP_SCALE_BITS[]; +extern const u32 BINK_DCT_TRIG_ONE_BITS[]; +extern const u32 BINK_DCT_CENTER_SCALE_BITS[]; +extern const u32 BINK_DCT_HALF_SCALE_BITS[]; +extern const u32 BINK_CFT_ROT_ONE_BITS[]; +extern const u32 BINK_CFT_ROT_ZERO_BITS[]; +extern const u32 BINK_CFT_INV_ROT_ONE_BITS[]; +extern const u32 BINK_CFT_INV_ROT_ZERO_BITS[]; +extern const u32 BINK_RFT_HALF_SCALE_BITS[]; +extern const u32 BINK_RFT_INV_HALF_SCALE_BITS[]; + +f32 atanf(f32 x); +f32 cosf(f32 x); +f32 sinf(f32 x); + +#define F32CONST(label) (*(const f32 PTR4*)(label)) +#define F64CONST(label) (*(const f64*)(label)) + +#define RDFT_INVERSE_SCALE F32CONST(BINK_RDFT_INVERSE_SCALE_BITS) +#define FFT_TRIG_ONE F32CONST(BINK_FFT_TRIG_ONE_BITS) +#define FFT_HALF_SECANT_SCALE F64CONST(BINK_FFT_HALF_SECANT_SCALE_BITS) +#define FFT_SIX F32CONST(BINK_FFT_SIX_BITS) +#define FFT_THREE F32CONST(BINK_FFT_THREE_BITS) +#define FFT_HALF_RECIP_SCALE F32CONST(BINK_FFT_HALF_RECIP_SCALE_BITS) +#define DCT_TRIG_ONE F32CONST(BINK_DCT_TRIG_ONE_BITS) +#define DCT_CENTER_SCALE F32CONST(BINK_DCT_CENTER_SCALE_BITS) +#define DCT_HALF_SCALE F64CONST(BINK_DCT_HALF_SCALE_BITS) +#define CFT_ROT_ONE F32CONST(BINK_CFT_ROT_ONE_BITS) +#define CFT_ROT_ZERO F32CONST(BINK_CFT_ROT_ZERO_BITS) +#define CFT_INV_ROT_ONE F32CONST(BINK_CFT_INV_ROT_ONE_BITS) +#define CFT_INV_ROT_ZERO F32CONST(BINK_CFT_INV_ROT_ZERO_BITS) +#define RFT_HALF_SCALE F32CONST(BINK_RFT_HALF_SCALE_BITS) +#define RFT_INV_HALF_SCALE F32CONST(BINK_RFT_INV_HALF_SCALE_BITS) + +#define FFT_WORK_INDEX_OFFSET 2 +#define FFT_EIGHTH_SIZE(n) ((n) >> 3) +#define FFT_QUARTER_SIZE(n) ((s32)(n) >> 2) +#define FFT_HALF_SIZE(n) ((n) >> 1) +#define FFT_TABLE_FULL_SIZE(n) ((n) << 2) +#define FFT_TABLE_DOUBLE_SIZE(n) ((n) << 1) +#define FFT_CFT_16_REAL_SIZE 32 +#define FFT_CFT_8_REAL_SIZE 16 +#define FFT_CFT_4_REAL_SIZE 8 +#define FFT_CFT_2_REAL_SIZE 4 +#define FFT_CFT_RECURSION_LIMIT 0x200 + +/* Bink audio uses Ooura-style RDFT/DCT work arrays: ip stores table sizes, + followed by bit-reversal work indices, while w holds trig tables. */ +static void makewt(s32 nw, s32 PTR4* ip, f32 PTR4* w); +static void makect(s32 nc, s32 PTR4* ip, f32 PTR4* c); +void cftfsub(s32 n, f32 PTR4* a, s32 PTR4* ip, s32 nw, f32 PTR4* w); +static void cftbsub(s32 n, f32 PTR4* a, s32 PTR4* ip, s32 nw, f32 PTR4* w); +static void bitrv2(s32 n, s32 PTR4* ip, f32 PTR4* a); +static void bitrv2conj(s32 n, s32 PTR4* ip, f32 PTR4* a); +static void bitrv216(f32 PTR4* a); +static void bitrv216neg(f32 PTR4* a); +static void bitrv208(f32 PTR4* a); +static void bitrv208neg(f32 PTR4* a); +static void cftf1st(s32 n, f32 PTR4* a, f32 PTR4* w); +static void cftb1st(s32 n, f32 PTR4* a, f32 PTR4* w); +void cftrec1(s32 n, f32 PTR4* a, s32 nw, f32 PTR4* w); +void cftrec2(s32 n, f32 PTR4* a, s32 nw, f32 PTR4* w); +static void cftexp1(s32 n, f32 PTR4* a, s32 nw, f32 PTR4* w); +static void cftexp2(s32 n, f32 PTR4* a, s32 nw, f32 PTR4* w); +static void cftmdl1(s32 n, f32 PTR4* a, f32 PTR4* w); +static void cftmdl2(s32 n, f32 PTR4* a, f32 PTR4* w); +static void cftfx41(s32 n, f32 PTR4* a, s32 nw, f32 PTR4* w); +static void cftfx42(s32 n, f32 PTR4* a, s32 nw, f32 PTR4* w); +static void cftf161(f32 PTR4* a, f32 PTR4* w); +static void cftf162(f32 PTR4* a, f32 PTR4* w); +static void cftf081(f32 PTR4* a, f32 PTR4* w); +static void cftf082(f32 PTR4* a, f32 PTR4* w); +static void cftf040(f32 PTR4* a); +static void cftb040(f32 PTR4* a); +static void cftx020(f32 PTR4* a); +static void rftfsub(s32 n, f32 PTR4* a, s32 nc, f32 PTR4* c); +static void rftbsub(s32 n, f32 PTR4* a, s32 nc, f32 PTR4* c); +static void dctsub(s32 n, f32 PTR4* a, s32 nc, f32 PTR4* c); + +/* Real discrete Fourier transform used by legacy Bink audio streams. */ +void rdft(u32 n, s32 isgn, f32 PTR4* a, s32 PTR4* ip, f32 PTR4* w) +{ + f32 xi; + s32 nw; + s32 nc; + + nw = ip[0]; + if ((s32)n > FFT_TABLE_FULL_SIZE(nw)) { + nw = FFT_QUARTER_SIZE(n); + makewt(nw, ip, w); + } + + nc = ip[1]; + if ((s32)n > FFT_TABLE_FULL_SIZE(nc)) { + nc = FFT_QUARTER_SIZE(n); + makect(nc, ip, w + nw); + } + + if (isgn >= 0) { + if ((s32)n > FFT_CFT_2_REAL_SIZE) { + cftfsub(n, a, ip + FFT_WORK_INDEX_OFFSET, nw, w); + rftfsub(n, a, nc, w + nw); + } else if (n == FFT_CFT_2_REAL_SIZE) { + cftfsub(FFT_CFT_2_REAL_SIZE, a, ip + FFT_WORK_INDEX_OFFSET, nw, w); + } + + xi = a[0] - a[1]; + a[0] += a[1]; + a[1] = xi; + } else { + a[1] = (a[0] - a[1]) * RDFT_INVERSE_SCALE; + a[0] -= a[1]; + + if ((s32)n > FFT_CFT_2_REAL_SIZE) { + rftbsub(n, a, nc, w + nw); + cftbsub(n, a, ip + FFT_WORK_INDEX_OFFSET, nw, w); + } else if (n == FFT_CFT_2_REAL_SIZE) { + cftbsub(FFT_CFT_2_REAL_SIZE, a, ip + FFT_WORK_INDEX_OFFSET, nw, w); + } + } +} + +/* Discrete cosine transform path used by newer Bink audio streams. */ +void ddct(u32 n, s32 isgn, f32 PTR4* a, s32 PTR4* ip, f32 PTR4* w) +{ + f32 xr; + f32 xi; + f32 yr; + s32 j; + s32 nw; + s32 nc; + + nw = ip[0]; + if ((s32)n > FFT_TABLE_FULL_SIZE(nw)) { + nw = FFT_QUARTER_SIZE(n); + makewt(nw, ip, w); + } + + nc = ip[1]; + if ((s32)n > nc) { + nc = n; + makect(nc, ip, w + nw); + } + + if (isgn < 0) { + j = n - 2; + xr = a[n - 1]; + if (1 < j) { + f32 PTR4* p = a + j; + do { + f32 tmp; + f32 sum; + f32 diff; + + xi = p[0]; + j -= 2; + tmp = p[-1]; + diff = xi - tmp; + p[1] = diff; + sum = xi + tmp; + p[0] = sum; + p -= 2; + } while (1 < j); + } + + xi = a[0]; + yr = xi + xr; + a[0] = yr; + xr = xi - xr; + a[1] = xr; + + if ((s32)n > FFT_CFT_2_REAL_SIZE) { + rftbsub(n, a, nc, w + nw); + cftbsub(n, a, ip + FFT_WORK_INDEX_OFFSET, nw, w); + } else if (n == FFT_CFT_2_REAL_SIZE) { + cftbsub(FFT_CFT_2_REAL_SIZE, a, ip + FFT_WORK_INDEX_OFFSET, nw, w); + } + } + + dctsub(n, a, nc, w + nw); + + if (isgn >= 0) { + if ((s32)n > FFT_CFT_2_REAL_SIZE) { + cftfsub(n, a, ip + FFT_WORK_INDEX_OFFSET, nw, w); + rftfsub(n, a, nc, w + nw); + } else if (n == FFT_CFT_2_REAL_SIZE) { + cftfsub(FFT_CFT_2_REAL_SIZE, a, ip + FFT_WORK_INDEX_OFFSET, nw, w); + } + + xr = a[0]; + j = 2; + xi = a[1]; + a[0] = xr + xi; + xi = xr - xi; + if (j < (s32)n) { + f32 PTR4* p = a + 2; + do { + f32 tmp; + f32 sum; + f32 diff; + + yr = p[0]; + j += 2; + tmp = p[1]; + diff = yr - tmp; + p[-1] = diff; + sum = yr + tmp; + p[0] = sum; + p += 2; + } while (j < (s32)n); + } + a[n - 1] = xi; + } +} + +#pragma dont_inline on +/* Build complex FFT twiddle factors and the matching bit-reversal table. */ +static void makewt(s32 nw, s32 PTR4* ip, f32 PTR4* w) +{ + s32 j; + s32 nwh; + s32 offset; + s32 next; + f32 delta; + f32 three; + f32 x; + f32 cos_nwh; + f32 half_recip; + + ip[1] = 1; + ip[0] = nw; + + if (nw > 2) { + nwh = FFT_HALF_SIZE(nw); + delta = atanf(FFT_TRIG_ONE) / (f32)nwh; + x = cosf(delta * (f32)nwh); + cos_nwh = x; + w[1] = x; + w[0] = FFT_TRIG_ONE; + + if (nwh > 3) { + w[2] = FFT_HALF_SECANT_SCALE / cosf(delta + delta); + w[3] = FFT_HALF_SECANT_SCALE / cosf(delta * FFT_SIX); + } + + j = 4; + if (j < nwh) { + f32 PTR4* p = w + 4; + three = FFT_THREE; + do { + x = delta * (f32)j; + p[0] = cosf(x); + p[1] = sinf(x); + x = (delta * three) * (f32)j; + p[2] = cosf(x); + p[3] = sinf(x); + j += 4; + p += 4; + } while (j < nwh); + } + + offset = 0; + while (nwh > 2) { + half_recip = FFT_HALF_RECIP_SCALE; + next = offset + nwh; + nwh >>= 1; + w[next] = FFT_TRIG_ONE; + w[next + 1] = cos_nwh; + + if (nwh > 3) { + f32 x6; + f32 x4; + + x6 = w[offset + 6]; + x4 = w[offset + 4]; + w[next + 3] = half_recip / x6; + w[next + 2] = half_recip / x4; + } + + j = 4; + if (j < nwh) { + f32 PTR4* src = w + offset + 8; + f32 PTR4* dst = w + next + 4; + do { + f32 x0; + f32 x1; + f32 x2; + f32 x3; + + j += 4; + x0 = src[0]; + x1 = src[1]; + x2 = src[2]; + x3 = src[3]; + dst[0] = x0; + dst[1] = x1; + dst[2] = x2; + dst[3] = x3; + src += 8; + dst += 4; + } while (j < nwh); + } + + offset = next; + } + } +} + +/* Build the cosine table used by RDFT/DCT pre/post-processing. */ +static void makect(s32 nc, s32 PTR4* ip, f32 PTR4* c) +{ + s32 j; + s32 nch; + f32 delta; + f32 x; + f64 half; + + ip[1] = nc; + + if (nc > 1) { + nch = FFT_HALF_SIZE(nc); + j = 1; + delta = atanf(DCT_TRIG_ONE) / (f32)nch; + x = cosf(delta * (f32)nch); + c[0] = x; + c[nch] = x * DCT_CENTER_SCALE; + + if (j < nch) { + f32 PTR4* fwd = c + 1; + f32 PTR4* rev = c + nc - 1; + half = DCT_HALF_SCALE; + do { + x = delta * (f32)j; + ++j; + *fwd = cosf(x) * half; + ++fwd; + *rev = sinf(x) * half; + --rev; + } while (j < nch); + } + } +} +void cftfsub(s32 n, f32 PTR4* a, s32 PTR4* ip, s32 nw, f32 PTR4* w) +{ + s32 n4; + + if (n > FFT_CFT_16_REAL_SIZE) { + n4 = FFT_QUARTER_SIZE(n); + cftf1st(n, a, &w[nw - n4]); + if (n > FFT_CFT_RECURSION_LIMIT) { + cftrec1(n4, a, nw, w); + cftrec2(n4, a + n4, nw, w); + cftrec1(n4, a + n4 * 2, nw, w); + cftrec1(n4, a + n4 * 3, nw, w); + } else if (n4 > FFT_CFT_16_REAL_SIZE) { + cftexp1(n, a, nw, w); + } else { + cftfx41(n, a, nw, w); + } + bitrv2(n, ip, a); + } else if (n > FFT_CFT_4_REAL_SIZE) { + if (n == FFT_CFT_16_REAL_SIZE) { + cftf161(a, &w[nw - 8]); + bitrv216(a); + } else { + cftf081(a, w); + bitrv208(a); + } + } else if (n == FFT_CFT_4_REAL_SIZE) { + cftf040(a); + } else if (n == FFT_CFT_2_REAL_SIZE) { + cftx020(a); + } +} + +static void cftbsub(s32 n, f32 PTR4* a, s32 PTR4* ip, s32 nw, f32 PTR4* w) +{ + s32 n4; + + if (n > FFT_CFT_16_REAL_SIZE) { + n4 = FFT_QUARTER_SIZE(n); + cftb1st(n, a, &w[nw - n4]); + if (n > FFT_CFT_RECURSION_LIMIT) { + cftrec1(n4, a, nw, w); + cftrec2(n4, a + n4, nw, w); + cftrec1(n4, a + n4 * 2, nw, w); + cftrec1(n4, a + n4 * 3, nw, w); + } else if (n4 > FFT_CFT_16_REAL_SIZE) { + cftexp1(n, a, nw, w); + } else { + cftfx41(n, a, nw, w); + } + bitrv2conj(n, ip, a); + } else if (n > FFT_CFT_4_REAL_SIZE) { + if (n == FFT_CFT_16_REAL_SIZE) { + cftf161(a, &w[nw - 8]); + bitrv216neg(a); + } else { + cftf081(a, w); + bitrv208neg(a); + } + } else if (n == FFT_CFT_4_REAL_SIZE) { + cftb040(a); + } else if (n == FFT_CFT_2_REAL_SIZE) { + cftx020(a); + } +} + +static void bitrv2(s32 n, s32 PTR4* ip, f32 PTR4* a) +{ + f32 xr; + f32 xi; + f32 yr; + s32 j; + s32 k; + s32 l; + s32 m; + s32 m2; + s32 j1; + s32 k1; + + ip[0] = 0; + l = n; + m = 1; + while ((m << 3) < l) { + l >>= 1; + for (j = 0; j < m; ++j) { + ip[m + j] = ip[j] + l; + } + m2 = FFT_TABLE_DOUBLE_SIZE(m); + m = m2; + } + + m2 = FFT_TABLE_DOUBLE_SIZE(m); + if ((m << 3) == l) { + k = 0; + if (k < m) { + do { + j = 0; + if (j < k) { + do { + j1 = k * 2 + ip[j]; + k1 = j * 2 + ip[k]; + xr = a[k1]; + xi = a[k1 + 1]; + yr = a[j1 + 1]; + a[k1] = a[j1]; + a[k1 + 1] = yr; + a[j1] = xr; + a[j1 + 1] = xi; + + k1 += m2; + j1 += m * 4; + xr = a[k1]; + xi = a[k1 + 1]; + yr = a[j1 + 1]; + a[k1] = a[j1]; + a[k1 + 1] = yr; + a[j1] = xr; + a[j1 + 1] = xi; + + k1 += m2; + j1 -= m2; + xr = a[k1]; + xi = a[k1 + 1]; + yr = a[j1 + 1]; + a[k1] = a[j1]; + a[k1 + 1] = yr; + a[j1] = xr; + a[j1 + 1] = xi; + + k1 += m2; + j1 += m * 4; + xr = a[k1]; + xi = a[k1 + 1]; + yr = a[j1 + 1]; + a[k1] = a[j1]; + a[k1 + 1] = yr; + a[j1] = xr; + a[j1 + 1] = xi; + + ++j; + } while (j < k); + } + + j1 = k * 2 + m2 + ip[k]; + k1 = j1 + m2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1 + 1]; + a[j1] = a[k1]; + a[j1 + 1] = yr; + a[k1] = xr; + a[k1 + 1] = xi; + ++k; + } while (k < m); + } + } else { + k = 1; + if (k < m) { + do { + j = 0; + if (j < k) { + do { + j1 = k * 2 + ip[j]; + k1 = j * 2 + ip[k]; + xr = a[k1]; + xi = a[k1 + 1]; + yr = a[j1 + 1]; + a[k1] = a[j1]; + a[k1 + 1] = yr; + a[j1] = xr; + a[j1 + 1] = xi; + + k1 += m2; + j1 += m2; + xr = a[k1]; + xi = a[k1 + 1]; + yr = a[j1 + 1]; + a[k1] = a[j1]; + a[k1 + 1] = yr; + a[j1] = xr; + a[j1 + 1] = xi; + ++j; + } while (j < k); + } + ++k; + } while (k < m); + } + } +} +static void bitrv2conj(s32 n, s32 PTR4* ip, f32 PTR4* a) +{ + f32 xr; + f32 xi; + f32 yr; + s32 j; + s32 k; + s32 l; + s32 m; + s32 m2; + s32 j1; + s32 k1; + + ip[0] = 0; + l = n; + m = 1; + while ((m << 3) < l) { + l >>= 1; + m2 = FFT_TABLE_DOUBLE_SIZE(m); + for (j = 0; j < m; ++j) { + ip[m + j] = ip[j] + l; + } + m = m2; + } + + m2 = FFT_TABLE_DOUBLE_SIZE(m); + if ((m << 3) == l) { + k = 0; + if (k < m) { + do { + j = 0; + if (j < k) { + do { + j1 = k * 2 + ip[j]; + k1 = j * 2 + ip[k]; + xr = a[k1]; + xi = a[k1 + 1]; + yr = a[j1 + 1]; + a[k1] = a[j1]; + a[k1 + 1] = -yr; + a[j1] = xr; + a[j1 + 1] = -xi; + + k1 += m2; + j1 += m * 4; + xr = a[k1]; + xi = a[k1 + 1]; + yr = a[j1 + 1]; + a[k1] = a[j1]; + a[k1 + 1] = -yr; + a[j1] = xr; + a[j1 + 1] = -xi; + + k1 += m2; + j1 -= m2; + xr = a[k1]; + xi = a[k1 + 1]; + yr = a[j1 + 1]; + a[k1] = a[j1]; + a[k1 + 1] = -yr; + a[j1] = xr; + a[j1 + 1] = -xi; + + k1 += m2; + j1 += m * 4; + xr = a[k1]; + xi = a[k1 + 1]; + yr = a[j1 + 1]; + a[k1] = a[j1]; + a[k1 + 1] = -yr; + a[j1] = xr; + a[j1 + 1] = -xi; + + ++j; + } while (j < k); + } + + j1 = k * 2 + ip[k]; + a[j1 + 1] = -a[j1 + 1]; + k1 = j1 + m2; + j1 = k1 + m2; + xr = a[k1]; + xi = a[k1 + 1]; + yr = a[j1 + 1]; + a[k1] = a[j1]; + a[k1 + 1] = -yr; + a[j1] = xr; + a[j1 + 1] = -xi; + j1 += m2; + a[j1 + 1] = -a[j1 + 1]; + ++k; + } while (k < m); + } + } else { + a[1] = -a[1]; + a[m2 + 1] = -a[m2 + 1]; + k = 1; + if (k < m) { + do { + j = 0; + if (j < k) { + do { + j1 = k * 2 + ip[j]; + k1 = j * 2 + ip[k]; + xr = a[k1]; + xi = a[k1 + 1]; + yr = a[j1 + 1]; + a[k1] = a[j1]; + a[k1 + 1] = -yr; + a[j1] = xr; + a[j1 + 1] = -xi; + + k1 += m2; + j1 += m2; + xr = a[k1]; + xi = a[k1 + 1]; + yr = a[j1 + 1]; + a[k1] = a[j1]; + a[k1 + 1] = -yr; + a[j1] = xr; + a[j1 + 1] = -xi; + ++j; + } while (j < k); + } + + j1 = k * 2 + ip[k]; + a[j1 + 1] = -a[j1 + 1]; + j1 += m2; + a[j1 + 1] = -a[j1 + 1]; + ++k; + } while (k < m); + } + } +} +static void bitrv216(f32 PTR4* a) +{ + f32 x4; + f32 x5; + f32 x8; + f32 x9; + f32 x16; + f32 x17; + f32 x20; + f32 x24; + f32 x25; + f32 x2; + f32 x3; + f32 x6; + f32 x7; + f32 x10; + f32 x21; + f32 x11; + f32 x14; + f32 x15; + f32 x22; + f32 x23; + f32 x26; + f32 x27; + f32 x28; + f32 x29; + + x4 = a[4]; + x5 = a[5]; + x8 = a[8]; + x9 = a[9]; + x16 = a[16]; + x17 = a[17]; + x20 = a[20]; + x24 = a[24]; + x25 = a[25]; + x2 = a[2]; + x3 = a[3]; + x6 = a[6]; + x7 = a[7]; + x10 = a[10]; + + a[2] = x16; + a[3] = x17; + a[4] = x8; + a[5] = x9; + a[6] = x24; + a[7] = x25; + a[8] = x4; + a[9] = x5; + a[10] = x20; + + x21 = a[21]; + x11 = a[11]; + x14 = a[14]; + x15 = a[15]; + x22 = a[22]; + x23 = a[23]; + x26 = a[26]; + x27 = a[27]; + x28 = a[28]; + x29 = a[29]; + + a[11] = x21; + a[14] = x28; + a[15] = x29; + a[16] = x2; + a[17] = x3; + a[20] = x10; + a[21] = x11; + a[22] = x26; + a[23] = x27; + a[24] = x6; + a[25] = x7; + a[26] = x22; + a[27] = x23; + a[28] = x14; + a[29] = x15; +} + +static void bitrv216neg(f32 PTR4* a) +{ + f32 x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i; + f32 x5r, x5i, x6r, x6i, x7r, x7i, x8r, x8i; + f32 x9r, x9i, x10r, x10i, x11r, x11i, x12r, x12i; + f32 x13r, x13i, x14r, x14i, x15r, x15i; + + x1r = a[2]; + x1i = a[3]; + x2r = a[4]; + x2i = a[5]; + x3r = a[6]; + x3i = a[7]; + x4r = a[8]; + x4i = a[9]; + x5r = a[10]; + x5i = a[11]; + x6r = a[12]; + x6i = a[13]; + x7r = a[14]; + x7i = a[15]; + x8r = a[16]; + x8i = a[17]; + x9r = a[18]; + x9i = a[19]; + x10r = a[20]; + x10i = a[21]; + x11r = a[22]; + x11i = a[23]; + x12r = a[24]; + x12i = a[25]; + x13r = a[26]; + x13i = a[27]; + x14r = a[28]; + x14i = a[29]; + x15r = a[30]; + x15i = a[31]; + + a[2] = x15r; + a[3] = x15i; + a[4] = x7r; + a[5] = x7i; + a[6] = x11r; + a[7] = x11i; + a[8] = x3r; + a[9] = x3i; + a[10] = x13r; + a[11] = x13i; + a[12] = x5r; + a[13] = x5i; + a[14] = x9r; + a[15] = x9i; + a[16] = x1r; + a[17] = x1i; + a[18] = x14r; + a[19] = x14i; + a[20] = x6r; + a[21] = x6i; + a[22] = x10r; + a[23] = x10i; + a[24] = x2r; + a[25] = x2i; + a[26] = x12r; + a[27] = x12i; + a[28] = x4r; + a[29] = x4i; + a[30] = x8r; + a[31] = x8i; +} + +static void bitrv208(f32 PTR4* a) +{ + f32 x2; + f32 x3; + f32 x6; + f32 x7; + f32 x8; + f32 x9; + f32 x12; + f32 x13; + + x2 = a[2]; + x3 = a[3]; + x6 = a[6]; + x7 = a[7]; + x8 = a[8]; + x9 = a[9]; + x12 = a[12]; + x13 = a[13]; + + a[2] = x8; + a[3] = x9; + a[6] = x12; + a[7] = x13; + a[8] = x2; + a[9] = x3; + a[12] = x6; + a[13] = x7; +} + +static void bitrv208neg(f32 PTR4* a) +{ + f32 x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i; + f32 x5r, x5i, x6r, x6i, x7r, x7i; + + x1r = a[2]; + x1i = a[3]; + x2r = a[4]; + x2i = a[5]; + x3r = a[6]; + x3i = a[7]; + x4r = a[8]; + x4i = a[9]; + x5r = a[10]; + x5i = a[11]; + x6r = a[12]; + x6i = a[13]; + x7r = a[14]; + x7i = a[15]; + + a[2] = x7r; + a[3] = x7i; + a[4] = x3r; + a[5] = x3i; + a[6] = x5r; + a[7] = x5i; + a[8] = x1r; + a[9] = x1i; + a[10] = x6r; + a[11] = x6i; + a[12] = x2r; + a[13] = x2i; + a[14] = x4r; + a[15] = x4i; +} +static void cftf1st(s32 n, f32 PTR4* a, f32 PTR4* w) +{ + int j, j0, j1, j2, j3, k, m, mh; + f32 wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i, + wd1r, wd1i, wd3r, wd3i; + f32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, + y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i; + + mh = FFT_EIGHTH_SIZE(n); + m = 2 * mh; + j1 = m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[0] + a[j2]; + x0i = a[1] + a[j2 + 1]; + x1r = a[0] - a[j2]; + x1i = a[1] - a[j2 + 1]; + x2r = a[j1] + a[j3]; + x2i = a[j1 + 1] + a[j3 + 1]; + x3r = a[j1] - a[j3]; + x3i = a[j1 + 1] - a[j3 + 1]; + a[0] = x0r + x2r; + a[1] = x0i + x2i; + a[j1] = x0r - x2r; + a[j1 + 1] = x0i - x2i; + a[j2] = x1r - x3i; + a[j2 + 1] = x1i + x3r; + a[j3] = x1r + x3i; + a[j3 + 1] = x1i - x3r; + wn4r = w[1]; + csc1 = w[2]; + csc3 = w[3]; + wd1r = CFT_ROT_ONE; + wd1i = CFT_ROT_ZERO; + wd3r = CFT_ROT_ONE; + wd3i = CFT_ROT_ZERO; + k = 0; + for (j = 2; j < mh - 2; j += 4) + { + k += 4; + wk1r = csc1 * (wd1r + w[k]); + wk1i = csc1 * (wd1i + w[k + 1]); + wk3r = csc3 * (wd3r + w[k + 2]); + wk3i = csc3 * (wd3i - w[k + 3]); + wd1r = w[k]; + wd1i = w[k + 1]; + wd3r = w[k + 2]; + wd3i = -w[k + 3]; + j1 = j + m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[j] + a[j2]; + x0i = a[j + 1] + a[j2 + 1]; + x1r = a[j] - a[j2]; + x1i = a[j + 1] - a[j2 + 1]; + y0r = a[j + 2] + a[j2 + 2]; + y0i = a[j + 3] + a[j2 + 3]; + y1r = a[j + 2] - a[j2 + 2]; + y1i = a[j + 3] - a[j2 + 3]; + x2r = a[j1] + a[j3]; + x2i = a[j1 + 1] + a[j3 + 1]; + x3r = a[j1] - a[j3]; + x3i = a[j1 + 1] - a[j3 + 1]; + y2r = a[j1 + 2] + a[j3 + 2]; + y2i = a[j1 + 3] + a[j3 + 3]; + y3r = a[j1 + 2] - a[j3 + 2]; + y3i = a[j1 + 3] - a[j3 + 3]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + a[j + 2] = y0r + y2r; + a[j + 3] = y0i + y2i; + a[j1] = x0r - x2r; + a[j1 + 1] = x0i - x2i; + a[j1 + 2] = y0r - y2r; + a[j1 + 3] = y0i - y2i; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j2] = wk1r * x0r - wk1i * x0i; + a[j2 + 1] = wk1r * x0i + wk1i * x0r; + x0r = y1r - y3i; + x0i = y1i + y3r; + a[j2 + 2] = wd1r * x0r - wd1i * x0i; + a[j2 + 3] = wd1r * x0i + wd1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j3] = wk3r * x0r + wk3i * x0i; + a[j3 + 1] = wk3r * x0i - wk3i * x0r; + x0r = y1r + y3i; + x0i = y1i - y3r; + a[j3 + 2] = wd3r * x0r + wd3i * x0i; + a[j3 + 3] = wd3r * x0i - wd3i * x0r; + j0 = m - j; + j1 = j0 + m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[j0] + a[j2]; + x0i = a[j0 + 1] + a[j2 + 1]; + x1r = a[j0] - a[j2]; + x1i = a[j0 + 1] - a[j2 + 1]; + y0r = a[j0 - 2] + a[j2 - 2]; + y0i = a[j0 - 1] + a[j2 - 1]; + y1r = a[j0 - 2] - a[j2 - 2]; + y1i = a[j0 - 1] - a[j2 - 1]; + x2r = a[j1] + a[j3]; + x2i = a[j1 + 1] + a[j3 + 1]; + x3r = a[j1] - a[j3]; + x3i = a[j1 + 1] - a[j3 + 1]; + y2r = a[j1 - 2] + a[j3 - 2]; + y2i = a[j1 - 1] + a[j3 - 1]; + y3r = a[j1 - 2] - a[j3 - 2]; + y3i = a[j1 - 1] - a[j3 - 1]; + a[j0] = x0r + x2r; + a[j0 + 1] = x0i + x2i; + a[j0 - 2] = y0r + y2r; + a[j0 - 1] = y0i + y2i; + a[j1] = x0r - x2r; + a[j1 + 1] = x0i - x2i; + a[j1 - 2] = y0r - y2r; + a[j1 - 1] = y0i - y2i; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j2] = wk1i * x0r - wk1r * x0i; + a[j2 + 1] = wk1i * x0i + wk1r * x0r; + x0r = y1r - y3i; + x0i = y1i + y3r; + a[j2 - 2] = wd1i * x0r - wd1r * x0i; + a[j2 - 1] = wd1i * x0i + wd1r * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j3] = wk3i * x0r + wk3r * x0i; + a[j3 + 1] = wk3i * x0i - wk3r * x0r; + x0r = y1r + y3i; + x0i = y1i - y3r; + a[j3 - 2] = wd3i * x0r + wd3r * x0i; + a[j3 - 1] = wd3i * x0i - wd3r * x0r; + } + wk1r = csc1 * (wd1r + wn4r); + wk1i = csc1 * (wd1i + wn4r); + wk3r = csc3 * (wd3r - wn4r); + wk3i = csc3 * (wd3i - wn4r); + j0 = mh; + j1 = j0 + m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[j0 - 2] + a[j2 - 2]; + x0i = a[j0 - 1] + a[j2 - 1]; + x1r = a[j0 - 2] - a[j2 - 2]; + x1i = a[j0 - 1] - a[j2 - 1]; + x2r = a[j1 - 2] + a[j3 - 2]; + x2i = a[j1 - 1] + a[j3 - 1]; + x3r = a[j1 - 2] - a[j3 - 2]; + x3i = a[j1 - 1] - a[j3 - 1]; + a[j0 - 2] = x0r + x2r; + a[j0 - 1] = x0i + x2i; + a[j1 - 2] = x0r - x2r; + a[j1 - 1] = x0i - x2i; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j2 - 2] = wk1r * x0r - wk1i * x0i; + a[j2 - 1] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j3 - 2] = wk3r * x0r + wk3i * x0i; + a[j3 - 1] = wk3r * x0i - wk3i * x0r; + x0r = a[j0] + a[j2]; + x0i = a[j0 + 1] + a[j2 + 1]; + x1r = a[j0] - a[j2]; + x1i = a[j0 + 1] - a[j2 + 1]; + x2r = a[j1] + a[j3]; + x2i = a[j1 + 1] + a[j3 + 1]; + x3r = a[j1] - a[j3]; + x3i = a[j1 + 1] - a[j3 + 1]; + a[j0] = x0r + x2r; + a[j0 + 1] = x0i + x2i; + a[j1] = x0r - x2r; + a[j1 + 1] = x0i - x2i; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j2] = wn4r * (x0r - x0i); + a[j2 + 1] = wn4r * (x0i + x0r); + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j3] = -wn4r * (x0r + x0i); + a[j3 + 1] = -wn4r * (x0i - x0r); + x0r = a[j0 + 2] + a[j2 + 2]; + x0i = a[j0 + 3] + a[j2 + 3]; + x1r = a[j0 + 2] - a[j2 + 2]; + x1i = a[j0 + 3] - a[j2 + 3]; + x2r = a[j1 + 2] + a[j3 + 2]; + x2i = a[j1 + 3] + a[j3 + 3]; + x3r = a[j1 + 2] - a[j3 + 2]; + x3i = a[j1 + 3] - a[j3 + 3]; + a[j0 + 2] = x0r + x2r; + a[j0 + 3] = x0i + x2i; + a[j1 + 2] = x0r - x2r; + a[j1 + 3] = x0i - x2i; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j2 + 2] = wk1i * x0r - wk1r * x0i; + a[j2 + 3] = wk1i * x0i + wk1r * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j3 + 2] = wk3i * x0r + wk3r * x0i; + a[j3 + 3] = wk3i * x0i - wk3r * x0r; +} + +static void cftb1st(s32 n, f32 PTR4* a, f32 PTR4* w) +{ + int j, j0, j1, j2, j3, k, m, mh; + f32 wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i, + wd1r, wd1i, wd3r, wd3i; + f32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, + y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i; + + mh = FFT_EIGHTH_SIZE(n); + m = 2 * mh; + j1 = m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[0] + a[j2]; + x0i = -a[1] - a[j2 + 1]; + x1r = a[0] - a[j2]; + x1i = -a[1] + a[j2 + 1]; + x2r = a[j1] + a[j3]; + x2i = a[j1 + 1] + a[j3 + 1]; + x3r = a[j1] - a[j3]; + x3i = a[j1 + 1] - a[j3 + 1]; + a[0] = x0r + x2r; + a[1] = x0i - x2i; + a[j1] = x0r - x2r; + a[j1 + 1] = x0i + x2i; + a[j2] = x1r + x3i; + a[j2 + 1] = x1i + x3r; + a[j3] = x1r - x3i; + a[j3 + 1] = x1i - x3r; + wn4r = w[1]; + csc1 = w[2]; + csc3 = w[3]; + wd1r = CFT_INV_ROT_ONE; + wd1i = CFT_INV_ROT_ZERO; + wd3r = CFT_INV_ROT_ONE; + wd3i = CFT_INV_ROT_ZERO; + k = 0; + for (j = 2; j < mh - 2; j += 4) + { + k += 4; + wk1r = csc1 * (wd1r + w[k]); + wk1i = csc1 * (wd1i + w[k + 1]); + wk3r = csc3 * (wd3r + w[k + 2]); + wk3i = csc3 * (wd3i - w[k + 3]); + wd1r = w[k]; + wd1i = w[k + 1]; + wd3r = w[k + 2]; + wd3i = -w[k + 3]; + j1 = j + m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[j] + a[j2]; + x0i = -a[j + 1] - a[j2 + 1]; + x1r = a[j] - a[j2]; + x1i = -a[j + 1] + a[j2 + 1]; + y0r = a[j + 2] + a[j2 + 2]; + y0i = -a[j + 3] - a[j2 + 3]; + y1r = a[j + 2] - a[j2 + 2]; + y1i = -a[j + 3] + a[j2 + 3]; + x2r = a[j1] + a[j3]; + x2i = a[j1 + 1] + a[j3 + 1]; + x3r = a[j1] - a[j3]; + x3i = a[j1 + 1] - a[j3 + 1]; + y2r = a[j1 + 2] + a[j3 + 2]; + y2i = a[j1 + 3] + a[j3 + 3]; + y3r = a[j1 + 2] - a[j3 + 2]; + y3i = a[j1 + 3] - a[j3 + 3]; + a[j] = x0r + x2r; + a[j + 1] = x0i - x2i; + a[j + 2] = y0r + y2r; + a[j + 3] = y0i - y2i; + a[j1] = x0r - x2r; + a[j1 + 1] = x0i + x2i; + a[j1 + 2] = y0r - y2r; + a[j1 + 3] = y0i + y2i; + x0r = x1r + x3i; + x0i = x1i + x3r; + a[j2] = wk1r * x0r - wk1i * x0i; + a[j2 + 1] = wk1r * x0i + wk1i * x0r; + x0r = y1r + y3i; + x0i = y1i + y3r; + a[j2 + 2] = wd1r * x0r - wd1i * x0i; + a[j2 + 3] = wd1r * x0i + wd1i * x0r; + x0r = x1r - x3i; + x0i = x1i - x3r; + a[j3] = wk3r * x0r + wk3i * x0i; + a[j3 + 1] = wk3r * x0i - wk3i * x0r; + x0r = y1r - y3i; + x0i = y1i - y3r; + a[j3 + 2] = wd3r * x0r + wd3i * x0i; + a[j3 + 3] = wd3r * x0i - wd3i * x0r; + j0 = m - j; + j1 = j0 + m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[j0] + a[j2]; + x0i = -a[j0 + 1] - a[j2 + 1]; + x1r = a[j0] - a[j2]; + x1i = -a[j0 + 1] + a[j2 + 1]; + y0r = a[j0 - 2] + a[j2 - 2]; + y0i = -a[j0 - 1] - a[j2 - 1]; + y1r = a[j0 - 2] - a[j2 - 2]; + y1i = -a[j0 - 1] + a[j2 - 1]; + x2r = a[j1] + a[j3]; + x2i = a[j1 + 1] + a[j3 + 1]; + x3r = a[j1] - a[j3]; + x3i = a[j1 + 1] - a[j3 + 1]; + y2r = a[j1 - 2] + a[j3 - 2]; + y2i = a[j1 - 1] + a[j3 - 1]; + y3r = a[j1 - 2] - a[j3 - 2]; + y3i = a[j1 - 1] - a[j3 - 1]; + a[j0] = x0r + x2r; + a[j0 + 1] = x0i - x2i; + a[j0 - 2] = y0r + y2r; + a[j0 - 1] = y0i - y2i; + a[j1] = x0r - x2r; + a[j1 + 1] = x0i + x2i; + a[j1 - 2] = y0r - y2r; + a[j1 - 1] = y0i + y2i; + x0r = x1r + x3i; + x0i = x1i + x3r; + a[j2] = wk1i * x0r - wk1r * x0i; + a[j2 + 1] = wk1i * x0i + wk1r * x0r; + x0r = y1r + y3i; + x0i = y1i + y3r; + a[j2 - 2] = wd1i * x0r - wd1r * x0i; + a[j2 - 1] = wd1i * x0i + wd1r * x0r; + x0r = x1r - x3i; + x0i = x1i - x3r; + a[j3] = wk3i * x0r + wk3r * x0i; + a[j3 + 1] = wk3i * x0i - wk3r * x0r; + x0r = y1r - y3i; + x0i = y1i - y3r; + a[j3 - 2] = wd3i * x0r + wd3r * x0i; + a[j3 - 1] = wd3i * x0i - wd3r * x0r; + } + wk1r = csc1 * (wd1r + wn4r); + wk1i = csc1 * (wd1i + wn4r); + wk3r = csc3 * (wd3r - wn4r); + wk3i = csc3 * (wd3i - wn4r); + j0 = mh; + j1 = j0 + m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[j0 - 2] + a[j2 - 2]; + x0i = -a[j0 - 1] - a[j2 - 1]; + x1r = a[j0 - 2] - a[j2 - 2]; + x1i = -a[j0 - 1] + a[j2 - 1]; + x2r = a[j1 - 2] + a[j3 - 2]; + x2i = a[j1 - 1] + a[j3 - 1]; + x3r = a[j1 - 2] - a[j3 - 2]; + x3i = a[j1 - 1] - a[j3 - 1]; + a[j0 - 2] = x0r + x2r; + a[j0 - 1] = x0i - x2i; + a[j1 - 2] = x0r - x2r; + a[j1 - 1] = x0i + x2i; + x0r = x1r + x3i; + x0i = x1i + x3r; + a[j2 - 2] = wk1r * x0r - wk1i * x0i; + a[j2 - 1] = wk1r * x0i + wk1i * x0r; + x0r = x1r - x3i; + x0i = x1i - x3r; + a[j3 - 2] = wk3r * x0r + wk3i * x0i; + a[j3 - 1] = wk3r * x0i - wk3i * x0r; + x0r = a[j0] + a[j2]; + x0i = -a[j0 + 1] - a[j2 + 1]; + x1r = a[j0] - a[j2]; + x1i = -a[j0 + 1] + a[j2 + 1]; + x2r = a[j1] + a[j3]; + x2i = a[j1 + 1] + a[j3 + 1]; + x3r = a[j1] - a[j3]; + x3i = a[j1 + 1] - a[j3 + 1]; + a[j0] = x0r + x2r; + a[j0 + 1] = x0i - x2i; + a[j1] = x0r - x2r; + a[j1 + 1] = x0i + x2i; + x0r = x1r + x3i; + x0i = x1i + x3r; + a[j2] = wn4r * (x0r - x0i); + a[j2 + 1] = wn4r * (x0i + x0r); + x0r = x1r - x3i; + x0i = x1i - x3r; + a[j3] = -wn4r * (x0r + x0i); + a[j3 + 1] = -wn4r * (x0i - x0r); + x0r = a[j0 + 2] + a[j2 + 2]; + x0i = -a[j0 + 3] - a[j2 + 3]; + x1r = a[j0 + 2] - a[j2 + 2]; + x1i = -a[j0 + 3] + a[j2 + 3]; + x2r = a[j1 + 2] + a[j3 + 2]; + x2i = a[j1 + 3] + a[j3 + 3]; + x3r = a[j1 + 2] - a[j3 + 2]; + x3i = a[j1 + 3] - a[j3 + 3]; + a[j0 + 2] = x0r + x2r; + a[j0 + 3] = x0i - x2i; + a[j1 + 2] = x0r - x2r; + a[j1 + 3] = x0i + x2i; + x0r = x1r + x3i; + x0i = x1i + x3r; + a[j2 + 2] = wk1i * x0r - wk1r * x0i; + a[j2 + 3] = wk1i * x0i + wk1r * x0r; + x0r = x1r - x3i; + x0i = x1i - x3r; + a[j3 + 2] = wk3i * x0r + wk3r * x0i; + a[j3 + 3] = wk3i * x0i - wk3r * x0r; +} + +void cftrec1(s32 n, f32 PTR4* a, s32 nw, f32 PTR4* w) +{ + int quarter; + + quarter = n >> 2; + cftmdl1(n, a, &w[nw - (quarter + quarter)]); + if (n > FFT_CFT_RECURSION_LIMIT) { + cftrec1(quarter, a, nw, w); + cftrec2(quarter, a + quarter, nw, w); + cftrec1(quarter, a + quarter * 2, nw, w); + cftrec1(quarter, a + quarter * 3, nw, w); + } else { + cftexp1(n, a, nw, w); + } +} + +void cftrec2(s32 n, f32 PTR4* a, s32 nw, f32 PTR4* w) +{ + int quarter; + + quarter = n >> 2; + cftmdl2(n, a, &w[nw - n]); + if (n > FFT_CFT_RECURSION_LIMIT) { + cftrec1(quarter, a, nw, w); + cftrec2(quarter, a + quarter, nw, w); + cftrec1(quarter, a + quarter * 2, nw, w); + cftrec2(quarter, a + quarter * 3, nw, w); + } else { + cftexp2(n, a, nw, w); + } +} +static void cftexp1(s32 n, f32 PTR4* a, s32 nw, f32 PTR4* w) +{ + s32 j; + s32 k; + s32 m; + s32 mh; + + m = n >> 2; + while (m > 128) { + k = m; + mh = m >> 1; + if (k < n) { + do { + for (j = k - m; j < n; j += k << 2) { + cftmdl1(m, &a[j], &w[nw - mh]); + cftmdl2(m, &a[j + k], &w[nw - m]); + cftmdl1(m, &a[j + 2 * k], &w[nw - mh]); + } + k <<= 2; + } while (k < n); + } + cftmdl1(m, &a[n - m], &w[nw - mh]); + m >>= 2; + } + + k = m; + mh = m >> 1; + if (k < n) { + do { + for (j = k - m; j < n; j += k << 2) { + cftmdl1(m, &a[j], &w[nw - mh]); + cftfx41(m, &a[j], nw, w); + cftmdl2(m, &a[j + k], &w[nw - m]); + cftfx42(m, &a[j + k], nw, w); + cftmdl1(m, &a[j + 2 * k], &w[nw - mh]); + cftfx41(m, &a[j + 2 * k], nw, w); + } + k <<= 2; + } while (k < n); + } + cftmdl1(m, &a[n - m], &w[nw - mh]); + cftfx41(m, &a[n - m], nw, w); +} + +static void cftexp2(s32 n, f32 PTR4* a, s32 nw, f32 PTR4* w) +{ + s32 j; + s32 k; + s32 m; + s32 mh; + s32 h; + s32 next; + + h = n >> 1; + m = n >> 2; + while (m > 128) { + k = m; + next = m >> 2; + if (k < h) { + mh = k >> 1; + do { + for (j = k - m; j < h; j += k << 1) { + cftmdl1(m, &a[j], &w[nw - mh]); + cftmdl1(m, &a[j + h], &w[nw - mh]); + } + for (j = k * 2 - m; j < h; j += k << 2) { + cftmdl2(m, &a[j], &w[nw - m]); + cftmdl2(m, &a[j + h], &w[nw - m]); + } + k <<= 2; + } while (k < h); + } + m = next; + } + + k = m; + if (k < h) { + mh = k >> 1; + do { + for (j = k - m; j < h; j += k << 1) { + cftmdl1(m, &a[j], &w[nw - mh]); + cftfx41(m, &a[j], nw, w); + cftmdl1(m, &a[j + h], &w[nw - mh]); + cftfx41(m, &a[j + h], nw, w); + } + for (j = k * 2 - m; j < h; j += k << 2) { + cftmdl2(m, &a[j], &w[nw - m]); + cftfx42(m, &a[j], nw, w); + cftmdl2(m, &a[j + h], &w[nw - m]); + cftfx42(m, &a[j + h], nw, w); + } + k <<= 2; + } while (k < h); + } +} +static void cftmdl1(s32 n, f32 PTR4* a, f32 PTR4* w) +{ + int j; + int j0; + int j1; + int j2; + int j3; + int k; + int m; + int mh; + f32 wn4r; + f32 wk1r; + f32 wk1i; + f32 wk3r; + f32 wk3i; + f32 x0r; + f32 x0i; + f32 x1r; + f32 x1i; + f32 x2r; + f32 x2i; + f32 x3r; + f32 x3i; + + mh = FFT_EIGHTH_SIZE(n); + m = 2 * mh; + j1 = m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[0] + a[j2]; + x0i = a[1] + a[j2 + 1]; + x1r = a[0] - a[j2]; + x1i = a[1] - a[j2 + 1]; + x2r = a[j1] + a[j3]; + x2i = a[j1 + 1] + a[j3 + 1]; + x3r = a[j1] - a[j3]; + x3i = a[j1 + 1] - a[j3 + 1]; + a[0] = x0r + x2r; + a[1] = x0i + x2i; + a[j1] = x0r - x2r; + a[j1 + 1] = x0i - x2i; + a[j2] = x1r - x3i; + a[j2 + 1] = x1i + x3r; + a[j3] = x1r + x3i; + a[j3 + 1] = x1i - x3r; + wn4r = w[1]; + k = 0; + for (j = 2; j < mh; j += 2) { + k += 4; + wk1r = w[k]; + wk1i = w[k + 1]; + wk3r = w[k + 2]; + wk3i = -w[k + 3]; + j1 = j + m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[j] + a[j2]; + x0i = a[j + 1] + a[j2 + 1]; + x1r = a[j] - a[j2]; + x1i = a[j + 1] - a[j2 + 1]; + x2r = a[j1] + a[j3]; + x2i = a[j1 + 1] + a[j3 + 1]; + x3r = a[j1] - a[j3]; + x3i = a[j1 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + a[j1] = x0r - x2r; + a[j1 + 1] = x0i - x2i; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j2] = wk1r * x0r - wk1i * x0i; + a[j2 + 1] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j3] = wk3r * x0r + wk3i * x0i; + a[j3 + 1] = wk3r * x0i - wk3i * x0r; + j0 = m - j; + j1 = j0 + m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[j0] + a[j2]; + x0i = a[j0 + 1] + a[j2 + 1]; + x1r = a[j0] - a[j2]; + x1i = a[j0 + 1] - a[j2 + 1]; + x2r = a[j1] + a[j3]; + x2i = a[j1 + 1] + a[j3 + 1]; + x3r = a[j1] - a[j3]; + x3i = a[j1 + 1] - a[j3 + 1]; + a[j0] = x0r + x2r; + a[j0 + 1] = x0i + x2i; + a[j1] = x0r - x2r; + a[j1 + 1] = x0i - x2i; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j2] = wk1i * x0r - wk1r * x0i; + a[j2 + 1] = wk1i * x0i + wk1r * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j3] = wk3i * x0r + wk3r * x0i; + a[j3 + 1] = wk3i * x0i - wk3r * x0r; + } + j0 = mh; + j1 = j0 + m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[j0] + a[j2]; + x0i = a[j0 + 1] + a[j2 + 1]; + x1r = a[j0] - a[j2]; + x1i = a[j0 + 1] - a[j2 + 1]; + x2r = a[j1] + a[j3]; + x2i = a[j1 + 1] + a[j3 + 1]; + x3r = a[j1] - a[j3]; + x3i = a[j1 + 1] - a[j3 + 1]; + a[j0] = x0r + x2r; + a[j0 + 1] = x0i + x2i; + a[j1] = x0r - x2r; + a[j1 + 1] = x0i - x2i; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j2] = wn4r * (x0r - x0i); + a[j2 + 1] = wn4r * (x0i + x0r); + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j3] = -wn4r * (x0r + x0i); + a[j3 + 1] = -wn4r * (x0i - x0r); +} +static void cftmdl2(s32 n, f32 PTR4* a, f32 PTR4* w) +{ + int j; + int j0; + int j1; + int j2; + int j3; + int k; + int kr; + int m; + int mh; + f32 wn4r; + f32 wk1r; + f32 wk1i; + f32 wk3r; + f32 wk3i; + f32 wd1r; + f32 wd1i; + f32 wd3r; + f32 wd3i; + f32 x0r; + f32 x0i; + f32 x1r; + f32 x1i; + f32 x2r; + f32 x2i; + f32 x3r; + f32 x3i; + f32 y0r; + f32 y0i; + f32 y2r; + f32 y2i; + + mh = FFT_EIGHTH_SIZE(n); + m = 2 * mh; + wn4r = w[1]; + j1 = m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[0] - a[j2 + 1]; + x0i = a[1] + a[j2]; + x1r = a[0] + a[j2 + 1]; + x1i = a[1] - a[j2]; + x2r = a[j1] - a[j3 + 1]; + x2i = a[j1 + 1] + a[j3]; + x3r = a[j1] + a[j3 + 1]; + x3i = a[j1 + 1] - a[j3]; + y0r = wn4r * (x2r - x2i); + y0i = wn4r * (x2i + x2r); + a[0] = x0r + y0r; + a[1] = x0i + y0i; + a[j1] = x0r - y0r; + a[j1 + 1] = x0i - y0i; + y0r = wn4r * (x3r - x3i); + y0i = wn4r * (x3i + x3r); + a[j2] = x1r - y0i; + a[j2 + 1] = x1i + y0r; + a[j3] = x1r + y0i; + a[j3 + 1] = x1i - y0r; + k = 0; + kr = 2 * m; + for (j = 2; j < mh; j += 2) { + k += 4; + wk1r = w[k]; + wk1i = w[k + 1]; + wk3r = w[k + 2]; + wk3i = -w[k + 3]; + kr -= 4; + wd1i = w[kr]; + wd1r = w[kr + 1]; + wd3i = w[kr + 2]; + wd3r = -w[kr + 3]; + j1 = j + m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[j] - a[j2 + 1]; + x0i = a[j + 1] + a[j2]; + x1r = a[j] + a[j2 + 1]; + x1i = a[j + 1] - a[j2]; + x2r = a[j1] - a[j3 + 1]; + x2i = a[j1 + 1] + a[j3]; + x3r = a[j1] + a[j3 + 1]; + x3i = a[j1 + 1] - a[j3]; + y0r = wk1r * x0r - wk1i * x0i; + y0i = wk1r * x0i + wk1i * x0r; + y2r = wd1r * x2r - wd1i * x2i; + y2i = wd1r * x2i + wd1i * x2r; + a[j] = y0r + y2r; + a[j + 1] = y0i + y2i; + a[j1] = y0r - y2r; + a[j1 + 1] = y0i - y2i; + y0r = wk3r * x1r + wk3i * x1i; + y0i = wk3r * x1i - wk3i * x1r; + y2r = wd3r * x3r + wd3i * x3i; + y2i = wd3r * x3i - wd3i * x3r; + a[j2] = y0r + y2r; + a[j2 + 1] = y0i + y2i; + a[j3] = y0r - y2r; + a[j3 + 1] = y0i - y2i; + j0 = m - j; + j1 = j0 + m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[j0] - a[j2 + 1]; + x0i = a[j0 + 1] + a[j2]; + x1r = a[j0] + a[j2 + 1]; + x1i = a[j0 + 1] - a[j2]; + x2r = a[j1] - a[j3 + 1]; + x2i = a[j1 + 1] + a[j3]; + x3r = a[j1] + a[j3 + 1]; + x3i = a[j1 + 1] - a[j3]; + y0r = wd1i * x0r - wd1r * x0i; + y0i = wd1i * x0i + wd1r * x0r; + y2r = wk1i * x2r - wk1r * x2i; + y2i = wk1i * x2i + wk1r * x2r; + a[j0] = y0r + y2r; + a[j0 + 1] = y0i + y2i; + a[j1] = y0r - y2r; + a[j1 + 1] = y0i - y2i; + y0r = wd3i * x1r + wd3r * x1i; + y0i = wd3i * x1i - wd3r * x1r; + y2r = wk3i * x3r + wk3r * x3i; + y2i = wk3i * x3i - wk3r * x3r; + a[j2] = y0r + y2r; + a[j2 + 1] = y0i + y2i; + a[j3] = y0r - y2r; + a[j3 + 1] = y0i - y2i; + } + wk1r = w[m]; + wk1i = w[m + 1]; + j0 = mh; + j1 = j0 + m; + j2 = j1 + m; + j3 = j2 + m; + x0r = a[j0] - a[j2 + 1]; + x0i = a[j0 + 1] + a[j2]; + x1r = a[j0] + a[j2 + 1]; + x1i = a[j0 + 1] - a[j2]; + x2r = a[j1] - a[j3 + 1]; + x2i = a[j1 + 1] + a[j3]; + x3r = a[j1] + a[j3 + 1]; + x3i = a[j1 + 1] - a[j3]; + y0r = wk1r * x0r - wk1i * x0i; + y0i = wk1r * x0i + wk1i * x0r; + y2r = wk1i * x2r - wk1r * x2i; + y2i = wk1i * x2i + wk1r * x2r; + a[j0] = y0r + y2r; + a[j0 + 1] = y0i + y2i; + a[j1] = y0r - y2r; + a[j1 + 1] = y0i - y2i; + y0r = wk1i * x1r - wk1r * x1i; + y0i = wk1i * x1i + wk1r * x1r; + y2r = wk1r * x3r - wk1i * x3i; + y2i = wk1r * x3i + wk1i * x3r; + a[j2] = y0r - y2r; + a[j2 + 1] = y0i - y2i; + a[j3] = y0r + y2r; + a[j3 + 1] = y0i + y2i; +} +static void cftfx41(s32 n, f32 PTR4* a, s32 nw, f32 PTR4* w) +{ + if (n == 0x80) { + cftf161(a, &w[nw - 8]); + cftf162(a + 0x20, &w[nw - 32]); + cftf161(a + 0x40, &w[nw - 8]); + cftf161(a + 0x60, &w[nw - 8]); + } else { + cftf081(a, &w[nw - 16]); + cftf082(a + 0x10, &w[nw - 16]); + cftf081(a + 0x20, &w[nw - 16]); + cftf081(a + 0x30, &w[nw - 16]); + } +} + +static void cftfx42(s32 n, f32 PTR4* a, s32 nw, f32 PTR4* w) +{ + if (n == 0x80) { + cftf161(a, &w[nw - 8]); + cftf162(a + 0x20, &w[nw - 32]); + cftf161(a + 0x40, &w[nw - 8]); + cftf162(a + 0x60, &w[nw - 32]); + } else { + cftf081(a, &w[nw - 16]); + cftf082(a + 0x10, &w[nw - 16]); + cftf081(a + 0x20, &w[nw - 16]); + cftf082(a + 0x30, &w[nw - 16]); + } +} +static void cftf161(f32 PTR4* a, f32 PTR4* w) +{ + f32 wn4r, wk1r, wk1i; + f32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + f32 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i; + f32 y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i; + f32 y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i; + f32 y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i; + + wn4r = w[1]; + wk1r = w[2]; + wk1i = w[3]; + x0r = a[0] + a[16]; + x0i = a[1] + a[17]; + x1r = a[0] - a[16]; + x1i = a[1] - a[17]; + x2r = a[8] + a[24]; + x2i = a[9] + a[25]; + x3r = a[8] - a[24]; + x3i = a[9] - a[25]; + y0r = x0r + x2r; + y0i = x0i + x2i; + y4r = x0r - x2r; + y4i = x0i - x2i; + y8r = x1r - x3i; + y8i = x1i + x3r; + y12r = x1r + x3i; + y12i = x1i - x3r; + x0r = a[2] + a[18]; + x0i = a[3] + a[19]; + x1r = a[2] - a[18]; + x1i = a[3] - a[19]; + x2r = a[10] + a[26]; + x2i = a[11] + a[27]; + x3r = a[10] - a[26]; + x3i = a[11] - a[27]; + y1r = x0r + x2r; + y1i = x0i + x2i; + y5r = x0r - x2r; + y5i = x0i - x2i; + x0r = x1r - x3i; + x0i = x1i + x3r; + y9r = wk1r * x0r - wk1i * x0i; + y9i = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + y13r = wk1i * x0r - wk1r * x0i; + y13i = wk1i * x0i + wk1r * x0r; + x0r = a[4] + a[20]; + x0i = a[5] + a[21]; + x1r = a[4] - a[20]; + x1i = a[5] - a[21]; + x2r = a[12] + a[28]; + x2i = a[13] + a[29]; + x3r = a[12] - a[28]; + x3i = a[13] - a[29]; + y2r = x0r + x2r; + y2i = x0i + x2i; + y6r = x0r - x2r; + y6i = x0i - x2i; + x0r = x1r - x3i; + x0i = x1i + x3r; + y10r = wn4r * (x0r - x0i); + y10i = wn4r * (x0i + x0r); + x0r = x1r + x3i; + x0i = x1i - x3r; + y14r = wn4r * (x0r + x0i); + y14i = wn4r * (x0i - x0r); + x0r = a[6] + a[22]; + x0i = a[7] + a[23]; + x1r = a[6] - a[22]; + x1i = a[7] - a[23]; + x2r = a[14] + a[30]; + x2i = a[15] + a[31]; + x3r = a[14] - a[30]; + x3i = a[15] - a[31]; + y3r = x0r + x2r; + y3i = x0i + x2i; + y7r = x0r - x2r; + y7i = x0i - x2i; + x0r = x1r - x3i; + x0i = x1i + x3r; + y11r = wk1i * x0r - wk1r * x0i; + y11i = wk1i * x0i + wk1r * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + y15r = wk1r * x0r - wk1i * x0i; + y15i = wk1r * x0i + wk1i * x0r; + x0r = y12r - y14r; + x0i = y12i - y14i; + x1r = y12r + y14r; + x1i = y12i + y14i; + x2r = y13r - y15r; + x2i = y13i - y15i; + x3r = y13r + y15r; + x3i = y13i + y15i; + a[24] = x0r + x2r; + a[25] = x0i + x2i; + a[26] = x0r - x2r; + a[27] = x0i - x2i; + a[28] = x1r - x3i; + a[29] = x1i + x3r; + a[30] = x1r + x3i; + a[31] = x1i - x3r; + x0r = y8r + y10r; + x0i = y8i + y10i; + x1r = y8r - y10r; + x1i = y8i - y10i; + x2r = y9r + y11r; + x2i = y9i + y11i; + x3r = y9r - y11r; + x3i = y9i - y11i; + a[16] = x0r + x2r; + a[17] = x0i + x2i; + a[18] = x0r - x2r; + a[19] = x0i - x2i; + a[20] = x1r - x3i; + a[21] = x1i + x3r; + a[22] = x1r + x3i; + a[23] = x1i - x3r; + x0r = y5r - y7i; + x0i = y5i + y7r; + x2r = wn4r * (x0r - x0i); + x2i = wn4r * (x0i + x0r); + x0r = y5r + y7i; + x0i = y5i - y7r; + x3r = wn4r * (x0r - x0i); + x3i = wn4r * (x0i + x0r); + x0r = y4r - y6i; + x0i = y4i + y6r; + x1r = y4r + y6i; + x1i = y4i - y6r; + a[8] = x0r + x2r; + a[9] = x0i + x2i; + a[10] = x0r - x2r; + a[11] = x0i - x2i; + a[12] = x1r - x3i; + a[13] = x1i + x3r; + a[14] = x1r + x3i; + a[15] = x1i - x3r; + x0r = y0r + y2r; + x0i = y0i + y2i; + x1r = y0r - y2r; + x1i = y0i - y2i; + x2r = y1r + y3r; + x2i = y1i + y3i; + x3r = y1r - y3r; + x3i = y1i - y3i; + a[0] = x0r + x2r; + a[1] = x0i + x2i; + a[2] = x0r - x2r; + a[3] = x0i - x2i; + a[4] = x1r - x3i; + a[5] = x1i + x3r; + a[6] = x1r + x3i; + a[7] = x1i - x3r; +} +static void cftf162(f32 PTR4* a, f32 PTR4* w) +{ + f32 wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i, + x0r, x0i, x1r, x1i, x2r, x2i, + y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, + y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i, + y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i, + y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i; + + wn4r = w[1]; + wk1r = w[4]; + wk1i = w[5]; + wk3r = w[6]; + wk3i = -w[7]; + wk2r = w[8]; + wk2i = w[9]; + x1r = a[0] - a[17]; + x1i = a[1] + a[16]; + x0r = a[8] - a[25]; + x0i = a[9] + a[24]; + x2r = wn4r * (x0r - x0i); + x2i = wn4r * (x0i + x0r); + y0r = x1r + x2r; + y0i = x1i + x2i; + y4r = x1r - x2r; + y4i = x1i - x2i; + x1r = a[0] + a[17]; + x1i = a[1] - a[16]; + x0r = a[8] + a[25]; + x0i = a[9] - a[24]; + x2r = wn4r * (x0r - x0i); + x2i = wn4r * (x0i + x0r); + y8r = x1r - x2i; + y8i = x1i + x2r; + y12r = x1r + x2i; + y12i = x1i - x2r; + x0r = a[2] - a[19]; + x0i = a[3] + a[18]; + x1r = wk1r * x0r - wk1i * x0i; + x1i = wk1r * x0i + wk1i * x0r; + x0r = a[10] - a[27]; + x0i = a[11] + a[26]; + x2r = wk3i * x0r - wk3r * x0i; + x2i = wk3i * x0i + wk3r * x0r; + y1r = x1r + x2r; + y1i = x1i + x2i; + y5r = x1r - x2r; + y5i = x1i - x2i; + x0r = a[2] + a[19]; + x0i = a[3] - a[18]; + x1r = wk3r * x0r - wk3i * x0i; + x1i = wk3r * x0i + wk3i * x0r; + x0r = a[10] + a[27]; + x0i = a[11] - a[26]; + x2r = wk1r * x0r + wk1i * x0i; + x2i = wk1r * x0i - wk1i * x0r; + y9r = x1r - x2r; + y9i = x1i - x2i; + y13r = x1r + x2r; + y13i = x1i + x2i; + x0r = a[4] - a[21]; + x0i = a[5] + a[20]; + x1r = wk2r * x0r - wk2i * x0i; + x1i = wk2r * x0i + wk2i * x0r; + x0r = a[12] - a[29]; + x0i = a[13] + a[28]; + x2r = wk2i * x0r - wk2r * x0i; + x2i = wk2i * x0i + wk2r * x0r; + y2r = x1r + x2r; + y2i = x1i + x2i; + y6r = x1r - x2r; + y6i = x1i - x2i; + x0r = a[4] + a[21]; + x0i = a[5] - a[20]; + x1r = wk2i * x0r - wk2r * x0i; + x1i = wk2i * x0i + wk2r * x0r; + x0r = a[12] + a[29]; + x0i = a[13] - a[28]; + x2r = wk2r * x0r - wk2i * x0i; + x2i = wk2r * x0i + wk2i * x0r; + y10r = x1r - x2r; + y10i = x1i - x2i; + y14r = x1r + x2r; + y14i = x1i + x2i; + x0r = a[6] - a[23]; + x0i = a[7] + a[22]; + x1r = wk3r * x0r - wk3i * x0i; + x1i = wk3r * x0i + wk3i * x0r; + x0r = a[14] - a[31]; + x0i = a[15] + a[30]; + x2r = wk1i * x0r - wk1r * x0i; + x2i = wk1i * x0i + wk1r * x0r; + y3r = x1r + x2r; + y3i = x1i + x2i; + y7r = x1r - x2r; + y7i = x1i - x2i; + x0r = a[6] + a[23]; + x0i = a[7] - a[22]; + x1r = wk1i * x0r + wk1r * x0i; + x1i = wk1i * x0i - wk1r * x0r; + x0r = a[14] + a[31]; + x0i = a[15] - a[30]; + x2r = wk3i * x0r - wk3r * x0i; + x2i = wk3i * x0i + wk3r * x0r; + y11r = x1r + x2r; + y11i = x1i + x2i; + y15r = x1r - x2r; + y15i = x1i - x2i; + x1r = y0r + y2r; + x1i = y0i + y2i; + x2r = y1r + y3r; + x2i = y1i + y3i; + a[0] = x1r + x2r; + a[1] = x1i + x2i; + a[2] = x1r - x2r; + a[3] = x1i - x2i; + x1r = y0r - y2r; + x1i = y0i - y2i; + x2r = y1r - y3r; + x2i = y1i - y3i; + a[4] = x1r - x2i; + a[5] = x1i + x2r; + a[6] = x1r + x2i; + a[7] = x1i - x2r; + x1r = y4r - y6i; + x1i = y4i + y6r; + x0r = y5r - y7i; + x0i = y5i + y7r; + x2r = wn4r * (x0r - x0i); + x2i = wn4r * (x0i + x0r); + a[8] = x1r + x2r; + a[9] = x1i + x2i; + a[10] = x1r - x2r; + a[11] = x1i - x2i; + x1r = y4r + y6i; + x1i = y4i - y6r; + x0r = y5r + y7i; + x0i = y5i - y7r; + x2r = wn4r * (x0r - x0i); + x2i = wn4r * (x0i + x0r); + a[12] = x1r - x2i; + a[13] = x1i + x2r; + a[14] = x1r + x2i; + a[15] = x1i - x2r; + x1r = y8r + y10r; + x1i = y8i + y10i; + x2r = y9r - y11r; + x2i = y9i - y11i; + a[16] = x1r + x2r; + a[17] = x1i + x2i; + a[18] = x1r - x2r; + a[19] = x1i - x2i; + x1r = y8r - y10r; + x1i = y8i - y10i; + x2r = y9r + y11r; + x2i = y9i + y11i; + a[20] = x1r - x2i; + a[21] = x1i + x2r; + a[22] = x1r + x2i; + a[23] = x1i - x2r; + x1r = y12r - y14i; + x1i = y12i + y14r; + x0r = y13r + y15i; + x0i = y13i - y15r; + x2r = wn4r * (x0r - x0i); + x2i = wn4r * (x0i + x0r); + a[24] = x1r + x2r; + a[25] = x1i + x2i; + a[26] = x1r - x2r; + a[27] = x1i - x2i; + x1r = y12r + y14i; + x1i = y12i - y14r; + x0r = y13r - y15i; + x0i = y13i + y15r; + x2r = wn4r * (x0r - x0i); + x2i = wn4r * (x0i + x0r); + a[28] = x1r - x2i; + a[29] = x1i + x2r; + a[30] = x1r + x2i; + a[31] = x1i - x2r; +} + +static void cftf081(f32 PTR4* a, f32 PTR4* w) +{ + f32 wn4r, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + f32 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i; + f32 y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i; + + wn4r = w[1]; + x0r = a[0] + a[8]; + x0i = a[1] + a[9]; + x1r = a[0] - a[8]; + x1i = a[1] - a[9]; + x2r = a[4] + a[12]; + x2i = a[5] + a[13]; + x3r = a[4] - a[12]; + x3i = a[5] - a[13]; + y0r = x0r + x2r; + y0i = x0i + x2i; + y2r = x0r - x2r; + y2i = x0i - x2i; + y1r = x1r - x3i; + y1i = x1i + x3r; + y3r = x1r + x3i; + y3i = x1i - x3r; + x0r = a[2] + a[10]; + x0i = a[3] + a[11]; + x1r = a[2] - a[10]; + x1i = a[3] - a[11]; + x2r = a[6] + a[14]; + x2i = a[7] + a[15]; + x3r = a[6] - a[14]; + x3i = a[7] - a[15]; + y4r = x0r + x2r; + y4i = x0i + x2i; + y6r = x0r - x2r; + y6i = x0i - x2i; + x0r = x1r - x3i; + x0i = x1i + x3r; + x2r = x1r + x3i; + x2i = x1i - x3r; + y5r = wn4r * (x0r - x0i); + y5i = wn4r * (x0r + x0i); + y7r = wn4r * (x2r - x2i); + y7i = wn4r * (x2r + x2i); + a[8] = y1r + y5r; + a[9] = y1i + y5i; + a[10] = y1r - y5r; + a[11] = y1i - y5i; + a[12] = y3r - y7i; + a[13] = y3i + y7r; + a[14] = y3r + y7i; + a[15] = y3i - y7r; + a[0] = y0r + y4r; + a[1] = y0i + y4i; + a[2] = y0r - y4r; + a[3] = y0i - y4i; + a[4] = y2r - y6i; + a[5] = y2i + y6r; + a[6] = y2r + y6i; + a[7] = y2i - y6r; +} + +static void cftf082(f32 PTR4* a, f32 PTR4* w) +{ + f32 wn4r, wk1r, wk1i, x0r, x0i, x1r, x1i; + f32 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i; + f32 y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i; + + wn4r = w[1]; + wk1r = w[4]; + wk1i = w[5]; + y0r = a[0] - a[9]; + y0i = a[1] + a[8]; + y1r = a[0] + a[9]; + y1i = a[1] - a[8]; + x0r = a[4] - a[13]; + x0i = a[5] + a[12]; + y2r = wn4r * (x0r - x0i); + y2i = wn4r * (x0i + x0r); + x0r = a[4] + a[13]; + x0i = a[5] - a[12]; + y3r = wn4r * (x0r - x0i); + y3i = wn4r * (x0i + x0r); + x0r = a[2] - a[11]; + x0i = a[3] + a[10]; + y4r = wk1r * x0r - wk1i * x0i; + y4i = wk1r * x0i + wk1i * x0r; + x0r = a[2] + a[11]; + x0i = a[3] - a[10]; + y5r = wk1i * x0r - wk1r * x0i; + y5i = wk1i * x0i + wk1r * x0r; + x0r = a[6] - a[15]; + x0i = a[7] + a[14]; + y6r = wk1i * x0r - wk1r * x0i; + y6i = wk1i * x0i + wk1r * x0r; + x0r = a[6] + a[15]; + x0i = a[7] - a[14]; + y7r = wk1r * x0r - wk1i * x0i; + y7i = wk1r * x0i + wk1i * x0r; + x0r = y0r + y2r; + x0i = y0i + y2i; + x1r = y4r + y6r; + x1i = y4i + y6i; + a[0] = x0r + x1r; + a[1] = x0i + x1i; + a[2] = x0r - x1r; + a[3] = x0i - x1i; + x0r = y0r - y2r; + x0i = y0i - y2i; + x1r = y4r - y6r; + x1i = y4i - y6i; + a[4] = x0r - x1i; + a[5] = x0i + x1r; + a[6] = x0r + x1i; + a[7] = x0i - x1r; + x0r = y1r - y3i; + x0i = y1i + y3r; + x1r = y5r - y7r; + x1i = y5i - y7i; + a[8] = x0r + x1r; + a[9] = x0i + x1i; + a[10] = x0r - x1r; + a[11] = x0i - x1i; + x0r = y1r + y3i; + x0i = y1i - y3r; + x1r = y5r + y7r; + x1i = y5i + y7i; + a[12] = x0r - x1i; + a[13] = x0i + x1r; + a[14] = x0r + x1i; + a[15] = x0i - x1r; +} +static void cftf040(f32 PTR4* a) +{ + f32 x0r; + f32 x0i; + f32 x1r; + f32 x1i; + f32 x2r; + f32 x2i; + f32 x3r; + f32 x3i; + + x0r = a[0] + a[4]; + x0i = a[1] + a[5]; + x1r = a[0] - a[4]; + x1i = a[1] - a[5]; + x2r = a[2] + a[6]; + x2i = a[3] + a[7]; + x3r = a[2] - a[6]; + x3i = a[3] - a[7]; + a[0] = x0r + x2r; + a[1] = x0i + x2i; + a[2] = x1r - x3i; + a[3] = x1i + x3r; + a[4] = x0r - x2r; + a[5] = x0i - x2i; + a[6] = x1r + x3i; + a[7] = x1i - x3r; +} + +static void cftb040(f32 PTR4* a) +{ + f32 x0r; + f32 x0i; + f32 x1r; + f32 x1i; + f32 x2r; + f32 x2i; + f32 x3r; + f32 x3i; + + x0r = a[0] + a[4]; + x0i = a[1] + a[5]; + x1r = a[0] - a[4]; + x1i = a[1] - a[5]; + x2r = a[2] + a[6]; + x2i = a[3] + a[7]; + x3r = a[2] - a[6]; + x3i = a[3] - a[7]; + a[0] = x0r + x2r; + a[1] = x0i + x2i; + a[2] = x1r + x3i; + a[3] = x1i - x3r; + a[4] = x0r - x2r; + a[5] = x0i - x2i; + a[6] = x1r - x3i; + a[7] = x1i + x3r; +} + +static void cftx020(f32 PTR4* a) +{ + f32 x0r; + f32 x0i; + + x0r = a[0] - a[2]; + x0i = a[1] - a[3]; + a[0] += a[2]; + a[1] += a[3]; + a[2] = x0r; + a[3] = x0i; +} +static void rftfsub(s32 n, f32 PTR4* a, s32 nc, f32 PTR4* c) +{ + f32 xr; + f32 xi; + f32 yr; + f32 yi; + f32 wkr; + f32 wki; + f32 half = RFT_HALF_SCALE; + s32 m; + s32 ks; + s32 kk; + s32 j; + s32 k; + + m = n >> 1; + ks = (nc + nc) / m; + kk = 0; + for (j = 2; j < m; j += 2) { + k = n - j; + kk += ks; + wkr = half - c[nc - kk]; + wki = c[kk]; + xr = a[j] - a[k]; + xi = a[j + 1] + a[k + 1]; + yr = wkr * xr - wki * xi; + yi = wkr * xi + wki * xr; + a[j] -= yr; + a[j + 1] -= yi; + a[k] += yr; + a[k + 1] -= yi; + } +} + +static void rftbsub(s32 n, f32 PTR4* a, s32 nc, f32 PTR4* c) +{ + f32 xr; + f32 xi; + f32 yr; + f32 yi; + f32 wkr; + f32 wki; + f32 half = RFT_INV_HALF_SCALE; + s32 m; + s32 ks; + s32 kk; + s32 j; + s32 k; + + m = n >> 1; + ks = (nc + nc) / m; + kk = 0; + for (j = 2; j < m; j += 2) { + k = n - j; + kk += ks; + wkr = half - c[nc - kk]; + wki = c[kk]; + xr = a[j] - a[k]; + xi = a[j + 1] + a[k + 1]; + yr = wkr * xr + wki * xi; + yi = wkr * xi - wki * xr; + a[j] -= yr; + a[j + 1] -= yi; + a[k] += yr; + a[k + 1] -= yi; + } +} + +static void dctsub(s32 n, f32 PTR4* a, s32 nc, f32 PTR4* c) +{ + f32 xr; + f32 yr; + f32 wkr; + f32 wki; + s32 j; + s32 k; + s32 kk; + s32 m; + s32 ks; + + m = n >> 1; + ks = nc / n; + kk = 0; + for (j = 1; j < m; ++j) { + k = n - j; + kk += ks; + xr = a[k]; + yr = a[j]; + wkr = c[kk] + c[nc - kk]; + wki = c[kk] - c[nc - kk]; + a[j] = wki * yr + wkr * xr; + a[k] = wkr * yr - wki * xr; + } + a[m] *= c[0]; +} + +const u32 BINK_RDFT_INVERSE_SCALE_BITS[] = { + 0x3f000000, +}; +const u32 BINK_FFT_TRIG_ONE_BITS[] = { + 0x3f800000, +}; +const u32 BINK_FFT_INT_TO_FLOAT_BIAS[] = { + 0x43300000, 0x80000000, +}; +const u32 BINK_FFT_HALF_SECANT_SCALE_BITS[] = { + 0x3fe00000, 0x00000000, +}; +const u32 BINK_FFT_SIX_BITS[] = { + 0x40c00000, +}; +const u32 BINK_FFT_THREE_BITS[] = { + 0x40400000, +}; +const u32 BINK_FFT_HALF_RECIP_SCALE_BITS[] = { + 0x3f000000, +}; +const u32 BINK_DCT_TRIG_ONE_BITS[] = { + 0x3f800000, +}; +const u32 BINK_DCT_INT_TO_FLOAT_BIAS[] = { + 0x43300000, 0x80000000, +}; +const u32 BINK_DCT_CENTER_SCALE_BITS[] = { + 0x3f000000, 0x00000000, +}; +const u32 BINK_DCT_HALF_SCALE_BITS[] = { + 0x3fe00000, 0x00000000, +}; +const u32 BINK_CFT_ROT_ONE_BITS[] = { + 0x3f800000, +}; +const u32 BINK_CFT_ROT_ZERO_BITS[] = { + 0x00000000, +}; +const u32 BINK_CFT_INV_ROT_ONE_BITS[] = { + 0x3f800000, +}; +const u32 BINK_CFT_INV_ROT_ZERO_BITS[] = { + 0x00000000, +}; +const u32 BINK_RFT_HALF_SCALE_BITS[] = { + 0x3f000000, +}; +const u32 BINK_RFT_INV_HALF_SCALE_BITS[] = { + 0x3f000000, 0x00000000, 0x00000000, +}; +#pragma dont_inline reset diff --git a/src/bink/src/sdk/fft.h b/src/bink/src/sdk/fft.h new file mode 100644 index 000000000..dff5c4c39 --- /dev/null +++ b/src/bink/src/sdk/fft.h @@ -0,0 +1,9 @@ +#ifndef BINK_SDK_FFT_H +#define BINK_SDK_FFT_H + +#include "bink.h" + +void rdft(u32 n, s32 isgn, f32 PTR4* a, s32 PTR4* ip, f32 PTR4* w); +void ddct(u32 n, s32 isgn, f32 PTR4* a, s32 PTR4* ip, f32 PTR4* w); + +#endif diff --git a/src/bink/src/sdk/popmal.c b/src/bink/src/sdk/popmal.c index e380fc7b0..2b3724304 100644 --- a/src/bink/src/sdk/popmal.c +++ b/src/bink/src/sdk/popmal.c @@ -1,22 +1,86 @@ #include "popmal.h" +#include "binkngc.h" -u32 pushtot = NULL; -u32 pushcur = NULL; -u32 pushptr = NULL; -u32 pushamt = NULL; -u32 cursize = 32; +#define PUSHMALLOC_STATIC_SLOTS 32 +#define PUSHMALLOC_GROW_RECORD_BYTES 16 +#define PUSHMALLOC_PTR_TABLE_BYTES 8 +#define PUSHMALLOC_ALIGN_BITS 5 +#define PUSHMALLOC_ALIGNMENT (1 << PUSHMALLOC_ALIGN_BITS) +#define PUSHMALLOC_ALIGN_MASK (PUSHMALLOC_ALIGNMENT - 1) +#define PUSHMALLOC_ALIGN_BIAS (PUSHMALLOC_ALIGNMENT + 1) -// TODO: -// figure out what arg 1 should be. pretty confident arg 2 is correct -void pushmalloc(BINKIO* io, HBINK bnk) +static void PTR4* PTR4* ptrs[PUSHMALLOC_STATIC_SLOTS]; +static u32 amt[PUSHMALLOC_STATIC_SLOTS]; + +static u32 pushtot = 0; +static u32 pushcur = 0; +/* Pending suballocations: each entry stores the caller's pointer slot and byte count. */ +static void PTR4* PTR4* PTR4* pushptr = ptrs; +static u32 PTR4* pushamt = amt; +static u32 cursize = PUSHMALLOC_STATIC_SLOTS; + +void pushmalloc(void PTR4* PTR4* ptr, u32 amount) { + if (cursize == pushcur) { + void PTR4* newmem = radmalloc(cursize * PUSHMALLOC_GROW_RECORD_BYTES); + void PTR4* PTR4* PTR4* newptrs = (void PTR4* PTR4* PTR4*)newmem; + u32 PTR4* newamt = + (u32 PTR4*)((u8 PTR4*)newmem + (cursize * PUSHMALLOC_PTR_TABLE_BYTES)); + + memcpy(newptrs, pushptr, cursize * sizeof(*pushptr)); + memcpy(newamt, pushamt, cursize * sizeof(*pushamt)); + + cursize += cursize; + if (cursize != PUSHMALLOC_STATIC_SLOTS * 2) { + radfree(pushptr); + } + + pushptr = newptrs; + pushamt = newamt; + } + + amount = Round32(amount); + amount += ((((pushtot >> PUSHMALLOC_ALIGN_BITS) & PUSHMALLOC_ALIGN_MASK) - + ((amount >> PUSHMALLOC_ALIGN_BITS) & PUSHMALLOC_ALIGN_MASK) + + PUSHMALLOC_ALIGN_BIAS) & + PUSHMALLOC_ALIGN_MASK) + << PUSHMALLOC_ALIGN_BITS; + + pushtot += amount; + pushamt[pushcur] = amount; + { + volatile u32 PTR4* cur = &pushcur; + u32 slot = *cur; + + ((void PTR4* PTR4* volatile PTR4*)pushptr)[slot] = ptr; + ++slot; + *cur = slot; + } } -u32 popmalloctotal() +u32 popmalloctotal(void) { return pushtot; } -u32 popmalloc(HBINK bnk) +void PTR4* popmalloc(u32 amount) { + u32 i; + u32 base = Round32(amount); + void PTR4* mem = radmalloc(pushtot + base); + + pushtot = 0; + + if (mem != 0) { + /* Lay out all queued allocations immediately after the base object. */ + u8 PTR4* cur = (u8 PTR4*)mem + base; + + for (i = 0; i < pushcur; ++i) { + *pushptr[i] = cur; + cur += pushamt[i]; + } + } + + pushcur = 0; + return mem; } diff --git a/src/bink/src/sdk/varbits.c b/src/bink/src/sdk/varbits.c index e69de29bb..ca9307c56 100644 --- a/src/bink/src/sdk/varbits.c +++ b/src/bink/src/sdk/varbits.c @@ -0,0 +1,58 @@ +#include "varbits.h" + +#define VARBITS_MAX_BITS 32 +#define VARBITS_LENS_COUNT (VARBITS_MAX_BITS + 1) +#define VARBITS_MAX_BITLEVEL 128 +#define VARBITS_BITLEVEL_COUNT (VARBITS_MAX_BITLEVEL + 1) + +const u32 VarBitsLens[VARBITS_LENS_COUNT] RAD_ATTRIBUTE_ALIGN(32) = { + 0x00000000, 0x00000001, 0x00000003, 0x00000007, 0x0000000F, 0x0000001F, 0x0000003F, + 0x0000007F, 0x000000FF, 0x000001FF, 0x000003FF, 0x000007FF, 0x00000FFF, 0x00001FFF, + 0x00003FFF, 0x00007FFF, 0x0000FFFF, 0x0001FFFF, 0x0003FFFF, 0x0007FFFF, 0x000FFFFF, + 0x001FFFFF, 0x003FFFFF, 0x007FFFFF, 0x00FFFFFF, 0x01FFFFFF, 0x03FFFFFF, 0x07FFFFFF, + 0x0FFFFFFF, 0x1FFFFFFF, 0x3FFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF, +}; +const double VarBitsLensalign = 0.0; + +const u8 _bitlevels[VARBITS_BITLEVEL_COUNT] RAD_ATTRIBUTE_ALIGN(64) = { + 0, 1, + 2, 2, + 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 8, +}; +const double _bitlevelsalign = 0.0; + +void VarBitsCopy(VARBITS PTR4* dest, VARBITS PTR4* src, u32 size) +{ + u32 value; + + while (size >= 8) { + if (src->bitlen > 7) { + value = src->bits & GetBitsLen(8); + src->bits >>= 8; + src->bitlen -= 8; + } else { + register VARBITSTEMP word = *src->cur; + + value = (src->bits | (word << src->bitlen)) & GetBitsLen(8); + src->bits = word >> (8 - src->bitlen); + src->bitlen += BITSTYPELEN - 8; + VARBITS_ADVANCE_CUR(src->cur); + } + VarBitsPut(*dest, value, 8); + size -= 8; + } + + if (size) { + VarBitsGet(value, u32, *src, size); + VarBitsPut(*dest, value, size); + } +} diff --git a/src/bink/src/sdk/varbits.h b/src/bink/src/sdk/varbits.h new file mode 100644 index 000000000..3d75df08e --- /dev/null +++ b/src/bink/src/sdk/varbits.h @@ -0,0 +1,367 @@ +#ifndef BINK_SDK_VARBITS_H +#define BINK_SDK_VARBITS_H + +#include "bink.h" + +typedef u32 BITSTYPE; + +#define BITSTYPELEN 32 +#define BITSTYPEBYTES 4 +#define BITSTOPMASK (1UL << (BITSTYPELEN - 1)) +typedef BITSTYPE VARBITSTEMP; + +/* + * VARBITS can either live in a struct, or in local variables set up by + * VARBITSLOCAL(name). The local macros build variable names with ##. + */ +#define VARBITSLOCAL(name) \ + u32 PTR4* name##cur; \ + BITSTYPE name##bits; \ + u32 name##bitlen + +typedef struct VARBITS +{ + u32 PTR4* cur; /* Next source/dest word. */ + u32 PTR4* init; /* Original stream base for size/position calculations. */ + BITSTYPE bits; /* Low-order bit reservoir. */ + u32 bitlen; /* Valid bits currently in the reservoir. */ +} VARBITS; + +typedef struct VARBITSEND +{ + u32 PTR4* cur; + u32 PTR4* init; + BITSTYPE bits; + u32 bitlen; + /* The macros do not update end; decoders carry it for bounds checks. */ + u32 PTR4* end; +} VARBITSEND; + +extern const u32 VarBitsLens[BITSTYPELEN + 1]; + +#define VARBITS_ADVANCE_CUR(cur) ((cur) = (u32 PTR4*)((u8 PTR4*)(cur) + BITSTYPEBYTES)) + +/* + * GetBitsLen(bits) = (1 << bits) - 1, except that bits == 32 is supported + * and bits == 0 is not safe. + */ +#define GetBitsLen(val) (((u32)0xffffffff) >> (u32)(BITSTYPELEN - (val))) + +/* + * getbitlevel(n) is the number of bits that n uses for its on bits. + * NOTE: getbitlevel(0) = 0. + */ +static RADINLINE u32 getbitlevelvar(register u32 value) +{ + u32 leading; + + __asm__ volatile("cntlzw %0,%1" : "=r"(leading) : "r"(value)); + return BITSTYPELEN - leading; +} + +#define VarBitsOpen(vb, pointer) \ + do { \ + (vb).cur = (vb).init = (pointer); \ + (vb).bits = 0; \ + (vb).bitlen = 0; \ + } while (0) + +#define VarBitsLocalOpen(vb, pointer) \ + do { \ + vb##cur = (pointer); \ + vb##bits = 0; \ + vb##bitlen = 0; \ + } while (0) + +#define VarBitsPut(vb, val, size) \ + { \ + u32 __s = size; \ + u32 __v = (val) & VarBitsLens[__s]; \ + (vb).bits |= __v << ((vb).bitlen); \ + (vb).bitlen += __s; \ + if ((vb).bitlen >= BITSTYPELEN) { \ + *((vb).cur) = (vb).bits; \ + VARBITS_ADVANCE_CUR((vb).cur); \ + (vb).bitlen -= BITSTYPELEN; \ + if ((vb).bitlen) { \ + (vb).bits = __v >> (__s - (vb).bitlen); \ + } else { \ + (vb).bits = 0; \ + } \ + } \ + } + +#define VarBitsPut1(vb, boolean) \ + do { \ + if (boolean) { \ + (vb).bits |= (1 << (vb).bitlen); \ + } \ + if (++(vb).bitlen == BITSTYPELEN) { \ + *((vb).cur) = (vb).bits; \ + VARBITS_ADVANCE_CUR((vb).cur); \ + (vb).bits = 0; \ + (vb).bitlen = 0; \ + } \ + } while (0) + +#define VarBitsPuta1(vb) \ + do { \ + (vb).bits |= (1 << (vb).bitlen); \ + if (++(vb).bitlen == BITSTYPELEN) { \ + *((vb).cur) = (vb).bits; \ + VARBITS_ADVANCE_CUR((vb).cur); \ + (vb).bits = 0; \ + (vb).bitlen = 0; \ + } \ + } while (0) + +#define VarBitsPuta0(vb) \ + do { \ + if (++(vb).bitlen == BITSTYPELEN) { \ + *((vb).cur) = (vb).bits; \ + VARBITS_ADVANCE_CUR((vb).cur); \ + (vb).bits = 0; \ + (vb).bitlen = 0; \ + } \ + } while (0) + +#define VarBitsPutAlign(vb) \ + do { \ + u32 __s2 = (BITSTYPELEN - (vb).bitlen) & (BITSTYPELEN - 1); \ + if (__s2) { \ + VarBitsPut((vb), 0, __s2); \ + } \ + } while (0) + +#define VarBitsFlushtoMemOnly(vb) \ + do { \ + if ((vb).bitlen) { \ + *((vb).cur) = (vb).bits; \ + } \ + } while (0) + +#define VarBitsConvertPutToGet(gvb, pvb) \ + do { \ + if ((pvb).bitlen) { \ + (gvb).bits = *((pvb).cur) >> (pvb).bitlen; \ + (gvb).bitlen = BITSTYPELEN - (pvb).bitlen; \ + (gvb).cur = (u32 PTR4*)((u8 PTR4*)(pvb).cur + BITSTYPEBYTES); \ + } else { \ + (gvb).bits = 0; \ + (gvb).bitlen = 0; \ + (gvb).cur = (pvb).cur; \ + } \ + (gvb).init = (pvb).init; \ + } while (0) + +#define VarBitsFlush(vb) VarBitsPutAlign(vb) +/* Total bits written from init through the current reservoir. */ +#define VarBitsSize(vb) ((u32)((((u8 PTR4*)(vb).cur) - ((u8 PTR4*)(vb).init)) * 8 + (vb).bitlen)) + +#define VarBitsGet(val, type, vb, size) \ + { \ + u32 __s = (size); \ + u32 __count = (vb).bitlen; \ + if (__count >= __s) { \ + (val) = (type)(((vb).bits) & GetBitsLen(__s)); \ + ((vb).bits) >>= __s; \ + ((vb).bitlen) = __count - __s; \ + } else { \ + register VARBITSTEMP __word = *((vb).cur); \ + (val) = (type)((((vb).bits) | (__word << __count)) & GetBitsLen(__s)); \ + ((vb).bits) = __word >> (__s - __count); \ + ((vb).bitlen) = __count + BITSTYPELEN - __s; \ + VARBITS_ADVANCE_CUR((vb).cur); \ + } \ + } + +#define VarBitsGetWithCheck(val, type, vb, size, endp, dowhat) \ + do { \ + u32 __s = (size); \ + u32 __count = (vb).bitlen; \ + if (__count < __s) { \ + VARBITSTEMP __word; \ + if ((u8 PTR4*)(vb).cur >= (u8 PTR4*)(endp)) { \ + dowhat \ + } \ + __word = *((vb).cur); \ + (val) = (type)(((vb).bits | (__word << __count)) & GetBitsLen(__s)); \ + (vb).bits = __word >> (__s - __count); \ + (vb).bitlen = __count + BITSTYPELEN - __s; \ + VARBITS_ADVANCE_CUR((vb).cur); \ + } else { \ + (val) = (type)((vb).bits & GetBitsLen(__s)); \ + (vb).bits >>= __s; \ + (vb).bitlen = __count - __s; \ + } \ + } while (0) + +/* + * Get one bit. The temp receives unmasked bits for scratch use; the expression + * result is the actual bit. + */ +#define VarBitsGet1(vb, temp) \ + (((vb).bitlen == 0) \ + ? ((temp) = *((vb).cur), VARBITS_ADVANCE_CUR((vb).cur), \ + (vb).bits = ((VARBITSTEMP)(temp)) >> 1, (vb).bitlen = BITSTYPELEN - 1) \ + : ((temp) = (vb).bits, (vb).bits >>= 1, --(vb).bitlen), \ + ((temp) & 1)) + +#define VarBitsPeek(val, type, vb, size) \ + do { \ + u32 __s = (size); \ + u32 __count = (vb).bitlen; \ + if (__count < __s) { \ + VARBITSTEMP __word = *((vb).cur); \ + (val) = (type)(((vb).bits | (__word << __count)) & GetBitsLen(__s)); \ + } else { \ + (val) = (type)((vb).bits & GetBitsLen(__s)); \ + } \ + } while (0) + +#define VarBitsUse(vb, size) \ + do { \ + u32 __s = (size); \ + u32 __count = (vb).bitlen; \ + if (__count < __s) { \ + VARBITSTEMP __word = *((vb).cur); \ + (vb).bits = __word >> (__s - __count); \ + (vb).bitlen = __count + BITSTYPELEN - __s; \ + VARBITS_ADVANCE_CUR((vb).cur); \ + } else { \ + (vb).bits >>= __s; \ + (vb).bitlen = __count - __s; \ + } \ + } while (0) + +#define VarBitsCopyToLocal(local, vb) \ + do { \ + local##cur = (vb).cur; \ + local##bits = (vb).bits; \ + local##bitlen = (vb).bitlen; \ + } while (0) + +#define VarBitsCopyFromLocal(vb, local) \ + do { \ + (vb).cur = local##cur; \ + (vb).bits = local##bits; \ + (vb).bitlen = local##bitlen; \ + } while (0) + +#define VarBitsLocalGet(val, type, vb, size) \ + do { \ + u32 __s = (size); \ + u32 __count = vb##bitlen; \ + if (__count < __s) { \ + VARBITSTEMP __word = *vb##cur; \ + (val) = (type)((vb##bits | (__word << __count)) & GetBitsLen(__s)); \ + vb##bits = __word >> (__s - __count); \ + vb##bitlen = __count + BITSTYPELEN - __s; \ + VARBITS_ADVANCE_CUR(vb##cur); \ + } else { \ + (val) = (type)(vb##bits & GetBitsLen(__s)); \ + vb##bits >>= __s; \ + vb##bitlen = __count - __s; \ + } \ + } while (0) + +#define VarBitsLocalPeek(val, type, vb, size) \ + do { \ + u32 __s = (size); \ + u32 __count = vb##bitlen; \ + if (__count < __s) { \ + VARBITSTEMP __word = *vb##cur; \ + (val) = (type)((vb##bits | (__word << __count)) & GetBitsLen(__s)); \ + } else { \ + (val) = (type)(vb##bits & GetBitsLen(__s)); \ + } \ + } while (0) + +#define VarBitsLocalUse(vb, size) \ + do { \ + u32 __s = (size); \ + u32 __count = vb##bitlen; \ + if (__count < __s) { \ + VARBITSTEMP __word = *vb##cur; \ + vb##bits = __word >> (__s - __count); \ + vb##bitlen = __count + BITSTYPELEN - __s; \ + VARBITS_ADVANCE_CUR(vb##cur); \ + } else { \ + vb##bits >>= __s; \ + vb##bitlen = __count - __s; \ + } \ + } while (0) + +#define VarBitsLocalGetWithCheckBase(val, type, vb, size, mask, endp, dowhat, load) \ + do { \ + u32 __s = (size); \ + u32 __count = vb##bitlen; \ + if (__count < __s) { \ + VARBITSTEMP __word; \ + if ((u8 PTR4*)vb##cur >= (u8 PTR4*)(endp)) { \ + dowhat \ + } \ + __word = load(vb##cur); \ + (val) = (type)((vb##bits | (__word << __count)) & (mask)); \ + vb##bits = __word >> (__s - __count); \ + vb##bitlen = __count + BITSTYPELEN - __s; \ + VARBITS_ADVANCE_CUR(vb##cur); \ + } else { \ + (val) = (type)(vb##bits & (mask)); \ + vb##bits >>= __s; \ + vb##bitlen = __count - __s; \ + } \ + } while (0) + +#define radloadbitsnative(ptr) (*(BITSTYPE PTR4*)(ptr)) +#define VarBitsLocalGetWithCheck(val, type, vb, size, endp, dowhat) \ + VarBitsLocalGetWithCheckBase(val, type, vb, size, GetBitsLen(size), endp, dowhat, \ + radloadbitsnative) +#define VarBitsLocalGetWithCheckM(val, type, vb, size, mask, endp, dowhat) \ + VarBitsLocalGetWithCheckBase(val, type, vb, size, mask, endp, dowhat, radloadbitsnative) + +#define VarBitsLocalGet1(vb, temp) \ + ((vb##bitlen == 0) \ + ? ((temp) = *vb##cur, VARBITS_ADVANCE_CUR(vb##cur), \ + vb##bits = ((VARBITSTEMP)(temp)) >> 1, vb##bitlen = BITSTYPELEN - 1) \ + : ((temp) = vb##bits, vb##bits >>= 1, --vb##bitlen), \ + ((temp) & 1)) + +#define VarBitsLocalFill1WithCheck(vb, endp, cond, dowhat) \ + do { \ + if (vb##bitlen == 0 && (cond)) { \ + if ((u8 PTR4*)vb##cur >= (u8 PTR4*)(endp)) { \ + dowhat \ + } \ + vb##bits = radloadbitsnative(vb##cur); \ + VARBITS_ADVANCE_CUR(vb##cur); \ + vb##bitlen = BITSTYPELEN; \ + } \ + } while (0) + +#define VarBitsLocalGet1WithCheck(val, vb, endp, dowhat) \ + do { \ + VarBitsLocalFill1WithCheck(vb, endp, 1, dowhat); \ + --vb##bitlen; \ + (val) = vb##bits & 1; \ + vb##bits >>= 1; \ + } while (0) + +#define VarBitsLocalPos(vb, origvb) \ + ((u32)((((u8 PTR4*)vb##cur) - ((u8 PTR4*)(origvb)->init)) * 8 + (BITSTYPELEN - vb##bitlen))) + +#define VarBitsGetAlign(vb) \ + do { \ + (vb).bitlen = 0; \ + } while (0) + +/* Current read bit position relative to init. */ +#define VarBitsPos(vb) ((u32)((((u8 PTR4*)(vb).cur) - ((u8 PTR4*)(vb).init)) * 8 - (vb).bitlen)) + +#define CLASSIFY_SIGN(val) \ + ((((u32)((s32)(val))) >> 31) + ((((u32)(-(s32)(val))) >> 30) & 2)) + +/* VarBitsCopy size is in bits; don't use this for big copies. */ +void VarBitsCopy(VARBITS PTR4* dest, VARBITS PTR4* src, u32 size); + +#endif diff --git a/src/dolphin/include/dolphin/ax.h b/src/dolphin/include/dolphin/ax.h index 3ec66e257..cb7989759 100644 --- a/src/dolphin/include/dolphin/ax.h +++ b/src/dolphin/include/dolphin/ax.h @@ -240,6 +240,17 @@ typedef void (*AXCallback)(); #define AX_SRC_TYPE_4TAP_12K 3 #define AX_SRC_TYPE_4TAP_16K 4 +#define AX_SAMPLE_RATE 32000 + +#define AX_PB_STATE_STOP 0 +#define AX_PB_STATE_RUN 1 + +#define AX_PB_FORMAT_PCM16 10 +#define AX_PB_FORMAT_PCM8 25 + +#define AX_MIX_MODE_DEFAULT 3 +#define AX_ADDR_HIGH_SHIFT 16 + // sync flags #define AX_SYNC_FLAG_COPYALL (1 << 31) #define AX_SYNC_FLAG_UNK1 (1 << 30) // reserved, unused? diff --git a/src/dolphin/include/dolphin/dvd/dvd.h b/src/dolphin/include/dolphin/dvd/dvd.h index 2a0adc52e..5a64851ec 100644 --- a/src/dolphin/include/dolphin/dvd/dvd.h +++ b/src/dolphin/include/dolphin/dvd/dvd.h @@ -1,7 +1,7 @@ #ifndef _DOLPHIN_DVD_H #define _DOLPHIN_DVD_H -#include "types.h" +#include #include #define DVD_MIN_TRANSFER_SIZE 32 @@ -166,4 +166,4 @@ void __DVDPrintFatalMessage(); } #endif -#endif // _DOLPHIN_DVD \ No newline at end of file +#endif // _DOLPHIN_DVD diff --git a/src/dolphin/include/dolphin/dvd/dvdfs.h b/src/dolphin/include/dolphin/dvd/dvdfs.h index 9a18215b4..96182afe2 100644 --- a/src/dolphin/include/dolphin/dvd/dvdfs.h +++ b/src/dolphin/include/dolphin/dvd/dvdfs.h @@ -1,7 +1,8 @@ #ifndef _DOLPHIN_DVDFS_H #define _DOLPHIN_DVDFS_H -#include +#include +#include #ifdef __cplusplus extern "C" @@ -46,4 +47,4 @@ extern OSThreadQueue __DVDThreadQueue; // clang-format on #endif -#endif \ No newline at end of file +#endif diff --git a/src/dolphin/include/dolphin/os/OSInterrupt.h b/src/dolphin/include/dolphin/os/OSInterrupt.h index 8baa37d0b..eaedc3070 100644 --- a/src/dolphin/include/dolphin/os/OSInterrupt.h +++ b/src/dolphin/include/dolphin/os/OSInterrupt.h @@ -107,6 +107,9 @@ OSInterruptMask OSGetInterruptMask(void); OSInterruptMask OSSetInterruptMask(OSInterruptMask mask); OSInterruptMask __OSMaskInterrupts(OSInterruptMask mask); OSInterruptMask __OSUnmaskInterrupts(OSInterruptMask mask); +BOOL OSDisableInterrupts(void); +BOOL OSEnableInterrupts(void); +BOOL OSRestoreInterrupts(BOOL level); #ifdef __cplusplus } diff --git a/tools/asm_shape_search.py b/tools/asm_shape_search.py new file mode 100644 index 000000000..6278a1ff2 --- /dev/null +++ b/tools/asm_shape_search.py @@ -0,0 +1,325 @@ +#!/usr/bin/env python3 +"""Search for similar PowerPC assembly shapes. + +The query normally comes from an objdiff JSON symbol. Candidates can come from +objdiff JSON files or from PowerPC objdump text. The matcher deliberately +normalizes registers, immediates, branch targets, and relocations so it can find +compiler idioms instead of exact byte matches. +""" + +from __future__ import annotations + +import argparse +import difflib +import json +import re +import subprocess +import sys +from dataclasses import dataclass +from pathlib import Path +from typing import Iterable, Iterator + + +ROOT = Path(__file__).resolve().parents[1] +BUILD = ROOT / "build" / "GQPE78" +OBJDUMP = ROOT / "build" / "binutils" / "powerpc-eabi-objdump.exe" +OBJDIFF = ROOT / "build" / "tools" / "objdiff-cli.exe" + +UNITS = { + "binkread": "main/bink/src/sdk/decode/binkread", + "binkacd": "main/bink/src/sdk/decode/binkacd", + "expand": "main/bink/src/sdk/decode/expand", + "yuv": "main/bink/src/sdk/decode/yuv", + "binkngc": "main/bink/src/sdk/decode/ngc/binkngc", + "ngcsnd": "main/bink/src/sdk/decode/ngc/ngcsnd", + "ngcfile": "main/bink/src/sdk/decode/ngc/ngcfile", + "ngcrgb": "main/bink/src/sdk/decode/ngc/ngcrgb", + "ngcyuy2": "main/bink/src/sdk/decode/ngc/ngcyuy2", + "varbits": "main/bink/src/sdk/varbits", + "fft": "main/bink/src/sdk/fft", + "dct": "main/bink/src/sdk/dct", + "bitplane": "main/bink/src/sdk/bitplane", +} + + +@dataclass(frozen=True) +class FunctionAsm: + origin: str + side: str + name: str + instructions: tuple[str, ...] + + +@dataclass(frozen=True) +class Hit: + score: float + origin: str + side: str + name: str + start: int + end: int + window: tuple[str, ...] + + +def load_json(path: Path) -> dict | None: + try: + with path.open("r", encoding="utf-8") as f: + return json.load(f) + except Exception: + return None + + +def formatted_instructions(sym: dict) -> tuple[str, ...]: + rows: list[str] = [] + for item in sym.get("instructions", []): + inst = item.get("instruction") or {} + text = inst.get("formatted") + if text: + rows.append(text) + return tuple(rows) + + +def functions_from_objdiff(path: Path, sides: Iterable[str]) -> Iterator[FunctionAsm]: + data = load_json(path) + if data is None or not isinstance(data, dict): + return + for side in sides: + for sym in data.get(side, {}).get("symbols", []): + if sym.get("kind") != "SYMBOL_FUNCTION": + continue + inst = formatted_instructions(sym) + if inst: + yield FunctionAsm(str(path.relative_to(ROOT)), side, sym.get("name") or "", inst) + + +OBJ_LABEL_RE = re.compile(r"^[0-9a-fA-F]+ <([^>]+)>:$") +OBJ_INST_RE = re.compile(r"^\s*[0-9a-fA-F]+:\s+(?:[0-9a-fA-F]{2}\s+){4}\s*(.+?)\s*$") + + +def functions_from_objdump_text(path: Path) -> Iterator[FunctionAsm]: + cur_name: str | None = None + cur_rows: list[str] = [] + + def flush() -> Iterator[FunctionAsm]: + nonlocal cur_name, cur_rows + if cur_name and cur_rows: + yield FunctionAsm(str(path.relative_to(ROOT)), "objdump", cur_name, tuple(cur_rows)) + cur_name = None + cur_rows = [] + + try: + lines = path.read_text(encoding="utf-8", errors="ignore").splitlines() + except OSError: + return + + for line in lines: + label = OBJ_LABEL_RE.match(line.strip()) + if label: + yield from flush() + cur_name = label.group(1) + continue + inst = OBJ_INST_RE.match(line) + if inst and cur_name: + text = inst.group(1).split("\t", 1)[-1].strip() + if text: + cur_rows.append(text) + yield from flush() + + +def functions_from_object(path: Path) -> Iterator[FunctionAsm]: + if not OBJDUMP.exists(): + return + result = subprocess.run( + [str(OBJDUMP), "-dr", str(path)], + cwd=ROOT, + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + ) + if result.returncode != 0: + return + dump = BUILD / "asm_shape_search" / (path.name + ".dump") + dump.parent.mkdir(parents=True, exist_ok=True) + dump.write_text(result.stdout, encoding="utf-8") + yield from functions_from_objdump_text(dump) + + +REG_RE = re.compile(r"\b([rf])(?:[0-9]|[12][0-9]|3[01])\b") +CR_RE = re.compile(r"\bcr[0-7]\b") +HEX_RE = re.compile(r"-?0x[0-9a-fA-F]+") +INT_RE = re.compile(r"(? str: + text = text.strip() + if not text: + return "" + mnemonic, _, rest = text.partition(" ") + mnemonic = mnemonic.lower() + if not operands: + return mnemonic + + rest = rest.lower() + if mnemonic.startswith("b"): + return f"{mnemonic} TARGET" + rest = REG_RE.sub(lambda m: m.group(1) + "N", rest) + rest = CR_RE.sub("crN", rest) + rest = HEX_RE.sub("IMM", rest) + rest = INT_RE.sub("IMM", rest) + rest = re.sub(r"\s+", "", rest) + return f"{mnemonic} {rest}" if rest else mnemonic + + +def normalize_many(rows: Iterable[str], *, operands: bool) -> tuple[str, ...]: + return tuple(row for row in (normalize_instruction(row, operands=operands) for row in rows) if row) + + +def ngrams(tokens: tuple[str, ...], n: int) -> set[tuple[str, ...]]: + if len(tokens) < n: + return {tokens} if tokens else set() + return {tokens[i : i + n] for i in range(len(tokens) - n + 1)} + + +def window_score(query: tuple[str, ...], window: tuple[str, ...], ngram: int) -> float: + ratio = difflib.SequenceMatcher(a=query, b=window, autojunk=False).ratio() + qn = ngrams(query, ngram) + wn = ngrams(window, ngram) + jaccard = len(qn & wn) / len(qn | wn) if qn and wn else 0.0 + return (0.65 * ratio) + (0.35 * jaccard) + + +def best_hit(query: tuple[str, ...], func: FunctionAsm, *, ngram: int, slack: int, fast: bool) -> Hit | None: + cand = normalize_many(func.instructions, operands=True) + if not cand: + return None + qlen = len(query) + min_len = max(1, qlen - slack) + max_len = min(len(cand), qlen + slack) + best: Hit | None = None + qn = ngrams(query, ngram) + for size in range(min_len, max_len + 1): + for start in range(0, len(cand) - size + 1): + window = cand[start : start + size] + if fast: + wn = ngrams(window, ngram) + score = len(qn & wn) / len(qn | wn) if qn and wn else 0.0 + else: + score = window_score(query, window, ngram) + if best is None or score > best.score: + best = Hit(score, func.origin, func.side, func.name, start, start + size, window) + return best + + +def current_json_for_unit(unit: str, refresh: bool) -> Path: + out = BUILD / f"{unit}_asm_shape_query.json" + if not refresh and out.exists(): + return out + subprocess.run( + [str(OBJDIFF), "diff", "-p", ".", "-u", UNITS[unit], "-o", str(out), "--format", "json"], + cwd=ROOT, + check=True, + ) + return out + + +def query_from_objdiff(path: Path, symbol: str, side: str, start: int | None, count: int | None) -> tuple[str, ...]: + data = load_json(path) + if data is None: + raise SystemExit(f"could not read objdiff JSON: {path}") + for sym in data.get(side, {}).get("symbols", []): + if sym.get("name") == symbol: + rows = formatted_instructions(sym) + lo = start or 0 + hi = None if count is None else lo + count + query = normalize_many(rows[lo:hi], operands=True) + if not query: + raise SystemExit(f"symbol has no instructions: {symbol}") + return query + raise SystemExit(f"symbol not found on {side}: {symbol}") + + +def iter_candidate_paths(roots: Iterable[Path]) -> Iterator[Path]: + exts = {".json", ".dump", ".txt", ".s", ".asm", ".lst", ".o", ".a"} + for root in roots: + if root.is_file() and root.suffix.lower() in exts: + yield root + elif root.is_dir(): + for path in root.rglob("*"): + if path.is_file() and path.suffix.lower() in exts: + if path.name in {"report.json"} or path.name.endswith(".ctx"): + continue + try: + if path.stat().st_size > 8 * 1024 * 1024: + continue + except OSError: + continue + yield path + + +def iter_functions(paths: Iterable[Path], sides: Iterable[str]) -> Iterator[FunctionAsm]: + for path in paths: + suffix = path.suffix.lower() + if suffix == ".json": + yield from functions_from_objdiff(path, sides) + elif suffix in {".dump", ".txt", ".s", ".asm", ".lst"}: + yield from functions_from_objdump_text(path) + elif suffix in {".o", ".a"}: + yield from functions_from_object(path) + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--unit", choices=sorted(UNITS), help="generate/read current objdiff JSON for this unit") + parser.add_argument("--json", type=Path, help="objdiff JSON to query instead of --unit") + parser.add_argument("--symbol", required=True) + parser.add_argument("--side", choices=["left", "right"], default="right") + parser.add_argument("--start", type=int) + parser.add_argument("--count", type=int) + parser.add_argument("--root", action="append", type=Path, default=[]) + parser.add_argument("--include-build", action="store_true") + parser.add_argument("--include-bink-json", action="store_true") + parser.add_argument("--limit", type=int, default=20) + parser.add_argument("--min-score", type=float, default=0.45) + parser.add_argument("--ngram", type=int, default=4) + parser.add_argument("--slack", type=int, default=6) + parser.add_argument("--slow", action="store_true", help="use SequenceMatcher plus n-grams") + parser.add_argument("--refresh", action="store_true") + parser.add_argument("--show", type=int, default=8) + args = parser.parse_args(argv) + + if args.json is None and args.unit is None: + raise SystemExit("provide --unit or --json") + query_json = args.json or current_json_for_unit(args.unit, args.refresh) + query = query_from_objdiff(query_json, args.symbol, args.side, args.start, args.count) + + roots = [p if p.is_absolute() else ROOT / p for p in args.root] + if args.include_build: + roots.append(BUILD) + if args.include_bink_json: + roots.extend(sorted(BUILD.glob("bink*_current_fresh.json"))) + roots.extend(sorted(BUILD.glob("ngc*_current_fresh.json"))) + roots.extend(sorted(BUILD.glob("*_asm_shape_query.json"))) + roots.extend(sorted((BUILD / "flag_matrix").glob("*.json"))) + if not roots: + roots = [ROOT / "reference_projects"] + + sides = ("left", "right") + hits: list[Hit] = [] + for func in iter_functions(iter_candidate_paths(roots), sides): + hit = best_hit(query, func, ngram=args.ngram, slack=args.slack, fast=not args.slow) + if hit and hit.score >= args.min_score: + hits.append(hit) + + hits.sort(key=lambda h: (-h.score, h.origin, h.name, h.start)) + for hit in hits[: args.limit]: + print(f"{hit.score:7.4f} {hit.origin} [{hit.side}] {hit.name} @{hit.start}:{hit.end}") + for row in hit.window[: args.show]: + print(f" {row}") + if len(hit.window) > args.show: + print(" ...") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/bink_firstdiff_clusters.py b/tools/bink_firstdiff_clusters.py new file mode 100644 index 000000000..8431b6c03 --- /dev/null +++ b/tools/bink_firstdiff_clusters.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python3 +"""Cluster Bink objdiff symbols by their first normalized asm mismatch.""" + +from __future__ import annotations + +import argparse +import json +import re +import subprocess +from collections import defaultdict +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] +BUILD = ROOT / "build" / "GQPE78" +OBJDIFF = ROOT / "build" / "tools" / "objdiff-cli.exe" + +UNITS = { + "binkread": "main/bink/src/sdk/decode/binkread", + "binkacd": "main/bink/src/sdk/decode/binkacd", + "expand": "main/bink/src/sdk/decode/expand", + "yuv": "main/bink/src/sdk/decode/yuv", + "binkngc": "main/bink/src/sdk/decode/ngc/binkngc", + "ngcsnd": "main/bink/src/sdk/decode/ngc/ngcsnd", + "ngcfile": "main/bink/src/sdk/decode/ngc/ngcfile", + "ngcrgb": "main/bink/src/sdk/decode/ngc/ngcrgb", + "ngcyuy2": "main/bink/src/sdk/decode/ngc/ngcyuy2", + "varbits": "main/bink/src/sdk/varbits", + "fft": "main/bink/src/sdk/fft", + "dct": "main/bink/src/sdk/dct", + "bitplane": "main/bink/src/sdk/bitplane", +} + +REG_RE = re.compile(r"\b([rf])(?:[0-9]|[12][0-9]|3[01])\b") +CR_RE = re.compile(r"\bcr[0-7]\b") +HEX_RE = re.compile(r"-?0x[0-9a-fA-F]+") +INT_RE = re.compile(r"(? Path: + path = BUILD / f"{unit}_firstdiff.json" + if not refresh and path.exists(): + return path + subprocess.run( + [str(OBJDIFF), "diff", "-p", ".", "-u", UNITS[unit], "-o", str(path), "--format", "json"], + cwd=ROOT, + check=True, + ) + return path + + +def norm(text: str | None) -> str: + if not text: + return "" + mnemonic, _, rest = text.strip().lower().partition(" ") + if mnemonic.startswith("b"): + return f"{mnemonic} target" + rest = REG_RE.sub(lambda m: m.group(1) + "N", rest) + rest = CR_RE.sub("crN", rest) + rest = HEX_RE.sub("IMM", rest) + rest = INT_RE.sub("IMM", rest) + rest = re.sub(r"\s+", "", rest) + return f"{mnemonic} {rest}" if rest else mnemonic + + +def instructions(sym: dict) -> list[str]: + rows: list[str] = [] + for item in sym.get("instructions", []): + inst = item.get("instruction") or {} + rows.append(inst.get("formatted") or "") + return rows + + +def score(sym: dict) -> float: + return float(sym.get("match_percent") or sym.get("score") or 0.0) + + +def source_ref(sym: dict) -> str: + src = sym.get("source") or {} + path = src.get("path") or src.get("file") or "" + line = src.get("line") + return f"{path}:{line}" if path and line else path + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("units", nargs="+", choices=sorted(UNITS)) + parser.add_argument("--refresh", action="store_true") + parser.add_argument("--min-score", type=float, default=0.0) + parser.add_argument("--max-score", type=float, default=99.99) + parser.add_argument("--limit", type=int, default=30) + args = parser.parse_args() + + clusters: dict[tuple[str, str], list[tuple[str, float, int, str, str, str]]] = defaultdict(list) + for unit in args.units: + data = json.loads(current_json(unit, args.refresh).read_text(encoding="utf-8")) + left = {s.get("name"): s for s in data.get("left", {}).get("symbols", []) if s.get("kind") == "SYMBOL_FUNCTION"} + right = {s.get("name"): s for s in data.get("right", {}).get("symbols", []) if s.get("kind") == "SYMBOL_FUNCTION"} + for name, rsym in right.items(): + lsym = left.get(name) + if not lsym: + continue + pct = score(rsym) + if pct < args.min_score or pct > args.max_score: + continue + lrows = instructions(lsym) + rrows = instructions(rsym) + for idx in range(max(len(lrows), len(rrows))): + lraw = lrows[idx] if idx < len(lrows) else "" + rraw = rrows[idx] if idx < len(rrows) else "" + key = (norm(lraw), norm(rraw)) + if key[0] != key[1]: + clusters[key].append((unit, pct, idx, name, lraw, rraw)) + break + + ranked = sorted(clusters.items(), key=lambda item: (-len(item[1]), item[0])) + for (lnorm, rnorm), hits in ranked[: args.limit]: + print(f"\n## {len(hits)} hits: {lnorm} | {rnorm}") + for unit, pct, idx, name, lraw, rraw in sorted(hits, key=lambda h: (-h[1], h[0], h[3]))[:8]: + print(f"{unit:8} {pct:7.3f} @{idx:03d} {name}") + print(f" {lraw} | {rraw}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tools/bink_flag_matrix.py b/tools/bink_flag_matrix.py new file mode 100644 index 000000000..34fda51de --- /dev/null +++ b/tools/bink_flag_matrix.py @@ -0,0 +1,444 @@ +#!/usr/bin/env python3 +"""Probe Bink compiler flags against objdiff scores. + +The script compiles a configured Bink unit to a temporary object with candidate +compiler/flag changes, swaps that object into the normal build path long enough +to run objdiff, then restores the original object. +""" + +from __future__ import annotations + +import argparse +import itertools +import json +import os +import shlex +import shutil +import subprocess +import sys +import time +from contextlib import contextmanager +from pathlib import Path +from typing import Iterable + +ROOT = Path(__file__).resolve().parents[1] +BUILD = ROOT / "build" / "GQPE78" +OBJDIFF = ROOT / "build" / "tools" / "objdiff-cli.exe" +PRODG_ROOT = ROOT / "build" / "compilers" / "ProDG" +MWCC_ROOT = ROOT / "build" / "compilers" / "GC" +LOCKS = BUILD / ".bink_flag_matrix_locks" +LOCK_TIMEOUT_SECONDS = 600.0 + +BINK_FLAGS = [ + "-O2", + "-mcpu=750", + "-fno-exceptions", + "-Wno-inline", + "-nostdinc", + "-I", + "src/dolphin/src", + "-I", + "include", + "-I", + "src/dolphin/include", + "-D__GEKKO__", + "-I", + "src/bink/include", + "-I", + "src/PowerPC_EABI_Support/include", + "-G8", +] + +CW_COMMON_FLAGS = [ + "-nodefaults", + "-proc", + "gekko", + "-align", + "powerpc", + "-enum", + "int", + "-fp", + "hardware", + "-Cpp_exceptions", + "off", + "-W", + "off", + "-O4,p", + "-inline", + "auto", + "-pragma", + "cats off", + "-pragma", + "warn_notinlined off", + "-maxerrors", + "1", + "-nosyspath", + "-RTTI", + "off", + "-fp_contract", + "on", + "-str", + "reuse", + "-multibyte", + "-i", + "include", + "-i", + "src/PowerPC_EABI_Support/include", + "-i", + "src/dolphin/include", + "-i", + "src/dolphin/src", + "-i", + "src/bink/include", + "-i", + "src/bink/src", + "-i", + "src", + "-i", + "build/GQPE78/include", + "-DNDEBUG=1", + "-DBUILD_VERSION=1", + "-DVERSION_GQPE78", +] + +CW_PRESETS = { + "base": [], + "runtime": [ + "-use_lmw_stmw", + "on", + "-str", + "reuse,pool,readonly", + "-gccinc", + "-common", + "off", + "-inline", + "auto", + ], + "dolphin": [ + "-fp", + "fmadd", + "-fp_contract", + "off", + "-char", + "signed", + "-str", + "reuse", + "-common", + "off", + "-O4,p", + ], + "bfbb": [ + "-common", + "on", + "-char", + "unsigned", + "-str", + "reuse,pool,readonly", + "-use_lmw_stmw", + "on", + "-inline", + "off", + "-gccinc", + "-i", + "include/inline", + "-i", + "include/rwsdk", + "-i", + "src/SB/Core/gc", + "-i", + "src/SB/Core/x", + "-i", + "src/SB/Game", + "-DGAMECUBE", + ], +} + +UNITS = { + "binkread": ("main/bink/src/sdk/decode/binkread", "src/bink/src/sdk/decode/binkread.c", "build/GQPE78/src/bink/src/sdk/decode/binkread.o", "c"), + "binkacd": ("main/bink/src/sdk/decode/binkacd", "src/bink/src/sdk/decode/binkacd.c", "build/GQPE78/src/bink/src/sdk/decode/binkacd.o", "c"), + "expand": ("main/bink/src/sdk/decode/expand", "src/bink/src/sdk/decode/expand.c", "build/GQPE78/src/bink/src/sdk/decode/expand.o", "c"), + "yuv": ("main/bink/src/sdk/decode/yuv", "src/bink/src/sdk/decode/yuv.cpp", "build/GQPE78/src/bink/src/sdk/decode/yuv.o", "c++"), + "binkngc": ("main/bink/src/sdk/decode/ngc/binkngc", "src/bink/src/sdk/decode/ngc/binkngc.c", "build/GQPE78/src/bink/src/sdk/decode/ngc/binkngc.o", "c"), + "ngcsnd": ("main/bink/src/sdk/decode/ngc/ngcsnd", "src/bink/src/sdk/decode/ngc/ngcsnd.c", "build/GQPE78/src/bink/src/sdk/decode/ngc/ngcsnd.o", "c"), + "ngcfile": ("main/bink/src/sdk/decode/ngc/ngcfile", "src/bink/src/sdk/decode/ngc/ngcfile.c", "build/GQPE78/src/bink/src/sdk/decode/ngc/ngcfile.o", "c"), + "ngcrgb": ("main/bink/src/sdk/decode/ngc/ngcrgb", "src/bink/src/sdk/decode/ngc/ngcrgb.c", "build/GQPE78/src/bink/src/sdk/decode/ngc/ngcrgb.o", "c"), + "ngcyuy2": ("main/bink/src/sdk/decode/ngc/ngcyuy2", "src/bink/src/sdk/decode/ngc/ngcyuy2.c", "build/GQPE78/src/bink/src/sdk/decode/ngc/ngcyuy2.o", "c"), + "varbits": ("main/bink/src/sdk/varbits", "src/bink/src/sdk/varbits.c", "build/GQPE78/src/bink/src/sdk/varbits.o", "c"), + "fft": ("main/bink/src/sdk/fft", "src/bink/src/sdk/fft.c", "build/GQPE78/src/bink/src/sdk/fft.o", "c"), + "dct": ("main/bink/src/sdk/dct", "src/bink/src/sdk/dct.c", "build/GQPE78/src/bink/src/sdk/dct.o", "c"), + "bitplane": ("main/bink/src/sdk/bitplane", "src/bink/src/sdk/bitplane.c", "build/GQPE78/src/bink/src/sdk/bitplane.o", "c"), +} + + +def replace_flag(flags: list[str], prefix: str, value: str) -> list[str]: + out = [] + replaced = False + for flag in flags: + if flag.startswith(prefix): + out.append(value) + replaced = True + else: + out.append(flag) + if not replaced: + out.append(value) + return out + + +def build_prodg_flags(lang: str, opt: str, small_data: str, extras: Iterable[str]) -> list[str]: + flags = replace_flag(BINK_FLAGS, "-O", opt) + flags = replace_flag(flags, "-G", small_data) + flags.append(f"-lang={lang}") + flags.extend(extras) + return flags + + +def build_mwcc_flags(lang: str, preset: str, source: str, extras: Iterable[str]) -> list[str]: + flags = list(CW_COMMON_FLAGS) + flags.extend(["-i", str(Path(source).parent).replace("\\", "/")]) + flags.extend(CW_PRESETS[preset]) + flags.append(f"-lang={lang}") + flags.extend(extras) + return flags + + +def section_score(path: Path) -> float | None: + with path.open("r", encoding="utf-8") as f: + data = json.load(f) + try: + score = data["left"]["sections"][0].get("match_percent") + except Exception: + return None + return None if score is None else float(score) + + +def run(cmd: list[str], *, quiet: bool = False) -> subprocess.CompletedProcess[str]: + return subprocess.run( + cmd, + cwd=ROOT, + text=True, + stdout=subprocess.PIPE if quiet else None, + stderr=subprocess.STDOUT if quiet else None, + ) + + +@contextmanager +def unit_lock(unit: str) -> Iterable[None]: + LOCKS.mkdir(parents=True, exist_ok=True) + lock = LOCKS / unit + start = time.monotonic() + while True: + try: + lock.mkdir() + (lock / "pid").write_text(str(os.getpid()), encoding="utf-8") + break + except FileExistsError: + if time.monotonic() - start > LOCK_TIMEOUT_SECONDS: + raise TimeoutError(f"timed out waiting for Bink flag-matrix lock: {lock}") + time.sleep(0.1) + + try: + yield + finally: + try: + (lock / "pid").unlink(missing_ok=True) + lock.rmdir() + except OSError: + pass + + +def compile_prodg_candidate(source: str, out: Path, flags: list[str], version: str) -> bool: + compiler_dir = PRODG_ROOT / version + compiler = compiler_dir / "ngccc.exe" + if not compiler.exists(): + print(f"missing compiler: {compiler}") + return False + + out.parent.mkdir(parents=True, exist_ok=True) + command = [str(compiler), *flags, "-c", "-o", str(out), source] + cmdline = " ".join(command) + result = run( + [ + "cmd", + "/c", + f"set SN_NGC_PATH={compiler_dir}&& {cmdline}", + ], + quiet=True, + ) + if result.returncode != 0: + print(result.stdout.strip()) + return False + return True + + +def compile_mwcc_candidate(source: str, out: Path, flags: list[str], version: str) -> bool: + compiler = MWCC_ROOT / version / "mwcceppc.exe" + if not compiler.exists(): + print(f"missing compiler: {compiler}") + return False + + out_dir = out.parent + out_dir.mkdir(parents=True, exist_ok=True) + for old in out_dir.glob(f"{Path(source).stem}.*"): + old.unlink() + + result = run( + [ + str(compiler), + *flags, + "-MMD", + "-c", + source, + "-o", + str(out_dir), + ], + quiet=True, + ) + if result.returncode != 0: + print(result.stdout.strip()) + return False + + produced = out_dir / f"{Path(source).stem}.o" + if not produced.exists(): + print(f"compile succeeded but did not produce {produced}") + return False + if produced != out: + shutil.copy2(produced, out) + return True + + +def score_candidate(unit_name: str, unit_path: str, object_path: Path, candidate: Path, report: Path) -> float | None: + backup = object_path.with_suffix(object_path.suffix + ".flagmatrix.bak") + if backup.exists(): + backup.unlink() + shutil.copy2(object_path, backup) + try: + shutil.copy2(candidate, object_path) + result = run( + [ + str(OBJDIFF), + "diff", + "-p", + ".", + "-u", + unit_path, + "-o", + str(report), + "--format", + "json", + ], + quiet=True, + ) + if result.returncode != 0: + print(result.stdout.strip()) + return None + return section_score(report) + finally: + if backup.exists(): + shutil.copy2(backup, object_path) + backup.unlink(missing_ok=True) + + +def score_existing(unit_path: str, report: Path) -> float | None: + result = run( + [ + str(OBJDIFF), + "diff", + "-p", + ".", + "-u", + unit_path, + "-o", + str(report), + "--format", + "json", + ], + quiet=True, + ) + if result.returncode != 0: + print(result.stdout.strip()) + return None + return section_score(report) + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("units", nargs="*", choices=sorted(UNITS)) + parser.add_argument("--compiler", choices=("prodg", "mwcc", "both"), default="prodg") + parser.add_argument("--prodg-versions", nargs="+", default=["3.5"]) + parser.add_argument("--opts", nargs="+", default=["-O1", "-O2", "-O3"]) + parser.add_argument("--small-data", nargs="+", default=["-G0", "-G4", "-G8"]) + parser.add_argument("--mw-versions", nargs="+", default=["1.2.5n", "1.3.2", "2.0p1", "2.6"]) + parser.add_argument("--mw-presets", nargs="+", choices=sorted(CW_PRESETS), default=["base", "runtime", "dolphin", "bfbb"]) + parser.add_argument("--extra", action="append", default=[]) + parser.add_argument("--limit", type=int, default=12, help="rows to print per unit; use 0 for all") + parser.add_argument("--only-improvements", action="store_true", help="only print variants that beat the current object") + args = parser.parse_args(argv) + extras = list(itertools.chain.from_iterable(shlex.split(extra) for extra in args.extra)) + + units = args.units or ["varbits", "binkngc", "ngcsnd", "binkacd"] + temp = BUILD / "flag_matrix" + temp.mkdir(parents=True, exist_ok=True) + + for unit_name in units: + unit_path, source, object_rel, lang = UNITS[unit_name] + object_path = ROOT / object_rel + if not object_path.exists(): + subprocess.run(["ninja", object_rel.replace("/", "\\")], cwd=ROOT, check=True) + + with unit_lock(unit_name): + baseline_report = temp / f"{unit_name}_baseline.json" + baseline = score_existing(unit_path, baseline_report) + rows: list[tuple[float, str, str]] = [] + if args.compiler in ("prodg", "both"): + for version, opt, small_data in itertools.product(args.prodg_versions, args.opts, args.small_data): + flags = build_prodg_flags(lang, opt, small_data, extras) + label = f"ProDG/{version} {opt} {small_data}" + (f" {' '.join(extras)}" if extras else "") + stem = f"{unit_name}_ProDG_{version.replace('.', '_')}_{opt[1:]}_{small_data[2:]}" + candidate = temp / f"{stem}.o" + report = temp / f"{stem}.json" + if not compile_prodg_candidate(source, candidate, flags, version): + print(f"{unit_name:<10} {label:<34} compile failed") + continue + score = score_candidate(unit_name, unit_path, object_path, candidate, report) + if score is None: + print(f"{unit_name:<10} {label:<34} score failed") + continue + rows.append((score, label, str(report.relative_to(ROOT)))) + + if args.compiler in ("mwcc", "both"): + for version, preset in itertools.product(args.mw_versions, args.mw_presets): + flags = build_mwcc_flags(lang, preset, source, extras) + label = f"GC/{version} {preset}" + (f" {' '.join(extras)}" if extras else "") + stem = f"{unit_name}_GC_{version.replace('.', '_')}_{preset}" + candidate = temp / f"{stem}" / f"{Path(source).stem}.o" + report = temp / f"{stem}.json" + if not compile_mwcc_candidate(source, candidate, flags, version): + print(f"{unit_name:<10} {label:<34} compile failed") + continue + score = score_candidate(unit_name, unit_path, object_path, candidate, report) + if score is None: + print(f"{unit_name:<10} {label:<34} score failed") + continue + rows.append((score, label, str(report.relative_to(ROOT)))) + + print(f"\n## {unit_name}") + if baseline is None: + print("baseline=") + else: + print(f"baseline={baseline:.6g} {baseline_report.relative_to(ROOT)}") + + printed = 0 + for score, label, report in sorted(rows, reverse=True): + delta = "" if baseline is None else f" {score - baseline:+9.6g}" + if args.only_improvements and baseline is not None and score <= baseline: + continue + if args.limit and printed >= args.limit: + break + print(f"{score:9.6g}{delta} {label:<34} {report}") + printed += 1 + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/bink_match.py b/tools/bink_match.py new file mode 100644 index 000000000..26f7c8cc5 --- /dev/null +++ b/tools/bink_match.py @@ -0,0 +1,1727 @@ +#!/usr/bin/env python3 +"""Small helpers for mining Bink objdiff artifacts. + +This is intentionally narrow: it compares existing objdiff JSON probes against a +fresh current diff so we can find source-real changes that were previously +measured, then inspect the first assembly-shape divergence. +""" + +from __future__ import annotations + +import argparse +import base64 +import glob +import json +import os +import re +import shutil +import subprocess +import sys +import time +from contextlib import contextmanager +from pathlib import Path +from typing import Any, Dict, Iterable, List, Optional, Tuple + + +ROOT = Path(__file__).resolve().parents[1] +BUILD = ROOT / "build" / "GQPE78" +OBJDIFF = ROOT / "build" / "tools" / "objdiff-cli.exe" +SYMBOLS = ROOT / "config" / "GQPE78" / "symbols.txt" +GHIDRA = ROOT / "reference_projects" / "bfbb_ghidra" +BINK_REFERENCE_ROOTS = [ + ROOT / "reference_projects" / "bink_references", + ROOT / "reference_projects" / "RatDecomp" / "src" / "3rdParty" / "bink", +] +BINK_HEADER_ROOTS = [ + ROOT / "src" / "bink" / "src" / "sdk", + ROOT / "src" / "bink" / "include", +] +LOCKS = BUILD / ".bink_match_locks" +LOCK_TIMEOUT_SECONDS = 600.0 + +UNITS = { + "binkread": { + "unit": "main/bink/src/sdk/decode/binkread", + "object": r"build\GQPE78\src\bink\src\sdk\decode\binkread.o", + }, + "binkacd": { + "unit": "main/bink/src/sdk/decode/binkacd", + "object": r"build\GQPE78\src\bink\src\sdk\decode\binkacd.o", + }, + "expand": { + "unit": "main/bink/src/sdk/decode/expand", + "object": r"build\GQPE78\src\bink\src\sdk\decode\expand.o", + }, + "yuv": { + "unit": "main/bink/src/sdk/decode/yuv", + "object": r"build\GQPE78\src\bink\src\sdk\decode\yuv.o", + }, + "binkngc": { + "unit": "main/bink/src/sdk/decode/ngc/binkngc", + "object": r"build\GQPE78\src\bink\src\sdk\decode\ngc\binkngc.o", + }, + "ngcsnd": { + "unit": "main/bink/src/sdk/decode/ngc/ngcsnd", + "object": r"build\GQPE78\src\bink\src\sdk\decode\ngc\ngcsnd.o", + }, + "ngcfile": { + "unit": "main/bink/src/sdk/decode/ngc/ngcfile", + "object": r"build\GQPE78\src\bink\src\sdk\decode\ngc\ngcfile.o", + }, + "ngcrgb": { + "unit": "main/bink/src/sdk/decode/ngc/ngcrgb", + "object": r"build\GQPE78\src\bink\src\sdk\decode\ngc\ngcrgb.o", + }, + "ngcyuy2": { + "unit": "main/bink/src/sdk/decode/ngc/ngcyuy2", + "object": r"build\GQPE78\src\bink\src\sdk\decode\ngc\ngcyuy2.o", + }, + "varbits": { + "unit": "main/bink/src/sdk/varbits", + "object": r"build\GQPE78\src\bink\src\sdk\varbits.o", + }, + "fft": { + "unit": "main/bink/src/sdk/fft", + "object": r"build\GQPE78\src\bink\src\sdk\fft.o", + }, + "dct": { + "unit": "main/bink/src/sdk/dct", + "object": r"build\GQPE78\src\bink\src\sdk\dct.o", + }, + "bitplane": { + "unit": "main/bink/src/sdk/bitplane", + "object": r"build\GQPE78\src\bink\src\sdk\bitplane.o", + }, +} + +TargetFingerprint = Tuple[Tuple[Tuple[str, str, str], ...], Tuple[Tuple[Any, ...], ...]] + + +def load_json(path: Path) -> Optional[Dict[str, Any]]: + try: + with path.open("r", encoding="utf-8") as f: + return json.load(f) + except Exception: + return None + + +def section_score(data: Dict[str, Any]) -> Optional[float]: + try: + return data["left"]["sections"][0].get("match_percent") + except Exception: + return None + + +def section_size(data: Dict[str, Any]) -> Optional[int]: + try: + size = data["left"]["sections"][0].get("size") + return None if size is None else int(size) + except Exception: + return None + + +def target_fingerprint(data: Dict[str, Any]) -> Optional[TargetFingerprint]: + try: + right = data["right"] + sections = tuple( + (str(section.get("name")), str(section.get("size")), str(section.get("kind"))) + for section in right.get("sections", []) + ) + syms = tuple( + ( + str(sym.get("name")), + str(sym.get("size")), + str(sym.get("kind")), + tuple( + instr.get("instruction", {}).get("formatted", "") + for instr in sym.get("instructions", []) + ), + tuple( + (chunk.get("size"), chunk.get("data")) + for chunk in sym.get("data_diff", []) + ), + ) + for sym in right.get("symbols", []) + if sym.get("name") + ) + return sections, syms + except Exception: + return None + + +def side_symbols(data: Dict[str, Any], side: str) -> Dict[str, Dict[str, Any]]: + out: Dict[str, Dict[str, Any]] = {} + for sym in data.get(side, {}).get("symbols", []): + name = sym.get("name") + if name: + out[name] = sym + return out + + +def symbols(data: Dict[str, Any]) -> Dict[str, Dict[str, Any]]: + return side_symbols(data, "left") + + +def target_symbols(data: Dict[str, Any]) -> Dict[str, Dict[str, Any]]: + return side_symbols(data, "right") + + +def is_function(sym: Dict[str, Any]) -> bool: + return sym.get("kind") == "SYMBOL_FUNCTION" + + +def symbol_score(data: Dict[str, Any], name: str) -> Optional[float]: + sym = symbols(data).get(name) + return None if sym is None else sym.get("match_percent") + + +@contextmanager +def unit_lock(unit: str) -> Iterable[None]: + LOCKS.mkdir(parents=True, exist_ok=True) + lock = LOCKS / unit + start = time.monotonic() + while True: + try: + lock.mkdir() + (lock / "pid").write_text(str(os.getpid()), encoding="utf-8") + break + except FileExistsError: + if time.monotonic() - start > LOCK_TIMEOUT_SECONDS: + raise TimeoutError(f"timed out waiting for Bink match lock: {lock}") + time.sleep(0.1) + + try: + yield + finally: + try: + (lock / "pid").unlink(missing_ok=True) + lock.rmdir() + except OSError: + pass + + +def current_json(unit: str, rebuild: bool, refresh: bool) -> Path: + info = UNITS[unit] + out = BUILD / f"{unit}_current_fresh.json" + if not refresh: + warn_if_object_stale(unit) + warn_if_current_json_stale(unit, out) + return out + with unit_lock(unit): + if rebuild: + if object_stale_inputs(unit): + unit_object(unit).unlink(missing_ok=True) + subprocess.run(["ninja", info["object"]], cwd=ROOT, check=True) + else: + warn_if_object_stale(unit) + subprocess.run( + [ + str(OBJDIFF), + "diff", + "-p", + ".", + "-u", + info["unit"], + "-o", + str(out), + "--format", + "json", + ], + cwd=ROOT, + check=True, + ) + return out + + +def unit_object(unit: str) -> Path: + return ROOT / UNITS[unit]["object"] + + +def unit_source_candidates(unit: str) -> List[Path]: + obj = unit_object(unit) + try: + rel = obj.relative_to(BUILD) + except ValueError: + return [] + src_base = (ROOT / rel).with_suffix("") + return [src_base.with_suffix(ext) for ext in (".c", ".cpp")] + + +def unit_header_candidates() -> Iterable[Path]: + for root in BINK_HEADER_ROOTS: + if not root.exists(): + continue + yield from root.rglob("*.h") + + +def unit_input_candidates(unit: str) -> List[Path]: + return unit_source_candidates(unit) + list(unit_header_candidates()) + + +def source_anchor(unit: str, symbol: str) -> Optional[str]: + names = [symbol] + if "__" in symbol: + names.append(symbol.split("__", 1)[0]) + + for src in unit_source_candidates(unit): + if not src.exists(): + continue + try: + lines = src.read_text(encoding="utf-8").splitlines() + except UnicodeDecodeError: + lines = src.read_text(errors="ignore").splitlines() + for index, line in enumerate(lines, 1): + for name in names: + if re.search(rf"\b{re.escape(name)}\s*\(", line) and not line.rstrip().endswith(";"): + rel = src.relative_to(ROOT) + return f"{rel}:{index} {line.strip()}" + return None + + +def symbol_address(symbol: str) -> Optional[str]: + if not SYMBOLS.exists(): + return None + pattern = re.compile(rf"^{re.escape(symbol)} = \.text:0x([0-9A-Fa-f]+);") + with SYMBOLS.open("r", encoding="utf-8", errors="ignore") as f: + for line in f: + match = pattern.match(line) + if match: + return match.group(1).lower() + return None + + +def ghidra_anchor(symbol: str) -> Optional[str]: + address = symbol_address(symbol) + if address is None: + return None + path = GHIDRA / f"{address}_FUN_{address}.c" + if not path.exists(): + return None + return str(path.relative_to(ROOT)) + + +def reference_terms(symbol: str) -> List[str]: + terms = [symbol] + if "__" in symbol: + terms.append(symbol.split("__", 1)[0]) + if symbol.startswith("_") and len(symbol) > 1: + terms.append(symbol[1:]) + + out = [] + seen = set() + for term in terms: + if not term or term.startswith("["): + continue + if term not in seen: + out.append(term) + seen.add(term) + return out + + +def reference_hits(symbol: str, limit: int) -> List[str]: + roots = [root for root in BINK_REFERENCE_ROOTS if root.exists()] + if limit <= 0 or not roots: + return [] + + hits: List[str] = [] + seen = set() + for term in reference_terms(symbol): + pattern = rf"\b{re.escape(term)}\b" + for root in roots: + try: + result = subprocess.run( + [ + "rg", + "--line-number", + "--no-heading", + "--color", + "never", + "--field-match-separator", + "\t", + pattern, + str(root.relative_to(ROOT)), + ], + cwd=ROOT, + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + check=False, + ) + except FileNotFoundError: + return hits + + for line in result.stdout.splitlines(): + parts = line.split("\t", 2) + if len(parts) != 3: + continue + path_text, line_no, text = parts + try: + rel = (ROOT / path_text).resolve().relative_to(ROOT) + except ValueError: + rel = Path(path_text) + rendered = f"{rel}:{line_no} {text.strip()}" + if rendered in seen: + continue + hits.append(rendered) + seen.add(rendered) + if len(hits) >= limit: + return hits + return hits + + +def warn_if_object_stale(unit: str) -> None: + stale_sources = object_stale_inputs(unit) + if stale_sources: + names = ", ".join(str(src.relative_to(ROOT)) for src in stale_sources[:8]) + if len(stale_sources) > 8: + names += f", ... (+{len(stale_sources) - 8} more)" + print( + f"warning: {UNITS[unit]['object']} is older than {names}; " + "rerun without --no-rebuild before trusting this diff", + file=sys.stderr, + ) + + +def object_stale_inputs(unit: str) -> List[Path]: + obj = unit_object(unit) + if not obj.exists(): + return [] + obj_mtime = obj.stat().st_mtime + return [src for src in unit_input_candidates(unit) if src.exists() and src.stat().st_mtime > obj_mtime] + + +def warn_if_current_json_stale(unit: str, current: Path) -> None: + obj = unit_object(unit) + if not obj.exists() or not current.exists(): + return + if current.stat().st_mtime >= obj.stat().st_mtime: + return + print( + f"warning: {current.relative_to(ROOT)} is older than {UNITS[unit]['object']}; " + "rerun without --cached before trusting this diff", + file=sys.stderr, + ) + + +def artifact_paths(unit: str, wide: bool) -> Iterable[Path]: + if wide: + patterns = [f"*{unit}*.json"] + else: + patterns = [f"{unit}*.json", f"tmp_{unit}*.json", f"report_{unit}*.json"] + + seen = set() + for pattern in patterns: + for path in glob.glob(str(BUILD / pattern)): + resolved = Path(path).resolve() + if resolved not in seen: + seen.add(resolved) + yield Path(path) + + +def json_arg(unit: str, value: str) -> Path: + path = Path(value) + if path.suffix == ".json" or path.parent != Path("."): + return path + return BUILD / f"{unit}_{value}.json" + + +def mine(args: argparse.Namespace) -> int: + units = args.units or list(UNITS) + for unit in units: + base_path = current_json(unit, not args.no_rebuild, not args.cached) + base = load_json(base_path) + if base is None: + continue + base_unit = section_score(base) + base_size = section_size(base) + base_target_fingerprint = target_fingerprint(base) + base_syms = symbols(base) + rows: List[Tuple[float, str, List[str]]] = [] + best_by_symbol: Dict[str, Tuple[float, float, str]] = {} + + for path in artifact_paths(unit, args.wide): + if path.resolve() == base_path.resolve(): + continue + data = load_json(path) + if data is None: + continue + if not args.include_stale_target and target_fingerprint(data) != base_target_fingerprint: + continue + score = section_score(data) + if score is None or base_unit is None: + continue + comparable_section = args.include_partial_section or section_size(data) == base_size + + improvements: List[str] = [] + for name, sym in symbols(data).items(): + if not args.include_data and not is_function(sym): + continue + if args.symbol and name != args.symbol: + continue + old = base_syms.get(name, {}).get("match_percent") + new = sym.get("match_percent") + if old is not None and new is not None and new > old + args.epsilon: + improvements.append(f"{name}:{old:.6g}->{new:.6g}") + best = best_by_symbol.get(name) + if best is None or new > best[1] + args.epsilon: + best_by_symbol[name] = (old, new, path.name) + + section_improved = comparable_section and score > base_unit + args.epsilon + if improvements or (section_improved and args.include_section_only): + rows.append((score, path.name, improvements[: args.symbol_limit])) + + rows.sort(reverse=True, key=lambda row: row[0]) + print(f"\n## {unit} current {base_unit:.6g} ({base_path.name})") + for score, name, improvements in rows[: args.limit]: + suffix = "" + if improvements: + suffix = " " + "; ".join(improvements) + print(f"{score:9.6g} {name}{suffix}") + + if args.per_symbol and best_by_symbol: + print(" per-symbol:") + ranked = sorted( + best_by_symbol.items(), + key=lambda item: (item[1][1] - item[1][0], item[1][1]), + reverse=True, + ) + for name, (old, new, path) in ranked[: args.symbol_limit]: + print(f" {name}: {old:.6g}->{new:.6g} {path}") + return 0 + + +def snapshot(args: argparse.Namespace) -> int: + src = current_json(args.unit, not args.no_rebuild, True) + dest = json_arg(args.unit, args.label) + shutil.copyfile(src, dest) + print(dest) + return 0 + + +def delta(args: argparse.Namespace) -> int: + before_path = json_arg(args.unit, args.before) + if args.after is None: + after_path = current_json(args.unit, not args.no_rebuild, not args.cached) + else: + after_path = json_arg(args.unit, args.after) + + before = load_json(before_path) + after = load_json(after_path) + if before is None or after is None: + raise SystemExit("could not load delta JSON") + + before_score = section_score(before) + after_score = section_score(after) + if before_score is not None and after_score is not None: + print(f"## {args.unit} {before_score:.6g}->{after_score:.6g} ({after_score - before_score:+.6g})") + + before_syms = symbols(before) + after_syms = symbols(after) + rows = [] + for name, after_sym in after_syms.items(): + if not args.include_data and not is_function(after_sym): + continue + before_sym = before_syms.get(name) + if before_sym is None: + continue + old = before_sym.get("match_percent") + new = after_sym.get("match_percent") + if old is None or new is None: + continue + change = new - old + if abs(change) <= args.epsilon: + continue + rows.append((change, new, old, int(after_sym.get("size") or 0), name)) + + rows.sort(key=lambda row: (abs(row[0]), row[3]), reverse=True) + for change, new, old, size, name in rows[: args.limit]: + print(f"{change:+10.6g} {old:9.6g}->{new:<9.6g} size={size:<5} {name}") + return 0 + + +def best_artifact_symbols( + unit: str, + base_path: Path, + base_syms: Dict[str, Dict[str, Any]], + base_target_fingerprint: Optional[TargetFingerprint], + wide: bool, + include_data: bool, + include_stale_target: bool, + epsilon: float, + base_unit_score: Optional[float] = None, + min_unit_gain: Optional[float] = None, +) -> Dict[str, Tuple[float, float, str]]: + best_by_symbol: Dict[str, Tuple[float, float, str]] = {} + for path in artifact_paths(unit, wide): + if path.resolve() == base_path.resolve(): + continue + data = load_json(path) + if data is None: + continue + if not include_stale_target and target_fingerprint(data) != base_target_fingerprint: + continue + if min_unit_gain is not None: + artifact_unit_score = section_score(data) + if ( + base_unit_score is None + or artifact_unit_score is None + or artifact_unit_score < base_unit_score + min_unit_gain + ): + continue + for name, sym in symbols(data).items(): + if not include_data and not is_function(sym): + continue + old = base_syms.get(name, {}).get("match_percent") + new = sym.get("match_percent") + if old is None or new is None or new <= old + epsilon: + continue + best = best_by_symbol.get(name) + if best is None or new > best[1] + epsilon: + best_by_symbol[name] = (old, new, path.name) + return best_by_symbol + + +def left_symbol(data: Dict[str, Any], symbol: str) -> Dict[str, Any]: + sym = symbols(data).get(symbol) + if sym is None: + raise SystemExit(f"symbol not found: {symbol}") + return sym + + +def right_symbol(data: Dict[str, Any], left: Dict[str, Any]) -> Dict[str, Any]: + target = left.get("target_symbol") + if target is None: + raise SystemExit("left symbol has no target_symbol") + return data.get("right", {}).get("symbols", [])[target] + + +def instr_text(ins: Dict[str, Any], ignore_relocs: bool) -> str: + if not ignore_relocs: + return ins.get("formatted", "") + + parts = ins.get("parts") + if not parts: + return ins.get("formatted", "") + + out = [] + first_arg = False + mnemonic = "" + for part in parts: + opcode = part.get("opcode") + if opcode: + mnemonic = opcode.get("mnemonic", "") + out.append(mnemonic) + first_arg = True + continue + + if part.get("separator"): + out.append(", ") + continue + + basic = part.get("basic") + if basic is not None: + out.append(str(basic)) + continue + + arg = part.get("arg") + if arg: + if first_arg: + out.append(" ") + first_arg = False + if arg.get("reloc"): + out.append("" if mnemonic.startswith("b") and mnemonic != "bl" else "") + elif "opaque" in arg: + out.append(str(arg["opaque"])) + elif "signed" in arg: + out.append(str(arg["signed"])) + elif "unsigned" in arg: + out.append(str(arg["unsigned"])) + elif "branch_dest" in arg: + out.append("") + else: + out.append(str(arg)) + return "".join(out) + + +def instrs(sym: Dict[str, Any], include_diff: bool, ignore_relocs: bool) -> List[str]: + out: List[str] = [] + for item in sym.get("instructions", []): + ins = item.get("instruction") + prefix = "" + if include_diff and item.get("diff_kind"): + prefix = f"{item.get('diff_kind')}:" + if ins is None: + out.append(f"{prefix}<{item.get('diff_kind', 'gap')}>") + else: + arg = "" + if include_diff and item.get("arg_diff"): + arg = f":{item.get('arg_diff')}" + reloc = "" + if include_diff and ins.get("relocation"): + rel = ins["relocation"] + reloc = f":reloc={rel.get('target_symbol')}+{rel.get('addend', '')}" + out.append(f"{prefix}{instr_text(ins, ignore_relocs)}{arg}{reloc}") + return out + + +def data_diff_rows(sym: Dict[str, Any]) -> List[str]: + rows: List[str] = [] + for item in sym.get("data_diff", []): + kind = item.get("kind") or "DATA" + size = item.get("size", "?") + raw = b"" + if item.get("data"): + try: + raw = base64.b64decode(item["data"]) + except Exception: + raw = b"" + hex_text = raw.hex().upper() + if len(hex_text) > 48: + hex_text = hex_text[:48] + "..." + ascii_text = "".join(chr(ch) if 32 <= ch < 127 else "." for ch in raw) + if len(ascii_text) > 24: + ascii_text = ascii_text[:24] + "..." + rows.append(f"{kind}:size={size}:hex={hex_text}:ascii={ascii_text}") + return rows + + +def data_item_bytes(item: Dict[str, Any]) -> bytes: + if not item.get("data"): + return b"" + try: + return base64.b64decode(item["data"]) + except Exception: + return b"" + + +def data_item_size(item: Dict[str, Any]) -> int: + try: + return int(item.get("size") or len(data_item_bytes(item))) + except Exception: + return len(data_item_bytes(item)) + + +def data_gap_stats(left: Dict[str, Any], right: Optional[Dict[str, Any]]) -> Tuple[int, int, int, int, Optional[int], str]: + left_rows = left.get("data_diff", []) + right_rows = [] if right is None else right.get("data_diff", []) + if right is None: + current_only = sum(data_item_size(item) for item in left_rows) + first = 0 if left_rows else None + return 0, current_only, 0, 0, first, "unmatched-current" + + equal_bytes = 0 + current_only = 0 + target_only = 0 + changed = 0 + first: Optional[int] = None + first_kind = "" + + for index in range(max(len(left_rows), len(right_rows))): + left_item = left_rows[index] if index < len(left_rows) else {} + right_item = right_rows[index] if index < len(right_rows) else {} + left_kind = left_item.get("kind") or "DATA" + right_kind = right_item.get("kind") or "DATA" + left_data = data_item_bytes(left_item) + right_data = data_item_bytes(right_item) + size = max(data_item_size(left_item), data_item_size(right_item)) + + if left_kind == "DATA" and right_kind == "DATA" and left_data == right_data: + equal_bytes += size + continue + + if left_kind == "DIFF_DELETE" and not right_data: + current_only += size + kind = "current-only" + elif right_kind == "DIFF_INSERT" and not left_data: + target_only += size + kind = "target-only" + elif left_data or right_data: + changed += size + kind = "changed" + else: + changed += size + kind = f"{left_kind.lower()}/{right_kind.lower()}" + + if first is None: + first = index + first_kind = kind + + return equal_bytes, current_only, target_only, changed, first, first_kind + + +def diff_count(sym: Dict[str, Any]) -> int: + return sum(1 for item in sym.get("instructions", []) if item.get("diff_kind")) + + +def first_diff_index(sym: Dict[str, Any]) -> Optional[int]: + for index, item in enumerate(sym.get("instructions", [])): + if item.get("diff_kind"): + return index + return None + + +def first_formatted_diff(left: Dict[str, Any], right: Dict[str, Any], ignore_relocs: bool) -> Tuple[Optional[int], str, str]: + left_i = instrs(left, False, ignore_relocs) + right_i = instrs(right, False, ignore_relocs) + for index, (left_text, right_text) in enumerate(zip(left_i, right_i)): + if left_text != right_text: + return index, left_text, right_text + if len(left_i) != len(right_i): + index = min(len(left_i), len(right_i)) + left_text = left_i[index] if index < len(left_i) else "" + right_text = right_i[index] if index < len(right_i) else "" + return index, left_text, right_text + return None, "", "" + + +def formatted_distance(left: Dict[str, Any], right: Dict[str, Any], ignore_relocs: bool) -> Tuple[int, Optional[int], str, str]: + left_i = instrs(left, False, ignore_relocs) + right_i = instrs(right, False, ignore_relocs) + diffs = 0 + first = None + first_left = "" + first_right = "" + limit = max(len(left_i), len(right_i)) + for index in range(limit): + left_text = left_i[index] if index < len(left_i) else "" + right_text = right_i[index] if index < len(right_i) else "" + if left_text == right_text: + continue + diffs += 1 + if first is None: + first = index + first_left = left_text + first_right = right_text + return diffs, first, first_left, first_right + + +def gaps(args: argparse.Namespace) -> int: + units = args.units or list(UNITS) + for unit in units: + base_path = current_json(unit, not args.no_rebuild, not args.cached) + data = load_json(base_path) + if data is None: + continue + print(f"\n## {unit} current {section_score(data):.6g} ({base_path.name})") + rows = [] + for name, sym in symbols(data).items(): + score = sym.get("match_percent") + if score is None: + continue + if not args.include_data and not is_function(sym): + continue + if not args.include_matched and score >= 100.0 - args.epsilon: + continue + if args.min_score is not None and score < args.min_score: + continue + if args.max_score is not None and score > args.max_score: + continue + target_first: Optional[int] = None + left_text = "" + right_text = "" + if args.target: + try: + target_first, left_text, right_text = first_formatted_diff( + sym, right_symbol(data, sym), args.ignore_relocs + ) + except Exception: + pass + if args.reloc_only and target_first is not None: + continue + if args.shape_only and target_first is None: + continue + rows.append( + ( + score, + int(sym.get("size") or 0), + diff_count(sym), + first_diff_index(sym), + target_first, + left_text, + right_text, + name, + ) + ) + + if args.closest: + rows.sort(key=lambda row: (100.0 - row[0], -row[1])) + elif args.target_sort: + rows.sort(key=lambda row: (row[4] is None, row[4] or 0, -row[1])) + else: + rows.sort(key=lambda row: (row[0], -row[1])) + + for score, size, diffs, first, target_first, left_text, right_text, name in rows[: args.limit]: + first_text = "-" if first is None else str(first) + if args.target: + target_text = "-" if target_first is None else str(target_first) + print( + f"{score:9.6g} size={size:<5} diffs={diffs:<4} " + f"first={first_text:<4} tfirst={target_text:<4} {name}" + ) + if args.show_diff and target_first is not None: + print(f" {left_text} | {right_text}") + else: + print(f"{score:9.6g} size={size:<5} diffs={diffs:<4} first={first_text:<4} {name}") + return 0 + + +def diff_tag(target_first: Optional[int], left_text: str, right_text: str) -> str: + if target_first is None: + return "reloc" + if left_text.startswith("<") or right_text.startswith("<"): + return "insert/delete" + left_op = left_text.split(" ", 1)[0] + right_op = right_text.split(" ", 1)[0] + if left_op != right_op: + return "opcode" + if is_branch_opcode(left_op): + return "branch-target" + if target_first == 0: + return "prologue" + return "operand" + + +def is_branch_opcode(opcode: str) -> bool: + return opcode == "b" or opcode.startswith("b") and not opcode.startswith("bl") + + +def tag_set(value: Optional[str]) -> set[str]: + if not value: + return set() + return {item.strip().lower() for item in value.split(",") if item.strip()} + + +def abstract_text(text: str, abstract_regs: bool) -> str: + if not abstract_regs: + return text + return re.sub(r"\b[rf]\d+\b", lambda match: match.group(0)[0] + "N", text) + + +def triage(args: argparse.Namespace) -> int: + units = args.units or list(UNITS) + only_tags = tag_set(args.only_tags) + skip_tags = tag_set(args.skip_tags) + for unit in units: + base_path = current_json(unit, not args.no_rebuild, not args.cached) + data = load_json(base_path) + if data is None: + continue + base_syms = symbols(data) + best_artifacts = best_artifact_symbols( + unit, + base_path, + base_syms, + target_fingerprint(data), + args.wide, + args.include_data, + args.include_stale_target, + args.epsilon, + ) + rows = [] + + for name, sym in base_syms.items(): + score = sym.get("match_percent") + if score is None: + continue + if not args.include_data and not is_function(sym): + continue + if not args.include_matched and score >= 100.0 - args.epsilon: + continue + if args.min_score is not None and score < args.min_score: + continue + if args.max_score is not None and score > args.max_score: + continue + + try: + target_first, left_text, right_text = first_formatted_diff( + sym, right_symbol(data, sym), args.ignore_relocs + ) + except Exception: + target_first, left_text, right_text = None, "", "" + if args.shape_only and target_first is None: + continue + if args.reloc_only and target_first is not None: + continue + + old, new, artifact = best_artifacts.get(name, (score, score, "")) + artifact_gain = new - old + if args.artifacts_only and artifact_gain <= args.epsilon: + continue + tag = diff_tag(target_first, left_text, right_text) + if only_tags and tag not in only_tags: + continue + if skip_tags and tag in skip_tags: + continue + + rows.append( + ( + artifact_gain, + score, + int(sym.get("size") or 0), + diff_count(sym), + first_diff_index(sym), + target_first, + tag, + left_text, + right_text, + new, + artifact, + name, + ) + ) + + rows.sort(key=lambda row: (row[0] <= args.epsilon, -row[0], 100.0 - row[1], -row[2])) + print(f"\n## {unit} current {section_score(data):.6g} ({base_path.name})") + for ( + artifact_gain, + score, + size, + diffs, + first, + target_first, + tag, + left_text, + right_text, + artifact_score, + artifact, + name, + ) in rows[: args.limit]: + first_text = "-" if first is None else str(first) + target_text = "-" if target_first is None else str(target_first) + artifact_text = "" + if artifact_gain > args.epsilon: + artifact_text = f" artifact={artifact_score:.6g} +{artifact_gain:.6g} {artifact}" + print( + f"{score:9.6g} size={size:<5} diffs={diffs:<4} first={first_text:<4} " + f"tfirst={target_text:<4} {tag:<13} {name}{artifact_text}" + ) + if args.show_diff and target_first is not None: + print(f" {left_text} | {right_text}") + return 0 + + +def next_candidates(args: argparse.Namespace) -> int: + units = args.units or list(UNITS) + only_tags = tag_set(args.only_tags) + skip_tags = tag_set(args.skip_tags) + rows = [] + + for unit in units: + base_path = current_json(unit, not args.no_rebuild, not args.cached) + data = load_json(base_path) + if data is None: + continue + base_unit_score = section_score(data) + base_syms = symbols(data) + best_artifacts = best_artifact_symbols( + unit, + base_path, + base_syms, + target_fingerprint(data), + args.wide, + args.include_data, + args.include_stale_target, + args.epsilon, + base_unit_score, + args.min_artifact_unit_gain, + ) + + for name, sym in base_syms.items(): + score = sym.get("match_percent") + if score is None: + continue + if not args.include_data and not is_function(sym): + continue + if not args.include_matched and score >= 100.0 - args.epsilon: + continue + if args.min_score is not None and score < args.min_score: + continue + if args.max_score is not None and score > args.max_score: + continue + + try: + target_first, left_text, right_text = first_formatted_diff( + sym, right_symbol(data, sym), args.ignore_relocs + ) + except Exception: + target_first, left_text, right_text = None, "", "" + tag = diff_tag(target_first, left_text, right_text) + if only_tags and tag not in only_tags: + continue + if skip_tags and tag in skip_tags: + continue + + old, new, artifact = best_artifacts.get(name, (score, score, "")) + artifact_gain = new - old + source = source_anchor(unit, name) + ghidra = ghidra_anchor(name) + rows.append( + ( + artifact_gain, + score, + diff_count(sym), + target_first if target_first is not None else 1000000, + int(sym.get("size") or 0), + unit, + name, + tag, + left_text, + right_text, + new, + artifact, + source, + ghidra, + ) + ) + + rows.sort(key=lambda row: (row[0] <= args.epsilon, -row[0], row[2], 100.0 - row[1], row[3], -row[4])) + for ( + artifact_gain, + score, + diffs, + target_first, + size, + unit, + name, + tag, + left_text, + right_text, + artifact_score, + artifact, + source, + ghidra, + ) in rows[: args.limit]: + target_text = "-" if target_first == 1000000 else str(target_first) + artifact_text = "" + if artifact_gain > args.epsilon: + artifact_text = f" artifact={artifact_score:.6g} +{artifact_gain:.6g} {artifact}" + print( + f"{unit:<8} {score:9.6g} size={size:<5} diffs={diffs:<4} " + f"tfirst={target_text:<4} {tag:<13} {name}{artifact_text}" + ) + if args.show_diff and target_first != 1000000: + print(f" {left_text} | {right_text}") + if source: + print(f" src: {source}") + if ghidra: + print(f" ghidra: {ghidra}") + if args.show_refs: + for hit in reference_hits(name, args.ref_limit): + print(f" ref: {hit}") + return 0 + + +def refs(args: argparse.Namespace) -> int: + for symbol in args.symbols: + hits = reference_hits(symbol, args.limit) + print(symbol) + if not hits: + print(" no reference hits") + continue + for hit in hits: + print(f" {hit}") + return 0 + + +def patterns(args: argparse.Namespace) -> int: + units = args.units or list(UNITS) + only_tags = tag_set(args.only_tags) + skip_tags = tag_set(args.skip_tags) + groups: Dict[Tuple[str, str, str], List[Tuple[str, str, float, int, Optional[int]]]] = {} + + for unit in units: + base_path = current_json(unit, not args.no_rebuild, not args.cached) + data = load_json(base_path) + if data is None: + continue + + for name, sym in symbols(data).items(): + score = sym.get("match_percent") + if score is None: + continue + if not args.include_data and not is_function(sym): + continue + if not args.include_matched and score >= 100.0 - args.epsilon: + continue + if args.min_score is not None and score < args.min_score: + continue + if args.max_score is not None and score > args.max_score: + continue + + try: + target_first, left_text, right_text = first_formatted_diff( + sym, right_symbol(data, sym), args.ignore_relocs + ) + except Exception: + target_first, left_text, right_text = None, "", "" + if args.shape_only and target_first is None: + continue + if args.reloc_only and target_first is not None: + continue + + tag = diff_tag(target_first, left_text, right_text) + if only_tags and tag not in only_tags: + continue + if skip_tags and tag in skip_tags: + continue + + key = (tag, abstract_text(left_text, args.abstract_regs), abstract_text(right_text, args.abstract_regs)) + groups.setdefault(key, []).append( + (unit, name, score, int(sym.get("size") or 0), target_first) + ) + + rows = [] + for key, entries in groups.items(): + if len(entries) < args.min_count: + continue + avg_score = sum(entry[2] for entry in entries) / len(entries) + total_size = sum(entry[3] for entry in entries) + firsts = [entry[4] for entry in entries if entry[4] is not None] + first = min(firsts) if firsts else None + rows.append((len(entries), total_size, avg_score, first, key, entries)) + + rows.sort(key=lambda row: (-row[0], -row[1], row[2])) + for count, total_size, avg_score, first, key, entries in rows[: args.limit]: + tag, left_text, right_text = key + first_text = "-" if first is None else str(first) + print( + f"{count:3} funcs size={total_size:<5} avg={avg_score:9.6g} " + f"first={first_text:<4} {tag:<13} {left_text} | {right_text}" + ) + for unit, name, score, size, _ in entries[: args.examples]: + print(f" {unit:<8} {score:9.6g} size={size:<5} {name}") + return 0 + + +def shape(args: argparse.Namespace) -> int: + units = args.units or list(UNITS) + for unit in units: + base_path = current_json(unit, not args.no_rebuild, not args.cached) + base = load_json(base_path) + if base is None: + continue + base_syms = symbols(base) + base_target_syms = target_symbols(base) + base_target_fingerprint = target_fingerprint(base) + rows = [] + for path in artifact_paths(unit, args.wide): + if path.resolve() == base_path.resolve(): + continue + artifact = load_json(path) + if artifact is None: + continue + if not args.include_stale_target and target_fingerprint(artifact) != base_target_fingerprint: + continue + for name, artifact_sym in symbols(artifact).items(): + if args.symbol and name != args.symbol: + continue + if not args.include_data and not is_function(artifact_sym): + continue + base_sym = base_syms.get(name) + if base_sym is None: + continue + old_score = base_sym.get("match_percent") + new_score = artifact_sym.get("match_percent") + if old_score is None or new_score is None: + continue + score_gain = new_score - old_score + if score_gain < args.min_score_gain: + continue + base_target = base_target_syms.get(name) + if base_target is None: + try: + base_target = right_symbol(base, base_sym) + except Exception: + continue + artifact_target = base_target_syms.get(name) + if artifact_target is None: + continue + base_dist, base_first, _, _ = formatted_distance(base_sym, base_target, args.ignore_relocs) + artifact_dist, artifact_first, left_text, right_text = formatted_distance( + artifact_sym, artifact_target, args.ignore_relocs + ) + shape_gain = base_dist - artifact_dist + if shape_gain < args.min_shape_gain: + continue + rows.append( + ( + shape_gain, + score_gain, + old_score, + new_score, + base_dist, + artifact_dist, + base_first, + artifact_first, + int(artifact_sym.get("size") or 0), + path.name, + name, + left_text, + right_text, + ) + ) + + rows.sort(key=lambda row: (-row[0], -row[1], row[5], -row[8])) + if not args.all_artifacts: + unique_rows = [] + seen_symbols = set() + for row in rows: + name = row[10] + if name in seen_symbols: + continue + seen_symbols.add(name) + unique_rows.append(row) + rows = unique_rows + print(f"\n## {unit} current {section_score(base):.6g} ({base_path.name})") + for ( + shape_gain, + score_gain, + old_score, + new_score, + base_dist, + artifact_dist, + base_first, + artifact_first, + size, + artifact_name, + name, + left_text, + right_text, + ) in rows[: args.limit]: + base_first_text = "-" if base_first is None else str(base_first) + artifact_first_text = "-" if artifact_first is None else str(artifact_first) + print( + f"{shape_gain:+5} shape {base_dist}->{artifact_dist} " + f"score {old_score:.6g}->{new_score:.6g} ({score_gain:+.6g}) " + f"first {base_first_text}->{artifact_first_text} size={size:<5} {name} {artifact_name}" + ) + if args.show_diff and artifact_first is not None: + print(f" {left_text} | {right_text}") + return 0 + + +def datagaps(args: argparse.Namespace) -> int: + units = args.units or list(UNITS) + for unit in units: + cur_path = current_json(unit, not args.no_rebuild, not args.cached) + cur = load_json(cur_path) + if cur is None: + continue + + rows = [] + for name, sym in symbols(cur).items(): + if not sym.get("data_diff"): + continue + if is_function(sym): + continue + if sym.get("kind") == "SYMBOL_SECTION" and not args.sections: + continue + if name.startswith("[.") and not args.sections: + continue + if args.symbol and name != args.symbol: + continue + score = sym.get("match_percent") + if not args.include_matched and score is not None and score >= 100.0 - args.epsilon: + continue + if args.min_score is not None and (score is None or score < args.min_score): + continue + if args.max_score is not None and (score is None or score > args.max_score): + continue + + target_index = sym.get("target_symbol") + if target_index is None: + right = None + else: + try: + right = cur.get("right", {}).get("symbols", [])[target_index] + except Exception: + right = None + equal_bytes, current_only, target_only, changed, first, first_kind = data_gap_stats(sym, right) + if current_only == 0 and target_only == 0 and changed == 0 and not args.include_matched: + continue + rows.append( + ( + target_only, + current_only, + changed, + score is None, + 100.0 if score is None else score, + int(sym.get("size") or 0), + equal_bytes, + first, + first_kind, + name, + ) + ) + + rows.sort(key=lambda row: (-row[0], -row[1], -row[2], row[3], row[4], -row[5])) + print(f"\n## {unit} current {section_score(cur):.6g} ({cur_path.name})") + for target_only, current_only, changed, score_missing, score, size, equal_bytes, first, first_kind, name in rows[: args.limit]: + score_text = "-" if score_missing else f"{score:.6g}" + first_text = "-" if first is None else f"{first}:{first_kind}" + print( + f"{score_text:>9} size={size:<6} equal={equal_bytes:<6} " + f"target+={target_only:<5} current+={current_only:<5} changed={changed:<5} " + f"first={first_text:<18} {name}" + ) + return 0 + + +def compare(args: argparse.Namespace) -> int: + cur_path = current_json(args.unit, not args.no_rebuild, not args.cached) + cur = load_json(cur_path) + other = None if args.target and not args.artifact_target else load_json(Path(args.artifact)) + if cur is None or (other is None and not args.target): + raise SystemExit("could not load JSON") + + cur_left = left_symbol(cur, args.symbol) + if args.artifact_target: + assert other is not None + other_left = left_symbol(other, args.symbol) + current_name = args.artifact + other_sym = right_symbol(other, other_left) + artifact_name = "target" + cur_i = instrs(other_left, args.diff_metadata, args.ignore_relocs) + elif args.target: + other_sym = right_symbol(cur, cur_left) + current_name = str(cur_path) + artifact_name = "target" + cur_i = instrs(cur_left, args.diff_metadata, args.ignore_relocs) + else: + assert other is not None + other_sym = left_symbol(other, args.symbol) + current_name = str(cur_path) + artifact_name = args.artifact + cur_i = instrs(cur_left, args.diff_metadata, args.ignore_relocs) + + other_i = instrs(other_sym, args.diff_metadata, args.ignore_relocs) + first = None + for i, (left, right) in enumerate(zip(cur_i, other_i)): + if left != right: + first = i + break + if first is None and len(cur_i) != len(other_i): + first = min(len(cur_i), len(other_i)) + if first is None: + print("no formatted instruction differences") + if args.data_diff: + print_data_diff(args.symbol, cur_left, other_sym, args.data_limit) + return 0 + + start = max(0, first - args.context) + end = min(max(len(cur_i), len(other_i)), first + args.context + 1) + print(f"{args.symbol}: first formatted diff at instruction {first}") + print(f"current: {current_name}") + print(f"artifact: {artifact_name}") + for i in range(start, end): + left = cur_i[i] if i < len(cur_i) else "" + right = other_i[i] if i < len(other_i) else "" + mark = "==" if left == right else "!=" + print(f"{i:04d} {mark} {left:<42} | {right}") + if args.data_diff: + print_data_diff(args.symbol, cur_left, other_sym, args.data_limit) + return 0 + + +def print_data_diff(symbol: str, left: Dict[str, Any], right: Dict[str, Any], limit: int) -> None: + left_rows = data_diff_rows(left) + right_rows = data_diff_rows(right) + if not left_rows and not right_rows: + print(f"{symbol}: no data_diff chunks") + return + + count = min(max(len(left_rows), len(right_rows)), limit) + print(f"{symbol}: data_diff chunks (showing {count}/{max(len(left_rows), len(right_rows))})") + for i in range(count): + left_text = left_rows[i] if i < len(left_rows) else "" + right_text = right_rows[i] if i < len(right_rows) else "" + mark = "==" if left_text == right_text else "!=" + print(f"data {i:02d} {mark} {left_text:<80} | {right_text}") + + +def score(args: argparse.Namespace) -> int: + units = args.units or list(UNITS) + names = set(args.symbols) + for unit in units: + cur_path = current_json(unit, not args.no_rebuild, not args.cached) + cur = load_json(cur_path) + if cur is None: + continue + + unit_score = section_score(cur) + unit_size = section_size(cur) + if unit_score is None: + print(f"{unit:<10} score= ({cur_path.name})") + elif unit_size is None: + print(f"{unit:<10} score={unit_score:.6g} ({cur_path.name})") + else: + print(f"{unit:<10} score={unit_score:.6g} size={unit_size} ({cur_path.name})") + + if names: + for name, sym in symbols(cur).items(): + if name in names or (args.demangle_base and "__" in name and name.split("__", 1)[0] in names): + sym_score = sym.get("match_percent") + sym_size = sym.get("size") + if sym_score is None: + print(f" {name:<40} score= size={sym_size}") + else: + print(f" {name:<40} score={sym_score:.6g} size={sym_size}") + return 0 + + +def dump(args: argparse.Namespace) -> int: + cur_path = current_json(args.unit, not args.no_rebuild, not args.cached) + cur = load_json(cur_path) + if cur is None: + print(f"could not read current JSON: {cur_path}", file=sys.stderr) + return 1 + + sides = ("left", "right") if args.side == "both" else (args.side,) + missing = False + for requested_name in args.symbols: + for side in sides: + sym = side_symbols(cur, side).get(requested_name) + if sym is None and args.demangle_base: + for name, candidate in side_symbols(cur, side).items(): + if "__" in name and name.split("__", 1)[0] == requested_name: + sym = candidate + break + + if sym is None: + print(f"{args.unit} {side}: missing symbol {requested_name}", file=sys.stderr) + missing = True + continue + + sym_name = sym.get("name", requested_name) + sym_score = sym.get("match_percent") + sym_size = sym.get("size") + score_text = "" if sym_score is None else f"{sym_score:.6g}" + print(f"## {args.unit} {side} {sym_name} score={score_text} size={sym_size}") + + instructions = sym.get("instructions", []) + start = max(args.start, 0) + end = len(instructions) if args.count is None else min(len(instructions), start + args.count) + for index in range(start, end): + formatted = instructions[index].get("instruction", {}).get("formatted") + if formatted is None and not args.show_gaps: + continue + print(f"{index:04d}: {formatted}") + print() + + return 1 if missing else 0 + + +def main(argv: Optional[List[str]] = None) -> int: + parser = argparse.ArgumentParser(description=__doc__) + sub = parser.add_subparsers(dest="cmd", required=True) + + p_mine = sub.add_parser("mine", help="find old artifacts that improve current scores") + p_mine.add_argument("units", nargs="*") + p_mine.add_argument("--symbol") + p_mine.add_argument("--limit", type=int, default=12) + p_mine.add_argument("--symbol-limit", type=int, default=8) + p_mine.add_argument("--epsilon", type=float, default=0.00001) + p_mine.add_argument("--no-rebuild", action="store_true") + p_mine.add_argument("--cached", action="store_true", help="reuse the last current JSON") + p_mine.add_argument("--wide", action="store_true", help="search *unit* JSON names instead of strict prefixes") + p_mine.add_argument("--per-symbol", action="store_true") + p_mine.add_argument("--include-data", action="store_true", help="include non-function symbols") + p_mine.add_argument( + "--include-partial-section", + action="store_true", + help="allow section-score rows from artifacts that only cover part of the unit", + ) + p_mine.add_argument("--include-stale-target", action="store_true", help="include artifacts with old target JSON") + p_mine.add_argument("--include-section-only", action="store_true", help="show unit-score rows with no symbol wins") + p_mine.set_defaults(func=mine) + + p_snapshot = sub.add_parser("snapshot", help="save a current unit JSON for probe deltas") + p_snapshot.add_argument("unit", choices=sorted(UNITS)) + p_snapshot.add_argument("label", help="label or JSON path") + p_snapshot.add_argument("--no-rebuild", action="store_true") + p_snapshot.set_defaults(func=snapshot) + + p_delta = sub.add_parser("delta", help="compare a probe JSON against a saved baseline") + p_delta.add_argument("unit", choices=sorted(UNITS)) + p_delta.add_argument("before", help="label or JSON path") + p_delta.add_argument("after", nargs="?", help="label or JSON path; defaults to current fresh") + p_delta.add_argument("--limit", type=int, default=20) + p_delta.add_argument("--epsilon", type=float, default=0.00001) + p_delta.add_argument("--include-data", action="store_true", help="include non-function symbols") + p_delta.add_argument("--no-rebuild", action="store_true") + p_delta.add_argument("--cached", action="store_true", help="reuse the last current JSON") + p_delta.set_defaults(func=delta) + + p_gaps = sub.add_parser("gaps", help="show current low/close matching symbols") + p_gaps.add_argument("units", nargs="*") + p_gaps.add_argument("--limit", type=int, default=20) + p_gaps.add_argument("--min-score", type=float) + p_gaps.add_argument("--max-score", type=float) + p_gaps.add_argument("--closest", action="store_true", help="rank closest-to-100 first") + p_gaps.add_argument("--target-sort", action="store_true", help="rank by first target-shape diff") + p_gaps.add_argument("--target", action="store_true", help="show first formatted diff against target") + p_gaps.add_argument("--show-diff", action="store_true", help="print the first target diff text") + p_gaps.add_argument("--ignore-relocs", action="store_true", help="normalize relocation operands") + p_gaps.add_argument("--reloc-only", action="store_true", help="only show symbols with no normalized target diff") + p_gaps.add_argument("--shape-only", action="store_true", help="only show symbols with a normalized target diff") + p_gaps.add_argument("--include-matched", action="store_true", help="include 100%% symbols") + p_gaps.add_argument("--include-data", action="store_true", help="include non-function symbols") + p_gaps.add_argument("--epsilon", type=float, default=0.00001) + p_gaps.add_argument("--no-rebuild", action="store_true") + p_gaps.add_argument("--cached", action="store_true", help="reuse the last current JSON") + p_gaps.set_defaults(func=gaps) + + p_triage = sub.add_parser("triage", help="rank Bink symbols by artifact wins and target shape") + p_triage.add_argument("units", nargs="*") + p_triage.add_argument("--limit", type=int, default=12) + p_triage.add_argument("--min-score", type=float) + p_triage.add_argument("--max-score", type=float) + p_triage.add_argument("--epsilon", type=float, default=0.00001) + p_triage.add_argument("--wide", action="store_true", help="search *unit* JSON names instead of strict prefixes") + p_triage.add_argument("--ignore-relocs", action="store_true", help="normalize relocation operands") + p_triage.add_argument("--show-diff", action="store_true", help="print the first target diff text") + p_triage.add_argument("--include-matched", action="store_true", help="include 100%% symbols") + p_triage.add_argument("--include-data", action="store_true", help="include non-function symbols") + p_triage.add_argument("--reloc-only", action="store_true", help="only show symbols with no normalized target diff") + p_triage.add_argument("--shape-only", action="store_true", help="only show symbols with a normalized target diff") + p_triage.add_argument("--artifacts-only", action="store_true", help="only show symbols improved by artifacts") + p_triage.add_argument("--include-stale-target", action="store_true", help="include artifacts with old target JSON") + p_triage.add_argument("--only-tags", help="comma-separated diff tags to include") + p_triage.add_argument("--skip-tags", help="comma-separated diff tags to exclude") + p_triage.add_argument("--no-rebuild", action="store_true") + p_triage.add_argument("--cached", action="store_true", help="reuse the last current JSON") + p_triage.set_defaults(func=triage) + + p_next = sub.add_parser("next", help="rank actionable symbols with source/decomp anchors") + p_next.add_argument("units", nargs="*") + p_next.add_argument("--limit", type=int, default=16) + p_next.add_argument("--min-score", type=float, default=80.0) + p_next.add_argument("--max-score", type=float) + p_next.add_argument("--epsilon", type=float, default=0.00001) + p_next.add_argument("--wide", action="store_true", help="search *unit* JSON names instead of strict prefixes") + p_next.add_argument("--ignore-relocs", action="store_true", help="normalize relocation operands") + p_next.add_argument("--show-diff", action="store_true", help="print the first target diff text") + p_next.add_argument("--show-refs", action="store_true", help="print reference-project hits for each symbol") + p_next.add_argument("--ref-limit", type=int, default=3, help="maximum reference hits to print per symbol") + p_next.add_argument("--include-matched", action="store_true", help="include 100%% symbols") + p_next.add_argument("--include-data", action="store_true", help="include non-function symbols") + p_next.add_argument("--include-stale-target", action="store_true", help="include artifacts with old target JSON") + p_next.add_argument( + "--min-artifact-unit-gain", + type=float, + default=0.0, + help="minimum unit-score gain required before using an artifact as a per-symbol hint", + ) + p_next.add_argument("--only-tags", help="comma-separated diff tags to include") + p_next.add_argument( + "--skip-tags", + default="operand,reloc,branch-target", + help="comma-separated diff tags to skip; defaults to operand,reloc,branch-target", + ) + p_next.add_argument("--no-rebuild", action="store_true") + p_next.add_argument("--cached", action="store_true", help="reuse the last current JSON") + p_next.set_defaults(func=next_candidates) + + p_refs = sub.add_parser("refs", help="search bink reference projects for symbols") + p_refs.add_argument("symbols", nargs="+") + p_refs.add_argument("--limit", type=int, default=8) + p_refs.set_defaults(func=refs) + + p_patterns = sub.add_parser("patterns", help="group repeated current target-diff shapes") + p_patterns.add_argument("units", nargs="*") + p_patterns.add_argument("--limit", type=int, default=20) + p_patterns.add_argument("--examples", type=int, default=4) + p_patterns.add_argument("--min-count", type=int, default=2) + p_patterns.add_argument("--min-score", type=float) + p_patterns.add_argument("--max-score", type=float) + p_patterns.add_argument("--epsilon", type=float, default=0.00001) + p_patterns.add_argument("--ignore-relocs", action="store_true", help="normalize relocation operands") + p_patterns.add_argument("--abstract-regs", action="store_true", help="group patterns with different register numbers") + p_patterns.add_argument("--include-matched", action="store_true", help="include 100%% symbols") + p_patterns.add_argument("--include-data", action="store_true", help="include non-function symbols") + p_patterns.add_argument("--reloc-only", action="store_true", help="only show symbols with no normalized target diff") + p_patterns.add_argument("--shape-only", action="store_true", help="only show symbols with a normalized target diff") + p_patterns.add_argument("--only-tags", help="comma-separated diff tags to include") + p_patterns.add_argument("--skip-tags", help="comma-separated diff tags to exclude") + p_patterns.add_argument("--no-rebuild", action="store_true") + p_patterns.add_argument("--cached", action="store_true", help="reuse the last current JSON") + p_patterns.set_defaults(func=patterns) + + p_shape = sub.add_parser("shape", help="rank artifacts by normalized target-shape improvement") + p_shape.add_argument("units", nargs="*") + p_shape.add_argument("--symbol") + p_shape.add_argument("--limit", type=int, default=12) + p_shape.add_argument("--min-score-gain", type=float, default=0.00001) + p_shape.add_argument("--min-shape-gain", type=int, default=1) + p_shape.add_argument("--ignore-relocs", action="store_true", help="normalize relocation operands") + p_shape.add_argument("--show-diff", action="store_true", help="print artifact's first target diff") + p_shape.add_argument("--wide", action="store_true", help="search *unit* JSON names instead of strict prefixes") + p_shape.add_argument("--all-artifacts", action="store_true", help="show repeated artifact snapshots per symbol") + p_shape.add_argument("--include-stale-target", action="store_true", help="include artifacts with old target JSON") + p_shape.add_argument("--include-data", action="store_true", help="include non-function symbols") + p_shape.add_argument("--no-rebuild", action="store_true") + p_shape.add_argument("--cached", action="store_true", help="reuse the last current JSON") + p_shape.set_defaults(func=shape) + + p_datagaps = sub.add_parser("datagaps", help="rank current Bink data/object byte gaps") + p_datagaps.add_argument("units", nargs="*") + p_datagaps.add_argument("--symbol") + p_datagaps.add_argument("--limit", type=int, default=20) + p_datagaps.add_argument("--min-score", type=float) + p_datagaps.add_argument("--max-score", type=float) + p_datagaps.add_argument("--epsilon", type=float, default=0.00001) + p_datagaps.add_argument("--sections", action="store_true", help="include aggregate section symbols") + p_datagaps.add_argument("--include-matched", action="store_true", help="include 100%% data symbols") + p_datagaps.add_argument("--no-rebuild", action="store_true") + p_datagaps.add_argument("--cached", action="store_true", help="reuse the last current JSON") + p_datagaps.set_defaults(func=datagaps) + + p_compare = sub.add_parser("compare", help="compare current symbol shape with an artifact") + p_compare.add_argument("unit", choices=sorted(UNITS)) + p_compare.add_argument("artifact", help="artifact JSON, or '-' with --target") + p_compare.add_argument("symbol") + p_compare.add_argument("--context", type=int, default=25) + p_compare.add_argument("--target", action="store_true") + p_compare.add_argument("--artifact-target", action="store_true") + p_compare.add_argument("--diff-metadata", action="store_true") + p_compare.add_argument("--data-diff", action="store_true", help="also print data_diff chunks for object/section symbols") + p_compare.add_argument("--data-limit", type=int, default=16, help="maximum data_diff chunks to print") + p_compare.add_argument("--ignore-relocs", action="store_true", help="normalize relocation operands") + p_compare.add_argument("--no-rebuild", action="store_true") + p_compare.add_argument("--cached", action="store_true", help="reuse the last current JSON") + p_compare.set_defaults(func=compare) + + p_score = sub.add_parser("score", help="print current unit and selected symbol scores") + p_score.add_argument("units", nargs="*", choices=sorted(UNITS)) + p_score.add_argument("--symbol", dest="symbols", action="append", default=[]) + p_score.add_argument( + "--demangle-base", + action="store_true", + help="also match C++ names before the first double underscore", + ) + p_score.add_argument("--no-rebuild", action="store_true") + p_score.add_argument("--cached", action="store_true", help="reuse the last current JSON") + p_score.set_defaults(func=score) + + p_dump = sub.add_parser("dump", help="dump target/current instructions for current symbols") + p_dump.add_argument("unit", choices=sorted(UNITS)) + p_dump.add_argument("symbols", nargs="+") + p_dump.add_argument("--side", choices=("left", "right", "both"), default="both") + p_dump.add_argument("--start", type=int, default=0) + p_dump.add_argument("--count", type=int) + p_dump.add_argument("--show-gaps", action="store_true", help="show alignment gaps as None rows") + p_dump.add_argument( + "--demangle-base", + action="store_true", + help="also match C++ names before the first double underscore", + ) + p_dump.add_argument("--no-rebuild", action="store_true") + p_dump.add_argument("--cached", action="store_true", help="reuse the last current JSON") + p_dump.set_defaults(func=dump) + + args = parser.parse_args(argv) + return args.func(args) + + +if __name__ == "__main__": + sys.exit(main()) From 369ddc28365b0b00744efbdb7be722457fc76096 Mon Sep 17 00:00:00 2001 From: Zachary Canann Date: Mon, 18 May 2026 12:10:32 -0700 Subject: [PATCH 2/3] Prune non-Bink PR changes --- src/dolphin/include/dolphin/ax.h | 11 - src/dolphin/include/dolphin/dvd/dvd.h | 4 +- src/dolphin/include/dolphin/dvd/dvdfs.h | 5 +- src/dolphin/include/dolphin/os/OSInterrupt.h | 3 - tools/asm_shape_search.py | 325 ---- tools/bink_firstdiff_clusters.py | 126 -- tools/bink_flag_matrix.py | 444 ----- tools/bink_match.py | 1727 ------------------ 8 files changed, 4 insertions(+), 2641 deletions(-) delete mode 100644 tools/asm_shape_search.py delete mode 100644 tools/bink_firstdiff_clusters.py delete mode 100644 tools/bink_flag_matrix.py delete mode 100644 tools/bink_match.py diff --git a/src/dolphin/include/dolphin/ax.h b/src/dolphin/include/dolphin/ax.h index cb7989759..3ec66e257 100644 --- a/src/dolphin/include/dolphin/ax.h +++ b/src/dolphin/include/dolphin/ax.h @@ -240,17 +240,6 @@ typedef void (*AXCallback)(); #define AX_SRC_TYPE_4TAP_12K 3 #define AX_SRC_TYPE_4TAP_16K 4 -#define AX_SAMPLE_RATE 32000 - -#define AX_PB_STATE_STOP 0 -#define AX_PB_STATE_RUN 1 - -#define AX_PB_FORMAT_PCM16 10 -#define AX_PB_FORMAT_PCM8 25 - -#define AX_MIX_MODE_DEFAULT 3 -#define AX_ADDR_HIGH_SHIFT 16 - // sync flags #define AX_SYNC_FLAG_COPYALL (1 << 31) #define AX_SYNC_FLAG_UNK1 (1 << 30) // reserved, unused? diff --git a/src/dolphin/include/dolphin/dvd/dvd.h b/src/dolphin/include/dolphin/dvd/dvd.h index 5a64851ec..2a0adc52e 100644 --- a/src/dolphin/include/dolphin/dvd/dvd.h +++ b/src/dolphin/include/dolphin/dvd/dvd.h @@ -1,7 +1,7 @@ #ifndef _DOLPHIN_DVD_H #define _DOLPHIN_DVD_H -#include +#include "types.h" #include #define DVD_MIN_TRANSFER_SIZE 32 @@ -166,4 +166,4 @@ void __DVDPrintFatalMessage(); } #endif -#endif // _DOLPHIN_DVD +#endif // _DOLPHIN_DVD \ No newline at end of file diff --git a/src/dolphin/include/dolphin/dvd/dvdfs.h b/src/dolphin/include/dolphin/dvd/dvdfs.h index 96182afe2..9a18215b4 100644 --- a/src/dolphin/include/dolphin/dvd/dvdfs.h +++ b/src/dolphin/include/dolphin/dvd/dvdfs.h @@ -1,8 +1,7 @@ #ifndef _DOLPHIN_DVDFS_H #define _DOLPHIN_DVDFS_H -#include -#include +#include #ifdef __cplusplus extern "C" @@ -47,4 +46,4 @@ extern OSThreadQueue __DVDThreadQueue; // clang-format on #endif -#endif +#endif \ No newline at end of file diff --git a/src/dolphin/include/dolphin/os/OSInterrupt.h b/src/dolphin/include/dolphin/os/OSInterrupt.h index eaedc3070..8baa37d0b 100644 --- a/src/dolphin/include/dolphin/os/OSInterrupt.h +++ b/src/dolphin/include/dolphin/os/OSInterrupt.h @@ -107,9 +107,6 @@ OSInterruptMask OSGetInterruptMask(void); OSInterruptMask OSSetInterruptMask(OSInterruptMask mask); OSInterruptMask __OSMaskInterrupts(OSInterruptMask mask); OSInterruptMask __OSUnmaskInterrupts(OSInterruptMask mask); -BOOL OSDisableInterrupts(void); -BOOL OSEnableInterrupts(void); -BOOL OSRestoreInterrupts(BOOL level); #ifdef __cplusplus } diff --git a/tools/asm_shape_search.py b/tools/asm_shape_search.py deleted file mode 100644 index 6278a1ff2..000000000 --- a/tools/asm_shape_search.py +++ /dev/null @@ -1,325 +0,0 @@ -#!/usr/bin/env python3 -"""Search for similar PowerPC assembly shapes. - -The query normally comes from an objdiff JSON symbol. Candidates can come from -objdiff JSON files or from PowerPC objdump text. The matcher deliberately -normalizes registers, immediates, branch targets, and relocations so it can find -compiler idioms instead of exact byte matches. -""" - -from __future__ import annotations - -import argparse -import difflib -import json -import re -import subprocess -import sys -from dataclasses import dataclass -from pathlib import Path -from typing import Iterable, Iterator - - -ROOT = Path(__file__).resolve().parents[1] -BUILD = ROOT / "build" / "GQPE78" -OBJDUMP = ROOT / "build" / "binutils" / "powerpc-eabi-objdump.exe" -OBJDIFF = ROOT / "build" / "tools" / "objdiff-cli.exe" - -UNITS = { - "binkread": "main/bink/src/sdk/decode/binkread", - "binkacd": "main/bink/src/sdk/decode/binkacd", - "expand": "main/bink/src/sdk/decode/expand", - "yuv": "main/bink/src/sdk/decode/yuv", - "binkngc": "main/bink/src/sdk/decode/ngc/binkngc", - "ngcsnd": "main/bink/src/sdk/decode/ngc/ngcsnd", - "ngcfile": "main/bink/src/sdk/decode/ngc/ngcfile", - "ngcrgb": "main/bink/src/sdk/decode/ngc/ngcrgb", - "ngcyuy2": "main/bink/src/sdk/decode/ngc/ngcyuy2", - "varbits": "main/bink/src/sdk/varbits", - "fft": "main/bink/src/sdk/fft", - "dct": "main/bink/src/sdk/dct", - "bitplane": "main/bink/src/sdk/bitplane", -} - - -@dataclass(frozen=True) -class FunctionAsm: - origin: str - side: str - name: str - instructions: tuple[str, ...] - - -@dataclass(frozen=True) -class Hit: - score: float - origin: str - side: str - name: str - start: int - end: int - window: tuple[str, ...] - - -def load_json(path: Path) -> dict | None: - try: - with path.open("r", encoding="utf-8") as f: - return json.load(f) - except Exception: - return None - - -def formatted_instructions(sym: dict) -> tuple[str, ...]: - rows: list[str] = [] - for item in sym.get("instructions", []): - inst = item.get("instruction") or {} - text = inst.get("formatted") - if text: - rows.append(text) - return tuple(rows) - - -def functions_from_objdiff(path: Path, sides: Iterable[str]) -> Iterator[FunctionAsm]: - data = load_json(path) - if data is None or not isinstance(data, dict): - return - for side in sides: - for sym in data.get(side, {}).get("symbols", []): - if sym.get("kind") != "SYMBOL_FUNCTION": - continue - inst = formatted_instructions(sym) - if inst: - yield FunctionAsm(str(path.relative_to(ROOT)), side, sym.get("name") or "", inst) - - -OBJ_LABEL_RE = re.compile(r"^[0-9a-fA-F]+ <([^>]+)>:$") -OBJ_INST_RE = re.compile(r"^\s*[0-9a-fA-F]+:\s+(?:[0-9a-fA-F]{2}\s+){4}\s*(.+?)\s*$") - - -def functions_from_objdump_text(path: Path) -> Iterator[FunctionAsm]: - cur_name: str | None = None - cur_rows: list[str] = [] - - def flush() -> Iterator[FunctionAsm]: - nonlocal cur_name, cur_rows - if cur_name and cur_rows: - yield FunctionAsm(str(path.relative_to(ROOT)), "objdump", cur_name, tuple(cur_rows)) - cur_name = None - cur_rows = [] - - try: - lines = path.read_text(encoding="utf-8", errors="ignore").splitlines() - except OSError: - return - - for line in lines: - label = OBJ_LABEL_RE.match(line.strip()) - if label: - yield from flush() - cur_name = label.group(1) - continue - inst = OBJ_INST_RE.match(line) - if inst and cur_name: - text = inst.group(1).split("\t", 1)[-1].strip() - if text: - cur_rows.append(text) - yield from flush() - - -def functions_from_object(path: Path) -> Iterator[FunctionAsm]: - if not OBJDUMP.exists(): - return - result = subprocess.run( - [str(OBJDUMP), "-dr", str(path)], - cwd=ROOT, - text=True, - stdout=subprocess.PIPE, - stderr=subprocess.DEVNULL, - ) - if result.returncode != 0: - return - dump = BUILD / "asm_shape_search" / (path.name + ".dump") - dump.parent.mkdir(parents=True, exist_ok=True) - dump.write_text(result.stdout, encoding="utf-8") - yield from functions_from_objdump_text(dump) - - -REG_RE = re.compile(r"\b([rf])(?:[0-9]|[12][0-9]|3[01])\b") -CR_RE = re.compile(r"\bcr[0-7]\b") -HEX_RE = re.compile(r"-?0x[0-9a-fA-F]+") -INT_RE = re.compile(r"(? str: - text = text.strip() - if not text: - return "" - mnemonic, _, rest = text.partition(" ") - mnemonic = mnemonic.lower() - if not operands: - return mnemonic - - rest = rest.lower() - if mnemonic.startswith("b"): - return f"{mnemonic} TARGET" - rest = REG_RE.sub(lambda m: m.group(1) + "N", rest) - rest = CR_RE.sub("crN", rest) - rest = HEX_RE.sub("IMM", rest) - rest = INT_RE.sub("IMM", rest) - rest = re.sub(r"\s+", "", rest) - return f"{mnemonic} {rest}" if rest else mnemonic - - -def normalize_many(rows: Iterable[str], *, operands: bool) -> tuple[str, ...]: - return tuple(row for row in (normalize_instruction(row, operands=operands) for row in rows) if row) - - -def ngrams(tokens: tuple[str, ...], n: int) -> set[tuple[str, ...]]: - if len(tokens) < n: - return {tokens} if tokens else set() - return {tokens[i : i + n] for i in range(len(tokens) - n + 1)} - - -def window_score(query: tuple[str, ...], window: tuple[str, ...], ngram: int) -> float: - ratio = difflib.SequenceMatcher(a=query, b=window, autojunk=False).ratio() - qn = ngrams(query, ngram) - wn = ngrams(window, ngram) - jaccard = len(qn & wn) / len(qn | wn) if qn and wn else 0.0 - return (0.65 * ratio) + (0.35 * jaccard) - - -def best_hit(query: tuple[str, ...], func: FunctionAsm, *, ngram: int, slack: int, fast: bool) -> Hit | None: - cand = normalize_many(func.instructions, operands=True) - if not cand: - return None - qlen = len(query) - min_len = max(1, qlen - slack) - max_len = min(len(cand), qlen + slack) - best: Hit | None = None - qn = ngrams(query, ngram) - for size in range(min_len, max_len + 1): - for start in range(0, len(cand) - size + 1): - window = cand[start : start + size] - if fast: - wn = ngrams(window, ngram) - score = len(qn & wn) / len(qn | wn) if qn and wn else 0.0 - else: - score = window_score(query, window, ngram) - if best is None or score > best.score: - best = Hit(score, func.origin, func.side, func.name, start, start + size, window) - return best - - -def current_json_for_unit(unit: str, refresh: bool) -> Path: - out = BUILD / f"{unit}_asm_shape_query.json" - if not refresh and out.exists(): - return out - subprocess.run( - [str(OBJDIFF), "diff", "-p", ".", "-u", UNITS[unit], "-o", str(out), "--format", "json"], - cwd=ROOT, - check=True, - ) - return out - - -def query_from_objdiff(path: Path, symbol: str, side: str, start: int | None, count: int | None) -> tuple[str, ...]: - data = load_json(path) - if data is None: - raise SystemExit(f"could not read objdiff JSON: {path}") - for sym in data.get(side, {}).get("symbols", []): - if sym.get("name") == symbol: - rows = formatted_instructions(sym) - lo = start or 0 - hi = None if count is None else lo + count - query = normalize_many(rows[lo:hi], operands=True) - if not query: - raise SystemExit(f"symbol has no instructions: {symbol}") - return query - raise SystemExit(f"symbol not found on {side}: {symbol}") - - -def iter_candidate_paths(roots: Iterable[Path]) -> Iterator[Path]: - exts = {".json", ".dump", ".txt", ".s", ".asm", ".lst", ".o", ".a"} - for root in roots: - if root.is_file() and root.suffix.lower() in exts: - yield root - elif root.is_dir(): - for path in root.rglob("*"): - if path.is_file() and path.suffix.lower() in exts: - if path.name in {"report.json"} or path.name.endswith(".ctx"): - continue - try: - if path.stat().st_size > 8 * 1024 * 1024: - continue - except OSError: - continue - yield path - - -def iter_functions(paths: Iterable[Path], sides: Iterable[str]) -> Iterator[FunctionAsm]: - for path in paths: - suffix = path.suffix.lower() - if suffix == ".json": - yield from functions_from_objdiff(path, sides) - elif suffix in {".dump", ".txt", ".s", ".asm", ".lst"}: - yield from functions_from_objdump_text(path) - elif suffix in {".o", ".a"}: - yield from functions_from_object(path) - - -def main(argv: list[str] | None = None) -> int: - parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument("--unit", choices=sorted(UNITS), help="generate/read current objdiff JSON for this unit") - parser.add_argument("--json", type=Path, help="objdiff JSON to query instead of --unit") - parser.add_argument("--symbol", required=True) - parser.add_argument("--side", choices=["left", "right"], default="right") - parser.add_argument("--start", type=int) - parser.add_argument("--count", type=int) - parser.add_argument("--root", action="append", type=Path, default=[]) - parser.add_argument("--include-build", action="store_true") - parser.add_argument("--include-bink-json", action="store_true") - parser.add_argument("--limit", type=int, default=20) - parser.add_argument("--min-score", type=float, default=0.45) - parser.add_argument("--ngram", type=int, default=4) - parser.add_argument("--slack", type=int, default=6) - parser.add_argument("--slow", action="store_true", help="use SequenceMatcher plus n-grams") - parser.add_argument("--refresh", action="store_true") - parser.add_argument("--show", type=int, default=8) - args = parser.parse_args(argv) - - if args.json is None and args.unit is None: - raise SystemExit("provide --unit or --json") - query_json = args.json or current_json_for_unit(args.unit, args.refresh) - query = query_from_objdiff(query_json, args.symbol, args.side, args.start, args.count) - - roots = [p if p.is_absolute() else ROOT / p for p in args.root] - if args.include_build: - roots.append(BUILD) - if args.include_bink_json: - roots.extend(sorted(BUILD.glob("bink*_current_fresh.json"))) - roots.extend(sorted(BUILD.glob("ngc*_current_fresh.json"))) - roots.extend(sorted(BUILD.glob("*_asm_shape_query.json"))) - roots.extend(sorted((BUILD / "flag_matrix").glob("*.json"))) - if not roots: - roots = [ROOT / "reference_projects"] - - sides = ("left", "right") - hits: list[Hit] = [] - for func in iter_functions(iter_candidate_paths(roots), sides): - hit = best_hit(query, func, ngram=args.ngram, slack=args.slack, fast=not args.slow) - if hit and hit.score >= args.min_score: - hits.append(hit) - - hits.sort(key=lambda h: (-h.score, h.origin, h.name, h.start)) - for hit in hits[: args.limit]: - print(f"{hit.score:7.4f} {hit.origin} [{hit.side}] {hit.name} @{hit.start}:{hit.end}") - for row in hit.window[: args.show]: - print(f" {row}") - if len(hit.window) > args.show: - print(" ...") - return 0 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/tools/bink_firstdiff_clusters.py b/tools/bink_firstdiff_clusters.py deleted file mode 100644 index 8431b6c03..000000000 --- a/tools/bink_firstdiff_clusters.py +++ /dev/null @@ -1,126 +0,0 @@ -#!/usr/bin/env python3 -"""Cluster Bink objdiff symbols by their first normalized asm mismatch.""" - -from __future__ import annotations - -import argparse -import json -import re -import subprocess -from collections import defaultdict -from pathlib import Path - - -ROOT = Path(__file__).resolve().parents[1] -BUILD = ROOT / "build" / "GQPE78" -OBJDIFF = ROOT / "build" / "tools" / "objdiff-cli.exe" - -UNITS = { - "binkread": "main/bink/src/sdk/decode/binkread", - "binkacd": "main/bink/src/sdk/decode/binkacd", - "expand": "main/bink/src/sdk/decode/expand", - "yuv": "main/bink/src/sdk/decode/yuv", - "binkngc": "main/bink/src/sdk/decode/ngc/binkngc", - "ngcsnd": "main/bink/src/sdk/decode/ngc/ngcsnd", - "ngcfile": "main/bink/src/sdk/decode/ngc/ngcfile", - "ngcrgb": "main/bink/src/sdk/decode/ngc/ngcrgb", - "ngcyuy2": "main/bink/src/sdk/decode/ngc/ngcyuy2", - "varbits": "main/bink/src/sdk/varbits", - "fft": "main/bink/src/sdk/fft", - "dct": "main/bink/src/sdk/dct", - "bitplane": "main/bink/src/sdk/bitplane", -} - -REG_RE = re.compile(r"\b([rf])(?:[0-9]|[12][0-9]|3[01])\b") -CR_RE = re.compile(r"\bcr[0-7]\b") -HEX_RE = re.compile(r"-?0x[0-9a-fA-F]+") -INT_RE = re.compile(r"(? Path: - path = BUILD / f"{unit}_firstdiff.json" - if not refresh and path.exists(): - return path - subprocess.run( - [str(OBJDIFF), "diff", "-p", ".", "-u", UNITS[unit], "-o", str(path), "--format", "json"], - cwd=ROOT, - check=True, - ) - return path - - -def norm(text: str | None) -> str: - if not text: - return "" - mnemonic, _, rest = text.strip().lower().partition(" ") - if mnemonic.startswith("b"): - return f"{mnemonic} target" - rest = REG_RE.sub(lambda m: m.group(1) + "N", rest) - rest = CR_RE.sub("crN", rest) - rest = HEX_RE.sub("IMM", rest) - rest = INT_RE.sub("IMM", rest) - rest = re.sub(r"\s+", "", rest) - return f"{mnemonic} {rest}" if rest else mnemonic - - -def instructions(sym: dict) -> list[str]: - rows: list[str] = [] - for item in sym.get("instructions", []): - inst = item.get("instruction") or {} - rows.append(inst.get("formatted") or "") - return rows - - -def score(sym: dict) -> float: - return float(sym.get("match_percent") or sym.get("score") or 0.0) - - -def source_ref(sym: dict) -> str: - src = sym.get("source") or {} - path = src.get("path") or src.get("file") or "" - line = src.get("line") - return f"{path}:{line}" if path and line else path - - -def main() -> int: - parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument("units", nargs="+", choices=sorted(UNITS)) - parser.add_argument("--refresh", action="store_true") - parser.add_argument("--min-score", type=float, default=0.0) - parser.add_argument("--max-score", type=float, default=99.99) - parser.add_argument("--limit", type=int, default=30) - args = parser.parse_args() - - clusters: dict[tuple[str, str], list[tuple[str, float, int, str, str, str]]] = defaultdict(list) - for unit in args.units: - data = json.loads(current_json(unit, args.refresh).read_text(encoding="utf-8")) - left = {s.get("name"): s for s in data.get("left", {}).get("symbols", []) if s.get("kind") == "SYMBOL_FUNCTION"} - right = {s.get("name"): s for s in data.get("right", {}).get("symbols", []) if s.get("kind") == "SYMBOL_FUNCTION"} - for name, rsym in right.items(): - lsym = left.get(name) - if not lsym: - continue - pct = score(rsym) - if pct < args.min_score or pct > args.max_score: - continue - lrows = instructions(lsym) - rrows = instructions(rsym) - for idx in range(max(len(lrows), len(rrows))): - lraw = lrows[idx] if idx < len(lrows) else "" - rraw = rrows[idx] if idx < len(rrows) else "" - key = (norm(lraw), norm(rraw)) - if key[0] != key[1]: - clusters[key].append((unit, pct, idx, name, lraw, rraw)) - break - - ranked = sorted(clusters.items(), key=lambda item: (-len(item[1]), item[0])) - for (lnorm, rnorm), hits in ranked[: args.limit]: - print(f"\n## {len(hits)} hits: {lnorm} | {rnorm}") - for unit, pct, idx, name, lraw, rraw in sorted(hits, key=lambda h: (-h[1], h[0], h[3]))[:8]: - print(f"{unit:8} {pct:7.3f} @{idx:03d} {name}") - print(f" {lraw} | {rraw}") - return 0 - - -if __name__ == "__main__": - raise SystemExit(main()) diff --git a/tools/bink_flag_matrix.py b/tools/bink_flag_matrix.py deleted file mode 100644 index 34fda51de..000000000 --- a/tools/bink_flag_matrix.py +++ /dev/null @@ -1,444 +0,0 @@ -#!/usr/bin/env python3 -"""Probe Bink compiler flags against objdiff scores. - -The script compiles a configured Bink unit to a temporary object with candidate -compiler/flag changes, swaps that object into the normal build path long enough -to run objdiff, then restores the original object. -""" - -from __future__ import annotations - -import argparse -import itertools -import json -import os -import shlex -import shutil -import subprocess -import sys -import time -from contextlib import contextmanager -from pathlib import Path -from typing import Iterable - -ROOT = Path(__file__).resolve().parents[1] -BUILD = ROOT / "build" / "GQPE78" -OBJDIFF = ROOT / "build" / "tools" / "objdiff-cli.exe" -PRODG_ROOT = ROOT / "build" / "compilers" / "ProDG" -MWCC_ROOT = ROOT / "build" / "compilers" / "GC" -LOCKS = BUILD / ".bink_flag_matrix_locks" -LOCK_TIMEOUT_SECONDS = 600.0 - -BINK_FLAGS = [ - "-O2", - "-mcpu=750", - "-fno-exceptions", - "-Wno-inline", - "-nostdinc", - "-I", - "src/dolphin/src", - "-I", - "include", - "-I", - "src/dolphin/include", - "-D__GEKKO__", - "-I", - "src/bink/include", - "-I", - "src/PowerPC_EABI_Support/include", - "-G8", -] - -CW_COMMON_FLAGS = [ - "-nodefaults", - "-proc", - "gekko", - "-align", - "powerpc", - "-enum", - "int", - "-fp", - "hardware", - "-Cpp_exceptions", - "off", - "-W", - "off", - "-O4,p", - "-inline", - "auto", - "-pragma", - "cats off", - "-pragma", - "warn_notinlined off", - "-maxerrors", - "1", - "-nosyspath", - "-RTTI", - "off", - "-fp_contract", - "on", - "-str", - "reuse", - "-multibyte", - "-i", - "include", - "-i", - "src/PowerPC_EABI_Support/include", - "-i", - "src/dolphin/include", - "-i", - "src/dolphin/src", - "-i", - "src/bink/include", - "-i", - "src/bink/src", - "-i", - "src", - "-i", - "build/GQPE78/include", - "-DNDEBUG=1", - "-DBUILD_VERSION=1", - "-DVERSION_GQPE78", -] - -CW_PRESETS = { - "base": [], - "runtime": [ - "-use_lmw_stmw", - "on", - "-str", - "reuse,pool,readonly", - "-gccinc", - "-common", - "off", - "-inline", - "auto", - ], - "dolphin": [ - "-fp", - "fmadd", - "-fp_contract", - "off", - "-char", - "signed", - "-str", - "reuse", - "-common", - "off", - "-O4,p", - ], - "bfbb": [ - "-common", - "on", - "-char", - "unsigned", - "-str", - "reuse,pool,readonly", - "-use_lmw_stmw", - "on", - "-inline", - "off", - "-gccinc", - "-i", - "include/inline", - "-i", - "include/rwsdk", - "-i", - "src/SB/Core/gc", - "-i", - "src/SB/Core/x", - "-i", - "src/SB/Game", - "-DGAMECUBE", - ], -} - -UNITS = { - "binkread": ("main/bink/src/sdk/decode/binkread", "src/bink/src/sdk/decode/binkread.c", "build/GQPE78/src/bink/src/sdk/decode/binkread.o", "c"), - "binkacd": ("main/bink/src/sdk/decode/binkacd", "src/bink/src/sdk/decode/binkacd.c", "build/GQPE78/src/bink/src/sdk/decode/binkacd.o", "c"), - "expand": ("main/bink/src/sdk/decode/expand", "src/bink/src/sdk/decode/expand.c", "build/GQPE78/src/bink/src/sdk/decode/expand.o", "c"), - "yuv": ("main/bink/src/sdk/decode/yuv", "src/bink/src/sdk/decode/yuv.cpp", "build/GQPE78/src/bink/src/sdk/decode/yuv.o", "c++"), - "binkngc": ("main/bink/src/sdk/decode/ngc/binkngc", "src/bink/src/sdk/decode/ngc/binkngc.c", "build/GQPE78/src/bink/src/sdk/decode/ngc/binkngc.o", "c"), - "ngcsnd": ("main/bink/src/sdk/decode/ngc/ngcsnd", "src/bink/src/sdk/decode/ngc/ngcsnd.c", "build/GQPE78/src/bink/src/sdk/decode/ngc/ngcsnd.o", "c"), - "ngcfile": ("main/bink/src/sdk/decode/ngc/ngcfile", "src/bink/src/sdk/decode/ngc/ngcfile.c", "build/GQPE78/src/bink/src/sdk/decode/ngc/ngcfile.o", "c"), - "ngcrgb": ("main/bink/src/sdk/decode/ngc/ngcrgb", "src/bink/src/sdk/decode/ngc/ngcrgb.c", "build/GQPE78/src/bink/src/sdk/decode/ngc/ngcrgb.o", "c"), - "ngcyuy2": ("main/bink/src/sdk/decode/ngc/ngcyuy2", "src/bink/src/sdk/decode/ngc/ngcyuy2.c", "build/GQPE78/src/bink/src/sdk/decode/ngc/ngcyuy2.o", "c"), - "varbits": ("main/bink/src/sdk/varbits", "src/bink/src/sdk/varbits.c", "build/GQPE78/src/bink/src/sdk/varbits.o", "c"), - "fft": ("main/bink/src/sdk/fft", "src/bink/src/sdk/fft.c", "build/GQPE78/src/bink/src/sdk/fft.o", "c"), - "dct": ("main/bink/src/sdk/dct", "src/bink/src/sdk/dct.c", "build/GQPE78/src/bink/src/sdk/dct.o", "c"), - "bitplane": ("main/bink/src/sdk/bitplane", "src/bink/src/sdk/bitplane.c", "build/GQPE78/src/bink/src/sdk/bitplane.o", "c"), -} - - -def replace_flag(flags: list[str], prefix: str, value: str) -> list[str]: - out = [] - replaced = False - for flag in flags: - if flag.startswith(prefix): - out.append(value) - replaced = True - else: - out.append(flag) - if not replaced: - out.append(value) - return out - - -def build_prodg_flags(lang: str, opt: str, small_data: str, extras: Iterable[str]) -> list[str]: - flags = replace_flag(BINK_FLAGS, "-O", opt) - flags = replace_flag(flags, "-G", small_data) - flags.append(f"-lang={lang}") - flags.extend(extras) - return flags - - -def build_mwcc_flags(lang: str, preset: str, source: str, extras: Iterable[str]) -> list[str]: - flags = list(CW_COMMON_FLAGS) - flags.extend(["-i", str(Path(source).parent).replace("\\", "/")]) - flags.extend(CW_PRESETS[preset]) - flags.append(f"-lang={lang}") - flags.extend(extras) - return flags - - -def section_score(path: Path) -> float | None: - with path.open("r", encoding="utf-8") as f: - data = json.load(f) - try: - score = data["left"]["sections"][0].get("match_percent") - except Exception: - return None - return None if score is None else float(score) - - -def run(cmd: list[str], *, quiet: bool = False) -> subprocess.CompletedProcess[str]: - return subprocess.run( - cmd, - cwd=ROOT, - text=True, - stdout=subprocess.PIPE if quiet else None, - stderr=subprocess.STDOUT if quiet else None, - ) - - -@contextmanager -def unit_lock(unit: str) -> Iterable[None]: - LOCKS.mkdir(parents=True, exist_ok=True) - lock = LOCKS / unit - start = time.monotonic() - while True: - try: - lock.mkdir() - (lock / "pid").write_text(str(os.getpid()), encoding="utf-8") - break - except FileExistsError: - if time.monotonic() - start > LOCK_TIMEOUT_SECONDS: - raise TimeoutError(f"timed out waiting for Bink flag-matrix lock: {lock}") - time.sleep(0.1) - - try: - yield - finally: - try: - (lock / "pid").unlink(missing_ok=True) - lock.rmdir() - except OSError: - pass - - -def compile_prodg_candidate(source: str, out: Path, flags: list[str], version: str) -> bool: - compiler_dir = PRODG_ROOT / version - compiler = compiler_dir / "ngccc.exe" - if not compiler.exists(): - print(f"missing compiler: {compiler}") - return False - - out.parent.mkdir(parents=True, exist_ok=True) - command = [str(compiler), *flags, "-c", "-o", str(out), source] - cmdline = " ".join(command) - result = run( - [ - "cmd", - "/c", - f"set SN_NGC_PATH={compiler_dir}&& {cmdline}", - ], - quiet=True, - ) - if result.returncode != 0: - print(result.stdout.strip()) - return False - return True - - -def compile_mwcc_candidate(source: str, out: Path, flags: list[str], version: str) -> bool: - compiler = MWCC_ROOT / version / "mwcceppc.exe" - if not compiler.exists(): - print(f"missing compiler: {compiler}") - return False - - out_dir = out.parent - out_dir.mkdir(parents=True, exist_ok=True) - for old in out_dir.glob(f"{Path(source).stem}.*"): - old.unlink() - - result = run( - [ - str(compiler), - *flags, - "-MMD", - "-c", - source, - "-o", - str(out_dir), - ], - quiet=True, - ) - if result.returncode != 0: - print(result.stdout.strip()) - return False - - produced = out_dir / f"{Path(source).stem}.o" - if not produced.exists(): - print(f"compile succeeded but did not produce {produced}") - return False - if produced != out: - shutil.copy2(produced, out) - return True - - -def score_candidate(unit_name: str, unit_path: str, object_path: Path, candidate: Path, report: Path) -> float | None: - backup = object_path.with_suffix(object_path.suffix + ".flagmatrix.bak") - if backup.exists(): - backup.unlink() - shutil.copy2(object_path, backup) - try: - shutil.copy2(candidate, object_path) - result = run( - [ - str(OBJDIFF), - "diff", - "-p", - ".", - "-u", - unit_path, - "-o", - str(report), - "--format", - "json", - ], - quiet=True, - ) - if result.returncode != 0: - print(result.stdout.strip()) - return None - return section_score(report) - finally: - if backup.exists(): - shutil.copy2(backup, object_path) - backup.unlink(missing_ok=True) - - -def score_existing(unit_path: str, report: Path) -> float | None: - result = run( - [ - str(OBJDIFF), - "diff", - "-p", - ".", - "-u", - unit_path, - "-o", - str(report), - "--format", - "json", - ], - quiet=True, - ) - if result.returncode != 0: - print(result.stdout.strip()) - return None - return section_score(report) - - -def main(argv: list[str] | None = None) -> int: - parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument("units", nargs="*", choices=sorted(UNITS)) - parser.add_argument("--compiler", choices=("prodg", "mwcc", "both"), default="prodg") - parser.add_argument("--prodg-versions", nargs="+", default=["3.5"]) - parser.add_argument("--opts", nargs="+", default=["-O1", "-O2", "-O3"]) - parser.add_argument("--small-data", nargs="+", default=["-G0", "-G4", "-G8"]) - parser.add_argument("--mw-versions", nargs="+", default=["1.2.5n", "1.3.2", "2.0p1", "2.6"]) - parser.add_argument("--mw-presets", nargs="+", choices=sorted(CW_PRESETS), default=["base", "runtime", "dolphin", "bfbb"]) - parser.add_argument("--extra", action="append", default=[]) - parser.add_argument("--limit", type=int, default=12, help="rows to print per unit; use 0 for all") - parser.add_argument("--only-improvements", action="store_true", help="only print variants that beat the current object") - args = parser.parse_args(argv) - extras = list(itertools.chain.from_iterable(shlex.split(extra) for extra in args.extra)) - - units = args.units or ["varbits", "binkngc", "ngcsnd", "binkacd"] - temp = BUILD / "flag_matrix" - temp.mkdir(parents=True, exist_ok=True) - - for unit_name in units: - unit_path, source, object_rel, lang = UNITS[unit_name] - object_path = ROOT / object_rel - if not object_path.exists(): - subprocess.run(["ninja", object_rel.replace("/", "\\")], cwd=ROOT, check=True) - - with unit_lock(unit_name): - baseline_report = temp / f"{unit_name}_baseline.json" - baseline = score_existing(unit_path, baseline_report) - rows: list[tuple[float, str, str]] = [] - if args.compiler in ("prodg", "both"): - for version, opt, small_data in itertools.product(args.prodg_versions, args.opts, args.small_data): - flags = build_prodg_flags(lang, opt, small_data, extras) - label = f"ProDG/{version} {opt} {small_data}" + (f" {' '.join(extras)}" if extras else "") - stem = f"{unit_name}_ProDG_{version.replace('.', '_')}_{opt[1:]}_{small_data[2:]}" - candidate = temp / f"{stem}.o" - report = temp / f"{stem}.json" - if not compile_prodg_candidate(source, candidate, flags, version): - print(f"{unit_name:<10} {label:<34} compile failed") - continue - score = score_candidate(unit_name, unit_path, object_path, candidate, report) - if score is None: - print(f"{unit_name:<10} {label:<34} score failed") - continue - rows.append((score, label, str(report.relative_to(ROOT)))) - - if args.compiler in ("mwcc", "both"): - for version, preset in itertools.product(args.mw_versions, args.mw_presets): - flags = build_mwcc_flags(lang, preset, source, extras) - label = f"GC/{version} {preset}" + (f" {' '.join(extras)}" if extras else "") - stem = f"{unit_name}_GC_{version.replace('.', '_')}_{preset}" - candidate = temp / f"{stem}" / f"{Path(source).stem}.o" - report = temp / f"{stem}.json" - if not compile_mwcc_candidate(source, candidate, flags, version): - print(f"{unit_name:<10} {label:<34} compile failed") - continue - score = score_candidate(unit_name, unit_path, object_path, candidate, report) - if score is None: - print(f"{unit_name:<10} {label:<34} score failed") - continue - rows.append((score, label, str(report.relative_to(ROOT)))) - - print(f"\n## {unit_name}") - if baseline is None: - print("baseline=") - else: - print(f"baseline={baseline:.6g} {baseline_report.relative_to(ROOT)}") - - printed = 0 - for score, label, report in sorted(rows, reverse=True): - delta = "" if baseline is None else f" {score - baseline:+9.6g}" - if args.only_improvements and baseline is not None and score <= baseline: - continue - if args.limit and printed >= args.limit: - break - print(f"{score:9.6g}{delta} {label:<34} {report}") - printed += 1 - - return 0 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/tools/bink_match.py b/tools/bink_match.py deleted file mode 100644 index 26f7c8cc5..000000000 --- a/tools/bink_match.py +++ /dev/null @@ -1,1727 +0,0 @@ -#!/usr/bin/env python3 -"""Small helpers for mining Bink objdiff artifacts. - -This is intentionally narrow: it compares existing objdiff JSON probes against a -fresh current diff so we can find source-real changes that were previously -measured, then inspect the first assembly-shape divergence. -""" - -from __future__ import annotations - -import argparse -import base64 -import glob -import json -import os -import re -import shutil -import subprocess -import sys -import time -from contextlib import contextmanager -from pathlib import Path -from typing import Any, Dict, Iterable, List, Optional, Tuple - - -ROOT = Path(__file__).resolve().parents[1] -BUILD = ROOT / "build" / "GQPE78" -OBJDIFF = ROOT / "build" / "tools" / "objdiff-cli.exe" -SYMBOLS = ROOT / "config" / "GQPE78" / "symbols.txt" -GHIDRA = ROOT / "reference_projects" / "bfbb_ghidra" -BINK_REFERENCE_ROOTS = [ - ROOT / "reference_projects" / "bink_references", - ROOT / "reference_projects" / "RatDecomp" / "src" / "3rdParty" / "bink", -] -BINK_HEADER_ROOTS = [ - ROOT / "src" / "bink" / "src" / "sdk", - ROOT / "src" / "bink" / "include", -] -LOCKS = BUILD / ".bink_match_locks" -LOCK_TIMEOUT_SECONDS = 600.0 - -UNITS = { - "binkread": { - "unit": "main/bink/src/sdk/decode/binkread", - "object": r"build\GQPE78\src\bink\src\sdk\decode\binkread.o", - }, - "binkacd": { - "unit": "main/bink/src/sdk/decode/binkacd", - "object": r"build\GQPE78\src\bink\src\sdk\decode\binkacd.o", - }, - "expand": { - "unit": "main/bink/src/sdk/decode/expand", - "object": r"build\GQPE78\src\bink\src\sdk\decode\expand.o", - }, - "yuv": { - "unit": "main/bink/src/sdk/decode/yuv", - "object": r"build\GQPE78\src\bink\src\sdk\decode\yuv.o", - }, - "binkngc": { - "unit": "main/bink/src/sdk/decode/ngc/binkngc", - "object": r"build\GQPE78\src\bink\src\sdk\decode\ngc\binkngc.o", - }, - "ngcsnd": { - "unit": "main/bink/src/sdk/decode/ngc/ngcsnd", - "object": r"build\GQPE78\src\bink\src\sdk\decode\ngc\ngcsnd.o", - }, - "ngcfile": { - "unit": "main/bink/src/sdk/decode/ngc/ngcfile", - "object": r"build\GQPE78\src\bink\src\sdk\decode\ngc\ngcfile.o", - }, - "ngcrgb": { - "unit": "main/bink/src/sdk/decode/ngc/ngcrgb", - "object": r"build\GQPE78\src\bink\src\sdk\decode\ngc\ngcrgb.o", - }, - "ngcyuy2": { - "unit": "main/bink/src/sdk/decode/ngc/ngcyuy2", - "object": r"build\GQPE78\src\bink\src\sdk\decode\ngc\ngcyuy2.o", - }, - "varbits": { - "unit": "main/bink/src/sdk/varbits", - "object": r"build\GQPE78\src\bink\src\sdk\varbits.o", - }, - "fft": { - "unit": "main/bink/src/sdk/fft", - "object": r"build\GQPE78\src\bink\src\sdk\fft.o", - }, - "dct": { - "unit": "main/bink/src/sdk/dct", - "object": r"build\GQPE78\src\bink\src\sdk\dct.o", - }, - "bitplane": { - "unit": "main/bink/src/sdk/bitplane", - "object": r"build\GQPE78\src\bink\src\sdk\bitplane.o", - }, -} - -TargetFingerprint = Tuple[Tuple[Tuple[str, str, str], ...], Tuple[Tuple[Any, ...], ...]] - - -def load_json(path: Path) -> Optional[Dict[str, Any]]: - try: - with path.open("r", encoding="utf-8") as f: - return json.load(f) - except Exception: - return None - - -def section_score(data: Dict[str, Any]) -> Optional[float]: - try: - return data["left"]["sections"][0].get("match_percent") - except Exception: - return None - - -def section_size(data: Dict[str, Any]) -> Optional[int]: - try: - size = data["left"]["sections"][0].get("size") - return None if size is None else int(size) - except Exception: - return None - - -def target_fingerprint(data: Dict[str, Any]) -> Optional[TargetFingerprint]: - try: - right = data["right"] - sections = tuple( - (str(section.get("name")), str(section.get("size")), str(section.get("kind"))) - for section in right.get("sections", []) - ) - syms = tuple( - ( - str(sym.get("name")), - str(sym.get("size")), - str(sym.get("kind")), - tuple( - instr.get("instruction", {}).get("formatted", "") - for instr in sym.get("instructions", []) - ), - tuple( - (chunk.get("size"), chunk.get("data")) - for chunk in sym.get("data_diff", []) - ), - ) - for sym in right.get("symbols", []) - if sym.get("name") - ) - return sections, syms - except Exception: - return None - - -def side_symbols(data: Dict[str, Any], side: str) -> Dict[str, Dict[str, Any]]: - out: Dict[str, Dict[str, Any]] = {} - for sym in data.get(side, {}).get("symbols", []): - name = sym.get("name") - if name: - out[name] = sym - return out - - -def symbols(data: Dict[str, Any]) -> Dict[str, Dict[str, Any]]: - return side_symbols(data, "left") - - -def target_symbols(data: Dict[str, Any]) -> Dict[str, Dict[str, Any]]: - return side_symbols(data, "right") - - -def is_function(sym: Dict[str, Any]) -> bool: - return sym.get("kind") == "SYMBOL_FUNCTION" - - -def symbol_score(data: Dict[str, Any], name: str) -> Optional[float]: - sym = symbols(data).get(name) - return None if sym is None else sym.get("match_percent") - - -@contextmanager -def unit_lock(unit: str) -> Iterable[None]: - LOCKS.mkdir(parents=True, exist_ok=True) - lock = LOCKS / unit - start = time.monotonic() - while True: - try: - lock.mkdir() - (lock / "pid").write_text(str(os.getpid()), encoding="utf-8") - break - except FileExistsError: - if time.monotonic() - start > LOCK_TIMEOUT_SECONDS: - raise TimeoutError(f"timed out waiting for Bink match lock: {lock}") - time.sleep(0.1) - - try: - yield - finally: - try: - (lock / "pid").unlink(missing_ok=True) - lock.rmdir() - except OSError: - pass - - -def current_json(unit: str, rebuild: bool, refresh: bool) -> Path: - info = UNITS[unit] - out = BUILD / f"{unit}_current_fresh.json" - if not refresh: - warn_if_object_stale(unit) - warn_if_current_json_stale(unit, out) - return out - with unit_lock(unit): - if rebuild: - if object_stale_inputs(unit): - unit_object(unit).unlink(missing_ok=True) - subprocess.run(["ninja", info["object"]], cwd=ROOT, check=True) - else: - warn_if_object_stale(unit) - subprocess.run( - [ - str(OBJDIFF), - "diff", - "-p", - ".", - "-u", - info["unit"], - "-o", - str(out), - "--format", - "json", - ], - cwd=ROOT, - check=True, - ) - return out - - -def unit_object(unit: str) -> Path: - return ROOT / UNITS[unit]["object"] - - -def unit_source_candidates(unit: str) -> List[Path]: - obj = unit_object(unit) - try: - rel = obj.relative_to(BUILD) - except ValueError: - return [] - src_base = (ROOT / rel).with_suffix("") - return [src_base.with_suffix(ext) for ext in (".c", ".cpp")] - - -def unit_header_candidates() -> Iterable[Path]: - for root in BINK_HEADER_ROOTS: - if not root.exists(): - continue - yield from root.rglob("*.h") - - -def unit_input_candidates(unit: str) -> List[Path]: - return unit_source_candidates(unit) + list(unit_header_candidates()) - - -def source_anchor(unit: str, symbol: str) -> Optional[str]: - names = [symbol] - if "__" in symbol: - names.append(symbol.split("__", 1)[0]) - - for src in unit_source_candidates(unit): - if not src.exists(): - continue - try: - lines = src.read_text(encoding="utf-8").splitlines() - except UnicodeDecodeError: - lines = src.read_text(errors="ignore").splitlines() - for index, line in enumerate(lines, 1): - for name in names: - if re.search(rf"\b{re.escape(name)}\s*\(", line) and not line.rstrip().endswith(";"): - rel = src.relative_to(ROOT) - return f"{rel}:{index} {line.strip()}" - return None - - -def symbol_address(symbol: str) -> Optional[str]: - if not SYMBOLS.exists(): - return None - pattern = re.compile(rf"^{re.escape(symbol)} = \.text:0x([0-9A-Fa-f]+);") - with SYMBOLS.open("r", encoding="utf-8", errors="ignore") as f: - for line in f: - match = pattern.match(line) - if match: - return match.group(1).lower() - return None - - -def ghidra_anchor(symbol: str) -> Optional[str]: - address = symbol_address(symbol) - if address is None: - return None - path = GHIDRA / f"{address}_FUN_{address}.c" - if not path.exists(): - return None - return str(path.relative_to(ROOT)) - - -def reference_terms(symbol: str) -> List[str]: - terms = [symbol] - if "__" in symbol: - terms.append(symbol.split("__", 1)[0]) - if symbol.startswith("_") and len(symbol) > 1: - terms.append(symbol[1:]) - - out = [] - seen = set() - for term in terms: - if not term or term.startswith("["): - continue - if term not in seen: - out.append(term) - seen.add(term) - return out - - -def reference_hits(symbol: str, limit: int) -> List[str]: - roots = [root for root in BINK_REFERENCE_ROOTS if root.exists()] - if limit <= 0 or not roots: - return [] - - hits: List[str] = [] - seen = set() - for term in reference_terms(symbol): - pattern = rf"\b{re.escape(term)}\b" - for root in roots: - try: - result = subprocess.run( - [ - "rg", - "--line-number", - "--no-heading", - "--color", - "never", - "--field-match-separator", - "\t", - pattern, - str(root.relative_to(ROOT)), - ], - cwd=ROOT, - text=True, - stdout=subprocess.PIPE, - stderr=subprocess.DEVNULL, - check=False, - ) - except FileNotFoundError: - return hits - - for line in result.stdout.splitlines(): - parts = line.split("\t", 2) - if len(parts) != 3: - continue - path_text, line_no, text = parts - try: - rel = (ROOT / path_text).resolve().relative_to(ROOT) - except ValueError: - rel = Path(path_text) - rendered = f"{rel}:{line_no} {text.strip()}" - if rendered in seen: - continue - hits.append(rendered) - seen.add(rendered) - if len(hits) >= limit: - return hits - return hits - - -def warn_if_object_stale(unit: str) -> None: - stale_sources = object_stale_inputs(unit) - if stale_sources: - names = ", ".join(str(src.relative_to(ROOT)) for src in stale_sources[:8]) - if len(stale_sources) > 8: - names += f", ... (+{len(stale_sources) - 8} more)" - print( - f"warning: {UNITS[unit]['object']} is older than {names}; " - "rerun without --no-rebuild before trusting this diff", - file=sys.stderr, - ) - - -def object_stale_inputs(unit: str) -> List[Path]: - obj = unit_object(unit) - if not obj.exists(): - return [] - obj_mtime = obj.stat().st_mtime - return [src for src in unit_input_candidates(unit) if src.exists() and src.stat().st_mtime > obj_mtime] - - -def warn_if_current_json_stale(unit: str, current: Path) -> None: - obj = unit_object(unit) - if not obj.exists() or not current.exists(): - return - if current.stat().st_mtime >= obj.stat().st_mtime: - return - print( - f"warning: {current.relative_to(ROOT)} is older than {UNITS[unit]['object']}; " - "rerun without --cached before trusting this diff", - file=sys.stderr, - ) - - -def artifact_paths(unit: str, wide: bool) -> Iterable[Path]: - if wide: - patterns = [f"*{unit}*.json"] - else: - patterns = [f"{unit}*.json", f"tmp_{unit}*.json", f"report_{unit}*.json"] - - seen = set() - for pattern in patterns: - for path in glob.glob(str(BUILD / pattern)): - resolved = Path(path).resolve() - if resolved not in seen: - seen.add(resolved) - yield Path(path) - - -def json_arg(unit: str, value: str) -> Path: - path = Path(value) - if path.suffix == ".json" or path.parent != Path("."): - return path - return BUILD / f"{unit}_{value}.json" - - -def mine(args: argparse.Namespace) -> int: - units = args.units or list(UNITS) - for unit in units: - base_path = current_json(unit, not args.no_rebuild, not args.cached) - base = load_json(base_path) - if base is None: - continue - base_unit = section_score(base) - base_size = section_size(base) - base_target_fingerprint = target_fingerprint(base) - base_syms = symbols(base) - rows: List[Tuple[float, str, List[str]]] = [] - best_by_symbol: Dict[str, Tuple[float, float, str]] = {} - - for path in artifact_paths(unit, args.wide): - if path.resolve() == base_path.resolve(): - continue - data = load_json(path) - if data is None: - continue - if not args.include_stale_target and target_fingerprint(data) != base_target_fingerprint: - continue - score = section_score(data) - if score is None or base_unit is None: - continue - comparable_section = args.include_partial_section or section_size(data) == base_size - - improvements: List[str] = [] - for name, sym in symbols(data).items(): - if not args.include_data and not is_function(sym): - continue - if args.symbol and name != args.symbol: - continue - old = base_syms.get(name, {}).get("match_percent") - new = sym.get("match_percent") - if old is not None and new is not None and new > old + args.epsilon: - improvements.append(f"{name}:{old:.6g}->{new:.6g}") - best = best_by_symbol.get(name) - if best is None or new > best[1] + args.epsilon: - best_by_symbol[name] = (old, new, path.name) - - section_improved = comparable_section and score > base_unit + args.epsilon - if improvements or (section_improved and args.include_section_only): - rows.append((score, path.name, improvements[: args.symbol_limit])) - - rows.sort(reverse=True, key=lambda row: row[0]) - print(f"\n## {unit} current {base_unit:.6g} ({base_path.name})") - for score, name, improvements in rows[: args.limit]: - suffix = "" - if improvements: - suffix = " " + "; ".join(improvements) - print(f"{score:9.6g} {name}{suffix}") - - if args.per_symbol and best_by_symbol: - print(" per-symbol:") - ranked = sorted( - best_by_symbol.items(), - key=lambda item: (item[1][1] - item[1][0], item[1][1]), - reverse=True, - ) - for name, (old, new, path) in ranked[: args.symbol_limit]: - print(f" {name}: {old:.6g}->{new:.6g} {path}") - return 0 - - -def snapshot(args: argparse.Namespace) -> int: - src = current_json(args.unit, not args.no_rebuild, True) - dest = json_arg(args.unit, args.label) - shutil.copyfile(src, dest) - print(dest) - return 0 - - -def delta(args: argparse.Namespace) -> int: - before_path = json_arg(args.unit, args.before) - if args.after is None: - after_path = current_json(args.unit, not args.no_rebuild, not args.cached) - else: - after_path = json_arg(args.unit, args.after) - - before = load_json(before_path) - after = load_json(after_path) - if before is None or after is None: - raise SystemExit("could not load delta JSON") - - before_score = section_score(before) - after_score = section_score(after) - if before_score is not None and after_score is not None: - print(f"## {args.unit} {before_score:.6g}->{after_score:.6g} ({after_score - before_score:+.6g})") - - before_syms = symbols(before) - after_syms = symbols(after) - rows = [] - for name, after_sym in after_syms.items(): - if not args.include_data and not is_function(after_sym): - continue - before_sym = before_syms.get(name) - if before_sym is None: - continue - old = before_sym.get("match_percent") - new = after_sym.get("match_percent") - if old is None or new is None: - continue - change = new - old - if abs(change) <= args.epsilon: - continue - rows.append((change, new, old, int(after_sym.get("size") or 0), name)) - - rows.sort(key=lambda row: (abs(row[0]), row[3]), reverse=True) - for change, new, old, size, name in rows[: args.limit]: - print(f"{change:+10.6g} {old:9.6g}->{new:<9.6g} size={size:<5} {name}") - return 0 - - -def best_artifact_symbols( - unit: str, - base_path: Path, - base_syms: Dict[str, Dict[str, Any]], - base_target_fingerprint: Optional[TargetFingerprint], - wide: bool, - include_data: bool, - include_stale_target: bool, - epsilon: float, - base_unit_score: Optional[float] = None, - min_unit_gain: Optional[float] = None, -) -> Dict[str, Tuple[float, float, str]]: - best_by_symbol: Dict[str, Tuple[float, float, str]] = {} - for path in artifact_paths(unit, wide): - if path.resolve() == base_path.resolve(): - continue - data = load_json(path) - if data is None: - continue - if not include_stale_target and target_fingerprint(data) != base_target_fingerprint: - continue - if min_unit_gain is not None: - artifact_unit_score = section_score(data) - if ( - base_unit_score is None - or artifact_unit_score is None - or artifact_unit_score < base_unit_score + min_unit_gain - ): - continue - for name, sym in symbols(data).items(): - if not include_data and not is_function(sym): - continue - old = base_syms.get(name, {}).get("match_percent") - new = sym.get("match_percent") - if old is None or new is None or new <= old + epsilon: - continue - best = best_by_symbol.get(name) - if best is None or new > best[1] + epsilon: - best_by_symbol[name] = (old, new, path.name) - return best_by_symbol - - -def left_symbol(data: Dict[str, Any], symbol: str) -> Dict[str, Any]: - sym = symbols(data).get(symbol) - if sym is None: - raise SystemExit(f"symbol not found: {symbol}") - return sym - - -def right_symbol(data: Dict[str, Any], left: Dict[str, Any]) -> Dict[str, Any]: - target = left.get("target_symbol") - if target is None: - raise SystemExit("left symbol has no target_symbol") - return data.get("right", {}).get("symbols", [])[target] - - -def instr_text(ins: Dict[str, Any], ignore_relocs: bool) -> str: - if not ignore_relocs: - return ins.get("formatted", "") - - parts = ins.get("parts") - if not parts: - return ins.get("formatted", "") - - out = [] - first_arg = False - mnemonic = "" - for part in parts: - opcode = part.get("opcode") - if opcode: - mnemonic = opcode.get("mnemonic", "") - out.append(mnemonic) - first_arg = True - continue - - if part.get("separator"): - out.append(", ") - continue - - basic = part.get("basic") - if basic is not None: - out.append(str(basic)) - continue - - arg = part.get("arg") - if arg: - if first_arg: - out.append(" ") - first_arg = False - if arg.get("reloc"): - out.append("" if mnemonic.startswith("b") and mnemonic != "bl" else "") - elif "opaque" in arg: - out.append(str(arg["opaque"])) - elif "signed" in arg: - out.append(str(arg["signed"])) - elif "unsigned" in arg: - out.append(str(arg["unsigned"])) - elif "branch_dest" in arg: - out.append("") - else: - out.append(str(arg)) - return "".join(out) - - -def instrs(sym: Dict[str, Any], include_diff: bool, ignore_relocs: bool) -> List[str]: - out: List[str] = [] - for item in sym.get("instructions", []): - ins = item.get("instruction") - prefix = "" - if include_diff and item.get("diff_kind"): - prefix = f"{item.get('diff_kind')}:" - if ins is None: - out.append(f"{prefix}<{item.get('diff_kind', 'gap')}>") - else: - arg = "" - if include_diff and item.get("arg_diff"): - arg = f":{item.get('arg_diff')}" - reloc = "" - if include_diff and ins.get("relocation"): - rel = ins["relocation"] - reloc = f":reloc={rel.get('target_symbol')}+{rel.get('addend', '')}" - out.append(f"{prefix}{instr_text(ins, ignore_relocs)}{arg}{reloc}") - return out - - -def data_diff_rows(sym: Dict[str, Any]) -> List[str]: - rows: List[str] = [] - for item in sym.get("data_diff", []): - kind = item.get("kind") or "DATA" - size = item.get("size", "?") - raw = b"" - if item.get("data"): - try: - raw = base64.b64decode(item["data"]) - except Exception: - raw = b"" - hex_text = raw.hex().upper() - if len(hex_text) > 48: - hex_text = hex_text[:48] + "..." - ascii_text = "".join(chr(ch) if 32 <= ch < 127 else "." for ch in raw) - if len(ascii_text) > 24: - ascii_text = ascii_text[:24] + "..." - rows.append(f"{kind}:size={size}:hex={hex_text}:ascii={ascii_text}") - return rows - - -def data_item_bytes(item: Dict[str, Any]) -> bytes: - if not item.get("data"): - return b"" - try: - return base64.b64decode(item["data"]) - except Exception: - return b"" - - -def data_item_size(item: Dict[str, Any]) -> int: - try: - return int(item.get("size") or len(data_item_bytes(item))) - except Exception: - return len(data_item_bytes(item)) - - -def data_gap_stats(left: Dict[str, Any], right: Optional[Dict[str, Any]]) -> Tuple[int, int, int, int, Optional[int], str]: - left_rows = left.get("data_diff", []) - right_rows = [] if right is None else right.get("data_diff", []) - if right is None: - current_only = sum(data_item_size(item) for item in left_rows) - first = 0 if left_rows else None - return 0, current_only, 0, 0, first, "unmatched-current" - - equal_bytes = 0 - current_only = 0 - target_only = 0 - changed = 0 - first: Optional[int] = None - first_kind = "" - - for index in range(max(len(left_rows), len(right_rows))): - left_item = left_rows[index] if index < len(left_rows) else {} - right_item = right_rows[index] if index < len(right_rows) else {} - left_kind = left_item.get("kind") or "DATA" - right_kind = right_item.get("kind") or "DATA" - left_data = data_item_bytes(left_item) - right_data = data_item_bytes(right_item) - size = max(data_item_size(left_item), data_item_size(right_item)) - - if left_kind == "DATA" and right_kind == "DATA" and left_data == right_data: - equal_bytes += size - continue - - if left_kind == "DIFF_DELETE" and not right_data: - current_only += size - kind = "current-only" - elif right_kind == "DIFF_INSERT" and not left_data: - target_only += size - kind = "target-only" - elif left_data or right_data: - changed += size - kind = "changed" - else: - changed += size - kind = f"{left_kind.lower()}/{right_kind.lower()}" - - if first is None: - first = index - first_kind = kind - - return equal_bytes, current_only, target_only, changed, first, first_kind - - -def diff_count(sym: Dict[str, Any]) -> int: - return sum(1 for item in sym.get("instructions", []) if item.get("diff_kind")) - - -def first_diff_index(sym: Dict[str, Any]) -> Optional[int]: - for index, item in enumerate(sym.get("instructions", [])): - if item.get("diff_kind"): - return index - return None - - -def first_formatted_diff(left: Dict[str, Any], right: Dict[str, Any], ignore_relocs: bool) -> Tuple[Optional[int], str, str]: - left_i = instrs(left, False, ignore_relocs) - right_i = instrs(right, False, ignore_relocs) - for index, (left_text, right_text) in enumerate(zip(left_i, right_i)): - if left_text != right_text: - return index, left_text, right_text - if len(left_i) != len(right_i): - index = min(len(left_i), len(right_i)) - left_text = left_i[index] if index < len(left_i) else "" - right_text = right_i[index] if index < len(right_i) else "" - return index, left_text, right_text - return None, "", "" - - -def formatted_distance(left: Dict[str, Any], right: Dict[str, Any], ignore_relocs: bool) -> Tuple[int, Optional[int], str, str]: - left_i = instrs(left, False, ignore_relocs) - right_i = instrs(right, False, ignore_relocs) - diffs = 0 - first = None - first_left = "" - first_right = "" - limit = max(len(left_i), len(right_i)) - for index in range(limit): - left_text = left_i[index] if index < len(left_i) else "" - right_text = right_i[index] if index < len(right_i) else "" - if left_text == right_text: - continue - diffs += 1 - if first is None: - first = index - first_left = left_text - first_right = right_text - return diffs, first, first_left, first_right - - -def gaps(args: argparse.Namespace) -> int: - units = args.units or list(UNITS) - for unit in units: - base_path = current_json(unit, not args.no_rebuild, not args.cached) - data = load_json(base_path) - if data is None: - continue - print(f"\n## {unit} current {section_score(data):.6g} ({base_path.name})") - rows = [] - for name, sym in symbols(data).items(): - score = sym.get("match_percent") - if score is None: - continue - if not args.include_data and not is_function(sym): - continue - if not args.include_matched and score >= 100.0 - args.epsilon: - continue - if args.min_score is not None and score < args.min_score: - continue - if args.max_score is not None and score > args.max_score: - continue - target_first: Optional[int] = None - left_text = "" - right_text = "" - if args.target: - try: - target_first, left_text, right_text = first_formatted_diff( - sym, right_symbol(data, sym), args.ignore_relocs - ) - except Exception: - pass - if args.reloc_only and target_first is not None: - continue - if args.shape_only and target_first is None: - continue - rows.append( - ( - score, - int(sym.get("size") or 0), - diff_count(sym), - first_diff_index(sym), - target_first, - left_text, - right_text, - name, - ) - ) - - if args.closest: - rows.sort(key=lambda row: (100.0 - row[0], -row[1])) - elif args.target_sort: - rows.sort(key=lambda row: (row[4] is None, row[4] or 0, -row[1])) - else: - rows.sort(key=lambda row: (row[0], -row[1])) - - for score, size, diffs, first, target_first, left_text, right_text, name in rows[: args.limit]: - first_text = "-" if first is None else str(first) - if args.target: - target_text = "-" if target_first is None else str(target_first) - print( - f"{score:9.6g} size={size:<5} diffs={diffs:<4} " - f"first={first_text:<4} tfirst={target_text:<4} {name}" - ) - if args.show_diff and target_first is not None: - print(f" {left_text} | {right_text}") - else: - print(f"{score:9.6g} size={size:<5} diffs={diffs:<4} first={first_text:<4} {name}") - return 0 - - -def diff_tag(target_first: Optional[int], left_text: str, right_text: str) -> str: - if target_first is None: - return "reloc" - if left_text.startswith("<") or right_text.startswith("<"): - return "insert/delete" - left_op = left_text.split(" ", 1)[0] - right_op = right_text.split(" ", 1)[0] - if left_op != right_op: - return "opcode" - if is_branch_opcode(left_op): - return "branch-target" - if target_first == 0: - return "prologue" - return "operand" - - -def is_branch_opcode(opcode: str) -> bool: - return opcode == "b" or opcode.startswith("b") and not opcode.startswith("bl") - - -def tag_set(value: Optional[str]) -> set[str]: - if not value: - return set() - return {item.strip().lower() for item in value.split(",") if item.strip()} - - -def abstract_text(text: str, abstract_regs: bool) -> str: - if not abstract_regs: - return text - return re.sub(r"\b[rf]\d+\b", lambda match: match.group(0)[0] + "N", text) - - -def triage(args: argparse.Namespace) -> int: - units = args.units or list(UNITS) - only_tags = tag_set(args.only_tags) - skip_tags = tag_set(args.skip_tags) - for unit in units: - base_path = current_json(unit, not args.no_rebuild, not args.cached) - data = load_json(base_path) - if data is None: - continue - base_syms = symbols(data) - best_artifacts = best_artifact_symbols( - unit, - base_path, - base_syms, - target_fingerprint(data), - args.wide, - args.include_data, - args.include_stale_target, - args.epsilon, - ) - rows = [] - - for name, sym in base_syms.items(): - score = sym.get("match_percent") - if score is None: - continue - if not args.include_data and not is_function(sym): - continue - if not args.include_matched and score >= 100.0 - args.epsilon: - continue - if args.min_score is not None and score < args.min_score: - continue - if args.max_score is not None and score > args.max_score: - continue - - try: - target_first, left_text, right_text = first_formatted_diff( - sym, right_symbol(data, sym), args.ignore_relocs - ) - except Exception: - target_first, left_text, right_text = None, "", "" - if args.shape_only and target_first is None: - continue - if args.reloc_only and target_first is not None: - continue - - old, new, artifact = best_artifacts.get(name, (score, score, "")) - artifact_gain = new - old - if args.artifacts_only and artifact_gain <= args.epsilon: - continue - tag = diff_tag(target_first, left_text, right_text) - if only_tags and tag not in only_tags: - continue - if skip_tags and tag in skip_tags: - continue - - rows.append( - ( - artifact_gain, - score, - int(sym.get("size") or 0), - diff_count(sym), - first_diff_index(sym), - target_first, - tag, - left_text, - right_text, - new, - artifact, - name, - ) - ) - - rows.sort(key=lambda row: (row[0] <= args.epsilon, -row[0], 100.0 - row[1], -row[2])) - print(f"\n## {unit} current {section_score(data):.6g} ({base_path.name})") - for ( - artifact_gain, - score, - size, - diffs, - first, - target_first, - tag, - left_text, - right_text, - artifact_score, - artifact, - name, - ) in rows[: args.limit]: - first_text = "-" if first is None else str(first) - target_text = "-" if target_first is None else str(target_first) - artifact_text = "" - if artifact_gain > args.epsilon: - artifact_text = f" artifact={artifact_score:.6g} +{artifact_gain:.6g} {artifact}" - print( - f"{score:9.6g} size={size:<5} diffs={diffs:<4} first={first_text:<4} " - f"tfirst={target_text:<4} {tag:<13} {name}{artifact_text}" - ) - if args.show_diff and target_first is not None: - print(f" {left_text} | {right_text}") - return 0 - - -def next_candidates(args: argparse.Namespace) -> int: - units = args.units or list(UNITS) - only_tags = tag_set(args.only_tags) - skip_tags = tag_set(args.skip_tags) - rows = [] - - for unit in units: - base_path = current_json(unit, not args.no_rebuild, not args.cached) - data = load_json(base_path) - if data is None: - continue - base_unit_score = section_score(data) - base_syms = symbols(data) - best_artifacts = best_artifact_symbols( - unit, - base_path, - base_syms, - target_fingerprint(data), - args.wide, - args.include_data, - args.include_stale_target, - args.epsilon, - base_unit_score, - args.min_artifact_unit_gain, - ) - - for name, sym in base_syms.items(): - score = sym.get("match_percent") - if score is None: - continue - if not args.include_data and not is_function(sym): - continue - if not args.include_matched and score >= 100.0 - args.epsilon: - continue - if args.min_score is not None and score < args.min_score: - continue - if args.max_score is not None and score > args.max_score: - continue - - try: - target_first, left_text, right_text = first_formatted_diff( - sym, right_symbol(data, sym), args.ignore_relocs - ) - except Exception: - target_first, left_text, right_text = None, "", "" - tag = diff_tag(target_first, left_text, right_text) - if only_tags and tag not in only_tags: - continue - if skip_tags and tag in skip_tags: - continue - - old, new, artifact = best_artifacts.get(name, (score, score, "")) - artifact_gain = new - old - source = source_anchor(unit, name) - ghidra = ghidra_anchor(name) - rows.append( - ( - artifact_gain, - score, - diff_count(sym), - target_first if target_first is not None else 1000000, - int(sym.get("size") or 0), - unit, - name, - tag, - left_text, - right_text, - new, - artifact, - source, - ghidra, - ) - ) - - rows.sort(key=lambda row: (row[0] <= args.epsilon, -row[0], row[2], 100.0 - row[1], row[3], -row[4])) - for ( - artifact_gain, - score, - diffs, - target_first, - size, - unit, - name, - tag, - left_text, - right_text, - artifact_score, - artifact, - source, - ghidra, - ) in rows[: args.limit]: - target_text = "-" if target_first == 1000000 else str(target_first) - artifact_text = "" - if artifact_gain > args.epsilon: - artifact_text = f" artifact={artifact_score:.6g} +{artifact_gain:.6g} {artifact}" - print( - f"{unit:<8} {score:9.6g} size={size:<5} diffs={diffs:<4} " - f"tfirst={target_text:<4} {tag:<13} {name}{artifact_text}" - ) - if args.show_diff and target_first != 1000000: - print(f" {left_text} | {right_text}") - if source: - print(f" src: {source}") - if ghidra: - print(f" ghidra: {ghidra}") - if args.show_refs: - for hit in reference_hits(name, args.ref_limit): - print(f" ref: {hit}") - return 0 - - -def refs(args: argparse.Namespace) -> int: - for symbol in args.symbols: - hits = reference_hits(symbol, args.limit) - print(symbol) - if not hits: - print(" no reference hits") - continue - for hit in hits: - print(f" {hit}") - return 0 - - -def patterns(args: argparse.Namespace) -> int: - units = args.units or list(UNITS) - only_tags = tag_set(args.only_tags) - skip_tags = tag_set(args.skip_tags) - groups: Dict[Tuple[str, str, str], List[Tuple[str, str, float, int, Optional[int]]]] = {} - - for unit in units: - base_path = current_json(unit, not args.no_rebuild, not args.cached) - data = load_json(base_path) - if data is None: - continue - - for name, sym in symbols(data).items(): - score = sym.get("match_percent") - if score is None: - continue - if not args.include_data and not is_function(sym): - continue - if not args.include_matched and score >= 100.0 - args.epsilon: - continue - if args.min_score is not None and score < args.min_score: - continue - if args.max_score is not None and score > args.max_score: - continue - - try: - target_first, left_text, right_text = first_formatted_diff( - sym, right_symbol(data, sym), args.ignore_relocs - ) - except Exception: - target_first, left_text, right_text = None, "", "" - if args.shape_only and target_first is None: - continue - if args.reloc_only and target_first is not None: - continue - - tag = diff_tag(target_first, left_text, right_text) - if only_tags and tag not in only_tags: - continue - if skip_tags and tag in skip_tags: - continue - - key = (tag, abstract_text(left_text, args.abstract_regs), abstract_text(right_text, args.abstract_regs)) - groups.setdefault(key, []).append( - (unit, name, score, int(sym.get("size") or 0), target_first) - ) - - rows = [] - for key, entries in groups.items(): - if len(entries) < args.min_count: - continue - avg_score = sum(entry[2] for entry in entries) / len(entries) - total_size = sum(entry[3] for entry in entries) - firsts = [entry[4] for entry in entries if entry[4] is not None] - first = min(firsts) if firsts else None - rows.append((len(entries), total_size, avg_score, first, key, entries)) - - rows.sort(key=lambda row: (-row[0], -row[1], row[2])) - for count, total_size, avg_score, first, key, entries in rows[: args.limit]: - tag, left_text, right_text = key - first_text = "-" if first is None else str(first) - print( - f"{count:3} funcs size={total_size:<5} avg={avg_score:9.6g} " - f"first={first_text:<4} {tag:<13} {left_text} | {right_text}" - ) - for unit, name, score, size, _ in entries[: args.examples]: - print(f" {unit:<8} {score:9.6g} size={size:<5} {name}") - return 0 - - -def shape(args: argparse.Namespace) -> int: - units = args.units or list(UNITS) - for unit in units: - base_path = current_json(unit, not args.no_rebuild, not args.cached) - base = load_json(base_path) - if base is None: - continue - base_syms = symbols(base) - base_target_syms = target_symbols(base) - base_target_fingerprint = target_fingerprint(base) - rows = [] - for path in artifact_paths(unit, args.wide): - if path.resolve() == base_path.resolve(): - continue - artifact = load_json(path) - if artifact is None: - continue - if not args.include_stale_target and target_fingerprint(artifact) != base_target_fingerprint: - continue - for name, artifact_sym in symbols(artifact).items(): - if args.symbol and name != args.symbol: - continue - if not args.include_data and not is_function(artifact_sym): - continue - base_sym = base_syms.get(name) - if base_sym is None: - continue - old_score = base_sym.get("match_percent") - new_score = artifact_sym.get("match_percent") - if old_score is None or new_score is None: - continue - score_gain = new_score - old_score - if score_gain < args.min_score_gain: - continue - base_target = base_target_syms.get(name) - if base_target is None: - try: - base_target = right_symbol(base, base_sym) - except Exception: - continue - artifact_target = base_target_syms.get(name) - if artifact_target is None: - continue - base_dist, base_first, _, _ = formatted_distance(base_sym, base_target, args.ignore_relocs) - artifact_dist, artifact_first, left_text, right_text = formatted_distance( - artifact_sym, artifact_target, args.ignore_relocs - ) - shape_gain = base_dist - artifact_dist - if shape_gain < args.min_shape_gain: - continue - rows.append( - ( - shape_gain, - score_gain, - old_score, - new_score, - base_dist, - artifact_dist, - base_first, - artifact_first, - int(artifact_sym.get("size") or 0), - path.name, - name, - left_text, - right_text, - ) - ) - - rows.sort(key=lambda row: (-row[0], -row[1], row[5], -row[8])) - if not args.all_artifacts: - unique_rows = [] - seen_symbols = set() - for row in rows: - name = row[10] - if name in seen_symbols: - continue - seen_symbols.add(name) - unique_rows.append(row) - rows = unique_rows - print(f"\n## {unit} current {section_score(base):.6g} ({base_path.name})") - for ( - shape_gain, - score_gain, - old_score, - new_score, - base_dist, - artifact_dist, - base_first, - artifact_first, - size, - artifact_name, - name, - left_text, - right_text, - ) in rows[: args.limit]: - base_first_text = "-" if base_first is None else str(base_first) - artifact_first_text = "-" if artifact_first is None else str(artifact_first) - print( - f"{shape_gain:+5} shape {base_dist}->{artifact_dist} " - f"score {old_score:.6g}->{new_score:.6g} ({score_gain:+.6g}) " - f"first {base_first_text}->{artifact_first_text} size={size:<5} {name} {artifact_name}" - ) - if args.show_diff and artifact_first is not None: - print(f" {left_text} | {right_text}") - return 0 - - -def datagaps(args: argparse.Namespace) -> int: - units = args.units or list(UNITS) - for unit in units: - cur_path = current_json(unit, not args.no_rebuild, not args.cached) - cur = load_json(cur_path) - if cur is None: - continue - - rows = [] - for name, sym in symbols(cur).items(): - if not sym.get("data_diff"): - continue - if is_function(sym): - continue - if sym.get("kind") == "SYMBOL_SECTION" and not args.sections: - continue - if name.startswith("[.") and not args.sections: - continue - if args.symbol and name != args.symbol: - continue - score = sym.get("match_percent") - if not args.include_matched and score is not None and score >= 100.0 - args.epsilon: - continue - if args.min_score is not None and (score is None or score < args.min_score): - continue - if args.max_score is not None and (score is None or score > args.max_score): - continue - - target_index = sym.get("target_symbol") - if target_index is None: - right = None - else: - try: - right = cur.get("right", {}).get("symbols", [])[target_index] - except Exception: - right = None - equal_bytes, current_only, target_only, changed, first, first_kind = data_gap_stats(sym, right) - if current_only == 0 and target_only == 0 and changed == 0 and not args.include_matched: - continue - rows.append( - ( - target_only, - current_only, - changed, - score is None, - 100.0 if score is None else score, - int(sym.get("size") or 0), - equal_bytes, - first, - first_kind, - name, - ) - ) - - rows.sort(key=lambda row: (-row[0], -row[1], -row[2], row[3], row[4], -row[5])) - print(f"\n## {unit} current {section_score(cur):.6g} ({cur_path.name})") - for target_only, current_only, changed, score_missing, score, size, equal_bytes, first, first_kind, name in rows[: args.limit]: - score_text = "-" if score_missing else f"{score:.6g}" - first_text = "-" if first is None else f"{first}:{first_kind}" - print( - f"{score_text:>9} size={size:<6} equal={equal_bytes:<6} " - f"target+={target_only:<5} current+={current_only:<5} changed={changed:<5} " - f"first={first_text:<18} {name}" - ) - return 0 - - -def compare(args: argparse.Namespace) -> int: - cur_path = current_json(args.unit, not args.no_rebuild, not args.cached) - cur = load_json(cur_path) - other = None if args.target and not args.artifact_target else load_json(Path(args.artifact)) - if cur is None or (other is None and not args.target): - raise SystemExit("could not load JSON") - - cur_left = left_symbol(cur, args.symbol) - if args.artifact_target: - assert other is not None - other_left = left_symbol(other, args.symbol) - current_name = args.artifact - other_sym = right_symbol(other, other_left) - artifact_name = "target" - cur_i = instrs(other_left, args.diff_metadata, args.ignore_relocs) - elif args.target: - other_sym = right_symbol(cur, cur_left) - current_name = str(cur_path) - artifact_name = "target" - cur_i = instrs(cur_left, args.diff_metadata, args.ignore_relocs) - else: - assert other is not None - other_sym = left_symbol(other, args.symbol) - current_name = str(cur_path) - artifact_name = args.artifact - cur_i = instrs(cur_left, args.diff_metadata, args.ignore_relocs) - - other_i = instrs(other_sym, args.diff_metadata, args.ignore_relocs) - first = None - for i, (left, right) in enumerate(zip(cur_i, other_i)): - if left != right: - first = i - break - if first is None and len(cur_i) != len(other_i): - first = min(len(cur_i), len(other_i)) - if first is None: - print("no formatted instruction differences") - if args.data_diff: - print_data_diff(args.symbol, cur_left, other_sym, args.data_limit) - return 0 - - start = max(0, first - args.context) - end = min(max(len(cur_i), len(other_i)), first + args.context + 1) - print(f"{args.symbol}: first formatted diff at instruction {first}") - print(f"current: {current_name}") - print(f"artifact: {artifact_name}") - for i in range(start, end): - left = cur_i[i] if i < len(cur_i) else "" - right = other_i[i] if i < len(other_i) else "" - mark = "==" if left == right else "!=" - print(f"{i:04d} {mark} {left:<42} | {right}") - if args.data_diff: - print_data_diff(args.symbol, cur_left, other_sym, args.data_limit) - return 0 - - -def print_data_diff(symbol: str, left: Dict[str, Any], right: Dict[str, Any], limit: int) -> None: - left_rows = data_diff_rows(left) - right_rows = data_diff_rows(right) - if not left_rows and not right_rows: - print(f"{symbol}: no data_diff chunks") - return - - count = min(max(len(left_rows), len(right_rows)), limit) - print(f"{symbol}: data_diff chunks (showing {count}/{max(len(left_rows), len(right_rows))})") - for i in range(count): - left_text = left_rows[i] if i < len(left_rows) else "" - right_text = right_rows[i] if i < len(right_rows) else "" - mark = "==" if left_text == right_text else "!=" - print(f"data {i:02d} {mark} {left_text:<80} | {right_text}") - - -def score(args: argparse.Namespace) -> int: - units = args.units or list(UNITS) - names = set(args.symbols) - for unit in units: - cur_path = current_json(unit, not args.no_rebuild, not args.cached) - cur = load_json(cur_path) - if cur is None: - continue - - unit_score = section_score(cur) - unit_size = section_size(cur) - if unit_score is None: - print(f"{unit:<10} score= ({cur_path.name})") - elif unit_size is None: - print(f"{unit:<10} score={unit_score:.6g} ({cur_path.name})") - else: - print(f"{unit:<10} score={unit_score:.6g} size={unit_size} ({cur_path.name})") - - if names: - for name, sym in symbols(cur).items(): - if name in names or (args.demangle_base and "__" in name and name.split("__", 1)[0] in names): - sym_score = sym.get("match_percent") - sym_size = sym.get("size") - if sym_score is None: - print(f" {name:<40} score= size={sym_size}") - else: - print(f" {name:<40} score={sym_score:.6g} size={sym_size}") - return 0 - - -def dump(args: argparse.Namespace) -> int: - cur_path = current_json(args.unit, not args.no_rebuild, not args.cached) - cur = load_json(cur_path) - if cur is None: - print(f"could not read current JSON: {cur_path}", file=sys.stderr) - return 1 - - sides = ("left", "right") if args.side == "both" else (args.side,) - missing = False - for requested_name in args.symbols: - for side in sides: - sym = side_symbols(cur, side).get(requested_name) - if sym is None and args.demangle_base: - for name, candidate in side_symbols(cur, side).items(): - if "__" in name and name.split("__", 1)[0] == requested_name: - sym = candidate - break - - if sym is None: - print(f"{args.unit} {side}: missing symbol {requested_name}", file=sys.stderr) - missing = True - continue - - sym_name = sym.get("name", requested_name) - sym_score = sym.get("match_percent") - sym_size = sym.get("size") - score_text = "" if sym_score is None else f"{sym_score:.6g}" - print(f"## {args.unit} {side} {sym_name} score={score_text} size={sym_size}") - - instructions = sym.get("instructions", []) - start = max(args.start, 0) - end = len(instructions) if args.count is None else min(len(instructions), start + args.count) - for index in range(start, end): - formatted = instructions[index].get("instruction", {}).get("formatted") - if formatted is None and not args.show_gaps: - continue - print(f"{index:04d}: {formatted}") - print() - - return 1 if missing else 0 - - -def main(argv: Optional[List[str]] = None) -> int: - parser = argparse.ArgumentParser(description=__doc__) - sub = parser.add_subparsers(dest="cmd", required=True) - - p_mine = sub.add_parser("mine", help="find old artifacts that improve current scores") - p_mine.add_argument("units", nargs="*") - p_mine.add_argument("--symbol") - p_mine.add_argument("--limit", type=int, default=12) - p_mine.add_argument("--symbol-limit", type=int, default=8) - p_mine.add_argument("--epsilon", type=float, default=0.00001) - p_mine.add_argument("--no-rebuild", action="store_true") - p_mine.add_argument("--cached", action="store_true", help="reuse the last current JSON") - p_mine.add_argument("--wide", action="store_true", help="search *unit* JSON names instead of strict prefixes") - p_mine.add_argument("--per-symbol", action="store_true") - p_mine.add_argument("--include-data", action="store_true", help="include non-function symbols") - p_mine.add_argument( - "--include-partial-section", - action="store_true", - help="allow section-score rows from artifacts that only cover part of the unit", - ) - p_mine.add_argument("--include-stale-target", action="store_true", help="include artifacts with old target JSON") - p_mine.add_argument("--include-section-only", action="store_true", help="show unit-score rows with no symbol wins") - p_mine.set_defaults(func=mine) - - p_snapshot = sub.add_parser("snapshot", help="save a current unit JSON for probe deltas") - p_snapshot.add_argument("unit", choices=sorted(UNITS)) - p_snapshot.add_argument("label", help="label or JSON path") - p_snapshot.add_argument("--no-rebuild", action="store_true") - p_snapshot.set_defaults(func=snapshot) - - p_delta = sub.add_parser("delta", help="compare a probe JSON against a saved baseline") - p_delta.add_argument("unit", choices=sorted(UNITS)) - p_delta.add_argument("before", help="label or JSON path") - p_delta.add_argument("after", nargs="?", help="label or JSON path; defaults to current fresh") - p_delta.add_argument("--limit", type=int, default=20) - p_delta.add_argument("--epsilon", type=float, default=0.00001) - p_delta.add_argument("--include-data", action="store_true", help="include non-function symbols") - p_delta.add_argument("--no-rebuild", action="store_true") - p_delta.add_argument("--cached", action="store_true", help="reuse the last current JSON") - p_delta.set_defaults(func=delta) - - p_gaps = sub.add_parser("gaps", help="show current low/close matching symbols") - p_gaps.add_argument("units", nargs="*") - p_gaps.add_argument("--limit", type=int, default=20) - p_gaps.add_argument("--min-score", type=float) - p_gaps.add_argument("--max-score", type=float) - p_gaps.add_argument("--closest", action="store_true", help="rank closest-to-100 first") - p_gaps.add_argument("--target-sort", action="store_true", help="rank by first target-shape diff") - p_gaps.add_argument("--target", action="store_true", help="show first formatted diff against target") - p_gaps.add_argument("--show-diff", action="store_true", help="print the first target diff text") - p_gaps.add_argument("--ignore-relocs", action="store_true", help="normalize relocation operands") - p_gaps.add_argument("--reloc-only", action="store_true", help="only show symbols with no normalized target diff") - p_gaps.add_argument("--shape-only", action="store_true", help="only show symbols with a normalized target diff") - p_gaps.add_argument("--include-matched", action="store_true", help="include 100%% symbols") - p_gaps.add_argument("--include-data", action="store_true", help="include non-function symbols") - p_gaps.add_argument("--epsilon", type=float, default=0.00001) - p_gaps.add_argument("--no-rebuild", action="store_true") - p_gaps.add_argument("--cached", action="store_true", help="reuse the last current JSON") - p_gaps.set_defaults(func=gaps) - - p_triage = sub.add_parser("triage", help="rank Bink symbols by artifact wins and target shape") - p_triage.add_argument("units", nargs="*") - p_triage.add_argument("--limit", type=int, default=12) - p_triage.add_argument("--min-score", type=float) - p_triage.add_argument("--max-score", type=float) - p_triage.add_argument("--epsilon", type=float, default=0.00001) - p_triage.add_argument("--wide", action="store_true", help="search *unit* JSON names instead of strict prefixes") - p_triage.add_argument("--ignore-relocs", action="store_true", help="normalize relocation operands") - p_triage.add_argument("--show-diff", action="store_true", help="print the first target diff text") - p_triage.add_argument("--include-matched", action="store_true", help="include 100%% symbols") - p_triage.add_argument("--include-data", action="store_true", help="include non-function symbols") - p_triage.add_argument("--reloc-only", action="store_true", help="only show symbols with no normalized target diff") - p_triage.add_argument("--shape-only", action="store_true", help="only show symbols with a normalized target diff") - p_triage.add_argument("--artifacts-only", action="store_true", help="only show symbols improved by artifacts") - p_triage.add_argument("--include-stale-target", action="store_true", help="include artifacts with old target JSON") - p_triage.add_argument("--only-tags", help="comma-separated diff tags to include") - p_triage.add_argument("--skip-tags", help="comma-separated diff tags to exclude") - p_triage.add_argument("--no-rebuild", action="store_true") - p_triage.add_argument("--cached", action="store_true", help="reuse the last current JSON") - p_triage.set_defaults(func=triage) - - p_next = sub.add_parser("next", help="rank actionable symbols with source/decomp anchors") - p_next.add_argument("units", nargs="*") - p_next.add_argument("--limit", type=int, default=16) - p_next.add_argument("--min-score", type=float, default=80.0) - p_next.add_argument("--max-score", type=float) - p_next.add_argument("--epsilon", type=float, default=0.00001) - p_next.add_argument("--wide", action="store_true", help="search *unit* JSON names instead of strict prefixes") - p_next.add_argument("--ignore-relocs", action="store_true", help="normalize relocation operands") - p_next.add_argument("--show-diff", action="store_true", help="print the first target diff text") - p_next.add_argument("--show-refs", action="store_true", help="print reference-project hits for each symbol") - p_next.add_argument("--ref-limit", type=int, default=3, help="maximum reference hits to print per symbol") - p_next.add_argument("--include-matched", action="store_true", help="include 100%% symbols") - p_next.add_argument("--include-data", action="store_true", help="include non-function symbols") - p_next.add_argument("--include-stale-target", action="store_true", help="include artifacts with old target JSON") - p_next.add_argument( - "--min-artifact-unit-gain", - type=float, - default=0.0, - help="minimum unit-score gain required before using an artifact as a per-symbol hint", - ) - p_next.add_argument("--only-tags", help="comma-separated diff tags to include") - p_next.add_argument( - "--skip-tags", - default="operand,reloc,branch-target", - help="comma-separated diff tags to skip; defaults to operand,reloc,branch-target", - ) - p_next.add_argument("--no-rebuild", action="store_true") - p_next.add_argument("--cached", action="store_true", help="reuse the last current JSON") - p_next.set_defaults(func=next_candidates) - - p_refs = sub.add_parser("refs", help="search bink reference projects for symbols") - p_refs.add_argument("symbols", nargs="+") - p_refs.add_argument("--limit", type=int, default=8) - p_refs.set_defaults(func=refs) - - p_patterns = sub.add_parser("patterns", help="group repeated current target-diff shapes") - p_patterns.add_argument("units", nargs="*") - p_patterns.add_argument("--limit", type=int, default=20) - p_patterns.add_argument("--examples", type=int, default=4) - p_patterns.add_argument("--min-count", type=int, default=2) - p_patterns.add_argument("--min-score", type=float) - p_patterns.add_argument("--max-score", type=float) - p_patterns.add_argument("--epsilon", type=float, default=0.00001) - p_patterns.add_argument("--ignore-relocs", action="store_true", help="normalize relocation operands") - p_patterns.add_argument("--abstract-regs", action="store_true", help="group patterns with different register numbers") - p_patterns.add_argument("--include-matched", action="store_true", help="include 100%% symbols") - p_patterns.add_argument("--include-data", action="store_true", help="include non-function symbols") - p_patterns.add_argument("--reloc-only", action="store_true", help="only show symbols with no normalized target diff") - p_patterns.add_argument("--shape-only", action="store_true", help="only show symbols with a normalized target diff") - p_patterns.add_argument("--only-tags", help="comma-separated diff tags to include") - p_patterns.add_argument("--skip-tags", help="comma-separated diff tags to exclude") - p_patterns.add_argument("--no-rebuild", action="store_true") - p_patterns.add_argument("--cached", action="store_true", help="reuse the last current JSON") - p_patterns.set_defaults(func=patterns) - - p_shape = sub.add_parser("shape", help="rank artifacts by normalized target-shape improvement") - p_shape.add_argument("units", nargs="*") - p_shape.add_argument("--symbol") - p_shape.add_argument("--limit", type=int, default=12) - p_shape.add_argument("--min-score-gain", type=float, default=0.00001) - p_shape.add_argument("--min-shape-gain", type=int, default=1) - p_shape.add_argument("--ignore-relocs", action="store_true", help="normalize relocation operands") - p_shape.add_argument("--show-diff", action="store_true", help="print artifact's first target diff") - p_shape.add_argument("--wide", action="store_true", help="search *unit* JSON names instead of strict prefixes") - p_shape.add_argument("--all-artifacts", action="store_true", help="show repeated artifact snapshots per symbol") - p_shape.add_argument("--include-stale-target", action="store_true", help="include artifacts with old target JSON") - p_shape.add_argument("--include-data", action="store_true", help="include non-function symbols") - p_shape.add_argument("--no-rebuild", action="store_true") - p_shape.add_argument("--cached", action="store_true", help="reuse the last current JSON") - p_shape.set_defaults(func=shape) - - p_datagaps = sub.add_parser("datagaps", help="rank current Bink data/object byte gaps") - p_datagaps.add_argument("units", nargs="*") - p_datagaps.add_argument("--symbol") - p_datagaps.add_argument("--limit", type=int, default=20) - p_datagaps.add_argument("--min-score", type=float) - p_datagaps.add_argument("--max-score", type=float) - p_datagaps.add_argument("--epsilon", type=float, default=0.00001) - p_datagaps.add_argument("--sections", action="store_true", help="include aggregate section symbols") - p_datagaps.add_argument("--include-matched", action="store_true", help="include 100%% data symbols") - p_datagaps.add_argument("--no-rebuild", action="store_true") - p_datagaps.add_argument("--cached", action="store_true", help="reuse the last current JSON") - p_datagaps.set_defaults(func=datagaps) - - p_compare = sub.add_parser("compare", help="compare current symbol shape with an artifact") - p_compare.add_argument("unit", choices=sorted(UNITS)) - p_compare.add_argument("artifact", help="artifact JSON, or '-' with --target") - p_compare.add_argument("symbol") - p_compare.add_argument("--context", type=int, default=25) - p_compare.add_argument("--target", action="store_true") - p_compare.add_argument("--artifact-target", action="store_true") - p_compare.add_argument("--diff-metadata", action="store_true") - p_compare.add_argument("--data-diff", action="store_true", help="also print data_diff chunks for object/section symbols") - p_compare.add_argument("--data-limit", type=int, default=16, help="maximum data_diff chunks to print") - p_compare.add_argument("--ignore-relocs", action="store_true", help="normalize relocation operands") - p_compare.add_argument("--no-rebuild", action="store_true") - p_compare.add_argument("--cached", action="store_true", help="reuse the last current JSON") - p_compare.set_defaults(func=compare) - - p_score = sub.add_parser("score", help="print current unit and selected symbol scores") - p_score.add_argument("units", nargs="*", choices=sorted(UNITS)) - p_score.add_argument("--symbol", dest="symbols", action="append", default=[]) - p_score.add_argument( - "--demangle-base", - action="store_true", - help="also match C++ names before the first double underscore", - ) - p_score.add_argument("--no-rebuild", action="store_true") - p_score.add_argument("--cached", action="store_true", help="reuse the last current JSON") - p_score.set_defaults(func=score) - - p_dump = sub.add_parser("dump", help="dump target/current instructions for current symbols") - p_dump.add_argument("unit", choices=sorted(UNITS)) - p_dump.add_argument("symbols", nargs="+") - p_dump.add_argument("--side", choices=("left", "right", "both"), default="both") - p_dump.add_argument("--start", type=int, default=0) - p_dump.add_argument("--count", type=int) - p_dump.add_argument("--show-gaps", action="store_true", help="show alignment gaps as None rows") - p_dump.add_argument( - "--demangle-base", - action="store_true", - help="also match C++ names before the first double underscore", - ) - p_dump.add_argument("--no-rebuild", action="store_true") - p_dump.add_argument("--cached", action="store_true", help="reuse the last current JSON") - p_dump.set_defaults(func=dump) - - args = parser.parse_args(argv) - return args.func(args) - - -if __name__ == "__main__": - sys.exit(main()) From 6f8a7c06346d221c09b6def9e106592242eea3d1 Mon Sep 17 00:00:00 2001 From: Zachary Canann Date: Mon, 18 May 2026 12:31:21 -0700 Subject: [PATCH 3/3] Fix Bink build after pruning SDK header changes --- src/bink/src/sdk/decode/ngc/ngcfile.c | 11 ++++++++++- src/bink/src/sdk/decode/ngc/ngcsnd.c | 7 +++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/bink/src/sdk/decode/ngc/ngcfile.c b/src/bink/src/sdk/decode/ngc/ngcfile.c index 492ffd28f..a5dda4eae 100644 --- a/src/bink/src/sdk/decode/ngc/ngcfile.c +++ b/src/bink/src/sdk/decode/ngc/ngcfile.c @@ -3,11 +3,20 @@ #include "ngcfile.h" #include "ngcrgb.h" +/* + * The unmodified BFBB Dolphin DVD header includes "types.h", which resolves to + * the game's type header under the Bink compiler include order. radbase.h has + * already pulled in dolphin/types.h, so skip that duplicate typedef block. + */ +#ifndef BFBB_TYPES_H +#define BFBB_TYPES_H +#endif + #include -#include #include void PTR4* memmove(void PTR4* dest, const void PTR4* src, u32 len); +s32 DVDConvertPathToEntrynum(const char PTR4* path); typedef enum NGCBinkFileOwnership { diff --git a/src/bink/src/sdk/decode/ngc/ngcsnd.c b/src/bink/src/sdk/decode/ngc/ngcsnd.c index ec9c11f9b..ec60e67bb 100644 --- a/src/bink/src/sdk/decode/ngc/ngcsnd.c +++ b/src/bink/src/sdk/decode/ngc/ngcsnd.c @@ -35,6 +35,13 @@ f32 powf(f32 x, f32 y); #define BINK_NGC_PAN_MAX 0x10000 #define AX_VOICE_PRIORITY_BINK 0x1f +#define AX_SAMPLE_RATE 32000 +#define AX_PB_STATE_STOP 0 +#define AX_PB_STATE_RUN 1 +#define AX_PB_FORMAT_PCM16 10 +#define AX_PB_FORMAT_PCM8 25 +#define AX_MIX_MODE_DEFAULT 3 +#define AX_ADDR_HIGH_SHIFT 16 #define AX_SYNC_VOLUME_MIX (AX_SYNC_FLAG_COPYVOL | AX_SYNC_FLAG_COPYAXPBMIX | AX_SYNC_FLAG_COPYMXRCTRL) #define NGC_SAMPLE_HALF_SHIFT 16