| 1 | .align 2\r |
| 2 | \r |
| 3 | .global expand_blend\r |
| 4 | .global expand_normal\r |
| 5 | \r |
| 6 | @ Input:\r |
| 7 | @ r0 = screen_src_ptr\r |
| 8 | @ r1 = screen_dest_ptr\r |
| 9 | @ r2 = start\r |
| 10 | @ r3 = end\r |
| 11 | \r |
| 12 | 6:\r |
| 13 | .word io_registers\r |
| 14 | .word palette_ram_converted\r |
| 15 | .word 0x04000200 @ combine test mask\r |
| 16 | .word 0x07E0F81F @ clamp mask\r |
| 17 | .word 0x000003FE @ palette index mask\r |
| 18 | .word 0x08010020 @ saturation mask\r |
| 19 | \r |
| 20 | expand_blend:\r |
| 21 | stmdb sp!, { r4, r5, r6, r9, r10, r11, r14 }\r |
| 22 | \r |
| 23 | add r0, r0, r2, lsl #2 @ screen_src_ptr += start\r |
| 24 | add r1, r1, r2, lsl #1 @ screen_dest_ptr += start\r |
| 25 | sub r2, r3, r2 @ r2 = end - start\r |
| 26 | ldr r3, 6b @ r3 = io_registers\r |
| 27 | ldr r3, [r3, #0x52] @ r3 = bldalpha\r |
| 28 | mov r4, r3, lsr #8 @ r4 = bldalpha >> 8\r |
| 29 | and r3, r3, #0x1F @ r3 = blend_a\r |
| 30 | and r4, r4, #0x1F @ r4 = blend_b\r |
| 31 | cmp r3, #16 @ if(blend_a > 16)\r |
| 32 | movgt r3, #16 @ blend_a = 16\r |
| 33 | cmp r4, #16 @ if(blend_b > 16)\r |
| 34 | movgt r3, #16 @ blend_b = 16\r |
| 35 | \r |
| 36 | ldr r14, 6b + 4 @ r14 = palette_ram_converted\r |
| 37 | ldr r12, 6b + 8 @ r12 = 0x04000200\r |
| 38 | ldr r11, 6b + 12 @ r11 = 0x07E0F81F\r |
| 39 | ldr r10, 6b + 16 @ r10 = 0x000003FE\r |
| 40 | \r |
| 41 | add r5, r3, r4 @ r5 = blend_a + blend_b\r |
| 42 | cmp r5, #16 @ if((blend_a + blend_b) > 16)\r |
| 43 | bgt 3f @ goto loop w/saturation\r |
| 44 | \r |
| 45 | \r |
| 46 | @ loop w/o saturation\r |
| 47 | 1:\r |
| 48 | ldr r5, [r0], #4 @ r5 = pixel_pair, screen_src_ptr++\r |
| 49 | and r6, r5, r12 @ r6 = r5 & 0x04000200\r |
| 50 | cmp r6, r12 @ if(r6 != 0x4000200)\r |
| 51 | bne 2f @ goto no_blend\r |
| 52 | \r |
| 53 | and r6, r10, r5, lsl #1 @ r6 = (pixel_pair & 0x1FF) << 1\r |
| 54 | ldrh r6, [r14, r6] @ r6 = pixel_top\r |
| 55 | orr r6, r6, r6, lsl #16 @ r6 = pixel_top | (pixel_top << 16)\r |
| 56 | and r6, r6, r11 @ r6 = pixel_top_dilated\r |
| 57 | \r |
| 58 | and r5, r10, r5, lsr #15 @ r5 = ((pixel_pair >> 16) & 0x1FF) << 1\r |
| 59 | ldrh r5, [r14, r5] @ r5 = pixel_bottom\r |
| 60 | orr r5, r5, r5, lsl #16 @ r5 = pixel_bottom | (pixel_bottom << 16)\r |
| 61 | and r5, r5, r11 @ r5 = pixel_bottom_dilated\r |
| 62 | \r |
| 63 | mul r5, r4, r5 @ r5 = pixel_bottom * blend_b = bottom_mul\r |
| 64 | mla r5, r3, r6, r5 @ r5 = (pixel_top * blend_a) + bottom_mul\r |
| 65 | \r |
| 66 | and r5, r11, r5, lsr #4 @ r5 = (color_dilated >> 4) & 0x07E0F81F\r |
| 67 | orr r5, r5, r5, lsr #16 @ r5 = color_dilated | (color_dilated >> 16)\r |
| 68 | \r |
| 69 | strh r5, [r1], #2 @ *screen_dest_ptr = r5, screen_dest_ptr++\r |
| 70 | subs r2, r2, #1 @ counter--\r |
| 71 | bne 1b @ go again\r |
| 72 | \r |
| 73 | ldmia sp!, { r4, r5, r6, r9, r10, r11, pc }\r |
| 74 | \r |
| 75 | 2:\r |
| 76 | and r5, r10, r5, lsl #1 @ r5 = (pixel_pair & 0x1FF) << 1\r |
| 77 | ldrh r5, [r14, r5] @ r5 = pixel_top\r |
| 78 | strh r5, [r1], #2 @ *screen_dest_ptr = r5, screen_dest_ptr++\r |
| 79 | \r |
| 80 | subs r2, r2, #1 @ counter--\r |
| 81 | bne 1b @ go again\r |
| 82 | \r |
| 83 | ldmia sp!, { r4, r5, r6, r9, r10, r11, pc }\r |
| 84 | \r |
| 85 | @ loop w/saturation\r |
| 86 | \r |
| 87 | 3:\r |
| 88 | ldr r9, 6b + 20 @ r9 = 0x08010020\r |
| 89 | \r |
| 90 | 4:\r |
| 91 | ldr r5, [r0], #4 @ r5 = pixel_pair, screen_src_ptr++\r |
| 92 | and r6, r5, r12 @ r6 = r5 & 0x04000200\r |
| 93 | cmp r6, r12 @ if(r6 != 0x4000200)\r |
| 94 | bne 5f @ goto no_blend\r |
| 95 | \r |
| 96 | and r6, r10, r5, lsl #1 @ r6 = (pixel_pair & 0x1FF) << 1\r |
| 97 | ldrh r6, [r14, r6] @ r6 = pixel_top\r |
| 98 | orr r6, r6, r6, lsl #16 @ r6 = pixel_top | (pixel_top << 16)\r |
| 99 | and r6, r6, r11 @ r6 = pixel_top_dilated\r |
| 100 | \r |
| 101 | and r5, r10, r5, lsr #15 @ r5 = ((pixel_pair >> 16) & 0x1FF) << 1\r |
| 102 | ldrh r5, [r14, r5] @ r5 = pixel_bottom\r |
| 103 | orr r5, r5, r5, lsl #16 @ r5 = pixel_bottom | (pixel_bottom << 16)\r |
| 104 | and r5, r5, r11 @ r5 = pixel_bottom_dilated\r |
| 105 | \r |
| 106 | mul r5, r4, r5 @ r5 = pixel_bottom * blend_b = bottom_mul\r |
| 107 | mla r5, r3, r6, r5 @ r5 = (pixel_top * blend_a) + bottom_mul\r |
| 108 | \r |
| 109 | and r6, r9, r5, lsr #4 @ r6 = saturation bits\r |
| 110 | orr r6, r6, r6, lsr #1 @ propogate saturation down msb\r |
| 111 | orr r6, r6, r6, lsr #2 @ propogate down next two bits\r |
| 112 | orr r6, r6, r6, lsr #3 @ propogate down next three bits\r |
| 113 | orr r5, r6, r5, lsr #4 @ mask over result w/saturation\r |
| 114 | \r |
| 115 | and r5, r11, r5 @ r5 = (color_dilated >> 4) & 0x07E0F81F\r |
| 116 | orr r5, r5, r5, lsr #16 @ r5 = color_dilated | (color_dilated >> 16)\r |
| 117 | strh r5, [r1], #2 @ *screen_dest_ptr = r5, screen_dest_ptr++\r |
| 118 | \r |
| 119 | subs r2, r2, #1 @ counter--\r |
| 120 | bne 4b @ go again\r |
| 121 | \r |
| 122 | ldmia sp!, { r4, r5, r6, r9, r10, r11, pc }\r |
| 123 | \r |
| 124 | 5:\r |
| 125 | and r5, r10, r5, lsl #1 @ r5 = (pixel_pair & 0x1FF) << 1\r |
| 126 | ldrh r5, [r14, r5] @ r5 = pixel_top\r |
| 127 | strh r5, [r1], #2 @ *screen_dest_ptr = r5, screen_dest_ptr++\r |
| 128 | \r |
| 129 | subs r2, r2, #1 @ counter--\r |
| 130 | bne 4b @ go again\r |
| 131 | \r |
| 132 | ldmia sp!, { r4, r5, r6, r9, r10, r11, pc }\r |
| 133 | \r |
| 134 | \r |
| 135 | \r |
| 136 | @ The following function isn't complete (only works on run multiples of 8),\r |
| 137 | @ but unfortunately I don't see much potential for actually being able to\r |
| 138 | @ use it..\r |
| 139 | \r |
| 140 | #define expand_pixel_pair(reg, temp) ;\\r |
| 141 | and temp, r3, reg, lsr #15 ;\\r |
| 142 | ldrh temp, [r2, temp] ;\\r |
| 143 | ;\\r |
| 144 | and reg, r3, reg, lsl #1 ;\\r |
| 145 | ldrh reg, [r2, reg] ;\\r |
| 146 | ;\\r |
| 147 | orr reg, reg, temp, lsl #16 ;\\r |
| 148 | \r |
| 149 | \r |
| 150 | @ Input:\r |
| 151 | @ r0 = screen_ptr\r |
| 152 | @ r1 = start\r |
| 153 | @ r2 = end\r |
| 154 | \r |
| 155 | 1:\r |
| 156 | .word palette_ram_converted\r |
| 157 | .word 0x3FE\r |
| 158 | \r |
| 159 | expand_normal:\r |
| 160 | stmdb sp!, { r4, r5, r6, r7, r14 }\r |
| 161 | \r |
| 162 | add r0, r0, r1, lsl #1 @ screen_ptr += start\r |
| 163 | sub r1, r2, r1 @ r1 = end - start\r |
| 164 | ldr r2, 1b @ r2 = palette_ram_converted\r |
| 165 | ldr r3, 1b + 4 @ r3 = 0x3FE\r |
| 166 | \r |
| 167 | 2:\r |
| 168 | ldmia r0, { r4, r5, r6, r7 }\r |
| 169 | \r |
| 170 | expand_pixel_pair(r4, r14)\r |
| 171 | expand_pixel_pair(r5, r14)\r |
| 172 | expand_pixel_pair(r6, r14)\r |
| 173 | expand_pixel_pair(r7, r14)\r |
| 174 | \r |
| 175 | stmia r0!, { r4, r5, r6, r7 }\r |
| 176 | \r |
| 177 | subs r1, r1, #8\r |
| 178 | bne 2b\r |
| 179 | \r |
| 180 | ldmia sp!, { r4, r5, r6, r7, pc }\r |
| 181 | \r |