integrate M-HT's neon scalers
[gpsp.git] / video.c
1 /* gameplaySP
2  *
3  * Copyright (C) 2006 Exophase <exophase@gmail.com>
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation; either version 2 of
8  * the License, or (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18  */
19
20 #include "common.h"
21 #define WANT_FONT_BITS
22 #include "font.h"
23
24 #ifdef PSP_BUILD
25
26 #include <pspctrl.h>
27
28 #include <pspkernel.h>
29 #include <pspdebug.h>
30 #include <pspdisplay.h>
31
32 #include <pspgu.h>
33 #include <psppower.h>
34 #include <psprtc.h>
35
36 static float *screen_vertex = (float *)0x441FC100;
37 static u32 *ge_cmd = (u32 *)0x441FC000;
38 static u16 *psp_gu_vram_base = (u16 *)(0x44000000);
39 static u32 *ge_cmd_ptr = (u32 *)0x441FC000;
40 static u32 gecbid;
41 static u32 video_direct = 0;
42
43 static u32 __attribute__((aligned(16))) display_list[32];
44
45 #define GBA_SCREEN_WIDTH 240
46 #define GBA_SCREEN_HEIGHT 160
47
48 #define PSP_SCREEN_WIDTH 480
49 #define PSP_SCREEN_HEIGHT 272
50 #define PSP_LINE_SIZE 512
51
52 #define PSP_ALL_BUTTON_MASK 0xFFFF
53
54 #define GE_CMD_FBP    0x9C
55 #define GE_CMD_FBW    0x9D
56 #define GE_CMD_TBP0   0xA0
57 #define GE_CMD_TBW0   0xA8
58 #define GE_CMD_TSIZE0 0xB8
59 #define GE_CMD_TFLUSH 0xCB
60 #define GE_CMD_CLEAR  0xD3
61 #define GE_CMD_VTYPE  0x12
62 #define GE_CMD_BASE   0x10
63 #define GE_CMD_VADDR  0x01
64 #define GE_CMD_IADDR  0x02
65 #define GE_CMD_PRIM   0x04
66 #define GE_CMD_FINISH 0x0F
67 #define GE_CMD_SIGNAL 0x0C
68 #define GE_CMD_NOP    0x00
69
70 #define GE_CMD(cmd, operand)                                                \
71   *ge_cmd_ptr = (((GE_CMD_##cmd) << 24) | (operand));                       \
72   ge_cmd_ptr++                                                              \
73
74 static u16 *screen_texture = (u16 *)(0x4000000 + (512 * 272 * 2));
75 static u16 *current_screen_texture = (u16 *)(0x4000000 + (512 * 272 * 2));
76 static u16 *screen_pixels = (u16 *)(0x4000000 + (512 * 272 * 2));
77 static u32 screen_pitch = 240;
78
79 static void Ge_Finish_Callback(int id, void *arg)
80 {
81 }
82
83 #define get_screen_pixels()                                                   \
84   screen_pixels                                                               \
85
86 #define get_screen_pitch()                                                    \
87   screen_pitch                                                                \
88
89 #elif defined(WIZ_BUILD)
90
91 static u16 rot_buffer[240*4];
92 static u32 rot_lines_total = 4;
93 static u32 rot_line_count = 0;
94 static char rot_msg_buff[64];
95
96 static u32 screen_offset = 0;
97 static u16 *screen_pixels = NULL;
98 const u32 screen_pitch = 320;
99
100 #define get_screen_pixels()                                                   \
101   screen_pixels                                                               \
102
103 #define get_screen_pitch()                                                    \
104   screen_pitch                                                                \
105
106 #elif defined(PND_BUILD)
107
108 static u16 *screen_pixels = NULL;
109
110 #define get_screen_pixels()                                                   \
111   screen_pixels                                                               \
112
113 #define get_screen_pitch()                                                    \
114   resolution_width                                                            \
115
116 #else
117
118 #ifdef GP2X_BUILD
119 #include "SDL_gp2x.h"
120 SDL_Surface *hw_screen;
121 #endif
122 SDL_Surface *screen;
123 const u32 video_scale = 1;
124
125 #define get_screen_pixels()                                                   \
126   ((u16 *)screen->pixels)                                                     \
127
128 #define get_screen_pitch()                                                    \
129   (screen->pitch / 2)                                                         \
130
131 #endif
132
133 static void render_scanline_conditional_tile(u32 start, u32 end, u16 *scanline,
134  u32 enable_flags, u32 dispcnt, u32 bldcnt, const tile_layer_render_struct
135  *layer_renderers);
136 static void render_scanline_conditional_bitmap(u32 start, u32 end, u16 *scanline,
137  u32 enable_flags, u32 dispcnt, u32 bldcnt, const bitmap_layer_render_struct
138  *layer_renderers);
139
140 #define no_op                                                                 \
141
142 // This old version is not necessary if the palette is either being converted
143 // transparently or the ABGR 1555 format is being used natively. The direct
144 // version (without conversion) is much faster.
145
146 #define tile_lookup_palette_full(palette, source)                             \
147   current_pixel = palette[source];                                            \
148   convert_palette(current_pixel)                                              \
149
150 #define tile_lookup_palette(palette, source)                                  \
151   current_pixel = palette[source];                                            \
152
153
154 #ifdef RENDER_COLOR16_NORMAL
155
156 #define tile_expand_base_normal(index)                                        \
157   tile_expand_base_color16(index)                                             \
158
159 #else
160
161 #define tile_expand_base_normal(index)                                        \
162   tile_lookup_palette(palette, current_pixel);                                \
163   dest_ptr[index] = current_pixel                                             \
164
165 #endif
166
167 #define tile_expand_transparent_normal(index)                                 \
168   tile_expand_base_normal(index)                                              \
169
170 #define tile_expand_copy(index)                                               \
171   dest_ptr[index] = copy_ptr[index]                                           \
172
173
174 #define advance_dest_ptr_base(delta)                                          \
175   dest_ptr += delta                                                           \
176
177 #define advance_dest_ptr_transparent(delta)                                   \
178   advance_dest_ptr_base(delta)                                                \
179
180 #define advance_dest_ptr_copy(delta)                                          \
181   advance_dest_ptr_base(delta);                                               \
182   copy_ptr += delta                                                           \
183
184
185 #define color_combine_mask_a(layer)                                           \
186   ((io_registers[REG_BLDCNT] >> layer) & 0x01)                                \
187
188 // For color blending operations, will create a mask that has in bit
189 // 10 if the layer is target B, and bit 9 if the layer is target A.
190
191 #define color_combine_mask(layer)                                             \
192   (color_combine_mask_a(layer) |                                              \
193    ((io_registers[REG_BLDCNT] >> (layer + 7)) & 0x02)) << 9                   \
194
195 // For alpha blending renderers, draw the palette index (9bpp) and
196 // layer bits rather than the raw RGB. For the base this should write to
197 // the 32bit location directly.
198
199 #define tile_expand_base_alpha(index)                                         \
200   dest_ptr[index] = current_pixel | pixel_combine                             \
201
202 #define tile_expand_base_bg(index)                                            \
203   dest_ptr[index] = bg_combine                                                \
204
205
206 // For layered (transparent) writes this should shift the "stack" and write
207 // to the bottom. This will preserve the topmost pixel and the most recent
208 // one.
209
210 #define tile_expand_transparent_alpha(index)                                  \
211   dest_ptr[index] = (dest_ptr[index] << 16) | current_pixel | pixel_combine   \
212
213
214 // OBJ should only shift if the top isn't already OBJ
215 #define tile_expand_transparent_alpha_obj(index)                              \
216   dest = dest_ptr[index];                                                     \
217   if(dest & 0x00000100)                                                       \
218   {                                                                           \
219     dest_ptr[index] = (dest & 0xFFFF0000) | current_pixel | pixel_combine;    \
220   }                                                                           \
221   else                                                                        \
222   {                                                                           \
223     dest_ptr[index] = (dest << 16) | current_pixel | pixel_combine;           \
224   }                                                                           \
225
226
227 // For color effects that don't need to preserve the previous layer.
228 // The color32 version should be used with 32bit wide dest_ptr so as to be
229 // compatible with alpha combine on top of it.
230
231 #define tile_expand_base_color16(index)                                       \
232   dest_ptr[index] = current_pixel | pixel_combine                             \
233
234 #define tile_expand_transparent_color16(index)                                \
235   tile_expand_base_color16(index)                                             \
236
237 #define tile_expand_base_color32(index)                                       \
238   tile_expand_base_color16(index)                                             \
239
240 #define tile_expand_transparent_color32(index)                                \
241   tile_expand_base_color16(index)                                             \
242
243
244 // Operations for isolation 8bpp pixels within 32bpp pixel blocks.
245
246 #define tile_8bpp_pixel_op_mask(op_param)                                     \
247   current_pixel = current_pixels & 0xFF                                       \
248
249 #define tile_8bpp_pixel_op_shift_mask(shift)                                  \
250   current_pixel = (current_pixels >> shift) & 0xFF                            \
251
252 #define tile_8bpp_pixel_op_shift(shift)                                       \
253   current_pixel = current_pixels >> shift                                     \
254
255 #define tile_8bpp_pixel_op_none(shift)                                        \
256
257 // Base should always draw raw in 8bpp mode; color 0 will be drawn where
258 // color 0 is.
259
260 #define tile_8bpp_draw_base_normal(index)                                     \
261   tile_expand_base_normal(index)                                              \
262
263 #define tile_8bpp_draw_base_alpha(index)                                      \
264   if(current_pixel)                                                           \
265   {                                                                           \
266     tile_expand_base_alpha(index);                                            \
267   }                                                                           \
268   else                                                                        \
269   {                                                                           \
270     tile_expand_base_bg(index);                                               \
271   }                                                                           \
272
273
274 #define tile_8bpp_draw_base_color16(index)                                    \
275   tile_8bpp_draw_base_alpha(index)                                            \
276
277 #define tile_8bpp_draw_base_color32(index)                                    \
278   tile_8bpp_draw_base_alpha(index)                                            \
279
280
281 #define tile_8bpp_draw_base(index, op, op_param, alpha_op)                    \
282   tile_8bpp_pixel_op_##op(op_param);                                          \
283   tile_8bpp_draw_base_##alpha_op(index)                                       \
284
285 // Transparent (layered) writes should only replace what is there if the
286 // pixel is not transparent (zero)
287
288 #define tile_8bpp_draw_transparent(index, op, op_param, alpha_op)             \
289   tile_8bpp_pixel_op_##op(op_param);                                          \
290   if(current_pixel)                                                           \
291   {                                                                           \
292     tile_expand_transparent_##alpha_op(index);                                \
293   }                                                                           \
294
295 #define tile_8bpp_draw_copy(index, op, op_param, alpha_op)                    \
296   tile_8bpp_pixel_op_##op(op_param);                                          \
297   if(current_pixel)                                                           \
298   {                                                                           \
299     tile_expand_copy(index);                                                  \
300   }                                                                           \
301
302 // Get the current tile from the map in 8bpp mode
303
304 #define get_tile_8bpp()                                                       \
305   current_tile = *map_ptr;                                                    \
306   tile_ptr = tile_base + ((current_tile & 0x3FF) * 64)                        \
307
308
309 // Draw half of a tile in 8bpp mode, for base renderer
310
311 #define tile_8bpp_draw_four_noflip(index, combine_op, alpha_op)               \
312   tile_8bpp_draw_##combine_op(index + 0, mask, 0, alpha_op);                  \
313   tile_8bpp_draw_##combine_op(index + 1, shift_mask, 8, alpha_op);            \
314   tile_8bpp_draw_##combine_op(index + 2, shift_mask, 16, alpha_op);           \
315   tile_8bpp_draw_##combine_op(index + 3, shift, 24, alpha_op)                 \
316
317
318 // Like the above, but draws the half-tile horizontally flipped
319
320 #define tile_8bpp_draw_four_flip(index, combine_op, alpha_op)                 \
321   tile_8bpp_draw_##combine_op(index + 3, mask, 0, alpha_op);                  \
322   tile_8bpp_draw_##combine_op(index + 2, shift_mask, 8, alpha_op);            \
323   tile_8bpp_draw_##combine_op(index + 1, shift_mask, 16, alpha_op);           \
324   tile_8bpp_draw_##combine_op(index + 0, shift, 24, alpha_op)                 \
325
326 #define tile_8bpp_draw_four_base(index, alpha_op, flip_op)                    \
327   tile_8bpp_draw_four_##flip_op(index, base, alpha_op)                        \
328
329
330 // Draw half of a tile in 8bpp mode, for transparent renderer; as an
331 // optimization the entire thing is checked against zero (in transparent
332 // capable renders it is more likely for the pixels to be transparent than
333 // opaque)
334
335 #define tile_8bpp_draw_four_transparent(index, alpha_op, flip_op)             \
336   if(current_pixels != 0)                                                     \
337   {                                                                           \
338     tile_8bpp_draw_four_##flip_op(index, transparent, alpha_op);              \
339   }                                                                           \
340
341 #define tile_8bpp_draw_four_copy(index, alpha_op, flip_op)                    \
342   if(current_pixels != 0)                                                     \
343   {                                                                           \
344     tile_8bpp_draw_four_##flip_op(index, copy, alpha_op);                     \
345   }                                                                           \
346
347 // Helper macro for drawing 8bpp tiles clipped against the edge of the screen
348
349 #define partial_tile_8bpp(combine_op, alpha_op)                               \
350   for(i = 0; i < partial_tile_run; i++)                                       \
351   {                                                                           \
352     tile_8bpp_draw_##combine_op(0, mask, 0, alpha_op);                        \
353     current_pixels >>= 8;                                                     \
354     advance_dest_ptr_##combine_op(1);                                         \
355   }                                                                           \
356
357
358 // Draws 8bpp tiles clipped against the left side of the screen,
359 // partial_tile_offset indicates how much clipped in it is, partial_tile_run
360 // indicates how much it should draw.
361
362 #define partial_tile_right_noflip_8bpp(combine_op, alpha_op)                  \
363   if(partial_tile_offset >= 4)                                                \
364   {                                                                           \
365     current_pixels = *((u32 *)(tile_ptr + 4)) >>                              \
366      ((partial_tile_offset - 4) * 8);                                         \
367     partial_tile_8bpp(combine_op, alpha_op);                                  \
368   }                                                                           \
369   else                                                                        \
370   {                                                                           \
371     partial_tile_run -= 4;                                                    \
372     current_pixels = *((u32 *)tile_ptr) >> (partial_tile_offset * 8);         \
373     partial_tile_8bpp(combine_op, alpha_op);                                  \
374     current_pixels = *((u32 *)(tile_ptr + 4));                                \
375     tile_8bpp_draw_four_##combine_op(0, alpha_op, noflip);                    \
376     advance_dest_ptr_##combine_op(4);                                         \
377   }                                                                           \
378
379
380 // Draws 8bpp tiles clipped against both the left and right side of the
381 // screen, IE, runs of less than 8 - partial_tile_offset.
382
383 #define partial_tile_mid_noflip_8bpp(combine_op, alpha_op)                    \
384   if(partial_tile_offset >= 4)                                                \
385   {                                                                           \
386     current_pixels = *((u32 *)(tile_ptr + 4)) >>                              \
387      ((partial_tile_offset - 4) * 8);                                         \
388     partial_tile_8bpp(combine_op, alpha_op);                                  \
389   }                                                                           \
390   else                                                                        \
391   {                                                                           \
392     current_pixels = *((u32 *)tile_ptr) >> (partial_tile_offset * 8);         \
393     if((partial_tile_offset + partial_tile_run) > 4)                          \
394     {                                                                         \
395       u32 old_run = partial_tile_run;                                         \
396       partial_tile_run = 4 - partial_tile_offset;                             \
397       partial_tile_8bpp(combine_op, alpha_op);                                \
398       partial_tile_run = old_run - partial_tile_run;                          \
399       current_pixels = *((u32 *)(tile_ptr + 4));                              \
400       partial_tile_8bpp(combine_op, alpha_op);                                \
401     }                                                                         \
402     else                                                                      \
403     {                                                                         \
404       partial_tile_8bpp(combine_op, alpha_op);                                \
405     }                                                                         \
406   }                                                                           \
407
408
409 // Draws 8bpp tiles clipped against the right side of the screen,
410 // partial_tile_run indicates how much there is to draw.
411
412 #define partial_tile_left_noflip_8bpp(combine_op, alpha_op)                   \
413   if(partial_tile_run >= 4)                                                   \
414   {                                                                           \
415     current_pixels = *((u32 *)tile_ptr);                                      \
416     tile_8bpp_draw_four_##combine_op(0, alpha_op, noflip);                    \
417     advance_dest_ptr_##combine_op(4);                                         \
418     tile_ptr += 4;                                                            \
419     partial_tile_run -= 4;                                                    \
420   }                                                                           \
421                                                                               \
422   current_pixels = *((u32 *)(tile_ptr));                                      \
423   partial_tile_8bpp(combine_op, alpha_op)                                     \
424
425
426 // Draws a non-clipped (complete) 8bpp tile.
427
428 #define tile_noflip_8bpp(combine_op, alpha_op)                                \
429   current_pixels = *((u32 *)tile_ptr);                                        \
430   tile_8bpp_draw_four_##combine_op(0, alpha_op, noflip);                      \
431   current_pixels = *((u32 *)(tile_ptr + 4));                                  \
432   tile_8bpp_draw_four_##combine_op(4, alpha_op, noflip)                       \
433
434
435 // Like the above versions but draws flipped tiles.
436
437 #define partial_tile_flip_8bpp(combine_op, alpha_op)                          \
438   for(i = 0; i < partial_tile_run; i++)                                       \
439   {                                                                           \
440     tile_8bpp_draw_##combine_op(0, shift, 24, alpha_op);                      \
441     current_pixels <<= 8;                                                     \
442     advance_dest_ptr_##combine_op(1);                                         \
443   }                                                                           \
444
445 #define partial_tile_right_flip_8bpp(combine_op, alpha_op)                    \
446   if(partial_tile_offset >= 4)                                                \
447   {                                                                           \
448     current_pixels = *((u32 *)tile_ptr) << ((partial_tile_offset - 4) * 8);   \
449     partial_tile_flip_8bpp(combine_op, alpha_op);                             \
450   }                                                                           \
451   else                                                                        \
452   {                                                                           \
453     partial_tile_run -= 4;                                                    \
454     current_pixels = *((u32 *)(tile_ptr + 4)) <<                              \
455      ((partial_tile_offset - 4) * 8);                                         \
456     partial_tile_flip_8bpp(combine_op, alpha_op);                             \
457     current_pixels = *((u32 *)tile_ptr);                                      \
458     tile_8bpp_draw_four_##combine_op(0, alpha_op, flip);                      \
459     advance_dest_ptr_##combine_op(4);                                         \
460   }                                                                           \
461
462 #define partial_tile_mid_flip_8bpp(combine_op, alpha_op)                      \
463   if(partial_tile_offset >= 4)                                                \
464   {                                                                           \
465     current_pixels = *((u32 *)tile_ptr) << ((partial_tile_offset - 4) * 8);   \
466     partial_tile_flip_8bpp(combine_op, alpha_op);                             \
467   }                                                                           \
468   else                                                                        \
469   {                                                                           \
470     current_pixels = *((u32 *)(tile_ptr + 4)) <<                              \
471      ((partial_tile_offset - 4) * 8);                                         \
472                                                                               \
473     if((partial_tile_offset + partial_tile_run) > 4)                          \
474     {                                                                         \
475       u32 old_run = partial_tile_run;                                         \
476       partial_tile_run = 4 - partial_tile_offset;                             \
477       partial_tile_flip_8bpp(combine_op, alpha_op);                           \
478       partial_tile_run = old_run - partial_tile_run;                          \
479       current_pixels = *((u32 *)(tile_ptr));                                  \
480       partial_tile_flip_8bpp(combine_op, alpha_op);                           \
481     }                                                                         \
482     else                                                                      \
483     {                                                                         \
484       partial_tile_flip_8bpp(combine_op, alpha_op);                           \
485     }                                                                         \
486   }                                                                           \
487
488 #define partial_tile_left_flip_8bpp(combine_op, alpha_op)                     \
489   if(partial_tile_run >= 4)                                                   \
490   {                                                                           \
491     current_pixels = *((u32 *)(tile_ptr + 4));                                \
492     tile_8bpp_draw_four_##combine_op(0, alpha_op, flip);                      \
493     advance_dest_ptr_##combine_op(4);                                         \
494     tile_ptr -= 4;                                                            \
495     partial_tile_run -= 4;                                                    \
496   }                                                                           \
497                                                                               \
498   current_pixels = *((u32 *)(tile_ptr + 4));                                  \
499   partial_tile_flip_8bpp(combine_op, alpha_op)                                \
500
501 #define tile_flip_8bpp(combine_op, alpha_op)                                  \
502   current_pixels = *((u32 *)(tile_ptr + 4));                                  \
503   tile_8bpp_draw_four_##combine_op(0, alpha_op, flip);                        \
504   current_pixels = *((u32 *)tile_ptr);                                        \
505   tile_8bpp_draw_four_##combine_op(4, alpha_op, flip)                         \
506
507
508 // Operations for isolating 4bpp tiles in a 32bit block
509
510 #define tile_4bpp_pixel_op_mask(op_param)                                     \
511   current_pixel = current_pixels & 0x0F                                       \
512
513 #define tile_4bpp_pixel_op_shift_mask(shift)                                  \
514   current_pixel = (current_pixels >> shift) & 0x0F                            \
515
516 #define tile_4bpp_pixel_op_shift(shift)                                       \
517   current_pixel = current_pixels >> shift                                     \
518
519 #define tile_4bpp_pixel_op_none(op_param)                                     \
520
521 // Draws a single 4bpp pixel as base, normal renderer; checks to see if the
522 // pixel is zero because if so the current palette should not be applied.
523 // These ifs can be replaced with a lookup table, may or may not be superior
524 // this way, should be benchmarked. The lookup table would be from 0-255
525 // identity map except for multiples of 16, which would map to 0.
526
527 #define tile_4bpp_draw_base_normal(index)                                     \
528   if(current_pixel)                                                           \
529   {                                                                           \
530     current_pixel |= current_palette;                                         \
531     tile_expand_base_normal(index);                                           \
532   }                                                                           \
533   else                                                                        \
534   {                                                                           \
535     tile_expand_base_normal(index);                                           \
536   }                                                                           \
537
538
539 #define tile_4bpp_draw_base_alpha(index)                                      \
540   if(current_pixel)                                                           \
541   {                                                                           \
542     current_pixel |= current_palette;                                         \
543     tile_expand_base_alpha(index);                                            \
544   }                                                                           \
545   else                                                                        \
546   {                                                                           \
547     tile_expand_base_bg(index);                                               \
548   }                                                                           \
549
550 #define tile_4bpp_draw_base_color16(index)                                    \
551   tile_4bpp_draw_base_alpha(index)                                            \
552
553 #define tile_4bpp_draw_base_color32(index)                                    \
554   tile_4bpp_draw_base_alpha(index)                                            \
555
556
557 #define tile_4bpp_draw_base(index, op, op_param, alpha_op)                    \
558   tile_4bpp_pixel_op_##op(op_param);                                          \
559   tile_4bpp_draw_base_##alpha_op(index)                                       \
560
561
562 // Draws a single 4bpp pixel as layered, if not transparent.
563
564 #define tile_4bpp_draw_transparent(index, op, op_param, alpha_op)             \
565   tile_4bpp_pixel_op_##op(op_param);                                          \
566   if(current_pixel)                                                           \
567   {                                                                           \
568     current_pixel |= current_palette;                                         \
569     tile_expand_transparent_##alpha_op(index);                                \
570   }                                                                           \
571
572 #define tile_4bpp_draw_copy(index, op, op_param, alpha_op)                    \
573   tile_4bpp_pixel_op_##op(op_param);                                          \
574   if(current_pixel)                                                           \
575   {                                                                           \
576     current_pixel |= current_palette;                                         \
577     tile_expand_copy(index);                                                  \
578   }                                                                           \
579
580
581 // Draws eight background pixels in transparent mode, for alpha or normal
582 // renderers.
583
584 #define tile_4bpp_draw_eight_base_zero(value)                                 \
585   dest_ptr[0] = value;                                                        \
586   dest_ptr[1] = value;                                                        \
587   dest_ptr[2] = value;                                                        \
588   dest_ptr[3] = value;                                                        \
589   dest_ptr[4] = value;                                                        \
590   dest_ptr[5] = value;                                                        \
591   dest_ptr[6] = value;                                                        \
592   dest_ptr[7] = value                                                         \
593
594
595 // Draws eight background pixels for the alpha renderer, basically color zero
596 // with the background flag high.
597
598 #define tile_4bpp_draw_eight_base_zero_alpha()                                \
599   tile_4bpp_draw_eight_base_zero(bg_combine)                                  \
600
601 #define tile_4bpp_draw_eight_base_zero_color16()                              \
602   tile_4bpp_draw_eight_base_zero_alpha()                                      \
603
604 #define tile_4bpp_draw_eight_base_zero_color32()                              \
605   tile_4bpp_draw_eight_base_zero_alpha()                                      \
606
607
608 // Draws eight background pixels for the normal renderer, just a bunch of
609 // zeros.
610
611 #ifdef RENDER_COLOR16_NORMAL
612
613 #define tile_4bpp_draw_eight_base_zero_normal()                               \
614   current_pixel = 0;                                                          \
615   tile_4bpp_draw_eight_base_zero(current_pixel)                               \
616
617 #else
618
619 #define tile_4bpp_draw_eight_base_zero_normal()                               \
620   current_pixel = palette[0];                                                 \
621   tile_4bpp_draw_eight_base_zero(current_pixel)                               \
622
623 #endif
624
625
626 // Draws eight 4bpp pixels.
627
628 #define tile_4bpp_draw_eight_noflip(combine_op, alpha_op)                     \
629   tile_4bpp_draw_##combine_op(0, mask, 0, alpha_op);                          \
630   tile_4bpp_draw_##combine_op(1, shift_mask, 4, alpha_op);                    \
631   tile_4bpp_draw_##combine_op(2, shift_mask, 8, alpha_op);                    \
632   tile_4bpp_draw_##combine_op(3, shift_mask, 12, alpha_op);                   \
633   tile_4bpp_draw_##combine_op(4, shift_mask, 16, alpha_op);                   \
634   tile_4bpp_draw_##combine_op(5, shift_mask, 20, alpha_op);                   \
635   tile_4bpp_draw_##combine_op(6, shift_mask, 24, alpha_op);                   \
636   tile_4bpp_draw_##combine_op(7, shift, 28, alpha_op)                         \
637
638
639 // Draws eight 4bpp pixels in reverse order (for hflip).
640
641 #define tile_4bpp_draw_eight_flip(combine_op, alpha_op)                       \
642   tile_4bpp_draw_##combine_op(7, mask, 0, alpha_op);                          \
643   tile_4bpp_draw_##combine_op(6, shift_mask, 4, alpha_op);                    \
644   tile_4bpp_draw_##combine_op(5, shift_mask, 8, alpha_op);                    \
645   tile_4bpp_draw_##combine_op(4, shift_mask, 12, alpha_op);                   \
646   tile_4bpp_draw_##combine_op(3, shift_mask, 16, alpha_op);                   \
647   tile_4bpp_draw_##combine_op(2, shift_mask, 20, alpha_op);                   \
648   tile_4bpp_draw_##combine_op(1, shift_mask, 24, alpha_op);                   \
649   tile_4bpp_draw_##combine_op(0, shift, 28, alpha_op)                         \
650
651
652 // Draws eight 4bpp pixels in base mode, checks if all are zero, if so draws
653 // the appropriate background pixels.
654
655 #define tile_4bpp_draw_eight_base(alpha_op, flip_op)                          \
656   if(current_pixels != 0)                                                     \
657   {                                                                           \
658     tile_4bpp_draw_eight_##flip_op(base, alpha_op);                           \
659   }                                                                           \
660   else                                                                        \
661   {                                                                           \
662     tile_4bpp_draw_eight_base_zero_##alpha_op();                              \
663   }                                                                           \
664
665
666 // Draws eight 4bpp pixels in transparent (layered) mode, checks if all are
667 // zero and if so draws nothing.
668
669 #define tile_4bpp_draw_eight_transparent(alpha_op, flip_op)                   \
670   if(current_pixels != 0)                                                     \
671   {                                                                           \
672     tile_4bpp_draw_eight_##flip_op(transparent, alpha_op);                    \
673   }                                                                           \
674
675
676 #define tile_4bpp_draw_eight_copy(alpha_op, flip_op)                          \
677   if(current_pixels != 0)                                                     \
678   {                                                                           \
679     tile_4bpp_draw_eight_##flip_op(copy, alpha_op);                           \
680   }                                                                           \
681
682 // Gets the current tile in 4bpp mode, also getting the current palette and
683 // the pixel block.
684
685 #define get_tile_4bpp()                                                       \
686   current_tile = *map_ptr;                                                    \
687   current_palette = (current_tile >> 12) << 4;                                \
688   tile_ptr = tile_base + ((current_tile & 0x3FF) * 32);                       \
689
690
691 // Helper macro for drawing clipped 4bpp tiles.
692
693 #define partial_tile_4bpp(combine_op, alpha_op)                               \
694   for(i = 0; i < partial_tile_run; i++)                                       \
695   {                                                                           \
696     tile_4bpp_draw_##combine_op(0, mask, 0, alpha_op);                        \
697     current_pixels >>= 4;                                                     \
698     advance_dest_ptr_##combine_op(1);                                         \
699   }                                                                           \
700
701
702 // Draws a 4bpp tile clipped against the left edge of the screen.
703 // partial_tile_offset is how far in it's clipped, partial_tile_run is
704 // how many to draw.
705
706 #define partial_tile_right_noflip_4bpp(combine_op, alpha_op)                  \
707   current_pixels = *((u32 *)tile_ptr) >> (partial_tile_offset * 4);           \
708   partial_tile_4bpp(combine_op, alpha_op)                                     \
709
710
711 // Draws a 4bpp tile clipped against both edges of the screen, same as right.
712
713 #define partial_tile_mid_noflip_4bpp(combine_op, alpha_op)                    \
714   partial_tile_right_noflip_4bpp(combine_op, alpha_op)                        \
715
716
717 // Draws a 4bpp tile clipped against the right edge of the screen.
718 // partial_tile_offset is how many to draw.
719
720 #define partial_tile_left_noflip_4bpp(combine_op, alpha_op)                   \
721   current_pixels = *((u32 *)tile_ptr);                                        \
722   partial_tile_4bpp(combine_op, alpha_op)                                     \
723
724
725 // Draws a complete 4bpp tile row (not clipped)
726 #define tile_noflip_4bpp(combine_op, alpha_op)                                \
727   current_pixels = *((u32 *)tile_ptr);                                        \
728   tile_4bpp_draw_eight_##combine_op(alpha_op, noflip)                         \
729
730
731 // Like the above, but draws flipped tiles.
732
733 #define partial_tile_flip_4bpp(combine_op, alpha_op)                          \
734   for(i = 0; i < partial_tile_run; i++)                                       \
735   {                                                                           \
736     tile_4bpp_draw_##combine_op(0, shift, 28, alpha_op);                      \
737     current_pixels <<= 4;                                                     \
738     advance_dest_ptr_##combine_op(1);                                         \
739   }                                                                           \
740
741 #define partial_tile_right_flip_4bpp(combine_op, alpha_op)                    \
742   current_pixels = *((u32 *)tile_ptr) << (partial_tile_offset * 4);           \
743   partial_tile_flip_4bpp(combine_op, alpha_op)                                \
744
745 #define partial_tile_mid_flip_4bpp(combine_op, alpha_op)                      \
746   partial_tile_right_flip_4bpp(combine_op, alpha_op)                          \
747
748 #define partial_tile_left_flip_4bpp(combine_op, alpha_op)                     \
749   current_pixels = *((u32 *)tile_ptr);                                        \
750   partial_tile_flip_4bpp(combine_op, alpha_op)                                \
751
752 #define tile_flip_4bpp(combine_op, alpha_op)                                  \
753   current_pixels = *((u32 *)tile_ptr);                                        \
754   tile_4bpp_draw_eight_##combine_op(alpha_op, flip)                           \
755
756
757 // Draws a single (partial or complete) tile from the tilemap, flipping
758 // as necessary.
759
760 #define single_tile_map(tile_type, combine_op, color_depth, alpha_op)         \
761   get_tile_##color_depth();                                                   \
762   if(current_tile & 0x800)                                                    \
763     tile_ptr += vertical_pixel_flip;                                          \
764                                                                               \
765   if(current_tile & 0x400)                                                    \
766   {                                                                           \
767     tile_type##_flip_##color_depth(combine_op, alpha_op);                     \
768   }                                                                           \
769   else                                                                        \
770   {                                                                           \
771     tile_type##_noflip_##color_depth(combine_op, alpha_op);                   \
772   }                                                                           \
773
774
775 // Draws multiple sequential tiles from the tilemap, hflips and vflips as
776 // necessary.
777
778 #define multiple_tile_map(combine_op, color_depth, alpha_op)                  \
779   for(i = 0; i < tile_run; i++)                                               \
780   {                                                                           \
781     single_tile_map(tile, combine_op, color_depth, alpha_op);                 \
782     advance_dest_ptr_##combine_op(8);                                         \
783     map_ptr++;                                                                \
784   }                                                                           \
785
786 // Draws a partial tile from a tilemap clipped against the left edge of the
787 // screen.
788
789 #define partial_tile_right_map(combine_op, color_depth, alpha_op)             \
790   single_tile_map(partial_tile_right, combine_op, color_depth, alpha_op);     \
791   map_ptr++                                                                   \
792
793 // Draws a partial tile from a tilemap clipped against both edges of the
794 // screen.
795
796 #define partial_tile_mid_map(combine_op, color_depth, alpha_op)               \
797   single_tile_map(partial_tile_mid, combine_op, color_depth, alpha_op)        \
798
799 // Draws a partial tile from a tilemap clipped against the right edge of the
800 // screen.
801
802 #define partial_tile_left_map(combine_op, color_depth, alpha_op)              \
803   single_tile_map(partial_tile_left, combine_op, color_depth, alpha_op)       \
804
805
806 // Advances a non-flipped 4bpp obj to the next tile.
807
808 #define obj_advance_noflip_4bpp()                                             \
809   tile_ptr += 32                                                              \
810
811
812 // Advances a non-flipped 8bpp obj to the next tile.
813
814 #define obj_advance_noflip_8bpp()                                             \
815   tile_ptr += 64                                                              \
816
817
818 // Advances a flipped 4bpp obj to the next tile.
819
820 #define obj_advance_flip_4bpp()                                               \
821   tile_ptr -= 32                                                              \
822
823
824 // Advances a flipped 8bpp obj to the next tile.
825
826 #define obj_advance_flip_8bpp()                                               \
827   tile_ptr -= 64                                                              \
828
829
830
831 // Draws multiple sequential tiles from an obj, flip_op determines if it should
832 // be flipped or not (set to flip or noflip)
833
834 #define multiple_tile_obj(combine_op, color_depth, alpha_op, flip_op)         \
835   for(i = 0; i < tile_run; i++)                                               \
836   {                                                                           \
837     tile_##flip_op##_##color_depth(combine_op, alpha_op);                     \
838     obj_advance_##flip_op##_##color_depth();                                  \
839     advance_dest_ptr_##combine_op(8);                                         \
840   }                                                                           \
841
842
843 // Draws an obj's tile clipped against the left side of the screen
844
845 #define partial_tile_right_obj(combine_op, color_depth, alpha_op, flip_op)    \
846   partial_tile_right_##flip_op##_##color_depth(combine_op, alpha_op);         \
847   obj_advance_##flip_op##_##color_depth()                                     \
848
849 // Draws an obj's tile clipped against both sides of the screen
850
851 #define partial_tile_mid_obj(combine_op, color_depth, alpha_op, flip_op)      \
852   partial_tile_mid_##flip_op##_##color_depth(combine_op, alpha_op)            \
853
854 // Draws an obj's tile clipped against the right side of the screen
855
856 #define partial_tile_left_obj(combine_op, color_depth, alpha_op, flip_op)     \
857   partial_tile_left_##flip_op##_##color_depth(combine_op, alpha_op)           \
858
859
860 // Extra variables specific for 8bpp/4bpp tile renderers.
861
862 #define tile_extra_variables_8bpp()                                           \
863
864 #define tile_extra_variables_4bpp()                                           \
865   u32 current_palette                                                         \
866
867
868 // Byte lengths of complete tiles and tile rows in 4bpp and 8bpp.
869
870 #define tile_width_4bpp 4
871 #define tile_size_4bpp 32
872 #define tile_width_8bpp 8
873 #define tile_size_8bpp 64
874
875
876 // Render a single scanline of text tiles
877
878 #define tile_render(color_depth, combine_op, alpha_op)                        \
879 {                                                                             \
880   u32 vertical_pixel_offset = (vertical_offset % 8) *                         \
881    tile_width_##color_depth;                                                  \
882   u32 vertical_pixel_flip =                                                   \
883    ((tile_size_##color_depth - tile_width_##color_depth) -                    \
884    vertical_pixel_offset) - vertical_pixel_offset;                            \
885   tile_extra_variables_##color_depth();                                       \
886   u8 *tile_base = vram + (((bg_control >> 2) & 0x03) * (1024 * 16)) +         \
887    vertical_pixel_offset;                                                     \
888   u32 pixel_run = 256 - (horizontal_offset % 256);                            \
889   u32 current_tile;                                                           \
890                                                                               \
891   map_base += ((vertical_offset % 256) / 8) * 32;                             \
892   partial_tile_offset = (horizontal_offset % 8);                              \
893                                                                               \
894   if(pixel_run >= end)                                                        \
895   {                                                                           \
896     if(partial_tile_offset)                                                   \
897     {                                                                         \
898       partial_tile_run = 8 - partial_tile_offset;                             \
899       if(end < partial_tile_run)                                              \
900       {                                                                       \
901         partial_tile_run = end;                                               \
902         partial_tile_mid_map(combine_op, color_depth, alpha_op);              \
903         return;                                                               \
904       }                                                                       \
905       else                                                                    \
906       {                                                                       \
907         end -= partial_tile_run;                                              \
908         partial_tile_right_map(combine_op, color_depth, alpha_op);            \
909       }                                                                       \
910     }                                                                         \
911                                                                               \
912     tile_run = end / 8;                                                       \
913     multiple_tile_map(combine_op, color_depth, alpha_op);                     \
914                                                                               \
915     partial_tile_run = end % 8;                                               \
916                                                                               \
917     if(partial_tile_run)                                                      \
918     {                                                                         \
919       partial_tile_left_map(combine_op, color_depth, alpha_op);               \
920     }                                                                         \
921   }                                                                           \
922   else                                                                        \
923   {                                                                           \
924     if(partial_tile_offset)                                                   \
925     {                                                                         \
926       partial_tile_run = 8 - partial_tile_offset;                             \
927       partial_tile_right_map(combine_op, color_depth, alpha_op);              \
928     }                                                                         \
929                                                                               \
930     tile_run = (pixel_run - partial_tile_run) / 8;                            \
931     multiple_tile_map(combine_op, color_depth, alpha_op);                     \
932     map_ptr = second_ptr;                                                     \
933     end -= pixel_run;                                                         \
934     tile_run = end / 8;                                                       \
935     multiple_tile_map(combine_op, color_depth, alpha_op);                     \
936                                                                               \
937     partial_tile_run = end % 8;                                               \
938     if(partial_tile_run)                                                      \
939     {                                                                         \
940       partial_tile_left_map(combine_op, color_depth, alpha_op);               \
941     }                                                                         \
942   }                                                                           \
943 }                                                                             \
944
945 #define render_scanline_dest_normal         u16
946 #define render_scanline_dest_alpha          u32
947 #define render_scanline_dest_alpha_obj      u32
948 #define render_scanline_dest_color16        u16
949 #define render_scanline_dest_color32        u32
950 #define render_scanline_dest_partial_alpha  u32
951 #define render_scanline_dest_copy_tile      u16
952 #define render_scanline_dest_copy_bitmap    u16
953
954
955 // If rendering a scanline that is not a target A then there's no point in
956 // keeping what's underneath it because it can't blend with it.
957
958 #define render_scanline_skip_alpha(bg_type, combine_op)                       \
959   if((pixel_combine & 0x00000200) == 0)                                       \
960   {                                                                           \
961     render_scanline_##bg_type##_##combine_op##_color32(layer,                 \
962      start, end, scanline);                                                   \
963     return;                                                                   \
964   }                                                                           \
965
966
967 #ifdef RENDER_COLOR16_NORMAL
968
969 #define render_scanline_extra_variables_base_normal(bg_type)                  \
970   const u32 pixel_combine = 0                                                 \
971
972 #else
973
974 #define render_scanline_extra_variables_base_normal(bg_type)                  \
975   u16 *palette = palette_ram_converted                                        \
976
977 #endif
978
979
980 #define render_scanline_extra_variables_base_alpha(bg_type)                   \
981   u32 bg_combine = color_combine_mask(5);                                     \
982   u32 pixel_combine = color_combine_mask(layer) | (bg_combine << 16);         \
983   render_scanline_skip_alpha(bg_type, base)                                   \
984
985 #define render_scanline_extra_variables_base_color()                          \
986   u32 bg_combine = color_combine_mask(5);                                     \
987   u32 pixel_combine = color_combine_mask(layer)                               \
988
989 #define render_scanline_extra_variables_base_color16(bg_type)                 \
990   render_scanline_extra_variables_base_color()                                \
991
992 #define render_scanline_extra_variables_base_color32(bg_type)                 \
993   render_scanline_extra_variables_base_color()                                \
994
995
996 #define render_scanline_extra_variables_transparent_normal(bg_type)           \
997   render_scanline_extra_variables_base_normal(bg_type)                        \
998
999 #define render_scanline_extra_variables_transparent_alpha(bg_type)            \
1000   u32 pixel_combine = color_combine_mask(layer);                              \
1001   render_scanline_skip_alpha(bg_type, transparent)                            \
1002
1003 #define render_scanline_extra_variables_transparent_color()                   \
1004   u32 pixel_combine = color_combine_mask(layer)                               \
1005
1006 #define render_scanline_extra_variables_transparent_color16(bg_type)          \
1007   render_scanline_extra_variables_transparent_color()                         \
1008
1009 #define render_scanline_extra_variables_transparent_color32(bg_type)          \
1010   render_scanline_extra_variables_transparent_color()                         \
1011
1012
1013
1014
1015 static const u32 map_widths[] = { 256, 512, 256, 512 };
1016
1017 // Build text scanline rendering functions.
1018
1019 #define render_scanline_text_builder(combine_op, alpha_op)                    \
1020 static void render_scanline_text_##combine_op##_##alpha_op(u32 layer,         \
1021  u32 start, u32 end, void *scanline)                                          \
1022 {                                                                             \
1023   render_scanline_extra_variables_##combine_op##_##alpha_op(text);            \
1024   u32 bg_control = io_registers[REG_BG0CNT + layer];                          \
1025   u32 map_size = (bg_control >> 14) & 0x03;                                   \
1026   u32 map_width = map_widths[map_size];                                       \
1027   u32 horizontal_offset =                                                     \
1028    (io_registers[REG_BG0HOFS + (layer * 2)] + start) % 512;                   \
1029   u32 vertical_offset = (io_registers[REG_VCOUNT] +                           \
1030    io_registers[REG_BG0VOFS + (layer * 2)]) % 512;                            \
1031   u32 current_pixel;                                                          \
1032   u32 current_pixels;                                                         \
1033   u32 partial_tile_run = 0;                                                   \
1034   u32 partial_tile_offset;                                                    \
1035   u32 tile_run;                                                               \
1036   u32 i;                                                                      \
1037   render_scanline_dest_##alpha_op *dest_ptr =                                 \
1038    ((render_scanline_dest_##alpha_op *)scanline) + start;                     \
1039                                                                               \
1040   u16 *map_base = (u16 *)(vram + ((bg_control >> 8) & 0x1F) * (1024 * 2));    \
1041   u16 *map_ptr, *second_ptr;                                                  \
1042   u8 *tile_ptr;                                                               \
1043                                                                               \
1044   end -= start;                                                               \
1045                                                                               \
1046   if((map_size & 0x02) && (vertical_offset >= 256))                           \
1047   {                                                                           \
1048     map_base += ((map_width / 8) * 32) +                                      \
1049      (((vertical_offset - 256) / 8) * 32);                                    \
1050   }                                                                           \
1051   else                                                                        \
1052   {                                                                           \
1053     map_base += (((vertical_offset % 256) / 8) * 32);                         \
1054   }                                                                           \
1055                                                                               \
1056   if(map_size & 0x01)                                                         \
1057   {                                                                           \
1058     if(horizontal_offset >= 256)                                              \
1059     {                                                                         \
1060       horizontal_offset -= 256;                                               \
1061       map_ptr = map_base + (32 * 32) + (horizontal_offset / 8);               \
1062       second_ptr = map_base;                                                  \
1063     }                                                                         \
1064     else                                                                      \
1065     {                                                                         \
1066       map_ptr = map_base + (horizontal_offset / 8);                           \
1067       second_ptr = map_base + (32 * 32);                                      \
1068     }                                                                         \
1069   }                                                                           \
1070   else                                                                        \
1071   {                                                                           \
1072     horizontal_offset %= 256;                                                 \
1073     map_ptr = map_base + (horizontal_offset / 8);                             \
1074     second_ptr = map_base;                                                    \
1075   }                                                                           \
1076                                                                               \
1077   if(bg_control & 0x80)                                                       \
1078   {                                                                           \
1079     tile_render(8bpp, combine_op, alpha_op);                                  \
1080   }                                                                           \
1081   else                                                                        \
1082   {                                                                           \
1083     tile_render(4bpp, combine_op, alpha_op);                                  \
1084   }                                                                           \
1085 }                                                                             \
1086
1087 render_scanline_text_builder(base, normal);
1088 render_scanline_text_builder(transparent, normal);
1089 render_scanline_text_builder(base, color16);
1090 render_scanline_text_builder(transparent, color16);
1091 render_scanline_text_builder(base, color32);
1092 render_scanline_text_builder(transparent, color32);
1093 render_scanline_text_builder(base, alpha);
1094 render_scanline_text_builder(transparent, alpha);
1095
1096
1097 s32 affine_reference_x[2];
1098 s32 affine_reference_y[2];
1099
1100 #define affine_render_bg_pixel_normal()                                       \
1101   current_pixel = palette_ram_converted[0]                                    \
1102
1103 #define affine_render_bg_pixel_alpha()                                        \
1104   current_pixel = bg_combine                                                  \
1105
1106 #define affine_render_bg_pixel_color16()                                      \
1107   affine_render_bg_pixel_alpha()                                              \
1108
1109 #define affine_render_bg_pixel_color32()                                      \
1110   affine_render_bg_pixel_alpha()                                              \
1111
1112 #define affine_render_bg_pixel_base(alpha_op)                                 \
1113   affine_render_bg_pixel_##alpha_op()                                         \
1114
1115 #define affine_render_bg_pixel_transparent(alpha_op)                          \
1116
1117 #define affine_render_bg_pixel_copy(alpha_op)                                 \
1118
1119 #define affine_render_bg_base(alpha_op)                                       \
1120   dest_ptr[0] = current_pixel
1121
1122 #define affine_render_bg_transparent(alpha_op)                                \
1123
1124 #define affine_render_bg_copy(alpha_op)                                       \
1125
1126 #define affine_render_bg_remainder_base(alpha_op)                             \
1127   affine_render_bg_pixel_##alpha_op();                                        \
1128   for(; i < end; i++)                                                         \
1129   {                                                                           \
1130     affine_render_bg_base(alpha_op);                                          \
1131     advance_dest_ptr_base(1);                                                 \
1132   }                                                                           \
1133
1134 #define affine_render_bg_remainder_transparent(alpha_op)                      \
1135
1136 #define affine_render_bg_remainder_copy(alpha_op)                             \
1137
1138 #define affine_render_next(combine_op)                                        \
1139   source_x += dx;                                                             \
1140   source_y += dy;                                                             \
1141   advance_dest_ptr_##combine_op(1)                                            \
1142
1143 #define affine_render_scale_offset()                                          \
1144   tile_base += ((pixel_y % 8) * 8);                                           \
1145   map_base += (pixel_y / 8) << map_pitch                                      \
1146
1147 #define affine_render_scale_pixel(combine_op, alpha_op)                       \
1148   map_offset = (pixel_x / 8);                                                 \
1149   if(map_offset != last_map_offset)                                           \
1150   {                                                                           \
1151     tile_ptr = tile_base + (map_base[map_offset] * 64);                       \
1152     last_map_offset = map_offset;                                             \
1153   }                                                                           \
1154   tile_ptr = tile_base + (map_base[(pixel_x / 8)] * 64);                      \
1155   current_pixel = tile_ptr[(pixel_x % 8)];                                    \
1156   tile_8bpp_draw_##combine_op(0, none, 0, alpha_op);                          \
1157   affine_render_next(combine_op)                                              \
1158
1159 #define affine_render_scale(combine_op, alpha_op)                             \
1160 {                                                                             \
1161   pixel_y = source_y >> 8;                                                    \
1162   u32 i = 0;                                                                  \
1163   affine_render_bg_pixel_##combine_op(alpha_op);                              \
1164   if((u32)pixel_y < (u32)width_height)                                        \
1165   {                                                                           \
1166     affine_render_scale_offset();                                             \
1167     for(; i < end; i++)                                                       \
1168     {                                                                         \
1169       pixel_x = source_x >> 8;                                                \
1170                                                                               \
1171       if((u32)pixel_x < (u32)width_height)                                    \
1172       {                                                                       \
1173         break;                                                                \
1174       }                                                                       \
1175                                                                               \
1176       affine_render_bg_##combine_op(alpha_op);                                \
1177       affine_render_next(combine_op);                                         \
1178     }                                                                         \
1179                                                                               \
1180     for(; i < end; i++)                                                       \
1181     {                                                                         \
1182       pixel_x = source_x >> 8;                                                \
1183                                                                               \
1184       if((u32)pixel_x >= (u32)width_height)                                   \
1185         break;                                                                \
1186                                                                               \
1187       affine_render_scale_pixel(combine_op, alpha_op);                        \
1188     }                                                                         \
1189   }                                                                           \
1190   affine_render_bg_remainder_##combine_op(alpha_op);                          \
1191 }                                                                             \
1192
1193 #define affine_render_scale_wrap(combine_op, alpha_op)                        \
1194 {                                                                             \
1195   u32 wrap_mask = width_height - 1;                                           \
1196   pixel_y = (source_y >> 8) & wrap_mask;                                      \
1197   if((u32)pixel_y < (u32)width_height)                                        \
1198   {                                                                           \
1199     affine_render_scale_offset();                                             \
1200     for(i = 0; i < end; i++)                                                  \
1201     {                                                                         \
1202       pixel_x = (source_x >> 8) & wrap_mask;                                  \
1203       affine_render_scale_pixel(combine_op, alpha_op);                        \
1204     }                                                                         \
1205   }                                                                           \
1206 }                                                                             \
1207
1208
1209 #define affine_render_rotate_pixel(combine_op, alpha_op)                      \
1210   map_offset = (pixel_x / 8) + ((pixel_y / 8) << map_pitch);                  \
1211   if(map_offset != last_map_offset)                                           \
1212   {                                                                           \
1213     tile_ptr = tile_base + (map_base[map_offset] * 64);                       \
1214     last_map_offset = map_offset;                                             \
1215   }                                                                           \
1216                                                                               \
1217   current_pixel = tile_ptr[(pixel_x % 8) + ((pixel_y % 8) * 8)];              \
1218   tile_8bpp_draw_##combine_op(0, none, 0, alpha_op);                          \
1219   affine_render_next(combine_op)                                              \
1220
1221 #define affine_render_rotate(combine_op, alpha_op)                            \
1222 {                                                                             \
1223   affine_render_bg_pixel_##combine_op(alpha_op);                              \
1224   for(i = 0; i < end; i++)                                                    \
1225   {                                                                           \
1226     pixel_x = source_x >> 8;                                                  \
1227     pixel_y = source_y >> 8;                                                  \
1228                                                                               \
1229     if(((u32)pixel_x < (u32)width_height) &&                                  \
1230      ((u32)pixel_y < (u32)width_height))                                      \
1231     {                                                                         \
1232       break;                                                                  \
1233     }                                                                         \
1234     affine_render_bg_##combine_op(alpha_op);                                  \
1235     affine_render_next(combine_op);                                           \
1236   }                                                                           \
1237                                                                               \
1238   for(; i < end; i++)                                                         \
1239   {                                                                           \
1240     pixel_x = source_x >> 8;                                                  \
1241     pixel_y = source_y >> 8;                                                  \
1242                                                                               \
1243     if(((u32)pixel_x >= (u32)width_height) ||                                 \
1244      ((u32)pixel_y >= (u32)width_height))                                     \
1245     {                                                                         \
1246       affine_render_bg_remainder_##combine_op(alpha_op);                      \
1247       break;                                                                  \
1248     }                                                                         \
1249                                                                               \
1250     affine_render_rotate_pixel(combine_op, alpha_op);                         \
1251   }                                                                           \
1252 }                                                                             \
1253
1254 #define affine_render_rotate_wrap(combine_op, alpha_op)                       \
1255 {                                                                             \
1256   u32 wrap_mask = width_height - 1;                                           \
1257   for(i = 0; i < end; i++)                                                    \
1258   {                                                                           \
1259     pixel_x = (source_x >> 8) & wrap_mask;                                    \
1260     pixel_y = (source_y >> 8) & wrap_mask;                                    \
1261                                                                               \
1262     affine_render_rotate_pixel(combine_op, alpha_op);                         \
1263   }                                                                           \
1264 }                                                                             \
1265
1266
1267 // Build affine background renderers.
1268
1269 #define render_scanline_affine_builder(combine_op, alpha_op)                  \
1270 void render_scanline_affine_##combine_op##_##alpha_op(u32 layer,              \
1271  u32 start, u32 end, void *scanline)                                          \
1272 {                                                                             \
1273   render_scanline_extra_variables_##combine_op##_##alpha_op(affine);          \
1274   u32 bg_control = io_registers[REG_BG0CNT + layer];                          \
1275   u32 current_pixel;                                                          \
1276   s32 source_x, source_y;                                                     \
1277   u32 pixel_x, pixel_y;                                                       \
1278   u32 layer_offset = (layer - 2) * 8;                                         \
1279   s32 dx, dy;                                                                 \
1280   u32 map_size = (bg_control >> 14) & 0x03;                                   \
1281   u32 width_height = 1 << (7 + map_size);                                     \
1282   u32 map_pitch = map_size + 4;                                               \
1283   u8 *map_base = vram + (((bg_control >> 8) & 0x1F) * (1024 * 2));            \
1284   u8 *tile_base = vram + (((bg_control >> 2) & 0x03) * (1024 * 16));          \
1285   u8 *tile_ptr = NULL;                                                        \
1286   u32 map_offset, last_map_offset = (u32)-1;                                  \
1287   u32 i;                                                                      \
1288   render_scanline_dest_##alpha_op *dest_ptr =                                 \
1289    ((render_scanline_dest_##alpha_op *)scanline) + start;                     \
1290                                                                               \
1291   dx = (s16)io_registers[REG_BG2PA + layer_offset];                           \
1292   dy = (s16)io_registers[REG_BG2PC + layer_offset];                           \
1293   source_x = affine_reference_x[layer - 2] + (start * dx);                    \
1294   source_y = affine_reference_y[layer - 2] + (start * dy);                    \
1295                                                                               \
1296   end -= start;                                                               \
1297                                                                               \
1298   switch(((bg_control >> 12) & 0x02) | (dy != 0))                             \
1299   {                                                                           \
1300     case 0x00:                                                                \
1301       affine_render_scale(combine_op, alpha_op);                              \
1302       break;                                                                  \
1303                                                                               \
1304     case 0x01:                                                                \
1305       affine_render_rotate(combine_op, alpha_op);                             \
1306       break;                                                                  \
1307                                                                               \
1308     case 0x02:                                                                \
1309       affine_render_scale_wrap(combine_op, alpha_op);                         \
1310       break;                                                                  \
1311                                                                               \
1312     case 0x03:                                                                \
1313       affine_render_rotate_wrap(combine_op, alpha_op);                        \
1314       break;                                                                  \
1315   }                                                                           \
1316 }                                                                             \
1317
1318 render_scanline_affine_builder(base, normal);
1319 render_scanline_affine_builder(transparent, normal);
1320 render_scanline_affine_builder(base, color16);
1321 render_scanline_affine_builder(transparent, color16);
1322 render_scanline_affine_builder(base, color32);
1323 render_scanline_affine_builder(transparent, color32);
1324 render_scanline_affine_builder(base, alpha);
1325 render_scanline_affine_builder(transparent, alpha);
1326
1327
1328 #define bitmap_render_pixel_mode3(alpha_op)                                   \
1329   convert_palette(current_pixel);                                             \
1330   *dest_ptr = current_pixel                                                   \
1331
1332 #define bitmap_render_pixel_mode4(alpha_op)                                   \
1333   tile_expand_base_##alpha_op(0)                                              \
1334
1335 #define bitmap_render_pixel_mode5(alpha_op)                                   \
1336   bitmap_render_pixel_mode3(alpha_op)                                         \
1337
1338
1339 #define bitmap_render_scale(type, alpha_op, width, height)                    \
1340   pixel_y = (source_y >> 8);                                                  \
1341   if((u32)pixel_y < (u32)height)                                              \
1342   {                                                                           \
1343     pixel_x = (source_x >> 8);                                                \
1344     src_ptr += (pixel_y * width);                                             \
1345     if(dx == 0x100)                                                           \
1346     {                                                                         \
1347       if(pixel_x < 0)                                                         \
1348       {                                                                       \
1349         end += pixel_x;                                                       \
1350         dest_ptr -= pixel_x;                                                  \
1351         pixel_x = 0;                                                          \
1352       }                                                                       \
1353       else                                                                    \
1354                                                                               \
1355       if(pixel_x > 0)                                                         \
1356       {                                                                       \
1357         src_ptr += pixel_x;                                                   \
1358       }                                                                       \
1359                                                                               \
1360       if((pixel_x + end) >= width)                                            \
1361         end = (width - pixel_x);                                              \
1362                                                                               \
1363       for(i = 0; (s32)i < (s32)end; i++)                                      \
1364       {                                                                       \
1365         current_pixel = *src_ptr;                                             \
1366         bitmap_render_pixel_##type(alpha_op);                                 \
1367         src_ptr++;                                                            \
1368         dest_ptr++;                                                           \
1369       }                                                                       \
1370     }                                                                         \
1371     else                                                                      \
1372     {                                                                         \
1373       if((u32)(source_y >> 8) < (u32)height)                                  \
1374       {                                                                       \
1375         for(i = 0; i < end; i++)                                              \
1376         {                                                                     \
1377           pixel_x = (source_x >> 8);                                          \
1378                                                                               \
1379           if((u32)pixel_x < (u32)width)                                       \
1380             break;                                                            \
1381                                                                               \
1382           source_x += dx;                                                     \
1383           dest_ptr++;                                                         \
1384         }                                                                     \
1385                                                                               \
1386         for(; i < end; i++)                                                   \
1387         {                                                                     \
1388           pixel_x = (source_x >> 8);                                          \
1389                                                                               \
1390           if((u32)pixel_x >= (u32)width)                                      \
1391             break;                                                            \
1392                                                                               \
1393           current_pixel = src_ptr[pixel_x];                                   \
1394           bitmap_render_pixel_##type(alpha_op);                               \
1395                                                                               \
1396           source_x += dx;                                                     \
1397           dest_ptr++;                                                         \
1398         }                                                                     \
1399       }                                                                       \
1400     }                                                                         \
1401   }                                                                           \
1402
1403 #define bitmap_render_rotate(type, alpha_op, width, height)                   \
1404   for(i = 0; i < end; i++)                                                    \
1405   {                                                                           \
1406     pixel_x = source_x >> 8;                                                  \
1407     pixel_y = source_y >> 8;                                                  \
1408                                                                               \
1409     if(((u32)pixel_x < (u32)width) && ((u32)pixel_y < (u32)height))           \
1410       break;                                                                  \
1411                                                                               \
1412     source_x += dx;                                                           \
1413     source_y += dy;                                                           \
1414     dest_ptr++;                                                               \
1415   }                                                                           \
1416                                                                               \
1417   for(; i < end; i++)                                                         \
1418   {                                                                           \
1419     pixel_x = (source_x >> 8);                                                \
1420     pixel_y = (source_y >> 8);                                                \
1421                                                                               \
1422     if(((u32)pixel_x >= (u32)width) || ((u32)pixel_y >= (u32)height))         \
1423       break;                                                                  \
1424                                                                               \
1425     current_pixel = src_ptr[pixel_x + (pixel_y * width)];                     \
1426      bitmap_render_pixel_##type(alpha_op);                                    \
1427                                                                               \
1428     source_x += dx;                                                           \
1429     source_y += dy;                                                           \
1430     dest_ptr++;                                                               \
1431   }                                                                           \
1432
1433
1434 #define render_scanline_vram_setup_mode3()                                    \
1435   u16 *src_ptr = (u16 *)vram                                                  \
1436
1437 #define render_scanline_vram_setup_mode5()                                    \
1438   u16 *src_ptr;                                                               \
1439   if(io_registers[REG_DISPCNT] & 0x10)                                        \
1440     src_ptr = (u16 *)(vram + 0xA000);                                         \
1441   else                                                                        \
1442     src_ptr = (u16 *)vram                                                     \
1443
1444
1445 #ifdef RENDER_COLOR16_NORMAL
1446
1447 #define render_scanline_vram_setup_mode4()                                    \
1448   const u32 pixel_combine = 0;                                                \
1449   u8 *src_ptr;                                                                \
1450   if(io_registers[REG_DISPCNT] & 0x10)                                        \
1451     src_ptr = vram + 0xA000;                                                  \
1452   else                                                                        \
1453     src_ptr = vram                                                            \
1454
1455
1456 #else
1457
1458 #define render_scanline_vram_setup_mode4()                                    \
1459   u16 *palette = palette_ram_converted;                                       \
1460   u8 *src_ptr;                                                                \
1461   if(io_registers[REG_DISPCNT] & 0x10)                                        \
1462     src_ptr = vram + 0xA000;                                                  \
1463   else                                                                        \
1464     src_ptr = vram                                                            \
1465
1466 #endif
1467
1468
1469
1470 // Build bitmap scanline rendering functions.
1471
1472 #define render_scanline_bitmap_builder(type, alpha_op, width, height)         \
1473 static void render_scanline_bitmap_##type##_##alpha_op(u32 start, u32 end,    \
1474  void *scanline)                                                              \
1475 {                                                                             \
1476   u32 current_pixel;                                                          \
1477   s32 source_x, source_y;                                                     \
1478   s32 pixel_x, pixel_y;                                                       \
1479                                                                               \
1480   s32 dx = (s16)io_registers[REG_BG2PA];                                      \
1481   s32 dy = (s16)io_registers[REG_BG2PC];                                      \
1482                                                                               \
1483   u32 i;                                                                      \
1484                                                                               \
1485   render_scanline_dest_##alpha_op *dest_ptr =                                 \
1486    ((render_scanline_dest_##alpha_op *)scanline) + start;                     \
1487   render_scanline_vram_setup_##type();                                        \
1488                                                                               \
1489   end -= start;                                                               \
1490                                                                               \
1491   source_x = affine_reference_x[0] + (start * dx);                            \
1492   source_y = affine_reference_y[0] + (start * dy);                            \
1493                                                                               \
1494   if(dy == 0)                                                                 \
1495   {                                                                           \
1496     bitmap_render_scale(type, alpha_op, width, height);                       \
1497   }                                                                           \
1498   else                                                                        \
1499   {                                                                           \
1500     bitmap_render_rotate(type, alpha_op, width, height);                      \
1501   }                                                                           \
1502 }                                                                             \
1503
1504 render_scanline_bitmap_builder(mode3, normal, 240, 160);
1505 render_scanline_bitmap_builder(mode4, normal, 240, 160);
1506 render_scanline_bitmap_builder(mode5, normal, 160, 128);
1507
1508
1509 // Fill in the renderers for a layer based on the mode type,
1510
1511 #define tile_layer_render_functions(type)                                     \
1512 {                                                                             \
1513   render_scanline_##type##_base_normal,                                       \
1514   render_scanline_##type##_transparent_normal,                                \
1515   render_scanline_##type##_base_alpha,                                        \
1516   render_scanline_##type##_transparent_alpha,                                 \
1517   render_scanline_##type##_base_color16,                                      \
1518   render_scanline_##type##_transparent_color16,                               \
1519   render_scanline_##type##_base_color32,                                      \
1520   render_scanline_##type##_transparent_color32                                \
1521 }                                                                             \
1522
1523
1524 // Use if a layer is unsupported for that mode.
1525
1526 #define tile_layer_render_null()                                              \
1527 {                                                                             \
1528   NULL, NULL, NULL, NULL                                                      \
1529 }                                                                             \
1530
1531 #define bitmap_layer_render_functions(type)                                   \
1532 {                                                                             \
1533   render_scanline_bitmap_##type##_normal                                      \
1534 }                                                                             \
1535
1536 // Structs containing functions to render the layers for each mode, for
1537 // each render type.
1538 static const tile_layer_render_struct tile_mode_renderers[3][4] =
1539 {
1540   {
1541     tile_layer_render_functions(text), tile_layer_render_functions(text),
1542     tile_layer_render_functions(text), tile_layer_render_functions(text)
1543   },
1544   {
1545     tile_layer_render_functions(text), tile_layer_render_functions(text),
1546     tile_layer_render_functions(affine), tile_layer_render_functions(text)
1547   },
1548   {
1549     tile_layer_render_functions(text), tile_layer_render_functions(text),
1550     tile_layer_render_functions(affine), tile_layer_render_functions(affine)
1551   }
1552 };
1553
1554 static const bitmap_layer_render_struct bitmap_mode_renderers[3] =
1555 {
1556   bitmap_layer_render_functions(mode3),
1557   bitmap_layer_render_functions(mode4),
1558   bitmap_layer_render_functions(mode5)
1559 };
1560
1561
1562 #define render_scanline_layer_functions_tile()                                \
1563   const tile_layer_render_struct *layer_renderers =                           \
1564    tile_mode_renderers[dispcnt & 0x07]                                        \
1565
1566 #define render_scanline_layer_functions_bitmap()                              \
1567   const bitmap_layer_render_struct *layer_renderers =                         \
1568    bitmap_mode_renderers + ((dispcnt & 0x07) - 3)                             \
1569
1570
1571 // Adjust a flipped obj's starting position
1572
1573 #define obj_tile_offset_noflip(color_depth)                                   \
1574
1575 #define obj_tile_offset_flip(color_depth)                                     \
1576   + (tile_size_##color_depth * ((obj_width - 8) / 8))                         \
1577
1578
1579 // Adjust the obj's starting point if it goes too far off the left edge of
1580 // the screen.
1581
1582 #define obj_tile_right_offset_noflip(color_depth)                             \
1583   tile_ptr += (partial_tile_offset / 8) * tile_size_##color_depth             \
1584
1585 #define obj_tile_right_offset_flip(color_depth)                               \
1586   tile_ptr -= (partial_tile_offset / 8) * tile_size_##color_depth             \
1587
1588 // Get the current row offset into an obj in 1D map space
1589
1590 #define obj_tile_offset_1D(color_depth, flip_op)                              \
1591   tile_ptr = tile_base + ((obj_attribute_2 & 0x3FF) * 32)                     \
1592    + ((vertical_offset / 8) * (obj_width / 8) * tile_size_##color_depth)      \
1593    + ((vertical_offset % 8) * tile_width_##color_depth)                       \
1594    obj_tile_offset_##flip_op(color_depth)                                     \
1595
1596 // Get the current row offset into an obj in 2D map space
1597
1598 #define obj_tile_offset_2D(color_depth, flip_op)                              \
1599   tile_ptr = tile_base + ((obj_attribute_2 & 0x3FF) * 32)                     \
1600    + ((vertical_offset / 8) * 1024)                                           \
1601    + ((vertical_offset % 8) * tile_width_##color_depth)                       \
1602    obj_tile_offset_##flip_op(color_depth)                                     \
1603
1604
1605 // Get the palette for 4bpp obj.
1606
1607 #define obj_get_palette_4bpp()                                                \
1608   current_palette = (obj_attribute_2 >> 8) & 0xF0                             \
1609
1610 #define obj_get_palette_8bpp()                                                \
1611
1612
1613 // Render the current row of an obj.
1614
1615 #define obj_render(combine_op, color_depth, alpha_op, map_space, flip_op)     \
1616 {                                                                             \
1617   obj_get_palette_##color_depth();                                            \
1618   obj_tile_offset_##map_space(color_depth, flip_op);                          \
1619                                                                               \
1620   if(obj_x < (s32)start)                                                      \
1621   {                                                                           \
1622     dest_ptr = scanline + start;                                              \
1623     pixel_run = obj_width - (start - obj_x);                                  \
1624     if((s32)pixel_run > 0)                                                    \
1625     {                                                                         \
1626       if((obj_x + obj_width) >= end)                                          \
1627       {                                                                       \
1628         pixel_run = end - start;                                              \
1629         partial_tile_offset = start - obj_x;                                  \
1630         obj_tile_right_offset_##flip_op(color_depth);                         \
1631         partial_tile_offset %= 8;                                             \
1632                                                                               \
1633         if(partial_tile_offset)                                               \
1634         {                                                                     \
1635           partial_tile_run = 8 - partial_tile_offset;                         \
1636           if((s32)pixel_run < (s32)partial_tile_run)                          \
1637           {                                                                   \
1638             if((s32)pixel_run > 0)                                            \
1639             {                                                                 \
1640               partial_tile_run = pixel_run;                                   \
1641               partial_tile_mid_obj(combine_op, color_depth, alpha_op,         \
1642                flip_op);                                                      \
1643             }                                                                 \
1644             continue;                                                         \
1645           }                                                                   \
1646           else                                                                \
1647           {                                                                   \
1648             pixel_run -= partial_tile_run;                                    \
1649             partial_tile_right_obj(combine_op, color_depth, alpha_op,         \
1650              flip_op);                                                        \
1651           }                                                                   \
1652         }                                                                     \
1653         tile_run = pixel_run / 8;                                             \
1654         multiple_tile_obj(combine_op, color_depth, alpha_op, flip_op);        \
1655         partial_tile_run = pixel_run % 8;                                     \
1656         if(partial_tile_run)                                                  \
1657         {                                                                     \
1658           partial_tile_left_obj(combine_op, color_depth, alpha_op,            \
1659            flip_op);                                                          \
1660         }                                                                     \
1661       }                                                                       \
1662       else                                                                    \
1663       {                                                                       \
1664         partial_tile_offset = start - obj_x;                                  \
1665         obj_tile_right_offset_##flip_op(color_depth);                         \
1666         partial_tile_offset %= 8;                                             \
1667         if(partial_tile_offset)                                               \
1668         {                                                                     \
1669           partial_tile_run = 8 - partial_tile_offset;                         \
1670           partial_tile_right_obj(combine_op, color_depth, alpha_op,           \
1671            flip_op);                                                          \
1672         }                                                                     \
1673         tile_run = pixel_run / 8;                                             \
1674         multiple_tile_obj(combine_op, color_depth, alpha_op, flip_op);        \
1675       }                                                                       \
1676     }                                                                         \
1677   }                                                                           \
1678   else                                                                        \
1679                                                                               \
1680   if((obj_x + obj_width) >= end)                                              \
1681   {                                                                           \
1682     pixel_run = end - obj_x;                                                  \
1683     if((s32)pixel_run > 0)                                                    \
1684     {                                                                         \
1685       dest_ptr = scanline + obj_x;                                            \
1686       tile_run = pixel_run / 8;                                               \
1687       multiple_tile_obj(combine_op, color_depth, alpha_op, flip_op);          \
1688       partial_tile_run = pixel_run % 8;                                       \
1689       if(partial_tile_run)                                                    \
1690       {                                                                       \
1691         partial_tile_left_obj(combine_op, color_depth, alpha_op, flip_op);    \
1692       }                                                                       \
1693     }                                                                         \
1694   }                                                                           \
1695   else                                                                        \
1696   {                                                                           \
1697     dest_ptr = scanline + obj_x;                                              \
1698     tile_run = obj_width / 8;                                                 \
1699     multiple_tile_obj(combine_op, color_depth, alpha_op, flip_op);            \
1700   }                                                                           \
1701 }                                                                             \
1702
1703 #define obj_scale_offset_1D(color_depth)                                      \
1704   tile_ptr = tile_base + ((obj_attribute_2 & 0x3FF) * 32)                     \
1705    + ((vertical_offset / 8) * (max_x / 8) * tile_size_##color_depth)          \
1706    + ((vertical_offset % 8) * tile_width_##color_depth)                       \
1707
1708 // Get the current row offset into an obj in 2D map space
1709
1710 #define obj_scale_offset_2D(color_depth)                                      \
1711   tile_ptr = tile_base + ((obj_attribute_2 & 0x3FF) * 32)                     \
1712    + ((vertical_offset / 8) * 1024)                                           \
1713    + ((vertical_offset % 8) * tile_width_##color_depth)                       \
1714
1715 #define obj_render_scale_pixel_4bpp(combine_op, alpha_op)                     \
1716   if(tile_x & 0x01)                                                           \
1717   {                                                                           \
1718     current_pixel = tile_ptr[tile_map_offset + ((tile_x >> 1) & 0x03)] >> 4;  \
1719   }                                                                           \
1720   else                                                                        \
1721   {                                                                           \
1722     current_pixel =                                                           \
1723      tile_ptr[tile_map_offset + ((tile_x >> 1) & 0x03)] & 0x0F;               \
1724   }                                                                           \
1725                                                                               \
1726   tile_4bpp_draw_##combine_op(0, none, 0, alpha_op)                           \
1727
1728
1729 #define obj_render_scale_pixel_8bpp(combine_op, alpha_op)                     \
1730   current_pixel = tile_ptr[tile_map_offset + (tile_x & 0x07)];                \
1731   tile_8bpp_draw_##combine_op(0, none, 0, alpha_op);                          \
1732
1733 #define obj_render_scale(combine_op, color_depth, alpha_op, map_space)        \
1734 {                                                                             \
1735   u32 vertical_offset;                                                        \
1736   source_y += (y_delta * dmy);                                                \
1737   vertical_offset = (source_y >> 8);                                          \
1738   if((u32)vertical_offset < (u32)max_y)                                       \
1739   {                                                                           \
1740     obj_scale_offset_##map_space(color_depth);                                \
1741     source_x += (y_delta * dmx) - (middle_x * dx);                            \
1742                                                                               \
1743     for(i = 0; i < obj_width; i++)                                            \
1744     {                                                                         \
1745       tile_x = (source_x >> 8);                                               \
1746                                                                               \
1747       if((u32)tile_x < (u32)max_x)                                            \
1748         break;                                                                \
1749                                                                               \
1750       source_x += dx;                                                         \
1751       advance_dest_ptr_##combine_op(1);                                       \
1752     }                                                                         \
1753                                                                               \
1754     for(; i < obj_width; i++)                                                 \
1755     {                                                                         \
1756       tile_x = (source_x >> 8);                                               \
1757                                                                               \
1758       if((u32)tile_x >= (u32)max_x)                                           \
1759         break;                                                                \
1760                                                                               \
1761       tile_map_offset = (tile_x >> 3) * tile_size_##color_depth;              \
1762       obj_render_scale_pixel_##color_depth(combine_op, alpha_op);             \
1763                                                                               \
1764       source_x += dx;                                                         \
1765       advance_dest_ptr_##combine_op(1);                                       \
1766     }                                                                         \
1767   }                                                                           \
1768 }                                                                             \
1769
1770
1771 #define obj_rotate_offset_1D(color_depth)                                     \
1772   obj_tile_pitch = (max_x / 8) * tile_size_##color_depth                      \
1773
1774 #define obj_rotate_offset_2D(color_depth)                                     \
1775   obj_tile_pitch = 1024                                                       \
1776
1777 #define obj_render_rotate_pixel_4bpp(combine_op, alpha_op)                    \
1778   if(tile_x & 0x01)                                                           \
1779   {                                                                           \
1780     current_pixel = tile_ptr[tile_map_offset +                                \
1781      ((tile_x >> 1) & 0x03) + ((tile_y & 0x07) * obj_pitch)] >> 4;            \
1782   }                                                                           \
1783   else                                                                        \
1784   {                                                                           \
1785     current_pixel = tile_ptr[tile_map_offset +                                \
1786      ((tile_x >> 1) & 0x03) + ((tile_y & 0x07) * obj_pitch)] & 0x0F;          \
1787   }                                                                           \
1788                                                                               \
1789   tile_4bpp_draw_##combine_op(0, none, 0, alpha_op)                           \
1790
1791 #define obj_render_rotate_pixel_8bpp(combine_op, alpha_op)                    \
1792   current_pixel = tile_ptr[tile_map_offset +                                  \
1793    (tile_x & 0x07) + ((tile_y & 0x07) * obj_pitch)];                          \
1794                                                                               \
1795   tile_8bpp_draw_##combine_op(0, none, 0, alpha_op)                           \
1796
1797 #define obj_render_rotate(combine_op, color_depth, alpha_op, map_space)       \
1798 {                                                                             \
1799   tile_ptr = tile_base + ((obj_attribute_2 & 0x3FF) * 32);                    \
1800   obj_rotate_offset_##map_space(color_depth);                                 \
1801                                                                               \
1802   source_x += (y_delta * dmx) - (middle_x * dx);                              \
1803   source_y += (y_delta * dmy) - (middle_x * dy);                              \
1804                                                                               \
1805   for(i = 0; i < obj_width; i++)                                              \
1806   {                                                                           \
1807     tile_x = (source_x >> 8);                                                 \
1808     tile_y = (source_y >> 8);                                                 \
1809                                                                               \
1810     if(((u32)tile_x < (u32)max_x) && ((u32)tile_y < (u32)max_y))              \
1811       break;                                                                  \
1812                                                                               \
1813     source_x += dx;                                                           \
1814     source_y += dy;                                                           \
1815     advance_dest_ptr_##combine_op(1);                                         \
1816   }                                                                           \
1817                                                                               \
1818   for(; i < obj_width; i++)                                                   \
1819   {                                                                           \
1820     tile_x = (source_x >> 8);                                                 \
1821     tile_y = (source_y >> 8);                                                 \
1822                                                                               \
1823     if(((u32)tile_x >= (u32)max_x) || ((u32)tile_y >= (u32)max_y))            \
1824       break;                                                                  \
1825                                                                               \
1826     tile_map_offset = ((tile_x >> 3) * tile_size_##color_depth) +             \
1827     ((tile_y >> 3) * obj_tile_pitch);                                         \
1828     obj_render_rotate_pixel_##color_depth(combine_op, alpha_op);              \
1829                                                                               \
1830     source_x += dx;                                                           \
1831     source_y += dy;                                                           \
1832     advance_dest_ptr_##combine_op(1);                                         \
1833   }                                                                           \
1834 }                                                                             \
1835
1836 // Render the current row of an affine transformed OBJ.
1837
1838 #define obj_render_affine(combine_op, color_depth, alpha_op, map_space)       \
1839 {                                                                             \
1840   s16 *params = (s16 *)oam_ram + (((obj_attribute_1 >> 9) & 0x1F) * 16);      \
1841   s32 dx = params[3];                                                         \
1842   s32 dmx = params[7];                                                        \
1843   s32 dy = params[11];                                                        \
1844   s32 dmy = params[15];                                                       \
1845   s32 source_x, source_y;                                                     \
1846   s32 tile_x, tile_y;                                                         \
1847   u32 tile_map_offset;                                                        \
1848   s32 middle_x;                                                               \
1849   s32 middle_y;                                                               \
1850   s32 max_x = obj_width;                                                      \
1851   s32 max_y = obj_height;                                                     \
1852   s32 y_delta;                                                                \
1853   u32 obj_pitch = tile_width_##color_depth;                                   \
1854   u32 obj_tile_pitch;                                                         \
1855                                                                               \
1856   middle_x = (obj_width / 2);                                                 \
1857   middle_y = (obj_height / 2);                                                \
1858                                                                               \
1859   source_x = (middle_x << 8);                                                 \
1860   source_y = (middle_y << 8);                                                 \
1861                                                                               \
1862                                                                               \
1863   if(obj_attribute_0 & 0x200)                                                 \
1864   {                                                                           \
1865     obj_width *= 2;                                                           \
1866     obj_height *= 2;                                                          \
1867     middle_x *= 2;                                                            \
1868     middle_y *= 2;                                                            \
1869   }                                                                           \
1870                                                                               \
1871   if((s32)obj_x < (s32)start)                                                 \
1872   {                                                                           \
1873     u32 x_delta = start - obj_x;                                              \
1874     middle_x -= x_delta;                                                      \
1875     obj_width -= x_delta;                                                     \
1876     obj_x = start;                                                            \
1877                                                                               \
1878     if((s32)obj_width <= 0)                                                   \
1879       continue;                                                               \
1880   }                                                                           \
1881                                                                               \
1882   if((s32)(obj_x + obj_width) >= (s32)end)                                    \
1883   {                                                                           \
1884     obj_width = end - obj_x;                                                  \
1885                                                                               \
1886     if((s32)obj_width <= 0)                                                   \
1887       continue;                                                               \
1888   }                                                                           \
1889   dest_ptr = scanline + obj_x;                                                \
1890                                                                               \
1891   y_delta = vcount - (obj_y + middle_y);                                      \
1892                                                                               \
1893   obj_get_palette_##color_depth();                                            \
1894                                                                               \
1895   if(dy == 0)                                                                 \
1896   {                                                                           \
1897     obj_render_scale(combine_op, color_depth, alpha_op, map_space);           \
1898   }                                                                           \
1899   else                                                                        \
1900   {                                                                           \
1901     obj_render_rotate(combine_op, color_depth, alpha_op, map_space);          \
1902   }                                                                           \
1903 }                                                                             \
1904
1905 static const u32 obj_width_table[] =
1906   { 8, 16, 32, 64, 16, 32, 32, 64, 8, 8, 16, 32 };
1907 static const u32 obj_height_table[] =
1908   { 8, 16, 32, 64, 8, 8, 16, 32, 16, 32, 32, 64 };
1909
1910 static u8 obj_priority_list[5][160][128];
1911 static u32 obj_priority_count[5][160];
1912 static u32 obj_alpha_count[160];
1913
1914
1915 // Build obj rendering functions
1916
1917 #ifdef RENDER_COLOR16_NORMAL
1918
1919 #define render_scanline_obj_extra_variables_normal(bg_type)                   \
1920   const u32 pixel_combine = (1 << 8)                                          \
1921
1922 #else
1923
1924 #define render_scanline_obj_extra_variables_normal(bg_type)                   \
1925   u16 *palette = palette_ram_converted + 256                                  \
1926
1927 #endif
1928
1929
1930 #define render_scanline_obj_extra_variables_color()                           \
1931   u32 pixel_combine = color_combine_mask(4) | (1 << 8)                        \
1932
1933 #define render_scanline_obj_extra_variables_alpha_obj(map_space)              \
1934   render_scanline_obj_extra_variables_color();                                \
1935   u32 dest;                                                                   \
1936   if((pixel_combine & 0x00000200) == 0)                                       \
1937   {                                                                           \
1938     render_scanline_obj_color32_##map_space(priority, start, end, scanline);  \
1939     return;                                                                   \
1940   }                                                                           \
1941
1942 #define render_scanline_obj_extra_variables_color16(map_space)                \
1943   render_scanline_obj_extra_variables_color()                                 \
1944
1945 #define render_scanline_obj_extra_variables_color32(map_space)                \
1946   render_scanline_obj_extra_variables_color()                                 \
1947
1948 #define render_scanline_obj_extra_variables_partial_alpha(map_space)          \
1949   render_scanline_obj_extra_variables_color();                                \
1950   u32 base_pixel_combine = pixel_combine;                                     \
1951   u32 dest                                                                    \
1952
1953 #define render_scanline_obj_extra_variables_copy(type)                        \
1954   u32 bldcnt = io_registers[REG_BLDCNT];                                      \
1955   u32 dispcnt = io_registers[REG_DISPCNT];                                    \
1956   u32 obj_enable = io_registers[REG_WINOUT] >> 8;                             \
1957   render_scanline_layer_functions_##type();                                   \
1958   u32 copy_start, copy_end;                                                   \
1959   u16 copy_buffer[240];                                                       \
1960   u16 *copy_ptr                                                               \
1961
1962 #define render_scanline_obj_extra_variables_copy_tile(map_space)              \
1963   render_scanline_obj_extra_variables_copy(tile)                              \
1964
1965 #define render_scanline_obj_extra_variables_copy_bitmap(map_space)            \
1966   render_scanline_obj_extra_variables_copy(bitmap)                            \
1967
1968
1969 #define render_scanline_obj_main(combine_op, alpha_op, map_space)             \
1970   if(obj_attribute_0 & 0x100)                                                 \
1971   {                                                                           \
1972     if((obj_attribute_0 >> 13) & 0x01)                                        \
1973     {                                                                         \
1974       obj_render_affine(combine_op, 8bpp, alpha_op, map_space);               \
1975     }                                                                         \
1976     else                                                                      \
1977     {                                                                         \
1978       obj_render_affine(combine_op, 4bpp, alpha_op, map_space);               \
1979     }                                                                         \
1980   }                                                                           \
1981   else                                                                        \
1982   {                                                                           \
1983     vertical_offset = vcount - obj_y;                                         \
1984                                                                               \
1985     if((obj_attribute_1 >> 13) & 0x01)                                        \
1986       vertical_offset = obj_height - vertical_offset - 1;                     \
1987                                                                               \
1988     switch(((obj_attribute_0 >> 12) & 0x02) |                                 \
1989      ((obj_attribute_1 >> 12) & 0x01))                                        \
1990     {                                                                         \
1991       case 0x0:                                                               \
1992         obj_render(combine_op, 4bpp, alpha_op, map_space, noflip);            \
1993         break;                                                                \
1994                                                                               \
1995       case 0x1:                                                               \
1996         obj_render(combine_op, 4bpp, alpha_op, map_space, flip);              \
1997         break;                                                                \
1998                                                                               \
1999       case 0x2:                                                               \
2000         obj_render(combine_op, 8bpp, alpha_op, map_space, noflip);            \
2001         break;                                                                \
2002                                                                               \
2003       case 0x3:                                                               \
2004         obj_render(combine_op, 8bpp, alpha_op, map_space, flip);              \
2005         break;                                                                \
2006     }                                                                         \
2007   }                                                                           \
2008
2009 #define render_scanline_obj_no_partial_alpha(combine_op, alpha_op, map_space) \
2010   render_scanline_obj_main(combine_op, alpha_op, map_space)                   \
2011
2012 #define render_scanline_obj_partial_alpha(combine_op, alpha_op, map_space)    \
2013   if((obj_attribute_0 >> 10) & 0x03)                                          \
2014   {                                                                           \
2015     pixel_combine = 0x00000300;                                               \
2016     render_scanline_obj_main(combine_op, alpha_obj, map_space);               \
2017   }                                                                           \
2018   else                                                                        \
2019   {                                                                           \
2020     pixel_combine = base_pixel_combine;                                       \
2021     render_scanline_obj_main(combine_op, color32, map_space);                 \
2022   }                                                                           \
2023
2024 #define render_scanline_obj_prologue_transparent(alpha_op)                    \
2025
2026 #define render_scanline_obj_prologue_copy_body(type)                          \
2027   copy_start = obj_x;                                                         \
2028   if(obj_attribute_0 & 0x200)                                                 \
2029     copy_end = obj_x + (obj_width * 2);                                       \
2030   else                                                                        \
2031     copy_end = obj_x + obj_width;                                             \
2032                                                                               \
2033   if(copy_start < start)                                                      \
2034     copy_start = start;                                                       \
2035   if(copy_end > end)                                                          \
2036     copy_end = end;                                                           \
2037                                                                               \
2038   if((copy_start < end) && (copy_end > start))                                \
2039   {                                                                           \
2040     render_scanline_conditional_##type(copy_start, copy_end, copy_buffer,     \
2041      obj_enable, dispcnt, bldcnt, layer_renderers);                           \
2042     copy_ptr = copy_buffer + copy_start;                                      \
2043   }                                                                           \
2044   else                                                                        \
2045   {                                                                           \
2046     continue;                                                                 \
2047   }                                                                           \
2048
2049 #define render_scanline_obj_prologue_copy_tile()                              \
2050   render_scanline_obj_prologue_copy_body(tile)                                \
2051
2052 #define render_scanline_obj_prologue_copy_bitmap()                            \
2053   render_scanline_obj_prologue_copy_body(bitmap)                              \
2054
2055 #define render_scanline_obj_prologue_copy(alpha_op)                           \
2056   render_scanline_obj_prologue_##alpha_op()                                   \
2057
2058
2059 #define render_scanline_obj_builder(combine_op, alpha_op, map_space,          \
2060  partial_alpha_op)                                                            \
2061 static void render_scanline_obj_##alpha_op##_##map_space(u32 priority,        \
2062  u32 start, u32 end, render_scanline_dest_##alpha_op *scanline)               \
2063 {                                                                             \
2064   render_scanline_obj_extra_variables_##alpha_op(map_space);                  \
2065   s32 obj_num, i;                                                             \
2066   s32 obj_x, obj_y;                                                           \
2067   s32 obj_size;                                                               \
2068   s32 obj_width, obj_height;                                                  \
2069   u32 obj_attribute_0, obj_attribute_1, obj_attribute_2;                      \
2070   s32 vcount = io_registers[REG_VCOUNT];                                      \
2071   u32 tile_run;                                                               \
2072   u32 current_pixels;                                                         \
2073   u32 current_pixel;                                                          \
2074   u32 current_palette;                                                        \
2075   u32 vertical_offset;                                                        \
2076   u32 partial_tile_run, partial_tile_offset;                                  \
2077   u32 pixel_run;                                                              \
2078   u16 *oam_ptr;                                                               \
2079   render_scanline_dest_##alpha_op *dest_ptr;                                  \
2080   u8 *tile_base = vram + 0x10000;                                             \
2081   u8 *tile_ptr;                                                               \
2082   u32 obj_count = obj_priority_count[priority][vcount];                       \
2083   u8 *obj_list = obj_priority_list[priority][vcount];                         \
2084                                                                               \
2085   for(obj_num = 0; obj_num < obj_count; obj_num++)                            \
2086   {                                                                           \
2087     oam_ptr = oam_ram + (obj_list[obj_num] * 4);                              \
2088     obj_attribute_0 = oam_ptr[0];                                             \
2089     obj_attribute_1 = oam_ptr[1];                                             \
2090     obj_attribute_2 = oam_ptr[2];                                             \
2091     obj_size = ((obj_attribute_0 >> 12) & 0x0C) | (obj_attribute_1 >> 14);    \
2092                                                                               \
2093     obj_x = (s32)(obj_attribute_1 << 23) >> 23;                               \
2094     obj_width = obj_width_table[obj_size];                                    \
2095                                                                               \
2096     render_scanline_obj_prologue_##combine_op(alpha_op);                      \
2097                                                                               \
2098     obj_y = obj_attribute_0 & 0xFF;                                           \
2099                                                                               \
2100     if(obj_y > 160)                                                           \
2101       obj_y -= 256;                                                           \
2102                                                                               \
2103     obj_height = obj_height_table[obj_size];                                  \
2104     render_scanline_obj_##partial_alpha_op(combine_op, alpha_op, map_space);  \
2105   }                                                                           \
2106 }                                                                             \
2107
2108 render_scanline_obj_builder(transparent, normal, 1D, no_partial_alpha);
2109 render_scanline_obj_builder(transparent, normal, 2D, no_partial_alpha);
2110 render_scanline_obj_builder(transparent, color16, 1D, no_partial_alpha);
2111 render_scanline_obj_builder(transparent, color16, 2D, no_partial_alpha);
2112 render_scanline_obj_builder(transparent, color32, 1D, no_partial_alpha);
2113 render_scanline_obj_builder(transparent, color32, 2D, no_partial_alpha);
2114 render_scanline_obj_builder(transparent, alpha_obj, 1D, no_partial_alpha);
2115 render_scanline_obj_builder(transparent, alpha_obj, 2D, no_partial_alpha);
2116 render_scanline_obj_builder(transparent, partial_alpha, 1D, partial_alpha);
2117 render_scanline_obj_builder(transparent, partial_alpha, 2D, partial_alpha);
2118 render_scanline_obj_builder(copy, copy_tile, 1D, no_partial_alpha);
2119 render_scanline_obj_builder(copy, copy_tile, 2D, no_partial_alpha);
2120 render_scanline_obj_builder(copy, copy_bitmap, 1D, no_partial_alpha);
2121 render_scanline_obj_builder(copy, copy_bitmap, 2D, no_partial_alpha);
2122
2123
2124
2125 static void order_obj(u32 video_mode)
2126 {
2127   s32 obj_num, priority, row;
2128   s32 obj_x, obj_y;
2129   s32 obj_size, obj_mode;
2130   s32 obj_width, obj_height;
2131   u32 obj_priority;
2132   u32 obj_attribute_0, obj_attribute_1, obj_attribute_2;
2133   u32 current_count;
2134   u16 *oam_ptr = oam_ram + 508;
2135
2136   for(priority = 0; priority < 5; priority++)
2137   {
2138     for(row = 0; row < 160; row++)
2139     {
2140       obj_priority_count[priority][row] = 0;
2141     }
2142   }
2143
2144   for(row = 0; row < 160; row++)
2145   {
2146     obj_alpha_count[row] = 0;
2147   }
2148
2149   for(obj_num = 127; obj_num >= 0; obj_num--, oam_ptr -= 4)
2150   {
2151     obj_attribute_0 = oam_ptr[0];
2152     obj_attribute_2 = oam_ptr[2];
2153     obj_size = obj_attribute_0 & 0xC000;
2154     obj_priority = (obj_attribute_2 >> 10) & 0x03;
2155     obj_mode = (obj_attribute_0 >> 10) & 0x03;
2156
2157     if(((obj_attribute_0 & 0x0300) != 0x0200) && (obj_size != 0xC000) &&
2158      (obj_mode != 3) && ((video_mode < 3) ||
2159      ((obj_attribute_2 & 0x3FF) >= 512)))
2160     {
2161       obj_y = obj_attribute_0 & 0xFF;
2162       if(obj_y > 160)
2163         obj_y -= 256;
2164
2165       obj_attribute_1 = oam_ptr[1];
2166       obj_size = ((obj_size >> 12) & 0x0C) | (obj_attribute_1 >> 14);
2167       obj_height = obj_height_table[obj_size];
2168       obj_width = obj_width_table[obj_size];
2169
2170       if(obj_attribute_0 & 0x200)
2171       {
2172         obj_height *= 2;
2173         obj_width *= 2;
2174       }
2175
2176       if(((obj_y + obj_height) > 0) && (obj_y < 160))
2177       {
2178         obj_x = (s32)(obj_attribute_1 << 23) >> 23;
2179
2180         if(((obj_x + obj_width) > 0) && (obj_x < 240))
2181         {
2182           if(obj_y < 0)
2183           {
2184             obj_height += obj_y;
2185             obj_y = 0;
2186           }
2187
2188           if((obj_y + obj_height) >= 160)
2189           {
2190             obj_height = 160 - obj_y;
2191           }
2192
2193           if(obj_mode == 1)
2194           {
2195             for(row = obj_y; row < obj_y + obj_height; row++)
2196             {
2197               current_count = obj_priority_count[obj_priority][row];
2198               obj_priority_list[obj_priority][row][current_count] = obj_num;
2199               obj_priority_count[obj_priority][row] = current_count + 1;
2200               obj_alpha_count[row]++;
2201             }
2202           }
2203           else
2204           {
2205             if(obj_mode == 2)
2206             {
2207               obj_priority = 4;
2208             }
2209
2210             for(row = obj_y; row < obj_y + obj_height; row++)
2211             {
2212               current_count = obj_priority_count[obj_priority][row];
2213               obj_priority_list[obj_priority][row][current_count] = obj_num;
2214               obj_priority_count[obj_priority][row] = current_count + 1;
2215             }
2216           }
2217         }
2218       }
2219     }
2220   }
2221 }
2222
2223 u32 layer_order[16];
2224 u32 layer_count;
2225
2226 static void order_layers(u32 layer_flags)
2227 {
2228   s32 priority, layer_number;
2229   layer_count = 0;
2230
2231   for(priority = 3; priority >= 0; priority--)
2232   {
2233     for(layer_number = 3; layer_number >= 0; layer_number--)
2234     {
2235       if(((layer_flags >> layer_number) & 1) &&
2236        ((io_registers[REG_BG0CNT + layer_number] & 0x03) == priority))
2237       {
2238         layer_order[layer_count] = layer_number;
2239         layer_count++;
2240       }
2241     }
2242
2243     if((obj_priority_count[priority][io_registers[REG_VCOUNT]] > 0)
2244      && (layer_flags & 0x10))
2245     {
2246       layer_order[layer_count] = priority | 0x04;
2247       layer_count++;
2248     }
2249   }
2250 }
2251
2252 #define fill_line(_start, _end)                                               \
2253   u32 i;                                                                      \
2254                                                                               \
2255   for(i = _start; i < _end; i++)                                              \
2256   {                                                                           \
2257     dest_ptr[i] = color;                                                      \
2258   }                                                                           \
2259
2260
2261 #define fill_line_color_normal()                                              \
2262   color = palette_ram_converted[color]                                        \
2263
2264 #define fill_line_color_alpha()                                               \
2265
2266 #define fill_line_color_color16()                                             \
2267
2268 #define fill_line_color_color32()                                             \
2269
2270 #define fill_line_builder(type)                                               \
2271 static void fill_line_##type(u16 color, render_scanline_dest_##type *dest_ptr,\
2272  u32 start, u32 end)                                                          \
2273 {                                                                             \
2274   fill_line_color_##type();                                                   \
2275   fill_line(start, end);                                                      \
2276 }                                                                             \
2277
2278 fill_line_builder(normal);
2279 fill_line_builder(alpha);
2280 fill_line_builder(color16);
2281 fill_line_builder(color32);
2282
2283
2284 // Alpha blend two pixels (pixel_top and pixel_bottom).
2285
2286 #define blend_pixel()                                                         \
2287   pixel_bottom = palette_ram_converted[(pixel_pair >> 16) & 0x1FF];           \
2288   pixel_bottom = (pixel_bottom | (pixel_bottom << 16)) & 0x07E0F81F;          \
2289   pixel_top = ((pixel_top * blend_a) + (pixel_bottom * blend_b)) >> 4         \
2290
2291
2292 // Alpha blend two pixels, allowing for saturation (individual channels > 31).
2293 // The operation is optimized towards saturation not occuring.
2294
2295 #define blend_saturate_pixel()                                                \
2296   pixel_bottom = palette_ram_converted[(pixel_pair >> 16) & 0x1FF];           \
2297   pixel_bottom = (pixel_bottom | (pixel_bottom << 16)) & 0x07E0F81F;          \
2298   pixel_top = ((pixel_top * blend_a) + (pixel_bottom * blend_b)) >> 4;        \
2299   if(pixel_top & 0x08010020)                                                  \
2300   {                                                                           \
2301     if(pixel_top & 0x08000000)                                                \
2302       pixel_top |= 0x07E00000;                                                \
2303                                                                               \
2304     if(pixel_top & 0x00010000)                                                \
2305       pixel_top |= 0x0000F800;                                                \
2306                                                                               \
2307     if(pixel_top & 0x00000020)                                                \
2308       pixel_top |= 0x0000001F;                                                \
2309   }                                                                           \
2310
2311 #define brighten_pixel()                                                      \
2312   pixel_top = upper + ((pixel_top * blend) >> 4);                             \
2313
2314 #define darken_pixel()                                                        \
2315   pixel_top = (pixel_top * blend) >> 4;                                       \
2316
2317 #define effect_condition_alpha                                                \
2318   ((pixel_pair & 0x04000200) == 0x04000200)                                   \
2319
2320 #define effect_condition_fade(pixel_source)                                   \
2321   ((pixel_source & 0x00000200) == 0x00000200)                                 \
2322
2323 #define expand_pixel_no_dest(expand_type, pixel_source)                       \
2324   pixel_top = (pixel_top | (pixel_top << 16)) & 0x07E0F81F;                   \
2325   expand_type##_pixel();                                                      \
2326   pixel_top &= 0x07E0F81F;                                                    \
2327   pixel_top = (pixel_top >> 16) | pixel_top                                   \
2328
2329 #define expand_pixel(expand_type, pixel_source)                               \
2330   pixel_top = palette_ram_converted[pixel_source & 0x1FF];                    \
2331   expand_pixel_no_dest(expand_type, pixel_source);                            \
2332   *screen_dest_ptr = pixel_top                                                \
2333
2334 #define expand_loop(expand_type, effect_condition, pixel_source)              \
2335   screen_src_ptr += start;                                                    \
2336   screen_dest_ptr += start;                                                   \
2337                                                                               \
2338   end -= start;                                                               \
2339                                                                               \
2340   for(i = 0; i < end; i++)                                                    \
2341   {                                                                           \
2342     pixel_source = *screen_src_ptr;                                           \
2343     if(effect_condition)                                                      \
2344     {                                                                         \
2345       expand_pixel(expand_type, pixel_source);                                \
2346     }                                                                         \
2347     else                                                                      \
2348     {                                                                         \
2349       *screen_dest_ptr =                                                      \
2350        palette_ram_converted[pixel_source & 0x1FF];                           \
2351     }                                                                         \
2352                                                                               \
2353     screen_src_ptr++;                                                         \
2354     screen_dest_ptr++;                                                        \
2355   }                                                                           \
2356
2357
2358 #define expand_loop_partial_alpha(alpha_expand, expand_type)                  \
2359   screen_src_ptr += start;                                                    \
2360   screen_dest_ptr += start;                                                   \
2361                                                                               \
2362   end -= start;                                                               \
2363                                                                               \
2364   for(i = 0; i < end; i++)                                                    \
2365   {                                                                           \
2366     pixel_pair = *screen_src_ptr;                                             \
2367     if(effect_condition_fade(pixel_pair))                                     \
2368     {                                                                         \
2369       if(effect_condition_alpha)                                              \
2370       {                                                                       \
2371         expand_pixel(alpha_expand, pixel_pair);                               \
2372       }                                                                       \
2373       else                                                                    \
2374       {                                                                       \
2375         expand_pixel(expand_type, pixel_pair);                                \
2376       }                                                                       \
2377     }                                                                         \
2378     else                                                                      \
2379     {                                                                         \
2380       *screen_dest_ptr =                                                      \
2381        palette_ram_converted[pixel_pair & 0x1FF];                             \
2382     }                                                                         \
2383                                                                               \
2384     screen_src_ptr++;                                                         \
2385     screen_dest_ptr++;                                                        \
2386   }                                                                           \
2387
2388
2389 #define expand_partial_alpha(expand_type)                                     \
2390   if((blend_a + blend_b) > 16)                                                \
2391   {                                                                           \
2392     expand_loop_partial_alpha(blend_saturate, expand_type);                   \
2393   }                                                                           \
2394   else                                                                        \
2395   {                                                                           \
2396     expand_loop_partial_alpha(blend, expand_type);                            \
2397   }                                                                           \
2398
2399
2400
2401 // Blend top two pixels of scanline with each other.
2402
2403 #ifdef RENDER_COLOR16_NORMAL
2404
2405 #ifndef ARM_ARCH
2406
2407 void expand_normal(u16 *screen_ptr, u32 start, u32 end)
2408 {
2409   u32 i, pixel_source;
2410   screen_ptr += start;
2411
2412   return;
2413
2414   end -= start;
2415
2416   for(i = 0; i < end; i++)
2417   {
2418     pixel_source = *screen_ptr;
2419     *screen_ptr = palette_ram_converted[pixel_source];
2420
2421     screen_ptr++;
2422   }
2423 }
2424
2425 #endif
2426
2427 #else
2428
2429 #define expand_normal(screen_ptr, start, end)
2430
2431 #endif
2432
2433
2434 void expand_blend(u32 *screen_src_ptr, u16 *screen_dest_ptr,
2435  u32 start, u32 end);
2436
2437 #ifndef ARM_ARCH
2438
2439 void expand_blend(u32 *screen_src_ptr, u16 *screen_dest_ptr,
2440  u32 start, u32 end)
2441 {
2442   u32 pixel_pair;
2443   u32 pixel_top, pixel_bottom;
2444   u32 bldalpha = io_registers[REG_BLDALPHA];
2445   u32 blend_a = bldalpha & 0x1F;
2446   u32 blend_b = (bldalpha >> 8) & 0x1F;
2447   u32 i;
2448
2449   if(blend_a > 16)
2450     blend_a = 16;
2451
2452   if(blend_b > 16)
2453     blend_b = 16;
2454
2455   // The individual colors can saturate over 31, this should be taken
2456   // care of in an alternate pass as it incurs a huge additional speedhit.
2457   if((blend_a + blend_b) > 16)
2458   {
2459     expand_loop(blend_saturate, effect_condition_alpha, pixel_pair);
2460   }
2461   else
2462   {
2463     expand_loop(blend, effect_condition_alpha, pixel_pair);
2464   }
2465 }
2466
2467 #endif
2468
2469 // Blend scanline with white.
2470
2471 static void expand_darken(u16 *screen_src_ptr, u16 *screen_dest_ptr,
2472  u32 start, u32 end)
2473 {
2474   u32 pixel_top;
2475   s32 blend = 16 - (io_registers[REG_BLDY] & 0x1F);
2476   u32 i;
2477
2478   if(blend < 0)
2479     blend = 0;
2480
2481   expand_loop(darken, effect_condition_fade(pixel_top), pixel_top);
2482 }
2483
2484
2485 // Blend scanline with black.
2486
2487 static void expand_brighten(u16 *screen_src_ptr, u16 *screen_dest_ptr,
2488  u32 start, u32 end)
2489 {
2490   u32 pixel_top;
2491   u32 blend = io_registers[REG_BLDY] & 0x1F;
2492   u32 upper;
2493   u32 i;
2494
2495   if(blend > 16)
2496     blend = 16;
2497
2498   upper = ((0x07E0F81F * blend) >> 4) & 0x07E0F81F;
2499   blend = 16 - blend;
2500
2501   expand_loop(brighten, effect_condition_fade(pixel_top), pixel_top);
2502
2503 }
2504
2505
2506 // Expand scanline such that if both top and bottom pass it's alpha,
2507 // if only top passes it's as specified, and if neither pass it's normal.
2508
2509 static void expand_darken_partial_alpha(u32 *screen_src_ptr, u16 *screen_dest_ptr,
2510  u32 start, u32 end)
2511 {
2512   s32 blend = 16 - (io_registers[REG_BLDY] & 0x1F);
2513   u32 pixel_pair;
2514   u32 pixel_top, pixel_bottom;
2515   u32 bldalpha = io_registers[REG_BLDALPHA];
2516   u32 blend_a = bldalpha & 0x1F;
2517   u32 blend_b = (bldalpha >> 8) & 0x1F;
2518   u32 i;
2519
2520   if(blend < 0)
2521     blend = 0;
2522
2523   if(blend_a > 16)
2524     blend_a = 16;
2525
2526   if(blend_b > 16)
2527     blend_b = 16;
2528
2529   expand_partial_alpha(darken);
2530 }
2531
2532
2533 static void expand_brighten_partial_alpha(u32 *screen_src_ptr, u16 *screen_dest_ptr,
2534  u32 start, u32 end)
2535 {
2536   s32 blend = io_registers[REG_BLDY] & 0x1F;
2537   u32 pixel_pair;
2538   u32 pixel_top, pixel_bottom;
2539   u32 bldalpha = io_registers[REG_BLDALPHA];
2540   u32 blend_a = bldalpha & 0x1F;
2541   u32 blend_b = (bldalpha >> 8) & 0x1F;
2542   u32 upper;
2543   u32 i;
2544
2545   if(blend > 16)
2546     blend = 16;
2547
2548   upper = ((0x07E0F81F * blend) >> 4) & 0x07E0F81F;
2549   blend = 16 - blend;
2550
2551   if(blend_a > 16)
2552     blend_a = 16;
2553
2554   if(blend_b > 16)
2555     blend_b = 16;
2556
2557   expand_partial_alpha(brighten);
2558 }
2559
2560
2561 // Render an OBJ layer from start to end, depending on the type (1D or 2D)
2562 // stored in dispcnt.
2563
2564 #define render_obj_layer(type, dest, _start, _end)                            \
2565   current_layer &= ~0x04;                                                     \
2566   if(dispcnt & 0x40)                                                          \
2567     render_scanline_obj_##type##_1D(current_layer, _start, _end, dest);       \
2568   else                                                                        \
2569     render_scanline_obj_##type##_2D(current_layer, _start, _end, dest)        \
2570
2571
2572 // Render a target all the way with the background color as taken from the
2573 // palette.
2574
2575 #define fill_line_bg(type, dest, _start, _end)                                \
2576   fill_line_##type(0, dest, _start, _end)                                     \
2577
2578
2579 // Render all layers as they appear in the layer order.
2580
2581 #define render_layers(tile_alpha, obj_alpha, dest)                            \
2582 {                                                                             \
2583   current_layer = layer_order[0];                                             \
2584   if(current_layer & 0x04)                                                    \
2585   {                                                                           \
2586     /* If the first one is OBJ render the background then render it. */       \
2587     fill_line_bg(tile_alpha, dest, 0, 240);                                   \
2588     render_obj_layer(obj_alpha, dest, 0, 240);                                \
2589   }                                                                           \
2590   else                                                                        \
2591   {                                                                           \
2592     /* Otherwise render a base layer. */                                      \
2593     layer_renderers[current_layer].tile_alpha##_render_base(current_layer,    \
2594      0, 240, dest);                                                           \
2595   }                                                                           \
2596                                                                               \
2597   /* Render the rest of the layers. */                                        \
2598   for(layer_order_pos = 1; layer_order_pos < layer_count; layer_order_pos++)  \
2599   {                                                                           \
2600     current_layer = layer_order[layer_order_pos];                             \
2601     if(current_layer & 0x04)                                                  \
2602     {                                                                         \
2603       render_obj_layer(obj_alpha, dest, 0, 240);                              \
2604     }                                                                         \
2605     else                                                                      \
2606     {                                                                         \
2607       layer_renderers[current_layer].                                         \
2608        tile_alpha##_render_transparent(current_layer, 0, 240, dest);          \
2609     }                                                                         \
2610   }                                                                           \
2611 }                                                                             \
2612
2613 #define render_condition_alpha                                                \
2614   (((io_registers[REG_BLDALPHA] & 0x1F1F) != 0x001F) &&                       \
2615    ((io_registers[REG_BLDCNT] & 0x3F) != 0) &&                                \
2616    ((io_registers[REG_BLDCNT] & 0x3F00) != 0))                                \
2617
2618 #define render_condition_fade                                                 \
2619   (((io_registers[REG_BLDY] & 0x1F) != 0) &&                                  \
2620    ((io_registers[REG_BLDCNT] & 0x3F) != 0))                                  \
2621
2622 #define render_layers_color_effect(renderer, layer_condition,                 \
2623  alpha_condition, fade_condition, _start, _end)                               \
2624 {                                                                             \
2625   if(layer_condition)                                                         \
2626   {                                                                           \
2627     if(obj_alpha_count[io_registers[REG_VCOUNT]] > 0)                         \
2628     {                                                                         \
2629       /* Render based on special effects mode. */                             \
2630       u32 screen_buffer[240];                                                 \
2631       switch((bldcnt >> 6) & 0x03)                                            \
2632       {                                                                       \
2633         /* Alpha blend */                                                     \
2634         case 0x01:                                                            \
2635         {                                                                     \
2636           if(alpha_condition)                                                 \
2637           {                                                                   \
2638             renderer(alpha, alpha_obj, screen_buffer);                        \
2639             expand_blend(screen_buffer, scanline, _start, _end);              \
2640             return;                                                           \
2641           }                                                                   \
2642           break;                                                              \
2643         }                                                                     \
2644                                                                               \
2645         /* Fade to white */                                                   \
2646         case 0x02:                                                            \
2647         {                                                                     \
2648           if(fade_condition)                                                  \
2649           {                                                                   \
2650             renderer(color32, partial_alpha, screen_buffer);                  \
2651             expand_brighten_partial_alpha(screen_buffer, scanline,            \
2652              _start, _end);                                                   \
2653             return;                                                           \
2654           }                                                                   \
2655           break;                                                              \
2656         }                                                                     \
2657                                                                               \
2658         /* Fade to black */                                                   \
2659         case 0x03:                                                            \
2660         {                                                                     \
2661           if(fade_condition)                                                  \
2662           {                                                                   \
2663             renderer(color32, partial_alpha, screen_buffer);                  \
2664             expand_darken_partial_alpha(screen_buffer, scanline,              \
2665              _start, _end);                                                   \
2666             return;                                                           \
2667           }                                                                   \
2668           break;                                                              \
2669         }                                                                     \
2670       }                                                                       \
2671                                                                               \
2672       renderer(color32, partial_alpha, screen_buffer);                        \
2673       expand_blend(screen_buffer, scanline, _start, _end);                    \
2674     }                                                                         \
2675     else                                                                      \
2676     {                                                                         \
2677       /* Render based on special effects mode. */                             \
2678       switch((bldcnt >> 6) & 0x03)                                            \
2679       {                                                                       \
2680         /* Alpha blend */                                                     \
2681         case 0x01:                                                            \
2682         {                                                                     \
2683           if(alpha_condition)                                                 \
2684           {                                                                   \
2685             u32 screen_buffer[240];                                           \
2686             renderer(alpha, alpha_obj, screen_buffer);                        \
2687             expand_blend(screen_buffer, scanline, _start, _end);              \
2688             return;                                                           \
2689           }                                                                   \
2690           break;                                                              \
2691         }                                                                     \
2692                                                                               \
2693         /* Fade to white */                                                   \
2694         case 0x02:                                                            \
2695         {                                                                     \
2696           if(fade_condition)                                                  \
2697           {                                                                   \
2698             renderer(color16, color16, scanline);                             \
2699             expand_brighten(scanline, scanline, _start, _end);                \
2700             return;                                                           \
2701           }                                                                   \
2702           break;                                                              \
2703         }                                                                     \
2704                                                                               \
2705         /* Fade to black */                                                   \
2706         case 0x03:                                                            \
2707         {                                                                     \
2708           if(fade_condition)                                                  \
2709           {                                                                   \
2710             renderer(color16, color16, scanline);                             \
2711             expand_darken(scanline, scanline, _start, _end);                  \
2712             return;                                                           \
2713           }                                                                   \
2714           break;                                                              \
2715         }                                                                     \
2716       }                                                                       \
2717                                                                               \
2718       renderer(normal, normal, scanline);                                     \
2719       expand_normal(scanline, _start, _end);                                  \
2720     }                                                                         \
2721   }                                                                           \
2722   else                                                                        \
2723   {                                                                           \
2724     u32 pixel_top = palette_ram_converted[0];                                 \
2725     switch((bldcnt >> 6) & 0x03)                                              \
2726     {                                                                         \
2727       /* Fade to white */                                                     \
2728       case 0x02:                                                              \
2729       {                                                                       \
2730         if(color_combine_mask_a(5))                                           \
2731         {                                                                     \
2732           u32 blend = io_registers[REG_BLDY] & 0x1F;                          \
2733           u32 upper;                                                          \
2734                                                                               \
2735           if(blend > 16)                                                      \
2736             blend = 16;                                                       \
2737                                                                               \
2738           upper = ((0x07E0F81F * blend) >> 4) & 0x07E0F81F;                   \
2739           blend = 16 - blend;                                                 \
2740                                                                               \
2741           expand_pixel_no_dest(brighten, pixel_top);                          \
2742         }                                                                     \
2743         break;                                                                \
2744       }                                                                       \
2745                                                                               \
2746       /* Fade to black */                                                     \
2747       case 0x03:                                                              \
2748       {                                                                       \
2749         if(color_combine_mask_a(5))                                           \
2750         {                                                                     \
2751           s32 blend = 16 - (io_registers[REG_BLDY] & 0x1F);                   \
2752                                                                               \
2753           if(blend < 0)                                                       \
2754             blend = 0;                                                        \
2755                                                                               \
2756           expand_pixel_no_dest(darken, pixel_top);                            \
2757         }                                                                     \
2758         break;                                                                \
2759       }                                                                       \
2760     }                                                                         \
2761     fill_line_color16(pixel_top, scanline, _start, _end);                     \
2762   }                                                                           \
2763 }                                                                             \
2764
2765
2766 // Renders an entire scanline from 0 to 240, based on current color mode.
2767
2768 static void render_scanline_tile(u16 *scanline, u32 dispcnt)
2769 {
2770   u32 current_layer;
2771   u32 layer_order_pos;
2772   u32 bldcnt = io_registers[REG_BLDCNT];
2773   render_scanline_layer_functions_tile();
2774
2775   render_layers_color_effect(render_layers, layer_count,
2776    render_condition_alpha, render_condition_fade, 0, 240);
2777 }
2778
2779 static void render_scanline_bitmap(u16 *scanline, u32 dispcnt)
2780 {
2781   render_scanline_layer_functions_bitmap();
2782   u32 current_layer;
2783   u32 layer_order_pos;
2784
2785   fill_line_bg(normal, scanline, 0, 240);
2786
2787   for(layer_order_pos = 0; layer_order_pos < layer_count; layer_order_pos++)
2788   {
2789     current_layer = layer_order[layer_order_pos];
2790     if(current_layer & 0x04)
2791     {
2792       render_obj_layer(normal, scanline, 0, 240);
2793     }
2794     else
2795     {
2796       layer_renderers->normal_render(0, 240, scanline);
2797     }
2798   }
2799 }
2800
2801 // Render layers from start to end based on if they're allowed in the
2802 // enable flags.
2803
2804 #define render_layers_conditional(tile_alpha, obj_alpha, dest)                \
2805 {                                                                             \
2806   __label__ skip;                                                             \
2807   current_layer = layer_order[layer_order_pos];                               \
2808   /* If OBJ aren't enabled skip to the first non-OBJ layer */                 \
2809   if(!(enable_flags & 0x10))                                                  \
2810   {                                                                           \
2811     while((current_layer & 0x04) || !((1 << current_layer) & enable_flags))   \
2812     {                                                                         \
2813       layer_order_pos++;                                                      \
2814       current_layer = layer_order[layer_order_pos];                           \
2815                                                                               \
2816       /* Oops, ran out of layers, render the background. */                   \
2817       if(layer_order_pos == layer_count)                                      \
2818       {                                                                       \
2819         fill_line_bg(tile_alpha, dest, start, end);                           \
2820         goto skip;                                                            \
2821       }                                                                       \
2822     }                                                                         \
2823                                                                               \
2824     /* Render the first valid layer */                                        \
2825     layer_renderers[current_layer].tile_alpha##_render_base(current_layer,    \
2826      start, end, dest);                                                       \
2827                                                                               \
2828     layer_order_pos++;                                                        \
2829                                                                               \
2830     /* Render the rest of the layers if active, skipping OBJ ones. */         \
2831     for(; layer_order_pos < layer_count; layer_order_pos++)                   \
2832     {                                                                         \
2833       current_layer = layer_order[layer_order_pos];                           \
2834       if(!(current_layer & 0x04) && ((1 << current_layer) & enable_flags))    \
2835       {                                                                       \
2836         layer_renderers[current_layer].                                       \
2837          tile_alpha##_render_transparent(current_layer, start, end, dest);    \
2838       }                                                                       \
2839     }                                                                         \
2840   }                                                                           \
2841   else                                                                        \
2842   {                                                                           \
2843     /* Find the first active layer, skip all of the inactive ones */          \
2844     while(!((current_layer & 0x04) || ((1 << current_layer) & enable_flags))) \
2845     {                                                                         \
2846       layer_order_pos++;                                                      \
2847       current_layer = layer_order[layer_order_pos];                           \
2848                                                                               \
2849       /* Oops, ran out of layers, render the background. */                   \
2850       if(layer_order_pos == layer_count)                                      \
2851       {                                                                       \
2852         fill_line_bg(tile_alpha, dest, start, end);                           \
2853         goto skip;                                                            \
2854       }                                                                       \
2855     }                                                                         \
2856                                                                               \
2857     if(current_layer & 0x04)                                                  \
2858     {                                                                         \
2859       /* If the first one is OBJ render the background then render it. */     \
2860       fill_line_bg(tile_alpha, dest, start, end);                             \
2861       render_obj_layer(obj_alpha, dest, start, end);                          \
2862     }                                                                         \
2863     else                                                                      \
2864     {                                                                         \
2865       /* Otherwise render a base layer. */                                    \
2866       layer_renderers[current_layer].                                         \
2867        tile_alpha##_render_base(current_layer, start, end, dest);             \
2868     }                                                                         \
2869                                                                               \
2870     layer_order_pos++;                                                        \
2871                                                                               \
2872     /* Render the rest of the layers. */                                      \
2873     for(; layer_order_pos < layer_count; layer_order_pos++)                   \
2874     {                                                                         \
2875       current_layer = layer_order[layer_order_pos];                           \
2876       if(current_layer & 0x04)                                                \
2877       {                                                                       \
2878         render_obj_layer(obj_alpha, dest, start, end);                        \
2879       }                                                                       \
2880       else                                                                    \
2881       {                                                                       \
2882         if(enable_flags & (1 << current_layer))                               \
2883         {                                                                     \
2884           layer_renderers[current_layer].                                     \
2885            tile_alpha##_render_transparent(current_layer, start, end, dest);  \
2886         }                                                                     \
2887       }                                                                       \
2888     }                                                                         \
2889   }                                                                           \
2890                                                                               \
2891   skip:                                                                       \
2892     ;                                                                         \
2893 }                                                                             \
2894
2895
2896 // Render all of the BG and OBJ in a tiled scanline from start to end ONLY if
2897 // enable_flag allows that layer/OBJ. Also conditionally render color effects.
2898
2899 static void render_scanline_conditional_tile(u32 start, u32 end, u16 *scanline,
2900  u32 enable_flags, u32 dispcnt, u32 bldcnt, const tile_layer_render_struct
2901  *layer_renderers)
2902 {
2903   u32 current_layer;
2904   u32 layer_order_pos = 0;
2905
2906   render_layers_color_effect(render_layers_conditional,
2907    (layer_count && (enable_flags & 0x1F)),
2908    ((enable_flags & 0x20) && render_condition_alpha),
2909    ((enable_flags & 0x20) && render_condition_fade), start, end);
2910 }
2911
2912
2913 // Render the BG and OBJ in a bitmap scanline from start to end ONLY if
2914 // enable_flag allows that layer/OBJ. Also conditionally render color effects.
2915
2916 static void render_scanline_conditional_bitmap(u32 start, u32 end, u16 *scanline,
2917  u32 enable_flags, u32 dispcnt, u32 bldcnt, const bitmap_layer_render_struct
2918  *layer_renderers)
2919 {
2920   u32 current_layer;
2921   u32 layer_order_pos;
2922
2923   fill_line_bg(normal, scanline, start, end);
2924
2925   for(layer_order_pos = 0; layer_order_pos < layer_count; layer_order_pos++)
2926   {
2927     current_layer = layer_order[layer_order_pos];
2928     if(current_layer & 0x04)
2929     {
2930       if(enable_flags & 0x10)
2931       {
2932         render_obj_layer(normal, scanline, start, end);
2933       }
2934     }
2935     else
2936     {
2937       if(enable_flags & 0x04)
2938         layer_renderers->normal_render(start, end, scanline);
2939     }
2940   }
2941 }
2942
2943
2944 #define window_x_coords(window_number)                                        \
2945   window_##window_number##_x1 =                                               \
2946    io_registers[REG_WIN##window_number##H] >> 8;                              \
2947   window_##window_number##_x2 =                                               \
2948    io_registers[REG_WIN##window_number##H] & 0xFF;                            \
2949   window_##window_number##_enable =                                           \
2950    (winin >> (window_number * 8)) & 0x3F;                                     \
2951                                                                               \
2952   if(window_##window_number##_x1 > 240)                                       \
2953     window_##window_number##_x1 = 240;                                        \
2954                                                                               \
2955   if(window_##window_number##_x2 > 240)                                       \
2956     window_##window_number##_x2 = 240                                         \
2957
2958 #define window_coords(window_number)                                          \
2959   u32 window_##window_number##_x1, window_##window_number##_x2;               \
2960   u32 window_##window_number##_y1, window_##window_number##_y2;               \
2961   u32 window_##window_number##_enable = 0;                                    \
2962   window_##window_number##_y1 =                                               \
2963    io_registers[REG_WIN##window_number##V] >> 8;                              \
2964   window_##window_number##_y2 =                                               \
2965    io_registers[REG_WIN##window_number##V] & 0xFF;                            \
2966                                                                               \
2967   if(window_##window_number##_y1 > window_##window_number##_y2)               \
2968   {                                                                           \
2969     if((((vcount <= window_##window_number##_y2) ||                           \
2970      (vcount > window_##window_number##_y1)) ||                               \
2971      (window_##window_number##_y2 > 227)) &&                                  \
2972      (window_##window_number##_y1 <= 227))                                    \
2973     {                                                                         \
2974       window_x_coords(window_number);                                         \
2975     }                                                                         \
2976     else                                                                      \
2977     {                                                                         \
2978       window_##window_number##_x1 = 240;                                      \
2979       window_##window_number##_x2 = 240;                                      \
2980     }                                                                         \
2981   }                                                                           \
2982   else                                                                        \
2983   {                                                                           \
2984     if((((vcount >= window_##window_number##_y1) &&                           \
2985      (vcount < window_##window_number##_y2)) ||                               \
2986      (window_##window_number##_y2 > 227)) &&                                  \
2987      (window_##window_number##_y1 <= 227))                                    \
2988     {                                                                         \
2989       window_x_coords(window_number);                                         \
2990     }                                                                         \
2991     else                                                                      \
2992     {                                                                         \
2993       window_##window_number##_x1 = 240;                                      \
2994       window_##window_number##_x2 = 240;                                      \
2995     }                                                                         \
2996   }                                                                           \
2997
2998 #define render_window_segment(type, start, end, window_type)                  \
2999   if(start != end)                                                            \
3000   {                                                                           \
3001     render_scanline_conditional_##type(start, end, scanline,                  \
3002      window_##window_type##_enable, dispcnt, bldcnt, layer_renderers);        \
3003   }                                                                           \
3004
3005 #define render_window_segment_unequal(type, start, end, window_type)          \
3006   render_scanline_conditional_##type(start, end, scanline,                    \
3007    window_##window_type##_enable, dispcnt, bldcnt, layer_renderers)           \
3008
3009 #define render_window_segment_clip(type, clip_start, clip_end, start, end,    \
3010  window_type)                                                                 \
3011 {                                                                             \
3012   if(start != end)                                                            \
3013   {                                                                           \
3014     if(start < clip_start)                                                    \
3015     {                                                                         \
3016       if(end > clip_start)                                                    \
3017       {                                                                       \
3018         if(end > clip_end)                                                    \
3019         {                                                                     \
3020           render_window_segment_unequal(type, clip_start, clip_end,           \
3021            window_type);                                                      \
3022         }                                                                     \
3023         else                                                                  \
3024         {                                                                     \
3025           render_window_segment_unequal(type, clip_start, end, window_type);  \
3026         }                                                                     \
3027       }                                                                       \
3028     }                                                                         \
3029     else                                                                      \
3030                                                                               \
3031     if(end > clip_end)                                                        \
3032     {                                                                         \
3033       if(start < clip_end)                                                    \
3034         render_window_segment_unequal(type, start, clip_end, window_type);    \
3035     }                                                                         \
3036     else                                                                      \
3037     {                                                                         \
3038       render_window_segment_unequal(type, start, end, window_type);           \
3039     }                                                                         \
3040   }                                                                           \
3041 }                                                                             \
3042
3043 #define render_window_clip_1(type, start, end)                                \
3044   if(window_1_x1 != 240)                                                      \
3045   {                                                                           \
3046     if(window_1_x1 > window_1_x2)                                             \
3047     {                                                                         \
3048       render_window_segment_clip(type, start, end, 0, window_1_x2, 1);        \
3049       render_window_segment_clip(type, start, end, window_1_x2, window_1_x1,  \
3050        out);                                                                  \
3051       render_window_segment_clip(type, start, end, window_1_x1, 240, 1);      \
3052     }                                                                         \
3053     else                                                                      \
3054     {                                                                         \
3055       render_window_segment_clip(type, start, end, 0, window_1_x1, out);      \
3056       render_window_segment_clip(type, start, end, window_1_x1, window_1_x2,  \
3057        1);                                                                    \
3058       render_window_segment_clip(type, start, end, window_1_x2, 240, out);    \
3059     }                                                                         \
3060   }                                                                           \
3061   else                                                                        \
3062   {                                                                           \
3063     render_window_segment(type, start, end, out);                             \
3064   }                                                                           \
3065
3066 #define render_window_clip_obj(type, start, end);                             \
3067   render_window_segment(type, start, end, out);                               \
3068   if(dispcnt & 0x40)                                                          \
3069     render_scanline_obj_copy_##type##_1D(4, start, end, scanline);            \
3070   else                                                                        \
3071     render_scanline_obj_copy_##type##_2D(4, start, end, scanline)             \
3072
3073
3074 #define render_window_segment_clip_obj(type, clip_start, clip_end, start,     \
3075  end)                                                                         \
3076 {                                                                             \
3077   if(start != end)                                                            \
3078   {                                                                           \
3079     if(start < clip_start)                                                    \
3080     {                                                                         \
3081       if(end > clip_start)                                                    \
3082       {                                                                       \
3083         if(end > clip_end)                                                    \
3084         {                                                                     \
3085           render_window_clip_obj(type, clip_start, clip_end);                 \
3086         }                                                                     \
3087         else                                                                  \
3088         {                                                                     \
3089           render_window_clip_obj(type, clip_start, end);                      \
3090         }                                                                     \
3091       }                                                                       \
3092     }                                                                         \
3093     else                                                                      \
3094                                                                               \
3095     if(end > clip_end)                                                        \
3096     {                                                                         \
3097       if(start < clip_end)                                                    \
3098       {                                                                       \
3099         render_window_clip_obj(type, start, clip_end);                        \
3100       }                                                                       \
3101     }                                                                         \
3102     else                                                                      \
3103     {                                                                         \
3104       render_window_clip_obj(type, start, end);                               \
3105     }                                                                         \
3106   }                                                                           \
3107 }                                                                             \
3108
3109
3110 #define render_window_clip_1_obj(type, start, end)                            \
3111   if(window_1_x1 != 240)                                                      \
3112   {                                                                           \
3113     if(window_1_x1 > window_1_x2)                                             \
3114     {                                                                         \
3115       render_window_segment_clip(type, start, end, 0, window_1_x2, 1);        \
3116       render_window_segment_clip_obj(type, start, end, window_1_x2,           \
3117        window_1_x1);                                                          \
3118       render_window_segment_clip(type, start, end, window_1_x1, 240, 1);      \
3119     }                                                                         \
3120     else                                                                      \
3121     {                                                                         \
3122       render_window_segment_clip_obj(type, start, end, 0, window_1_x1);       \
3123       render_window_segment_clip(type, start, end, window_1_x1, window_1_x2,  \
3124        1);                                                                    \
3125       render_window_segment_clip_obj(type, start, end, window_1_x2, 240);     \
3126     }                                                                         \
3127   }                                                                           \
3128   else                                                                        \
3129   {                                                                           \
3130     render_window_clip_obj(type, start, end);                                 \
3131   }                                                                           \
3132
3133
3134
3135 #define render_window_single(type, window_number)                             \
3136   u32 winin = io_registers[REG_WININ];                                        \
3137   window_coords(window_number);                                               \
3138   if(window_##window_number##_x1 > window_##window_number##_x2)               \
3139   {                                                                           \
3140     render_window_segment(type, 0, window_##window_number##_x2,               \
3141      window_number);                                                          \
3142     render_window_segment(type, window_##window_number##_x2,                  \
3143      window_##window_number##_x1, out);                                       \
3144     render_window_segment(type, window_##window_number##_x1, 240,             \
3145      window_number);                                                          \
3146   }                                                                           \
3147   else                                                                        \
3148   {                                                                           \
3149     render_window_segment(type, 0, window_##window_number##_x1, out);         \
3150     render_window_segment(type, window_##window_number##_x1,                  \
3151      window_##window_number##_x2, window_number);                             \
3152     render_window_segment(type, window_##window_number##_x2, 240, out);       \
3153   }                                                                           \
3154
3155 #define render_window_multi(type, front, back)                                \
3156   if(window_##front##_x1 > window_##front##_x2)                               \
3157   {                                                                           \
3158     render_window_segment(type, 0, window_##front##_x2, front);               \
3159     render_window_clip_##back(type, window_##front##_x2,                      \
3160      window_##front##_x1);                                                    \
3161     render_window_segment(type, window_##front##_x1, 240, front);             \
3162   }                                                                           \
3163   else                                                                        \
3164   {                                                                           \
3165     render_window_clip_##back(type, 0, window_##front##_x1);                  \
3166     render_window_segment(type, window_##front##_x1, window_##front##_x2,     \
3167      front);                                                                  \
3168     render_window_clip_##back(type, window_##front##_x2, 240);                \
3169   }                                                                           \
3170
3171 #define render_scanline_window_builder(type)                                  \
3172 static void render_scanline_window_##type(u16 *scanline, u32 dispcnt)         \
3173 {                                                                             \
3174   u32 vcount = io_registers[REG_VCOUNT];                                      \
3175   u32 winout = io_registers[REG_WINOUT];                                      \
3176   u32 bldcnt = io_registers[REG_BLDCNT];                                      \
3177   u32 window_out_enable = winout & 0x3F;                                      \
3178                                                                               \
3179   render_scanline_layer_functions_##type();                                   \
3180                                                                               \
3181   switch(dispcnt >> 13)                                                       \
3182   {                                                                           \
3183     /* Just window 0 */                                                       \
3184     case 0x01:                                                                \
3185     {                                                                         \
3186       render_window_single(type, 0);                                          \
3187       break;                                                                  \
3188     }                                                                         \
3189                                                                               \
3190     /* Just window 1 */                                                       \
3191     case 0x02:                                                                \
3192     {                                                                         \
3193       render_window_single(type, 1);                                          \
3194       break;                                                                  \
3195     }                                                                         \
3196                                                                               \
3197     /* Windows 1 and 2 */                                                     \
3198     case 0x03:                                                                \
3199     {                                                                         \
3200       u32 winin = io_registers[REG_WININ];                                    \
3201       window_coords(0);                                                       \
3202       window_coords(1);                                                       \
3203       render_window_multi(type, 0, 1);                                        \
3204       break;                                                                  \
3205     }                                                                         \
3206                                                                               \
3207     /* Just OBJ windows */                                                    \
3208     case 0x04:                                                                \
3209     {                                                                         \
3210       render_window_clip_obj(type, 0, 240);                                   \
3211       break;                                                                  \
3212     }                                                                         \
3213                                                                               \
3214     /* Window 0 and OBJ window */                                             \
3215     case 0x05:                                                                \
3216     {                                                                         \
3217       u32 winin = io_registers[REG_WININ];                                    \
3218       window_coords(0);                                                       \
3219       render_window_multi(type, 0, obj);                                      \
3220       break;                                                                  \
3221     }                                                                         \
3222                                                                               \
3223     /* Window 1 and OBJ window */                                             \
3224     case 0x06:                                                                \
3225     {                                                                         \
3226       u32 winin = io_registers[REG_WININ];                                    \
3227       window_coords(1);                                                       \
3228       render_window_multi(type, 1, obj);                                      \
3229       break;                                                                  \
3230     }                                                                         \
3231                                                                               \
3232     /* Window 0, 1, and OBJ window */                                         \
3233     case 0x07:                                                                \
3234     {                                                                         \
3235       u32 winin = io_registers[REG_WININ];                                    \
3236       window_coords(0);                                                       \
3237       window_coords(1);                                                       \
3238       render_window_multi(type, 0, 1_obj);                                    \
3239       break;                                                                  \
3240     }                                                                         \
3241   }                                                                           \
3242 }                                                                             \
3243
3244 render_scanline_window_builder(tile);
3245 render_scanline_window_builder(bitmap);
3246
3247 static const u32 active_layers[6] = { 0x1F, 0x17, 0x1C, 0x14, 0x14, 0x14 };
3248
3249 u32 small_resolution_width = 240;
3250 u32 small_resolution_height = 160;
3251 u32 resolution_width, resolution_height;
3252
3253 void update_scanline()
3254 {
3255   u32 pitch = get_screen_pitch();
3256   u32 dispcnt = io_registers[REG_DISPCNT];
3257   u32 vcount = io_registers[REG_VCOUNT];
3258   u16 *screen_offset = get_screen_pixels() + (vcount * pitch);
3259   u32 video_mode = dispcnt & 0x07;
3260
3261   // If OAM has been modified since the last scanline has been updated then
3262   // reorder and reprofile the OBJ lists.
3263   if(oam_update)
3264   {
3265     order_obj(video_mode);
3266     oam_update = 0;
3267   }
3268
3269   order_layers((dispcnt >> 8) & active_layers[video_mode]);
3270
3271   if(skip_next_frame)
3272     return;
3273
3274 #ifdef WIZ_BUILD
3275   if (screen_scale == unscaled_rot || screen_scale == scaled_aspect_rot)
3276   {
3277     if (rot_line_count == rot_lines_total)
3278     {
3279       rot_line_count = 0;
3280       if (vcount - rot_lines_total < FONT_HEIGHT && rot_msg_buff[0])
3281       {
3282         print_string_ext(rot_msg_buff, 0xFFFF, 0x0000, 0, 0,
3283           rot_buffer, 240, 0, vcount - rot_lines_total, rot_lines_total);
3284         if (vcount >= FONT_HEIGHT)
3285           rot_msg_buff[0] = 0;
3286       }
3287       if (screen_scale == unscaled_rot)
3288         do_rotated_blit(gpsp_gp2x_screen, rot_buffer, vcount);
3289       else
3290         upscale_aspect_row(gpsp_gp2x_screen, rot_buffer, vcount/3);
3291     }
3292     screen_offset = &rot_buffer[rot_line_count++ * 240];
3293   }
3294 #endif
3295
3296   // If the screen is in in forced blank draw pure white.
3297   if(dispcnt & 0x80)
3298   {
3299     fill_line_color16(0xFFFF, screen_offset, 0, 240);
3300   }
3301   else
3302   {
3303     if(video_mode < 3)
3304     {
3305       if(dispcnt >> 13)
3306       {
3307         render_scanline_window_tile(screen_offset, dispcnt);
3308       }
3309       else
3310       {
3311         render_scanline_tile(screen_offset, dispcnt);
3312       }
3313     }
3314     else
3315     {
3316       if(dispcnt >> 13)
3317         render_scanline_window_bitmap(screen_offset, dispcnt);
3318       else
3319         render_scanline_bitmap(screen_offset, dispcnt);
3320     }
3321   }
3322
3323   affine_reference_x[0] += (s16)io_registers[REG_BG2PB];
3324   affine_reference_y[0] += (s16)io_registers[REG_BG2PD];
3325   affine_reference_x[1] += (s16)io_registers[REG_BG3PB];
3326   affine_reference_y[1] += (s16)io_registers[REG_BG3PD];
3327 }
3328
3329 #ifdef PSP_BUILD
3330
3331 u32 screen_flip = 0;
3332
3333 void flip_screen()
3334 {
3335   if(video_direct == 0)
3336   {
3337     u32 *old_ge_cmd_ptr = ge_cmd_ptr;
3338     sceKernelDcacheWritebackAll();
3339
3340     // Render the current screen
3341     ge_cmd_ptr = ge_cmd + 2;
3342     GE_CMD(TBP0, ((u32)screen_pixels & 0x00FFFFFF));
3343     GE_CMD(TBW0, (((u32)screen_pixels & 0xFF000000) >> 8) |
3344      GBA_SCREEN_WIDTH);
3345     ge_cmd_ptr = old_ge_cmd_ptr;
3346
3347     sceGeListEnQueue(ge_cmd, ge_cmd_ptr, gecbid, NULL);
3348
3349     // Flip to the next screen
3350     screen_flip ^= 1;
3351
3352     if(screen_flip)
3353       screen_pixels = screen_texture + (240 * 160 * 2);
3354     else
3355       screen_pixels = screen_texture;
3356   }
3357 }
3358
3359 #elif defined(WIZ_BUILD)
3360
3361 void flip_screen()
3362 {
3363   if((resolution_width == small_resolution_width) &&
3364    (resolution_height == small_resolution_height))
3365   {
3366     switch(screen_scale)
3367     {
3368       case unscaled:
3369         break;
3370       case scaled_aspect:
3371         upscale_aspect(gpsp_gp2x_screen, screen_pixels);
3372         break;
3373       case unscaled_rot:
3374         do_rotated_blit(gpsp_gp2x_screen, rot_buffer, 160);
3375         rot_line_count = 0;
3376         goto no_clean;
3377       case scaled_aspect_rot:
3378         rot_line_count = 0;
3379         goto no_clean;
3380     }
3381   }
3382   warm_cache_op_all(WOP_D_CLEAN);
3383
3384 no_clean:
3385   pollux_video_flip();
3386   screen_pixels = (u16 *)gpsp_gp2x_screen + screen_offset;
3387 }
3388
3389 #elif defined(PND_BUILD)
3390
3391 void flip_screen()
3392 {
3393   screen_pixels = fb_flip_screen();
3394 }
3395
3396 #else
3397
3398 #define integer_scale_copy_2()                                                \
3399   current_scanline_ptr[x2] = current_pixel;                                   \
3400   current_scanline_ptr[x2 - 1] = current_pixel;                               \
3401   x2 -= 2                                                                     \
3402
3403 #define integer_scale_copy_3()                                                \
3404   current_scanline_ptr[x2] = current_pixel;                                   \
3405   current_scanline_ptr[x2 - 1] = current_pixel;                               \
3406   current_scanline_ptr[x2 - 2] = current_pixel;                               \
3407   x2 -= 3                                                                     \
3408
3409 #define integer_scale_copy_4()                                                \
3410   current_scanline_ptr[x2] = current_pixel;                                   \
3411   current_scanline_ptr[x2 - 1] = current_pixel;                               \
3412   current_scanline_ptr[x2 - 2] = current_pixel;                               \
3413   current_scanline_ptr[x2 - 3] = current_pixel;                               \
3414   x2 -= 4                                                                     \
3415
3416 #define integer_scale_horizontal(scale_factor)                                \
3417   for(y = 0; y < 160; y++)                                                    \
3418   {                                                                           \
3419     for(x = 239, x2 = (240 * video_scale) - 1; x >= 0; x--)                   \
3420     {                                                                         \
3421       current_pixel = current_scanline_ptr[x];                                \
3422       integer_scale_copy_##scale_factor();                                    \
3423       current_scanline_ptr[x2] = current_scanline_ptr[x];                     \
3424       current_scanline_ptr[x2 - 1] = current_scanline_ptr[x];                 \
3425       current_scanline_ptr[x2 - 2] = current_scanline_ptr[x];                 \
3426     }                                                                         \
3427     current_scanline_ptr += pitch;                                            \
3428   }                                                                           \
3429
3430 void flip_screen()
3431 {
3432   if((video_scale != 1) && (current_scale != unscaled))
3433   {
3434     s32 x, y;
3435     s32 x2, y2;
3436     u16 *screen_ptr = get_screen_pixels();
3437     u16 *current_scanline_ptr = screen_ptr;
3438     u32 pitch = get_screen_pitch();
3439     u16 current_pixel;
3440     u32 i;
3441
3442     switch(video_scale)
3443     {
3444       case 2:
3445         integer_scale_horizontal(2);
3446         break;
3447
3448       case 3:
3449         integer_scale_horizontal(3);
3450         break;
3451
3452       default:
3453       case 4:
3454         integer_scale_horizontal(4);
3455         break;
3456
3457     }
3458
3459     for(y = 159, y2 = (160 * video_scale) - 1; y >= 0; y--)
3460     {
3461       for(i = 0; i < video_scale; i++)
3462       {
3463         memcpy(screen_ptr + (y2 * pitch),
3464          screen_ptr + (y * pitch), 480 * video_scale);
3465         y2--;
3466       }
3467     }
3468   }
3469 #ifdef GP2X_BUILD
3470   {
3471     if((resolution_width == small_resolution_width) &&
3472      (resolution_height == small_resolution_height))
3473     {
3474       switch (screen_scale)
3475       {
3476         case unscaled:
3477         {
3478           SDL_Rect srect = {0, 0, 240, 160};
3479           SDL_Rect drect = {40, 40, 240, 160};
3480           warm_cache_op_all(WOP_D_CLEAN);
3481           SDL_BlitSurface(screen, &srect, hw_screen, &drect);
3482           return;
3483         }
3484         case scaled_aspect:
3485         {
3486           SDL_Rect drect = {0, 10, 0, 0};
3487           warm_cache_op_all(WOP_D_CLEAN);
3488           SDL_BlitSurface(screen, NULL, hw_screen, &drect);
3489           return;
3490         }
3491         case scaled_aspect_sw:
3492         {
3493           upscale_aspect(hw_screen->pixels, get_screen_pixels());
3494           return;
3495         }
3496         case fullscreen:
3497           break;
3498       }
3499     }
3500     warm_cache_op_all(WOP_D_CLEAN);
3501     SDL_BlitSurface(screen, NULL, hw_screen, NULL);
3502   }
3503 #else
3504   SDL_Flip(screen);
3505 #endif
3506 }
3507
3508 #endif
3509
3510 u32 frame_to_render;
3511
3512 void update_screen()
3513 {
3514   if(!skip_next_frame)
3515     flip_screen();
3516 }
3517
3518 #ifdef PSP_BUILD
3519
3520 void init_video()
3521 {
3522   sceDisplaySetMode(0, PSP_SCREEN_WIDTH, PSP_SCREEN_HEIGHT);
3523
3524   sceDisplayWaitVblankStart();
3525   sceDisplaySetFrameBuf((void*)psp_gu_vram_base, PSP_LINE_SIZE,
3526    PSP_DISPLAY_PIXEL_FORMAT_565, PSP_DISPLAY_SETBUF_NEXTFRAME);
3527
3528   sceGuInit();
3529
3530   sceGuStart(GU_DIRECT, display_list);
3531   sceGuDrawBuffer(GU_PSM_5650, (void*)0, PSP_LINE_SIZE);
3532   sceGuDispBuffer(PSP_SCREEN_WIDTH, PSP_SCREEN_HEIGHT,
3533    (void*)0, PSP_LINE_SIZE);
3534   sceGuClear(GU_COLOR_BUFFER_BIT);
3535
3536   sceGuOffset(2048 - (PSP_SCREEN_WIDTH / 2), 2048 - (PSP_SCREEN_HEIGHT / 2));
3537   sceGuViewport(2048, 2048, PSP_SCREEN_WIDTH, PSP_SCREEN_HEIGHT);
3538
3539   sceGuScissor(0, 0, PSP_SCREEN_WIDTH + 1, PSP_SCREEN_HEIGHT + 1);
3540   sceGuEnable(GU_SCISSOR_TEST);
3541   sceGuTexMode(GU_PSM_5650, 0, 0, GU_FALSE);
3542   sceGuTexFunc(GU_TFX_REPLACE, GU_TCC_RGBA);
3543   sceGuTexFilter(GU_LINEAR, GU_LINEAR);
3544   sceGuEnable(GU_TEXTURE_2D);
3545
3546   sceGuFrontFace(GU_CW);
3547   sceGuDisable(GU_BLEND);
3548
3549   sceGuFinish();
3550   sceGuSync(0, 0);
3551
3552   sceDisplayWaitVblankStart();
3553   sceGuDisplay(GU_TRUE);
3554
3555   PspGeCallbackData gecb;
3556   gecb.signal_func = NULL;
3557   gecb.signal_arg = NULL;
3558   gecb.finish_func = Ge_Finish_Callback;
3559   gecb.finish_arg = NULL;
3560   gecbid = sceGeSetCallback(&gecb);
3561
3562   screen_vertex[0] = 0 + 0.5;
3563   screen_vertex[1] = 0 + 0.5;
3564   screen_vertex[2] = 0 + 0.5;
3565   screen_vertex[3] = 0 + 0.5;
3566   screen_vertex[4] = 0;
3567   screen_vertex[5] = GBA_SCREEN_WIDTH - 0.5;
3568   screen_vertex[6] = GBA_SCREEN_HEIGHT - 0.5;
3569   screen_vertex[7] = PSP_SCREEN_WIDTH - 0.5;
3570   screen_vertex[8] = PSP_SCREEN_HEIGHT - 0.5;
3571   screen_vertex[9] = 0;
3572
3573   // Set framebuffer to PSP VRAM
3574   GE_CMD(FBP, ((u32)psp_gu_vram_base & 0x00FFFFFF));
3575   GE_CMD(FBW, (((u32)psp_gu_vram_base & 0xFF000000) >> 8) | PSP_LINE_SIZE);
3576   // Set texture 0 to the screen texture
3577   GE_CMD(TBP0, ((u32)screen_texture & 0x00FFFFFF));
3578   GE_CMD(TBW0, (((u32)screen_texture & 0xFF000000) >> 8) | GBA_SCREEN_WIDTH);
3579   // Set the texture size to 256 by 256 (2^8 by 2^8)
3580   GE_CMD(TSIZE0, (8 << 8) | 8);
3581   // Flush the texture cache
3582   GE_CMD(TFLUSH, 0);
3583   // Use 2D coordinates, no indeces, no weights, 32bit float positions,
3584   // 32bit float texture coordinates
3585   GE_CMD(VTYPE, (1 << 23) | (0 << 11) | (0 << 9) |
3586    (3 << 7) | (0 << 5) | (0 << 2) | 3);
3587   // Set the base of the index list pointer to 0
3588   GE_CMD(BASE, 0);
3589   // Set the rest of index list pointer to 0 (not being used)
3590   GE_CMD(IADDR, 0);
3591   // Set the base of the screen vertex list pointer
3592   GE_CMD(BASE, ((u32)screen_vertex & 0xFF000000) >> 8);
3593   // Set the rest of the screen vertex list pointer
3594   GE_CMD(VADDR, ((u32)screen_vertex & 0x00FFFFFF));
3595   // Primitive kick: render sprite (primitive 6), 2 vertices
3596   GE_CMD(PRIM, (6 << 16) | 2);
3597   // Done with commands
3598   GE_CMD(FINISH, 0);
3599   // Raise signal interrupt
3600   GE_CMD(SIGNAL, 0);
3601   GE_CMD(NOP, 0);
3602   GE_CMD(NOP, 0);
3603 }
3604
3605 #elif defined(WIZ_BUILD) || defined(PND_BUILD)
3606
3607 void init_video()
3608 {
3609 }
3610
3611 #else
3612
3613 void init_video()
3614 {
3615   SDL_Init(SDL_INIT_VIDEO | SDL_INIT_JOYSTICK | SDL_INIT_NOPARACHUTE);
3616
3617 #ifdef GP2X_BUILD
3618   SDL_GP2X_AllowGfxMemory(NULL, 0);
3619
3620   hw_screen = SDL_SetVideoMode(320 * video_scale, 240 * video_scale,
3621    16, SDL_HWSURFACE);
3622
3623   screen = SDL_CreateRGBSurface(SDL_HWSURFACE, 240 * video_scale,
3624    160 * video_scale, 16, 0xFFFF, 0xFFFF, 0xFFFF, 0);
3625
3626   warm_change_cb_upper(WCB_C_BIT|WCB_B_BIT, 1);
3627 #else
3628   screen = SDL_SetVideoMode(240 * video_scale, 160 * video_scale, 16, 0);
3629 #endif
3630   SDL_ShowCursor(0);
3631 }
3632
3633 #endif
3634
3635 video_scale_type screen_scale = scaled_aspect;
3636 video_scale_type current_scale = scaled_aspect;
3637 video_filter_type screen_filter = filter_bilinear;
3638 video_filter_type2 screen_filter2 = filter2_none;
3639
3640
3641 #ifdef PSP_BUILD
3642
3643 void video_resolution_large()
3644 {
3645   if(video_direct != 1)
3646   {
3647     video_direct = 1;
3648     screen_pixels = psp_gu_vram_base;
3649     screen_pitch = 512;
3650     sceGuStart(GU_DIRECT, display_list);
3651     sceGuDispBuffer(PSP_SCREEN_WIDTH, PSP_SCREEN_HEIGHT,
3652      (void*)0, PSP_LINE_SIZE);
3653     sceGuFinish();
3654   }
3655 }
3656
3657 void set_gba_resolution(video_scale_type scale)
3658 {
3659   u32 filter_linear = 0;
3660   screen_scale = scale;
3661   switch(scale)
3662   {
3663     case unscaled:
3664       screen_vertex[2] = 120 + 0.5;
3665       screen_vertex[3] = 56 + 0.5;
3666       screen_vertex[7] = GBA_SCREEN_WIDTH + 120 - 0.5;
3667       screen_vertex[8] = GBA_SCREEN_HEIGHT + 56 - 0.5;
3668       break;
3669
3670     case scaled_aspect:
3671       screen_vertex[2] = 36 + 0.5;
3672       screen_vertex[3] = 0 + 0.5;
3673       screen_vertex[7] = 408 + 36 - 0.5;
3674       screen_vertex[8] = PSP_SCREEN_HEIGHT - 0.5;
3675       break;
3676
3677     case fullscreen:
3678       screen_vertex[2] = 0;
3679       screen_vertex[3] = 0;
3680       screen_vertex[7] = PSP_SCREEN_WIDTH;
3681       screen_vertex[8] = PSP_SCREEN_HEIGHT;
3682       break;
3683   }
3684
3685   sceGuStart(GU_DIRECT, display_list);
3686   if(screen_filter == filter_bilinear)
3687     sceGuTexFilter(GU_LINEAR, GU_LINEAR);
3688   else
3689     sceGuTexFilter(GU_NEAREST, GU_NEAREST);
3690
3691   sceGuFinish();
3692   sceGuSync(0, 0);
3693
3694   clear_screen(0x0000);
3695 }
3696
3697 void video_resolution_small()
3698 {
3699   if(video_direct != 0)
3700   {
3701     set_gba_resolution(screen_scale);
3702     video_direct = 0;
3703     screen_pixels = screen_texture;
3704     screen_flip = 0;
3705     screen_pitch = 240;
3706     sceGuStart(GU_DIRECT, display_list);
3707     sceGuDispBuffer(PSP_SCREEN_WIDTH, PSP_SCREEN_HEIGHT,
3708      (void*)0, PSP_LINE_SIZE);
3709     sceGuFinish();
3710   }
3711 }
3712
3713 void clear_screen(u16 color)
3714 {
3715   u32 i;
3716   u16 *src_ptr = get_screen_pixels();
3717
3718   sceGuSync(0, 0);
3719
3720   for(i = 0; i < (512 * 272); i++, src_ptr++)
3721   {
3722     *src_ptr = color;
3723   }
3724
3725   // I don't know why this doesn't work.
3726 /*  color = (((color & 0x1F) * 255 / 31) << 0) |
3727    ((((color >> 5) & 0x3F) * 255 / 63) << 8) |
3728    ((((color >> 11) & 0x1F) * 255 / 31) << 16) | (0xFF << 24);
3729
3730   sceGuStart(GU_DIRECT, display_list);
3731   sceGuDrawBuffer(GU_PSM_5650, (void*)0, PSP_LINE_SIZE);
3732   //sceGuDispBuffer(PSP_SCREEN_WIDTH, PSP_SCREEN_HEIGHT,
3733   // (void*)0, PSP_LINE_SIZE);
3734   sceGuClearColor(color);
3735   sceGuClear(GU_COLOR_BUFFER_BIT);
3736   sceGuFinish();
3737   sceGuSync(0, 0); */
3738 }
3739
3740 #elif defined(WIZ_BUILD)
3741
3742 void video_resolution_large()
3743 {
3744   screen_offset = 0;
3745   resolution_width = 320;
3746   resolution_height = 240;
3747
3748   fb_use_buffers(1);
3749   flip_screen();
3750   clear_screen(0);
3751   wiz_lcd_set_portrait(0);
3752 }
3753
3754 void video_resolution_small()
3755 {
3756   fb_use_buffers(4);
3757
3758   switch (screen_scale)
3759   {
3760     case unscaled:
3761       screen_offset = 320*40 + 40;
3762       wiz_lcd_set_portrait(0);
3763       break;
3764     case scaled_aspect:
3765       screen_offset = 320*(80 - 14) + 80;
3766       wiz_lcd_set_portrait(0);
3767       break;
3768     case unscaled_rot:
3769       wiz_lcd_set_portrait(1);
3770       rot_lines_total = 4;
3771       rot_line_count = 0;
3772       break;
3773     case scaled_aspect_rot:
3774       wiz_lcd_set_portrait(1);
3775       rot_lines_total = 3;
3776       rot_line_count = 0;
3777       break;
3778   }
3779
3780   flip_screen();
3781   clear_screen(0);
3782
3783   resolution_width = 240;
3784   resolution_height = 160;
3785 }
3786
3787 void set_gba_resolution(video_scale_type scale)
3788 {
3789   screen_scale = scale;
3790 }
3791
3792 void clear_screen(u16 color)
3793 {
3794   u32 col = ((u32)color << 16) | color;
3795   u32 *p = gpsp_gp2x_screen;
3796   int c = 320*240/2;
3797   while (c-- > 0)
3798     *p++ = col;
3799 }
3800
3801 #elif defined(PND_BUILD)
3802
3803 void video_resolution_large()
3804 {
3805   resolution_width = 400;
3806   resolution_height = 272;
3807
3808   fb_set_mode(400, 272, 1, 15, screen_filter, screen_filter2);
3809   flip_screen();
3810   clear_screen(0);
3811 }
3812
3813 void video_resolution_small()
3814 {
3815   resolution_width = 240;
3816   resolution_height = 160;
3817
3818   fb_set_mode(240, 160, 3, screen_scale, screen_filter, screen_filter2);
3819   flip_screen();
3820   clear_screen(0);
3821 }
3822
3823 void set_gba_resolution(video_scale_type scale)
3824 {
3825   screen_scale = scale;
3826 }
3827
3828 void clear_screen(u16 color)
3829 {
3830   u32 col = ((u32)color << 16) | color;
3831   u32 *p = (u32 *)get_screen_pixels();
3832   int c = resolution_width * resolution_height / 2;
3833   while (c-- > 0)
3834     *p++ = col;
3835 }
3836
3837 #else
3838
3839 void video_resolution_large()
3840 {
3841   current_scale = unscaled;
3842
3843 #ifdef GP2X_BUILD
3844   SDL_FreeSurface(screen);
3845   SDL_GP2X_AllowGfxMemory(NULL, 0);
3846     hw_screen = SDL_SetVideoMode(320, 240, 16, SDL_HWSURFACE);
3847   screen = SDL_CreateRGBSurface(SDL_HWSURFACE, 320, 240, 16, 0xFFFF,
3848    0xFFFF, 0xFFFF, 0);
3849   resolution_width = 320;
3850     resolution_height = 240;
3851   SDL_ShowCursor(0);
3852
3853   warm_change_cb_upper(WCB_C_BIT|WCB_B_BIT, 1);
3854 #else
3855   screen = SDL_SetVideoMode(480, 272, 16, 0);
3856   resolution_width = 480;
3857   resolution_height = 272;
3858 #endif
3859 }
3860
3861 void video_resolution_small()
3862 {
3863   current_scale = screen_scale;
3864
3865 #ifdef GP2X_BUILD
3866   int w, h;
3867   SDL_FreeSurface(screen);
3868   SDL_GP2X_AllowGfxMemory(NULL, 0);
3869
3870   w = 320; h = 240;
3871   if (screen_scale == scaled_aspect || screen_scale == fullscreen)
3872   {
3873     w = small_resolution_width * video_scale;
3874     h = small_resolution_height * video_scale;
3875   }
3876   if (screen_scale == scaled_aspect) h += 20;
3877   hw_screen = SDL_SetVideoMode(w, h, 16, SDL_HWSURFACE);
3878
3879   w = small_resolution_width * video_scale;
3880   if (screen_scale == scaled_aspect_sw)
3881     w = 320;
3882   screen = SDL_CreateRGBSurface(SDL_HWSURFACE,
3883    w, small_resolution_height * video_scale,
3884    16, 0xFFFF, 0xFFFF, 0xFFFF, 0);
3885
3886   SDL_ShowCursor(0);
3887
3888   warm_change_cb_upper(WCB_C_BIT|WCB_B_BIT, 1);
3889 #else
3890   screen = SDL_SetVideoMode(small_resolution_width * video_scale,
3891    small_resolution_height * video_scale, 16, 0);
3892 #endif
3893   resolution_width = small_resolution_width;
3894   resolution_height = small_resolution_height;
3895 }
3896
3897 void set_gba_resolution(video_scale_type scale)
3898 {
3899   if(screen_scale != scale)
3900   {
3901     screen_scale = scale;
3902     small_resolution_width = 240 * video_scale;
3903     small_resolution_height = 160 * video_scale;
3904   }
3905 }
3906
3907 void clear_screen(u16 color)
3908 {
3909   u16 *dest_ptr = get_screen_pixels();
3910   u32 line_skip = get_screen_pitch() - screen->w;
3911   u32 x, y;
3912
3913   for(y = 0; y < screen->h; y++)
3914   {
3915     for(x = 0; x < screen->w; x++, dest_ptr++)
3916     {
3917       *dest_ptr = color;
3918     }
3919     dest_ptr += line_skip;
3920   }
3921 }
3922
3923 #endif
3924
3925 u16 *copy_screen()
3926 {
3927   u16 *copy = malloc(240 * 160 * 2);
3928   memcpy(copy, get_screen_pixels(), 240 * 160 * 2);
3929   return copy;
3930 }
3931
3932 void blit_to_screen(u16 *src, u32 w, u32 h, u32 dest_x, u32 dest_y)
3933 {
3934   u32 pitch = get_screen_pitch();
3935   u16 *dest_ptr = get_screen_pixels() + dest_x + (dest_y * pitch);
3936
3937   s32 w1 = dest_x + w > pitch ? pitch - dest_x : w;
3938   u16 *src_ptr = src;
3939   s32 x, y;
3940
3941   for(y = 0; y < h; y++)
3942   {
3943     for(x = 0; x < w1; x++)
3944     {
3945       dest_ptr[x] = src_ptr[x];
3946     }
3947     src_ptr += w;
3948     dest_ptr += pitch;
3949   }
3950 }
3951
3952 void print_string_ext(const char *str, u16 fg_color, u16 bg_color,
3953  u32 x, u32 y, void *_dest_ptr, u32 pitch, u32 pad, u32 h_offset, u32 height)
3954 {
3955   u16 *dest_ptr = (u16 *)_dest_ptr + (y * pitch) + x;
3956   u8 current_char = str[0];
3957   u32 current_row;
3958   u32 glyph_offset;
3959   u32 i = 0, i2, i3, h;
3960   u32 str_index = 1;
3961   u32 current_x = x;
3962
3963   if(y + height > resolution_height)
3964       return;
3965
3966   while(current_char)
3967   {
3968     if(current_char == '\n')
3969     {
3970       y += FONT_HEIGHT;
3971       current_x = x;
3972       dest_ptr = get_screen_pixels() + (y * pitch) + x;
3973     }
3974     else
3975     {
3976       glyph_offset = _font_offset[current_char];
3977       current_x += FONT_WIDTH;
3978       glyph_offset += h_offset;
3979       for(i2 = h_offset, h = 0; i2 < FONT_HEIGHT && h < height; i2++, h++, glyph_offset++)
3980       {
3981         current_row = _font_bits[glyph_offset];
3982         for(i3 = 0; i3 < FONT_WIDTH; i3++)
3983         {
3984           if((current_row >> (15 - i3)) & 0x01)
3985             *dest_ptr = fg_color;
3986           else
3987             *dest_ptr = bg_color;
3988           dest_ptr++;
3989         }
3990         dest_ptr += (pitch - FONT_WIDTH);
3991       }
3992       dest_ptr = dest_ptr - (pitch * h) + FONT_WIDTH;
3993     }
3994
3995     i++;
3996
3997     current_char = str[str_index];
3998
3999     if((i < pad) && (current_char == 0))
4000     {
4001       current_char = ' ';
4002     }
4003     else
4004     {
4005       str_index++;
4006     }
4007
4008     if(current_x + FONT_WIDTH > resolution_width /* EDIT */)
4009     {
4010       while (current_char && current_char != '\n')
4011       {
4012         current_char = str[str_index++];
4013       }
4014     }
4015   }
4016 }
4017
4018 void print_string(const char *str, u16 fg_color, u16 bg_color,
4019  u32 x, u32 y)
4020 {
4021 #ifdef WIZ_BUILD
4022   if ((screen_scale == unscaled_rot || screen_scale == scaled_aspect_rot) &&
4023    (resolution_width == small_resolution_width) &&
4024    (resolution_height == small_resolution_height))
4025   {
4026     snprintf(rot_msg_buff, sizeof(rot_msg_buff), "%s", str);
4027     return;
4028   }
4029 #endif
4030   print_string_ext(str, fg_color, bg_color, x, y, get_screen_pixels(),
4031    get_screen_pitch(), 0, 0, FONT_HEIGHT);
4032 }
4033
4034 void print_string_pad(const char *str, u16 fg_color, u16 bg_color,
4035  u32 x, u32 y, u32 pad)
4036 {
4037   print_string_ext(str, fg_color, bg_color, x, y, get_screen_pixels(),
4038    get_screen_pitch(), pad, 0, FONT_HEIGHT);
4039 }
4040
4041 u32 debug_cursor_x = 0;
4042 u32 debug_cursor_y = 0;
4043
4044 #ifdef STDIO_DEBUG
4045
4046 void debug_screen_clear()
4047 {
4048 }
4049
4050 void debug_screen_start()
4051 {
4052 }
4053
4054 void debug_screen_end()
4055 {
4056 }
4057
4058 void debug_screen_update()
4059 {
4060 }
4061
4062 void debug_screen_printf(const char *format, ...)
4063 {
4064   va_list ap;
4065
4066   va_start(ap, format);
4067   vprintf(format, ap);
4068   va_end(ap);
4069 }
4070
4071 void debug_screen_newline(u32 count)
4072 {
4073   printf("\n");
4074 }
4075
4076
4077 #else
4078
4079 void debug_screen_clear()
4080 {
4081   debug_cursor_x = 0;
4082   debug_cursor_y = 0;
4083   clear_screen(0x0000);
4084 }
4085
4086 void debug_screen_start()
4087 {
4088   video_resolution_large();
4089   debug_screen_clear();
4090 }
4091
4092 void debug_screen_end()
4093 {
4094   video_resolution_small();
4095 }
4096
4097 void debug_screen_update()
4098 {
4099   flip_screen();
4100 }
4101
4102 void debug_screen_printf(const char *format, ...)
4103 {
4104   char str_buffer[512];
4105   u32 str_buffer_length;
4106   va_list ap;
4107
4108   va_start(ap, format);
4109   str_buffer_length = vsnprintf(str_buffer, 512, format, ap);
4110   va_end(ap);
4111
4112   printf("printing debug string %s at %d %d\n", str_buffer,
4113    debug_cursor_x, debug_cursor_y);
4114
4115   print_string(str_buffer, 0xFFFF, 0x0000, debug_cursor_x, debug_cursor_y);
4116   debug_cursor_x += FONT_WIDTH * str_buffer_length;
4117 }
4118
4119 void debug_screen_newline(u32 count)
4120 {
4121   debug_cursor_x = 0;
4122   debug_cursor_y += FONT_HEIGHT * count;
4123 }
4124
4125 #endif
4126
4127 void debug_screen_printl(const char *format, ...)
4128 {
4129   va_list ap;
4130
4131   va_start(ap, format);
4132   debug_screen_printf(format, ap);
4133   debug_screen_newline(1);
4134 //  debug_screen_printf("\n");
4135   va_end(ap);
4136 }
4137
4138
4139 #define video_savestate_builder(type)                                         \
4140 void video_##type##_savestate(file_tag_type savestate_file)                   \
4141 {                                                                             \
4142   file_##type##_array(savestate_file, affine_reference_x);                    \
4143   file_##type##_array(savestate_file, affine_reference_y);                    \
4144 }                                                                             \
4145
4146 video_savestate_builder(read);
4147 video_savestate_builder(write_mem);
4148
4149