revive PC build, support Linux
[gpsp.git] / video.c
1 /* gameplaySP
2  *
3  * Copyright (C) 2006 Exophase <exophase@gmail.com>
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation; either version 2 of
8  * the License, or (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18  */
19
20 #include "common.h"
21 #include "font.h"
22
23 #ifdef PSP_BUILD
24
25 #include <pspctrl.h>
26
27 #include <pspkernel.h>
28 #include <pspdebug.h>
29 #include <pspdisplay.h>
30
31 #include <pspgu.h>
32 #include <psppower.h>
33 #include <psprtc.h>
34
35 static float *screen_vertex = (float *)0x441FC100;
36 static u32 *ge_cmd = (u32 *)0x441FC000;
37 static u16 *psp_gu_vram_base = (u16 *)(0x44000000);
38 static u32 *ge_cmd_ptr = (u32 *)0x441FC000;
39 static u32 gecbid;
40 static u32 video_direct = 0;
41
42 static u32 __attribute__((aligned(16))) display_list[32];
43
44 #define GBA_SCREEN_WIDTH 240
45 #define GBA_SCREEN_HEIGHT 160
46
47 #define PSP_SCREEN_WIDTH 480
48 #define PSP_SCREEN_HEIGHT 272
49 #define PSP_LINE_SIZE 512
50
51 #define PSP_ALL_BUTTON_MASK 0xFFFF
52
53 #define GE_CMD_FBP    0x9C
54 #define GE_CMD_FBW    0x9D
55 #define GE_CMD_TBP0   0xA0
56 #define GE_CMD_TBW0   0xA8
57 #define GE_CMD_TSIZE0 0xB8
58 #define GE_CMD_TFLUSH 0xCB
59 #define GE_CMD_CLEAR  0xD3
60 #define GE_CMD_VTYPE  0x12
61 #define GE_CMD_BASE   0x10
62 #define GE_CMD_VADDR  0x01
63 #define GE_CMD_IADDR  0x02
64 #define GE_CMD_PRIM   0x04
65 #define GE_CMD_FINISH 0x0F
66 #define GE_CMD_SIGNAL 0x0C
67 #define GE_CMD_NOP    0x00
68
69 #define GE_CMD(cmd, operand)                                                \
70   *ge_cmd_ptr = (((GE_CMD_##cmd) << 24) | (operand));                       \
71   ge_cmd_ptr++                                                              \
72
73 static u16 *screen_texture = (u16 *)(0x4000000 + (512 * 272 * 2));
74 static u16 *current_screen_texture = (u16 *)(0x4000000 + (512 * 272 * 2));
75 static u16 *screen_pixels = (u16 *)(0x4000000 + (512 * 272 * 2));
76 static u32 screen_pitch = 240;
77
78 static void Ge_Finish_Callback(int id, void *arg)
79 {
80 }
81
82 #define get_screen_pixels()                                                   \
83   screen_pixels                                                               \
84
85 #define get_screen_pitch()                                                    \
86   screen_pitch                                                                \
87
88 #elif defined(WIZ_BUILD)
89
90 static u16 rot_buffer[240*4];
91 static u32 rot_lines_total = 4;
92 static u32 rot_line_count = 0;
93 static char rot_msg_buff[64];
94
95 static u32 screen_offset = 0;
96 static u16 *screen_pixels = NULL;
97 const u32 screen_pitch = 320;
98
99 #define get_screen_pixels()                                                   \
100   screen_pixels                                                               \
101
102 #define get_screen_pitch()                                                    \
103   screen_pitch                                                                \
104
105 #else
106
107 #ifdef GP2X_BUILD
108 #include "SDL_gp2x.h"
109 SDL_Surface *hw_screen;
110 #endif
111 SDL_Surface *screen;
112 const u32 video_scale = 1;
113
114 #define get_screen_pixels()                                                   \
115   ((u16 *)screen->pixels)                                                     \
116
117 #define get_screen_pitch()                                                    \
118   (screen->pitch / 2)                                                         \
119
120 #endif
121
122 void render_scanline_conditional_tile(u32 start, u32 end, u16 *scanline,
123  u32 enable_flags, u32 dispcnt, u32 bldcnt, tile_layer_render_struct
124  *layer_renderers);
125 void render_scanline_conditional_bitmap(u32 start, u32 end, u16 *scanline,
126  u32 enable_flags, u32 dispcnt, u32 bldcnt, bitmap_layer_render_struct
127  *layer_renderers);
128
129 #define no_op                                                                 \
130
131 // This old version is not necessary if the palette is either being converted
132 // transparently or the ABGR 1555 format is being used natively. The direct
133 // version (without conversion) is much faster.
134
135 #define tile_lookup_palette_full(palette, source)                             \
136   current_pixel = palette[source];                                            \
137   convert_palette(current_pixel)                                              \
138
139 #define tile_lookup_palette(palette, source)                                  \
140   current_pixel = palette[source];                                            \
141
142
143 #ifdef RENDER_COLOR16_NORMAL
144
145 #define tile_expand_base_normal(index)                                        \
146   tile_expand_base_color16(index)                                             \
147
148 #else
149
150 #define tile_expand_base_normal(index)                                        \
151   tile_lookup_palette(palette, current_pixel);                                \
152   dest_ptr[index] = current_pixel                                             \
153
154 #endif
155
156 #define tile_expand_transparent_normal(index)                                 \
157   tile_expand_base_normal(index)                                              \
158
159 #define tile_expand_copy(index)                                               \
160   dest_ptr[index] = copy_ptr[index]                                           \
161
162
163 #define advance_dest_ptr_base(delta)                                          \
164   dest_ptr += delta                                                           \
165
166 #define advance_dest_ptr_transparent(delta)                                   \
167   advance_dest_ptr_base(delta)                                                \
168
169 #define advance_dest_ptr_copy(delta)                                          \
170   advance_dest_ptr_base(delta);                                               \
171   copy_ptr += delta                                                           \
172
173
174 #define color_combine_mask_a(layer)                                           \
175   ((io_registers[REG_BLDCNT] >> layer) & 0x01)                                \
176
177 // For color blending operations, will create a mask that has in bit
178 // 10 if the layer is target B, and bit 9 if the layer is target A.
179
180 #define color_combine_mask(layer)                                             \
181   (color_combine_mask_a(layer) |                                              \
182    ((io_registers[REG_BLDCNT] >> (layer + 7)) & 0x02)) << 9                   \
183
184 // For alpha blending renderers, draw the palette index (9bpp) and
185 // layer bits rather than the raw RGB. For the base this should write to
186 // the 32bit location directly.
187
188 #define tile_expand_base_alpha(index)                                         \
189   dest_ptr[index] = current_pixel | pixel_combine                             \
190
191 #define tile_expand_base_bg(index)                                            \
192   dest_ptr[index] = bg_combine                                                \
193
194
195 // For layered (transparent) writes this should shift the "stack" and write
196 // to the bottom. This will preserve the topmost pixel and the most recent
197 // one.
198
199 #define tile_expand_transparent_alpha(index)                                  \
200   dest_ptr[index] = (dest_ptr[index] << 16) | current_pixel | pixel_combine   \
201
202
203 // OBJ should only shift if the top isn't already OBJ
204 #define tile_expand_transparent_alpha_obj(index)                              \
205   dest = dest_ptr[index];                                                     \
206   if(dest & 0x00000100)                                                       \
207   {                                                                           \
208     dest_ptr[index] = (dest & 0xFFFF0000) | current_pixel | pixel_combine;    \
209   }                                                                           \
210   else                                                                        \
211   {                                                                           \
212     dest_ptr[index] = (dest << 16) | current_pixel | pixel_combine;           \
213   }                                                                           \
214
215
216 // For color effects that don't need to preserve the previous layer.
217 // The color32 version should be used with 32bit wide dest_ptr so as to be
218 // compatible with alpha combine on top of it.
219
220 #define tile_expand_base_color16(index)                                       \
221   dest_ptr[index] = current_pixel | pixel_combine                             \
222
223 #define tile_expand_transparent_color16(index)                                \
224   tile_expand_base_color16(index)                                             \
225
226 #define tile_expand_base_color32(index)                                       \
227   tile_expand_base_color16(index)                                             \
228
229 #define tile_expand_transparent_color32(index)                                \
230   tile_expand_base_color16(index)                                             \
231
232
233 // Operations for isolation 8bpp pixels within 32bpp pixel blocks.
234
235 #define tile_8bpp_pixel_op_mask(op_param)                                     \
236   current_pixel = current_pixels & 0xFF                                       \
237
238 #define tile_8bpp_pixel_op_shift_mask(shift)                                  \
239   current_pixel = (current_pixels >> shift) & 0xFF                            \
240
241 #define tile_8bpp_pixel_op_shift(shift)                                       \
242   current_pixel = current_pixels >> shift                                     \
243
244 #define tile_8bpp_pixel_op_none(shift)                                        \
245
246 // Base should always draw raw in 8bpp mode; color 0 will be drawn where
247 // color 0 is.
248
249 #define tile_8bpp_draw_base_normal(index)                                     \
250   tile_expand_base_normal(index)                                              \
251
252 #define tile_8bpp_draw_base_alpha(index)                                      \
253   if(current_pixel)                                                           \
254   {                                                                           \
255     tile_expand_base_alpha(index);                                            \
256   }                                                                           \
257   else                                                                        \
258   {                                                                           \
259     tile_expand_base_bg(index);                                               \
260   }                                                                           \
261
262
263 #define tile_8bpp_draw_base_color16(index)                                    \
264   tile_8bpp_draw_base_alpha(index)                                            \
265
266 #define tile_8bpp_draw_base_color32(index)                                    \
267   tile_8bpp_draw_base_alpha(index)                                            \
268
269
270 #define tile_8bpp_draw_base(index, op, op_param, alpha_op)                    \
271   tile_8bpp_pixel_op_##op(op_param);                                          \
272   tile_8bpp_draw_base_##alpha_op(index)                                       \
273
274 // Transparent (layered) writes should only replace what is there if the
275 // pixel is not transparent (zero)
276
277 #define tile_8bpp_draw_transparent(index, op, op_param, alpha_op)             \
278   tile_8bpp_pixel_op_##op(op_param);                                          \
279   if(current_pixel)                                                           \
280   {                                                                           \
281     tile_expand_transparent_##alpha_op(index);                                \
282   }                                                                           \
283
284 #define tile_8bpp_draw_copy(index, op, op_param, alpha_op)                    \
285   tile_8bpp_pixel_op_##op(op_param);                                          \
286   if(current_pixel)                                                           \
287   {                                                                           \
288     tile_expand_copy(index);                                                  \
289   }                                                                           \
290
291 // Get the current tile from the map in 8bpp mode
292
293 #define get_tile_8bpp()                                                       \
294   current_tile = *map_ptr;                                                    \
295   tile_ptr = tile_base + ((current_tile & 0x3FF) * 64)                        \
296
297
298 // Draw half of a tile in 8bpp mode, for base renderer
299
300 #define tile_8bpp_draw_four_noflip(index, combine_op, alpha_op)               \
301   tile_8bpp_draw_##combine_op(index + 0, mask, 0, alpha_op);                  \
302   tile_8bpp_draw_##combine_op(index + 1, shift_mask, 8, alpha_op);            \
303   tile_8bpp_draw_##combine_op(index + 2, shift_mask, 16, alpha_op);           \
304   tile_8bpp_draw_##combine_op(index + 3, shift, 24, alpha_op)                 \
305
306
307 // Like the above, but draws the half-tile horizontally flipped
308
309 #define tile_8bpp_draw_four_flip(index, combine_op, alpha_op)                 \
310   tile_8bpp_draw_##combine_op(index + 3, mask, 0, alpha_op);                  \
311   tile_8bpp_draw_##combine_op(index + 2, shift_mask, 8, alpha_op);            \
312   tile_8bpp_draw_##combine_op(index + 1, shift_mask, 16, alpha_op);           \
313   tile_8bpp_draw_##combine_op(index + 0, shift, 24, alpha_op)                 \
314
315 #define tile_8bpp_draw_four_base(index, alpha_op, flip_op)                    \
316   tile_8bpp_draw_four_##flip_op(index, base, alpha_op)                        \
317
318
319 // Draw half of a tile in 8bpp mode, for transparent renderer; as an
320 // optimization the entire thing is checked against zero (in transparent
321 // capable renders it is more likely for the pixels to be transparent than
322 // opaque)
323
324 #define tile_8bpp_draw_four_transparent(index, alpha_op, flip_op)             \
325   if(current_pixels != 0)                                                     \
326   {                                                                           \
327     tile_8bpp_draw_four_##flip_op(index, transparent, alpha_op);              \
328   }                                                                           \
329
330 #define tile_8bpp_draw_four_copy(index, alpha_op, flip_op)                    \
331   if(current_pixels != 0)                                                     \
332   {                                                                           \
333     tile_8bpp_draw_four_##flip_op(index, copy, alpha_op);                     \
334   }                                                                           \
335
336 // Helper macro for drawing 8bpp tiles clipped against the edge of the screen
337
338 #define partial_tile_8bpp(combine_op, alpha_op)                               \
339   for(i = 0; i < partial_tile_run; i++)                                       \
340   {                                                                           \
341     tile_8bpp_draw_##combine_op(0, mask, 0, alpha_op);                        \
342     current_pixels >>= 8;                                                     \
343     advance_dest_ptr_##combine_op(1);                                         \
344   }                                                                           \
345
346
347 // Draws 8bpp tiles clipped against the left side of the screen,
348 // partial_tile_offset indicates how much clipped in it is, partial_tile_run
349 // indicates how much it should draw.
350
351 #define partial_tile_right_noflip_8bpp(combine_op, alpha_op)                  \
352   if(partial_tile_offset >= 4)                                                \
353   {                                                                           \
354     current_pixels = *((u32 *)(tile_ptr + 4)) >>                              \
355      ((partial_tile_offset - 4) * 8);                                         \
356     partial_tile_8bpp(combine_op, alpha_op);                                  \
357   }                                                                           \
358   else                                                                        \
359   {                                                                           \
360     partial_tile_run -= 4;                                                    \
361     current_pixels = *((u32 *)tile_ptr) >> (partial_tile_offset * 8);         \
362     partial_tile_8bpp(combine_op, alpha_op);                                  \
363     current_pixels = *((u32 *)(tile_ptr + 4));                                \
364     tile_8bpp_draw_four_##combine_op(0, alpha_op, noflip);                    \
365     advance_dest_ptr_##combine_op(4);                                         \
366   }                                                                           \
367
368
369 // Draws 8bpp tiles clipped against both the left and right side of the
370 // screen, IE, runs of less than 8 - partial_tile_offset.
371
372 #define partial_tile_mid_noflip_8bpp(combine_op, alpha_op)                    \
373   if(partial_tile_offset >= 4)                                                \
374   {                                                                           \
375     current_pixels = *((u32 *)(tile_ptr + 4)) >>                              \
376      ((partial_tile_offset - 4) * 8);                                         \
377     partial_tile_8bpp(combine_op, alpha_op);                                  \
378   }                                                                           \
379   else                                                                        \
380   {                                                                           \
381     current_pixels = *((u32 *)tile_ptr) >> (partial_tile_offset * 8);         \
382     if((partial_tile_offset + partial_tile_run) > 4)                          \
383     {                                                                         \
384       u32 old_run = partial_tile_run;                                         \
385       partial_tile_run = 4 - partial_tile_offset;                             \
386       partial_tile_8bpp(combine_op, alpha_op);                                \
387       partial_tile_run = old_run - partial_tile_run;                          \
388       current_pixels = *((u32 *)(tile_ptr + 4));                              \
389       partial_tile_8bpp(combine_op, alpha_op);                                \
390     }                                                                         \
391     else                                                                      \
392     {                                                                         \
393       partial_tile_8bpp(combine_op, alpha_op);                                \
394     }                                                                         \
395   }                                                                           \
396
397
398 // Draws 8bpp tiles clipped against the right side of the screen,
399 // partial_tile_run indicates how much there is to draw.
400
401 #define partial_tile_left_noflip_8bpp(combine_op, alpha_op)                   \
402   if(partial_tile_run >= 4)                                                   \
403   {                                                                           \
404     current_pixels = *((u32 *)tile_ptr);                                      \
405     tile_8bpp_draw_four_##combine_op(0, alpha_op, noflip);                    \
406     advance_dest_ptr_##combine_op(4);                                         \
407     tile_ptr += 4;                                                            \
408     partial_tile_run -= 4;                                                    \
409   }                                                                           \
410                                                                               \
411   current_pixels = *((u32 *)(tile_ptr));                                      \
412   partial_tile_8bpp(combine_op, alpha_op)                                     \
413
414
415 // Draws a non-clipped (complete) 8bpp tile.
416
417 #define tile_noflip_8bpp(combine_op, alpha_op)                                \
418   current_pixels = *((u32 *)tile_ptr);                                        \
419   tile_8bpp_draw_four_##combine_op(0, alpha_op, noflip);                      \
420   current_pixels = *((u32 *)(tile_ptr + 4));                                  \
421   tile_8bpp_draw_four_##combine_op(4, alpha_op, noflip)                       \
422
423
424 // Like the above versions but draws flipped tiles.
425
426 #define partial_tile_flip_8bpp(combine_op, alpha_op)                          \
427   for(i = 0; i < partial_tile_run; i++)                                       \
428   {                                                                           \
429     tile_8bpp_draw_##combine_op(0, shift, 24, alpha_op);                      \
430     current_pixels <<= 8;                                                     \
431     advance_dest_ptr_##combine_op(1);                                         \
432   }                                                                           \
433
434 #define partial_tile_right_flip_8bpp(combine_op, alpha_op)                    \
435   if(partial_tile_offset >= 4)                                                \
436   {                                                                           \
437     current_pixels = *((u32 *)tile_ptr) << ((partial_tile_offset - 4) * 8);   \
438     partial_tile_flip_8bpp(combine_op, alpha_op);                             \
439   }                                                                           \
440   else                                                                        \
441   {                                                                           \
442     partial_tile_run -= 4;                                                    \
443     current_pixels = *((u32 *)(tile_ptr + 4)) <<                              \
444      ((partial_tile_offset - 4) * 8);                                         \
445     partial_tile_flip_8bpp(combine_op, alpha_op);                             \
446     current_pixels = *((u32 *)tile_ptr);                                      \
447     tile_8bpp_draw_four_##combine_op(0, alpha_op, flip);                      \
448     advance_dest_ptr_##combine_op(4);                                         \
449   }                                                                           \
450
451 #define partial_tile_mid_flip_8bpp(combine_op, alpha_op)                      \
452   if(partial_tile_offset >= 4)                                                \
453   {                                                                           \
454     current_pixels = *((u32 *)tile_ptr) << ((partial_tile_offset - 4) * 8);   \
455     partial_tile_flip_8bpp(combine_op, alpha_op);                             \
456   }                                                                           \
457   else                                                                        \
458   {                                                                           \
459     current_pixels = *((u32 *)(tile_ptr + 4)) <<                              \
460      ((partial_tile_offset - 4) * 8);                                         \
461                                                                               \
462     if((partial_tile_offset + partial_tile_run) > 4)                          \
463     {                                                                         \
464       u32 old_run = partial_tile_run;                                         \
465       partial_tile_run = 4 - partial_tile_offset;                             \
466       partial_tile_flip_8bpp(combine_op, alpha_op);                           \
467       partial_tile_run = old_run - partial_tile_run;                          \
468       current_pixels = *((u32 *)(tile_ptr));                                  \
469       partial_tile_flip_8bpp(combine_op, alpha_op);                           \
470     }                                                                         \
471     else                                                                      \
472     {                                                                         \
473       partial_tile_flip_8bpp(combine_op, alpha_op);                           \
474     }                                                                         \
475   }                                                                           \
476
477 #define partial_tile_left_flip_8bpp(combine_op, alpha_op)                     \
478   if(partial_tile_run >= 4)                                                   \
479   {                                                                           \
480     current_pixels = *((u32 *)(tile_ptr + 4));                                \
481     tile_8bpp_draw_four_##combine_op(0, alpha_op, flip);                      \
482     advance_dest_ptr_##combine_op(4);                                         \
483     tile_ptr -= 4;                                                            \
484     partial_tile_run -= 4;                                                    \
485   }                                                                           \
486                                                                               \
487   current_pixels = *((u32 *)(tile_ptr + 4));                                  \
488   partial_tile_flip_8bpp(combine_op, alpha_op)                                \
489
490 #define tile_flip_8bpp(combine_op, alpha_op)                                  \
491   current_pixels = *((u32 *)(tile_ptr + 4));                                  \
492   tile_8bpp_draw_four_##combine_op(0, alpha_op, flip);                        \
493   current_pixels = *((u32 *)tile_ptr);                                        \
494   tile_8bpp_draw_four_##combine_op(4, alpha_op, flip)                         \
495
496
497 // Operations for isolating 4bpp tiles in a 32bit block
498
499 #define tile_4bpp_pixel_op_mask(op_param)                                     \
500   current_pixel = current_pixels & 0x0F                                       \
501
502 #define tile_4bpp_pixel_op_shift_mask(shift)                                  \
503   current_pixel = (current_pixels >> shift) & 0x0F                            \
504
505 #define tile_4bpp_pixel_op_shift(shift)                                       \
506   current_pixel = current_pixels >> shift                                     \
507
508 #define tile_4bpp_pixel_op_none(op_param)                                     \
509
510 // Draws a single 4bpp pixel as base, normal renderer; checks to see if the
511 // pixel is zero because if so the current palette should not be applied.
512 // These ifs can be replaced with a lookup table, may or may not be superior
513 // this way, should be benchmarked. The lookup table would be from 0-255
514 // identity map except for multiples of 16, which would map to 0.
515
516 #define tile_4bpp_draw_base_normal(index)                                     \
517   if(current_pixel)                                                           \
518   {                                                                           \
519     current_pixel |= current_palette;                                         \
520     tile_expand_base_normal(index);                                           \
521   }                                                                           \
522   else                                                                        \
523   {                                                                           \
524     tile_expand_base_normal(index);                                           \
525   }                                                                           \
526
527
528 #define tile_4bpp_draw_base_alpha(index)                                      \
529   if(current_pixel)                                                           \
530   {                                                                           \
531     current_pixel |= current_palette;                                         \
532     tile_expand_base_alpha(index);                                            \
533   }                                                                           \
534   else                                                                        \
535   {                                                                           \
536     tile_expand_base_bg(index);                                               \
537   }                                                                           \
538
539 #define tile_4bpp_draw_base_color16(index)                                    \
540   tile_4bpp_draw_base_alpha(index)                                            \
541
542 #define tile_4bpp_draw_base_color32(index)                                    \
543   tile_4bpp_draw_base_alpha(index)                                            \
544
545
546 #define tile_4bpp_draw_base(index, op, op_param, alpha_op)                    \
547   tile_4bpp_pixel_op_##op(op_param);                                          \
548   tile_4bpp_draw_base_##alpha_op(index)                                       \
549
550
551 // Draws a single 4bpp pixel as layered, if not transparent.
552
553 #define tile_4bpp_draw_transparent(index, op, op_param, alpha_op)             \
554   tile_4bpp_pixel_op_##op(op_param);                                          \
555   if(current_pixel)                                                           \
556   {                                                                           \
557     current_pixel |= current_palette;                                         \
558     tile_expand_transparent_##alpha_op(index);                                \
559   }                                                                           \
560
561 #define tile_4bpp_draw_copy(index, op, op_param, alpha_op)                    \
562   tile_4bpp_pixel_op_##op(op_param);                                          \
563   if(current_pixel)                                                           \
564   {                                                                           \
565     current_pixel |= current_palette;                                         \
566     tile_expand_copy(index);                                                  \
567   }                                                                           \
568
569
570 // Draws eight background pixels in transparent mode, for alpha or normal
571 // renderers.
572
573 #define tile_4bpp_draw_eight_base_zero(value)                                 \
574   dest_ptr[0] = value;                                                        \
575   dest_ptr[1] = value;                                                        \
576   dest_ptr[2] = value;                                                        \
577   dest_ptr[3] = value;                                                        \
578   dest_ptr[4] = value;                                                        \
579   dest_ptr[5] = value;                                                        \
580   dest_ptr[6] = value;                                                        \
581   dest_ptr[7] = value                                                         \
582
583
584 // Draws eight background pixels for the alpha renderer, basically color zero
585 // with the background flag high.
586
587 #define tile_4bpp_draw_eight_base_zero_alpha()                                \
588   tile_4bpp_draw_eight_base_zero(bg_combine)                                  \
589
590 #define tile_4bpp_draw_eight_base_zero_color16()                              \
591   tile_4bpp_draw_eight_base_zero_alpha()                                      \
592
593 #define tile_4bpp_draw_eight_base_zero_color32()                              \
594   tile_4bpp_draw_eight_base_zero_alpha()                                      \
595
596
597 // Draws eight background pixels for the normal renderer, just a bunch of
598 // zeros.
599
600 #ifdef RENDER_COLOR16_NORMAL
601
602 #define tile_4bpp_draw_eight_base_zero_normal()                               \
603   current_pixel = 0;                                                          \
604   tile_4bpp_draw_eight_base_zero(current_pixel)                               \
605
606 #else
607
608 #define tile_4bpp_draw_eight_base_zero_normal()                               \
609   current_pixel = palette[0];                                                 \
610   tile_4bpp_draw_eight_base_zero(current_pixel)                               \
611
612 #endif
613
614
615 // Draws eight 4bpp pixels.
616
617 #define tile_4bpp_draw_eight_noflip(combine_op, alpha_op)                     \
618   tile_4bpp_draw_##combine_op(0, mask, 0, alpha_op);                          \
619   tile_4bpp_draw_##combine_op(1, shift_mask, 4, alpha_op);                    \
620   tile_4bpp_draw_##combine_op(2, shift_mask, 8, alpha_op);                    \
621   tile_4bpp_draw_##combine_op(3, shift_mask, 12, alpha_op);                   \
622   tile_4bpp_draw_##combine_op(4, shift_mask, 16, alpha_op);                   \
623   tile_4bpp_draw_##combine_op(5, shift_mask, 20, alpha_op);                   \
624   tile_4bpp_draw_##combine_op(6, shift_mask, 24, alpha_op);                   \
625   tile_4bpp_draw_##combine_op(7, shift, 28, alpha_op)                         \
626
627
628 // Draws eight 4bpp pixels in reverse order (for hflip).
629
630 #define tile_4bpp_draw_eight_flip(combine_op, alpha_op)                       \
631   tile_4bpp_draw_##combine_op(7, mask, 0, alpha_op);                          \
632   tile_4bpp_draw_##combine_op(6, shift_mask, 4, alpha_op);                    \
633   tile_4bpp_draw_##combine_op(5, shift_mask, 8, alpha_op);                    \
634   tile_4bpp_draw_##combine_op(4, shift_mask, 12, alpha_op);                   \
635   tile_4bpp_draw_##combine_op(3, shift_mask, 16, alpha_op);                   \
636   tile_4bpp_draw_##combine_op(2, shift_mask, 20, alpha_op);                   \
637   tile_4bpp_draw_##combine_op(1, shift_mask, 24, alpha_op);                   \
638   tile_4bpp_draw_##combine_op(0, shift, 28, alpha_op)                         \
639
640
641 // Draws eight 4bpp pixels in base mode, checks if all are zero, if so draws
642 // the appropriate background pixels.
643
644 #define tile_4bpp_draw_eight_base(alpha_op, flip_op)                          \
645   if(current_pixels != 0)                                                     \
646   {                                                                           \
647     tile_4bpp_draw_eight_##flip_op(base, alpha_op);                           \
648   }                                                                           \
649   else                                                                        \
650   {                                                                           \
651     tile_4bpp_draw_eight_base_zero_##alpha_op();                              \
652   }                                                                           \
653
654
655 // Draws eight 4bpp pixels in transparent (layered) mode, checks if all are
656 // zero and if so draws nothing.
657
658 #define tile_4bpp_draw_eight_transparent(alpha_op, flip_op)                   \
659   if(current_pixels != 0)                                                     \
660   {                                                                           \
661     tile_4bpp_draw_eight_##flip_op(transparent, alpha_op);                    \
662   }                                                                           \
663
664
665 #define tile_4bpp_draw_eight_copy(alpha_op, flip_op)                          \
666   if(current_pixels != 0)                                                     \
667   {                                                                           \
668     tile_4bpp_draw_eight_##flip_op(copy, alpha_op);                           \
669   }                                                                           \
670
671 // Gets the current tile in 4bpp mode, also getting the current palette and
672 // the pixel block.
673
674 #define get_tile_4bpp()                                                       \
675   current_tile = *map_ptr;                                                    \
676   current_palette = (current_tile >> 12) << 4;                                \
677   tile_ptr = tile_base + ((current_tile & 0x3FF) * 32);                       \
678
679
680 // Helper macro for drawing clipped 4bpp tiles.
681
682 #define partial_tile_4bpp(combine_op, alpha_op)                               \
683   for(i = 0; i < partial_tile_run; i++)                                       \
684   {                                                                           \
685     tile_4bpp_draw_##combine_op(0, mask, 0, alpha_op);                        \
686     current_pixels >>= 4;                                                     \
687     advance_dest_ptr_##combine_op(1);                                         \
688   }                                                                           \
689
690
691 // Draws a 4bpp tile clipped against the left edge of the screen.
692 // partial_tile_offset is how far in it's clipped, partial_tile_run is
693 // how many to draw.
694
695 #define partial_tile_right_noflip_4bpp(combine_op, alpha_op)                  \
696   current_pixels = *((u32 *)tile_ptr) >> (partial_tile_offset * 4);           \
697   partial_tile_4bpp(combine_op, alpha_op)                                     \
698
699
700 // Draws a 4bpp tile clipped against both edges of the screen, same as right.
701
702 #define partial_tile_mid_noflip_4bpp(combine_op, alpha_op)                    \
703   partial_tile_right_noflip_4bpp(combine_op, alpha_op)                        \
704
705
706 // Draws a 4bpp tile clipped against the right edge of the screen.
707 // partial_tile_offset is how many to draw.
708
709 #define partial_tile_left_noflip_4bpp(combine_op, alpha_op)                   \
710   current_pixels = *((u32 *)tile_ptr);                                        \
711   partial_tile_4bpp(combine_op, alpha_op)                                     \
712
713
714 // Draws a complete 4bpp tile row (not clipped)
715 #define tile_noflip_4bpp(combine_op, alpha_op)                                \
716   current_pixels = *((u32 *)tile_ptr);                                        \
717   tile_4bpp_draw_eight_##combine_op(alpha_op, noflip)                         \
718
719
720 // Like the above, but draws flipped tiles.
721
722 #define partial_tile_flip_4bpp(combine_op, alpha_op)                          \
723   for(i = 0; i < partial_tile_run; i++)                                       \
724   {                                                                           \
725     tile_4bpp_draw_##combine_op(0, shift, 28, alpha_op);                      \
726     current_pixels <<= 4;                                                     \
727     advance_dest_ptr_##combine_op(1);                                         \
728   }                                                                           \
729
730 #define partial_tile_right_flip_4bpp(combine_op, alpha_op)                    \
731   current_pixels = *((u32 *)tile_ptr) << (partial_tile_offset * 4);           \
732   partial_tile_flip_4bpp(combine_op, alpha_op)                                \
733
734 #define partial_tile_mid_flip_4bpp(combine_op, alpha_op)                      \
735   partial_tile_right_flip_4bpp(combine_op, alpha_op)                          \
736
737 #define partial_tile_left_flip_4bpp(combine_op, alpha_op)                     \
738   current_pixels = *((u32 *)tile_ptr);                                        \
739   partial_tile_flip_4bpp(combine_op, alpha_op)                                \
740
741 #define tile_flip_4bpp(combine_op, alpha_op)                                  \
742   current_pixels = *((u32 *)tile_ptr);                                        \
743   tile_4bpp_draw_eight_##combine_op(alpha_op, flip)                           \
744
745
746 // Draws a single (partial or complete) tile from the tilemap, flipping
747 // as necessary.
748
749 #define single_tile_map(tile_type, combine_op, color_depth, alpha_op)         \
750   get_tile_##color_depth();                                                   \
751   if(current_tile & 0x800)                                                    \
752     tile_ptr += vertical_pixel_flip;                                          \
753                                                                               \
754   if(current_tile & 0x400)                                                    \
755   {                                                                           \
756     tile_type##_flip_##color_depth(combine_op, alpha_op);                     \
757   }                                                                           \
758   else                                                                        \
759   {                                                                           \
760     tile_type##_noflip_##color_depth(combine_op, alpha_op);                   \
761   }                                                                           \
762
763
764 // Draws multiple sequential tiles from the tilemap, hflips and vflips as
765 // necessary.
766
767 #define multiple_tile_map(combine_op, color_depth, alpha_op)                  \
768   for(i = 0; i < tile_run; i++)                                               \
769   {                                                                           \
770     single_tile_map(tile, combine_op, color_depth, alpha_op);                 \
771     advance_dest_ptr_##combine_op(8);                                         \
772     map_ptr++;                                                                \
773   }                                                                           \
774
775 // Draws a partial tile from a tilemap clipped against the left edge of the
776 // screen.
777
778 #define partial_tile_right_map(combine_op, color_depth, alpha_op)             \
779   single_tile_map(partial_tile_right, combine_op, color_depth, alpha_op);     \
780   map_ptr++                                                                   \
781
782 // Draws a partial tile from a tilemap clipped against both edges of the
783 // screen.
784
785 #define partial_tile_mid_map(combine_op, color_depth, alpha_op)               \
786   single_tile_map(partial_tile_mid, combine_op, color_depth, alpha_op)        \
787
788 // Draws a partial tile from a tilemap clipped against the right edge of the
789 // screen.
790
791 #define partial_tile_left_map(combine_op, color_depth, alpha_op)              \
792   single_tile_map(partial_tile_left, combine_op, color_depth, alpha_op)       \
793
794
795 // Advances a non-flipped 4bpp obj to the next tile.
796
797 #define obj_advance_noflip_4bpp()                                             \
798   tile_ptr += 32                                                              \
799
800
801 // Advances a non-flipped 8bpp obj to the next tile.
802
803 #define obj_advance_noflip_8bpp()                                             \
804   tile_ptr += 64                                                              \
805
806
807 // Advances a flipped 4bpp obj to the next tile.
808
809 #define obj_advance_flip_4bpp()                                               \
810   tile_ptr -= 32                                                              \
811
812
813 // Advances a flipped 8bpp obj to the next tile.
814
815 #define obj_advance_flip_8bpp()                                               \
816   tile_ptr -= 64                                                              \
817
818
819
820 // Draws multiple sequential tiles from an obj, flip_op determines if it should
821 // be flipped or not (set to flip or noflip)
822
823 #define multiple_tile_obj(combine_op, color_depth, alpha_op, flip_op)         \
824   for(i = 0; i < tile_run; i++)                                               \
825   {                                                                           \
826     tile_##flip_op##_##color_depth(combine_op, alpha_op);                     \
827     obj_advance_##flip_op##_##color_depth();                                  \
828     advance_dest_ptr_##combine_op(8);                                         \
829   }                                                                           \
830
831
832 // Draws an obj's tile clipped against the left side of the screen
833
834 #define partial_tile_right_obj(combine_op, color_depth, alpha_op, flip_op)    \
835   partial_tile_right_##flip_op##_##color_depth(combine_op, alpha_op);         \
836   obj_advance_##flip_op##_##color_depth()                                     \
837
838 // Draws an obj's tile clipped against both sides of the screen
839
840 #define partial_tile_mid_obj(combine_op, color_depth, alpha_op, flip_op)      \
841   partial_tile_mid_##flip_op##_##color_depth(combine_op, alpha_op)            \
842
843 // Draws an obj's tile clipped against the right side of the screen
844
845 #define partial_tile_left_obj(combine_op, color_depth, alpha_op, flip_op)     \
846   partial_tile_left_##flip_op##_##color_depth(combine_op, alpha_op)           \
847
848
849 // Extra variables specific for 8bpp/4bpp tile renderers.
850
851 #define tile_extra_variables_8bpp()                                           \
852
853 #define tile_extra_variables_4bpp()                                           \
854   u32 current_palette                                                         \
855
856
857 // Byte lengths of complete tiles and tile rows in 4bpp and 8bpp.
858
859 #define tile_width_4bpp 4
860 #define tile_size_4bpp 32
861 #define tile_width_8bpp 8
862 #define tile_size_8bpp 64
863
864
865 // Render a single scanline of text tiles
866
867 #define tile_render(color_depth, combine_op, alpha_op)                        \
868 {                                                                             \
869   u32 vertical_pixel_offset = (vertical_offset % 8) *                         \
870    tile_width_##color_depth;                                                  \
871   u32 vertical_pixel_flip =                                                   \
872    ((tile_size_##color_depth - tile_width_##color_depth) -                    \
873    vertical_pixel_offset) - vertical_pixel_offset;                            \
874   tile_extra_variables_##color_depth();                                       \
875   u8 *tile_base = vram + (((bg_control >> 2) & 0x03) * (1024 * 16)) +         \
876    vertical_pixel_offset;                                                     \
877   u32 pixel_run = 256 - (horizontal_offset % 256);                            \
878   u32 current_tile;                                                           \
879                                                                               \
880   map_base += ((vertical_offset % 256) / 8) * 32;                             \
881   partial_tile_offset = (horizontal_offset % 8);                              \
882                                                                               \
883   if(pixel_run >= end)                                                        \
884   {                                                                           \
885     if(partial_tile_offset)                                                   \
886     {                                                                         \
887       partial_tile_run = 8 - partial_tile_offset;                             \
888       if(end < partial_tile_run)                                              \
889       {                                                                       \
890         partial_tile_run = end;                                               \
891         partial_tile_mid_map(combine_op, color_depth, alpha_op);              \
892         return;                                                               \
893       }                                                                       \
894       else                                                                    \
895       {                                                                       \
896         end -= partial_tile_run;                                              \
897         partial_tile_right_map(combine_op, color_depth, alpha_op);            \
898       }                                                                       \
899     }                                                                         \
900                                                                               \
901     tile_run = end / 8;                                                       \
902     multiple_tile_map(combine_op, color_depth, alpha_op);                     \
903                                                                               \
904     partial_tile_run = end % 8;                                               \
905                                                                               \
906     if(partial_tile_run)                                                      \
907     {                                                                         \
908       partial_tile_left_map(combine_op, color_depth, alpha_op);               \
909     }                                                                         \
910   }                                                                           \
911   else                                                                        \
912   {                                                                           \
913     if(partial_tile_offset)                                                   \
914     {                                                                         \
915       partial_tile_run = 8 - partial_tile_offset;                             \
916       partial_tile_right_map(combine_op, color_depth, alpha_op);              \
917     }                                                                         \
918                                                                               \
919     tile_run = (pixel_run - partial_tile_run) / 8;                            \
920     multiple_tile_map(combine_op, color_depth, alpha_op);                     \
921     map_ptr = second_ptr;                                                     \
922     end -= pixel_run;                                                         \
923     tile_run = end / 8;                                                       \
924     multiple_tile_map(combine_op, color_depth, alpha_op);                     \
925                                                                               \
926     partial_tile_run = end % 8;                                               \
927     if(partial_tile_run)                                                      \
928     {                                                                         \
929       partial_tile_left_map(combine_op, color_depth, alpha_op);               \
930     }                                                                         \
931   }                                                                           \
932 }                                                                             \
933
934 #define render_scanline_dest_normal         u16
935 #define render_scanline_dest_alpha          u32
936 #define render_scanline_dest_alpha_obj      u32
937 #define render_scanline_dest_color16        u16
938 #define render_scanline_dest_color32        u32
939 #define render_scanline_dest_partial_alpha  u32
940 #define render_scanline_dest_copy_tile      u16
941 #define render_scanline_dest_copy_bitmap    u16
942
943
944 // If rendering a scanline that is not a target A then there's no point in
945 // keeping what's underneath it because it can't blend with it.
946
947 #define render_scanline_skip_alpha(bg_type, combine_op)                       \
948   if((pixel_combine & 0x00000200) == 0)                                       \
949   {                                                                           \
950     render_scanline_##bg_type##_##combine_op##_color32(layer,                 \
951      start, end, scanline);                                                   \
952     return;                                                                   \
953   }                                                                           \
954
955
956 #ifdef RENDER_COLOR16_NORMAL
957
958 #define render_scanline_extra_variables_base_normal(bg_type)                  \
959   const u32 pixel_combine = 0                                                 \
960
961 #else
962
963 #define render_scanline_extra_variables_base_normal(bg_type)                  \
964   u16 *palette = palette_ram_converted                                        \
965
966 #endif
967
968
969 #define render_scanline_extra_variables_base_alpha(bg_type)                   \
970   u32 bg_combine = color_combine_mask(5);                                     \
971   u32 pixel_combine = color_combine_mask(layer) | (bg_combine << 16);         \
972   render_scanline_skip_alpha(bg_type, base)                                   \
973
974 #define render_scanline_extra_variables_base_color()                          \
975   u32 bg_combine = color_combine_mask(5);                                     \
976   u32 pixel_combine = color_combine_mask(layer)                               \
977
978 #define render_scanline_extra_variables_base_color16(bg_type)                 \
979   render_scanline_extra_variables_base_color()                                \
980
981 #define render_scanline_extra_variables_base_color32(bg_type)                 \
982   render_scanline_extra_variables_base_color()                                \
983
984
985 #define render_scanline_extra_variables_transparent_normal(bg_type)           \
986   render_scanline_extra_variables_base_normal(bg_type)                        \
987
988 #define render_scanline_extra_variables_transparent_alpha(bg_type)            \
989   u32 pixel_combine = color_combine_mask(layer);                              \
990   render_scanline_skip_alpha(bg_type, transparent)                            \
991
992 #define render_scanline_extra_variables_transparent_color()                   \
993   u32 pixel_combine = color_combine_mask(layer)                               \
994
995 #define render_scanline_extra_variables_transparent_color16(bg_type)          \
996   render_scanline_extra_variables_transparent_color()                         \
997
998 #define render_scanline_extra_variables_transparent_color32(bg_type)          \
999   render_scanline_extra_variables_transparent_color()                         \
1000
1001
1002
1003
1004
1005 // Map widths and heights
1006
1007 u32 map_widths[] = { 256, 512, 256, 512 };
1008 u32 map_heights[] = { 256, 256, 512, 512 };
1009
1010 // Build text scanline rendering functions.
1011
1012 #define render_scanline_text_builder(combine_op, alpha_op)                    \
1013 void render_scanline_text_##combine_op##_##alpha_op(u32 layer,                \
1014  u32 start, u32 end, void *scanline)                                          \
1015 {                                                                             \
1016   render_scanline_extra_variables_##combine_op##_##alpha_op(text);            \
1017   u32 bg_control = io_registers[REG_BG0CNT + layer];                          \
1018   u32 map_size = (bg_control >> 14) & 0x03;                                   \
1019   u32 map_width = map_widths[map_size];                                       \
1020   u32 map_height = map_heights[map_size];                                     \
1021   u32 horizontal_offset =                                                     \
1022    (io_registers[REG_BG0HOFS + (layer * 2)] + start) % 512;                   \
1023   u32 vertical_offset = (io_registers[REG_VCOUNT] +                           \
1024    io_registers[REG_BG0VOFS + (layer * 2)]) % 512;                            \
1025   u32 current_pixel;                                                          \
1026   u32 current_pixels;                                                         \
1027   u32 partial_tile_run = 0;                                                   \
1028   u32 partial_tile_offset;                                                    \
1029   u32 tile_run;                                                               \
1030   u32 i;                                                                      \
1031   render_scanline_dest_##alpha_op *dest_ptr =                                 \
1032    ((render_scanline_dest_##alpha_op *)scanline) + start;                     \
1033                                                                               \
1034   u16 *map_base = (u16 *)(vram + ((bg_control >> 8) & 0x1F) * (1024 * 2));    \
1035   u16 *map_ptr, *second_ptr;                                                  \
1036   u8 *tile_ptr;                                                               \
1037                                                                               \
1038   end -= start;                                                               \
1039                                                                               \
1040   if((map_size & 0x02) && (vertical_offset >= 256))                           \
1041   {                                                                           \
1042     map_base += ((map_width / 8) * 32) +                                      \
1043      (((vertical_offset - 256) / 8) * 32);                                    \
1044   }                                                                           \
1045   else                                                                        \
1046   {                                                                           \
1047     map_base += (((vertical_offset % 256) / 8) * 32);                         \
1048   }                                                                           \
1049                                                                               \
1050   if(map_size & 0x01)                                                         \
1051   {                                                                           \
1052     if(horizontal_offset >= 256)                                              \
1053     {                                                                         \
1054       horizontal_offset -= 256;                                               \
1055       map_ptr = map_base + (32 * 32) + (horizontal_offset / 8);               \
1056       second_ptr = map_base;                                                  \
1057     }                                                                         \
1058     else                                                                      \
1059     {                                                                         \
1060       map_ptr = map_base + (horizontal_offset / 8);                           \
1061       second_ptr = map_base + (32 * 32);                                      \
1062     }                                                                         \
1063   }                                                                           \
1064   else                                                                        \
1065   {                                                                           \
1066     horizontal_offset %= 256;                                                 \
1067     map_ptr = map_base + (horizontal_offset / 8);                             \
1068     second_ptr = map_base;                                                    \
1069   }                                                                           \
1070                                                                               \
1071   if(bg_control & 0x80)                                                       \
1072   {                                                                           \
1073     tile_render(8bpp, combine_op, alpha_op);                                  \
1074   }                                                                           \
1075   else                                                                        \
1076   {                                                                           \
1077     tile_render(4bpp, combine_op, alpha_op);                                  \
1078   }                                                                           \
1079 }                                                                             \
1080
1081 render_scanline_text_builder(base, normal);
1082 render_scanline_text_builder(transparent, normal);
1083 render_scanline_text_builder(base, color16);
1084 render_scanline_text_builder(transparent, color16);
1085 render_scanline_text_builder(base, color32);
1086 render_scanline_text_builder(transparent, color32);
1087 render_scanline_text_builder(base, alpha);
1088 render_scanline_text_builder(transparent, alpha);
1089
1090
1091 s32 affine_reference_x[2];
1092 s32 affine_reference_y[2];
1093
1094 #define affine_render_bg_pixel_normal()                                       \
1095   current_pixel = palette_ram_converted[0]                                    \
1096
1097 #define affine_render_bg_pixel_alpha()                                        \
1098   current_pixel = bg_combine                                                  \
1099
1100 #define affine_render_bg_pixel_color16()                                      \
1101   affine_render_bg_pixel_alpha()                                              \
1102
1103 #define affine_render_bg_pixel_color32()                                      \
1104   affine_render_bg_pixel_alpha()                                              \
1105
1106 #define affine_render_bg_pixel_base(alpha_op)                                 \
1107   affine_render_bg_pixel_##alpha_op()                                         \
1108
1109 #define affine_render_bg_pixel_transparent(alpha_op)                          \
1110
1111 #define affine_render_bg_pixel_copy(alpha_op)                                 \
1112
1113 #define affine_render_bg_base(alpha_op)                                       \
1114   dest_ptr[0] = current_pixel
1115
1116 #define affine_render_bg_transparent(alpha_op)                                \
1117
1118 #define affine_render_bg_copy(alpha_op)                                       \
1119
1120 #define affine_render_bg_remainder_base(alpha_op)                             \
1121   affine_render_bg_pixel_##alpha_op();                                        \
1122   for(; i < end; i++)                                                         \
1123   {                                                                           \
1124     affine_render_bg_base(alpha_op);                                          \
1125     advance_dest_ptr_base(1);                                                 \
1126   }                                                                           \
1127
1128 #define affine_render_bg_remainder_transparent(alpha_op)                      \
1129
1130 #define affine_render_bg_remainder_copy(alpha_op)                             \
1131
1132 #define affine_render_next(combine_op)                                        \
1133   source_x += dx;                                                             \
1134   source_y += dy;                                                             \
1135   advance_dest_ptr_##combine_op(1)                                            \
1136
1137 #define affine_render_scale_offset()                                          \
1138   tile_base += ((pixel_y % 8) * 8);                                           \
1139   map_base += (pixel_y / 8) << map_pitch                                      \
1140
1141 #define affine_render_scale_pixel(combine_op, alpha_op)                       \
1142   map_offset = (pixel_x / 8);                                                 \
1143   if(map_offset != last_map_offset)                                           \
1144   {                                                                           \
1145     tile_ptr = tile_base + (map_base[map_offset] * 64);                       \
1146     last_map_offset = map_offset;                                             \
1147   }                                                                           \
1148   tile_ptr = tile_base + (map_base[(pixel_x / 8)] * 64);                      \
1149   current_pixel = tile_ptr[(pixel_x % 8)];                                    \
1150   tile_8bpp_draw_##combine_op(0, none, 0, alpha_op);                          \
1151   affine_render_next(combine_op)                                              \
1152
1153 #define affine_render_scale(combine_op, alpha_op)                             \
1154 {                                                                             \
1155   pixel_y = source_y >> 8;                                                    \
1156   u32 i = 0;                                                                  \
1157   affine_render_bg_pixel_##combine_op(alpha_op);                              \
1158   if((u32)pixel_y < (u32)width_height)                                        \
1159   {                                                                           \
1160     affine_render_scale_offset();                                             \
1161     for(; i < end; i++)                                                       \
1162     {                                                                         \
1163       pixel_x = source_x >> 8;                                                \
1164                                                                               \
1165       if((u32)pixel_x < (u32)width_height)                                    \
1166       {                                                                       \
1167         break;                                                                \
1168       }                                                                       \
1169                                                                               \
1170       affine_render_bg_##combine_op(alpha_op);                                \
1171       affine_render_next(combine_op);                                         \
1172     }                                                                         \
1173                                                                               \
1174     for(; i < end; i++)                                                       \
1175     {                                                                         \
1176       pixel_x = source_x >> 8;                                                \
1177                                                                               \
1178       if((u32)pixel_x >= (u32)width_height)                                   \
1179         break;                                                                \
1180                                                                               \
1181       affine_render_scale_pixel(combine_op, alpha_op);                        \
1182     }                                                                         \
1183   }                                                                           \
1184   affine_render_bg_remainder_##combine_op(alpha_op);                          \
1185 }                                                                             \
1186
1187 #define affine_render_scale_wrap(combine_op, alpha_op)                        \
1188 {                                                                             \
1189   u32 wrap_mask = width_height - 1;                                           \
1190   pixel_y = (source_y >> 8) & wrap_mask;                                      \
1191   if((u32)pixel_y < (u32)width_height)                                        \
1192   {                                                                           \
1193     affine_render_scale_offset();                                             \
1194     for(i = 0; i < end; i++)                                                  \
1195     {                                                                         \
1196       pixel_x = (source_x >> 8) & wrap_mask;                                  \
1197       affine_render_scale_pixel(combine_op, alpha_op);                        \
1198     }                                                                         \
1199   }                                                                           \
1200 }                                                                             \
1201
1202
1203 #define affine_render_rotate_pixel(combine_op, alpha_op)                      \
1204   map_offset = (pixel_x / 8) + ((pixel_y / 8) << map_pitch);                  \
1205   if(map_offset != last_map_offset)                                           \
1206   {                                                                           \
1207     tile_ptr = tile_base + (map_base[map_offset] * 64);                       \
1208     last_map_offset = map_offset;                                             \
1209   }                                                                           \
1210                                                                               \
1211   current_pixel = tile_ptr[(pixel_x % 8) + ((pixel_y % 8) * 8)];              \
1212   tile_8bpp_draw_##combine_op(0, none, 0, alpha_op);                          \
1213   affine_render_next(combine_op)                                              \
1214
1215 #define affine_render_rotate(combine_op, alpha_op)                            \
1216 {                                                                             \
1217   affine_render_bg_pixel_##combine_op(alpha_op);                              \
1218   for(i = 0; i < end; i++)                                                    \
1219   {                                                                           \
1220     pixel_x = source_x >> 8;                                                  \
1221     pixel_y = source_y >> 8;                                                  \
1222                                                                               \
1223     if(((u32)pixel_x < (u32)width_height) &&                                  \
1224      ((u32)pixel_y < (u32)width_height))                                      \
1225     {                                                                         \
1226       break;                                                                  \
1227     }                                                                         \
1228     affine_render_bg_##combine_op(alpha_op);                                  \
1229     affine_render_next(combine_op);                                           \
1230   }                                                                           \
1231                                                                               \
1232   for(; i < end; i++)                                                         \
1233   {                                                                           \
1234     pixel_x = source_x >> 8;                                                  \
1235     pixel_y = source_y >> 8;                                                  \
1236                                                                               \
1237     if(((u32)pixel_x >= (u32)width_height) ||                                 \
1238      ((u32)pixel_y >= (u32)width_height))                                     \
1239     {                                                                         \
1240       affine_render_bg_remainder_##combine_op(alpha_op);                      \
1241       break;                                                                  \
1242     }                                                                         \
1243                                                                               \
1244     affine_render_rotate_pixel(combine_op, alpha_op);                         \
1245   }                                                                           \
1246 }                                                                             \
1247
1248 #define affine_render_rotate_wrap(combine_op, alpha_op)                       \
1249 {                                                                             \
1250   u32 wrap_mask = width_height - 1;                                           \
1251   for(i = 0; i < end; i++)                                                    \
1252   {                                                                           \
1253     pixel_x = (source_x >> 8) & wrap_mask;                                    \
1254     pixel_y = (source_y >> 8) & wrap_mask;                                    \
1255                                                                               \
1256     affine_render_rotate_pixel(combine_op, alpha_op);                         \
1257   }                                                                           \
1258 }                                                                             \
1259
1260
1261 // Build affine background renderers.
1262
1263 #define render_scanline_affine_builder(combine_op, alpha_op)                  \
1264 void render_scanline_affine_##combine_op##_##alpha_op(u32 layer,              \
1265  u32 start, u32 end, void *scanline)                                          \
1266 {                                                                             \
1267   render_scanline_extra_variables_##combine_op##_##alpha_op(affine);          \
1268   u32 bg_control = io_registers[REG_BG0CNT + layer];                          \
1269   u32 current_pixel;                                                          \
1270   s32 source_x, source_y;                                                     \
1271   u32 vcount = io_registers[REG_VCOUNT];                                      \
1272   u32 pixel_x, pixel_y;                                                       \
1273   u32 layer_offset = (layer - 2) * 8;                                         \
1274   s32 dx, dy;                                                                 \
1275   u32 map_size = (bg_control >> 14) & 0x03;                                   \
1276   u32 width_height = 1 << (7 + map_size);                                     \
1277   u32 map_pitch = map_size + 4;                                               \
1278   u8 *map_base = vram + (((bg_control >> 8) & 0x1F) * (1024 * 2));            \
1279   u8 *tile_base = vram + (((bg_control >> 2) & 0x03) * (1024 * 16));          \
1280   u8 *tile_ptr;                                                               \
1281   u32 map_offset, last_map_offset = (u32)-1;                                  \
1282   u32 i;                                                                      \
1283   render_scanline_dest_##alpha_op *dest_ptr =                                 \
1284    ((render_scanline_dest_##alpha_op *)scanline) + start;                     \
1285                                                                               \
1286   dx = (s16)io_registers[REG_BG2PA + layer_offset];                           \
1287   dy = (s16)io_registers[REG_BG2PC + layer_offset];                           \
1288   source_x = affine_reference_x[layer - 2] + (start * dx);                    \
1289   source_y = affine_reference_y[layer - 2] + (start * dy);                    \
1290                                                                               \
1291   end -= start;                                                               \
1292                                                                               \
1293   switch(((bg_control >> 12) & 0x02) | (dy != 0))                             \
1294   {                                                                           \
1295     case 0x00:                                                                \
1296       affine_render_scale(combine_op, alpha_op);                              \
1297       break;                                                                  \
1298                                                                               \
1299     case 0x01:                                                                \
1300       affine_render_rotate(combine_op, alpha_op);                             \
1301       break;                                                                  \
1302                                                                               \
1303     case 0x02:                                                                \
1304       affine_render_scale_wrap(combine_op, alpha_op);                         \
1305       break;                                                                  \
1306                                                                               \
1307     case 0x03:                                                                \
1308       affine_render_rotate_wrap(combine_op, alpha_op);                        \
1309       break;                                                                  \
1310   }                                                                           \
1311 }                                                                             \
1312
1313 render_scanline_affine_builder(base, normal);
1314 render_scanline_affine_builder(transparent, normal);
1315 render_scanline_affine_builder(base, color16);
1316 render_scanline_affine_builder(transparent, color16);
1317 render_scanline_affine_builder(base, color32);
1318 render_scanline_affine_builder(transparent, color32);
1319 render_scanline_affine_builder(base, alpha);
1320 render_scanline_affine_builder(transparent, alpha);
1321
1322
1323 #define bitmap_render_pixel_mode3(alpha_op)                                   \
1324   convert_palette(current_pixel);                                             \
1325   *dest_ptr = current_pixel                                                   \
1326
1327 #define bitmap_render_pixel_mode4(alpha_op)                                   \
1328   tile_expand_base_##alpha_op(0)                                              \
1329
1330 #define bitmap_render_pixel_mode5(alpha_op)                                   \
1331   bitmap_render_pixel_mode3(alpha_op)                                         \
1332
1333
1334 #define bitmap_render_scale(type, alpha_op, width, height)                    \
1335   pixel_y = (source_y >> 8);                                                  \
1336   if((u32)pixel_y < (u32)height)                                              \
1337   {                                                                           \
1338     pixel_x = (source_x >> 8);                                                \
1339     src_ptr += (pixel_y * width);                                             \
1340     if(dx == 0x100)                                                           \
1341     {                                                                         \
1342       if(pixel_x < 0)                                                         \
1343       {                                                                       \
1344         end += pixel_x;                                                       \
1345         dest_ptr -= pixel_x;                                                  \
1346         pixel_x = 0;                                                          \
1347       }                                                                       \
1348       else                                                                    \
1349                                                                               \
1350       if(pixel_x > 0)                                                         \
1351       {                                                                       \
1352         src_ptr += pixel_x;                                                   \
1353       }                                                                       \
1354                                                                               \
1355       if((pixel_x + end) >= width)                                            \
1356         end = (width - pixel_x);                                              \
1357                                                                               \
1358       for(i = 0; (s32)i < (s32)end; i++)                                      \
1359       {                                                                       \
1360         current_pixel = *src_ptr;                                             \
1361         bitmap_render_pixel_##type(alpha_op);                                 \
1362         src_ptr++;                                                            \
1363         dest_ptr++;                                                           \
1364       }                                                                       \
1365     }                                                                         \
1366     else                                                                      \
1367     {                                                                         \
1368       if((u32)(source_y >> 8) < (u32)height)                                  \
1369       {                                                                       \
1370         for(i = 0; i < end; i++)                                              \
1371         {                                                                     \
1372           pixel_x = (source_x >> 8);                                          \
1373                                                                               \
1374           if((u32)pixel_x < (u32)width)                                       \
1375             break;                                                            \
1376                                                                               \
1377           source_x += dx;                                                     \
1378           dest_ptr++;                                                         \
1379         }                                                                     \
1380                                                                               \
1381         for(; i < end; i++)                                                   \
1382         {                                                                     \
1383           pixel_x = (source_x >> 8);                                          \
1384                                                                               \
1385           if((u32)pixel_x >= (u32)width)                                      \
1386             break;                                                            \
1387                                                                               \
1388           current_pixel = src_ptr[pixel_x];                                   \
1389           bitmap_render_pixel_##type(alpha_op);                               \
1390                                                                               \
1391           source_x += dx;                                                     \
1392           dest_ptr++;                                                         \
1393         }                                                                     \
1394       }                                                                       \
1395     }                                                                         \
1396   }                                                                           \
1397
1398 #define bitmap_render_rotate(type, alpha_op, width, height)                   \
1399   for(i = 0; i < end; i++)                                                    \
1400   {                                                                           \
1401     pixel_x = source_x >> 8;                                                  \
1402     pixel_y = source_y >> 8;                                                  \
1403                                                                               \
1404     if(((u32)pixel_x < (u32)width) && ((u32)pixel_y < (u32)height))           \
1405       break;                                                                  \
1406                                                                               \
1407     source_x += dx;                                                           \
1408     source_y += dy;                                                           \
1409     dest_ptr++;                                                               \
1410   }                                                                           \
1411                                                                               \
1412   for(; i < end; i++)                                                         \
1413   {                                                                           \
1414     pixel_x = (source_x >> 8);                                                \
1415     pixel_y = (source_y >> 8);                                                \
1416                                                                               \
1417     if(((u32)pixel_x >= (u32)width) || ((u32)pixel_y >= (u32)height))         \
1418       break;                                                                  \
1419                                                                               \
1420     current_pixel = src_ptr[pixel_x + (pixel_y * width)];                     \
1421      bitmap_render_pixel_##type(alpha_op);                                    \
1422                                                                               \
1423     source_x += dx;                                                           \
1424     source_y += dy;                                                           \
1425     dest_ptr++;                                                               \
1426   }                                                                           \
1427
1428
1429 #define render_scanline_vram_setup_mode3()                                    \
1430   u16 *src_ptr = (u16 *)vram                                                  \
1431
1432 #define render_scanline_vram_setup_mode5()                                    \
1433   u16 *src_ptr;                                                               \
1434   if(io_registers[REG_DISPCNT] & 0x10)                                        \
1435     src_ptr = (u16 *)(vram + 0xA000);                                         \
1436   else                                                                        \
1437     src_ptr = (u16 *)vram                                                     \
1438
1439
1440 #ifdef RENDER_COLOR16_NORMAL
1441
1442 #define render_scanline_vram_setup_mode4()                                    \
1443   const u32 pixel_combine = 0;                                                \
1444   u8 *src_ptr;                                                                \
1445   if(io_registers[REG_DISPCNT] & 0x10)                                        \
1446     src_ptr = vram + 0xA000;                                                  \
1447   else                                                                        \
1448     src_ptr = vram                                                            \
1449
1450
1451 #else
1452
1453 #define render_scanline_vram_setup_mode4()                                    \
1454   u16 *palette = palette_ram_converted;                                       \
1455   u8 *src_ptr;                                                                \
1456   if(io_registers[REG_DISPCNT] & 0x10)                                        \
1457     src_ptr = vram + 0xA000;                                                  \
1458   else                                                                        \
1459     src_ptr = vram                                                            \
1460
1461 #endif
1462
1463
1464
1465 // Build bitmap scanline rendering functions.
1466
1467 #define render_scanline_bitmap_builder(type, alpha_op, width, height)         \
1468 void render_scanline_bitmap_##type##_##alpha_op(u32 start, u32 end,           \
1469  void *scanline)                                                              \
1470 {                                                                             \
1471   u32 bg_control = io_registers[REG_BG2CNT];                                  \
1472   u32 current_pixel;                                                          \
1473   s32 source_x, source_y;                                                     \
1474   u32 vcount = io_registers[REG_VCOUNT];                                      \
1475   s32 pixel_x, pixel_y;                                                       \
1476                                                                               \
1477   s32 dx = (s16)io_registers[REG_BG2PA];                                      \
1478   s32 dy = (s16)io_registers[REG_BG2PC];                                      \
1479                                                                               \
1480   u32 i;                                                                      \
1481                                                                               \
1482   render_scanline_dest_##alpha_op *dest_ptr =                                 \
1483    ((render_scanline_dest_##alpha_op *)scanline) + start;                     \
1484   render_scanline_vram_setup_##type();                                        \
1485                                                                               \
1486   end -= start;                                                               \
1487                                                                               \
1488   source_x = affine_reference_x[0] + (start * dx);                            \
1489   source_y = affine_reference_y[0] + (start * dy);                            \
1490                                                                               \
1491   if(dy == 0)                                                                 \
1492   {                                                                           \
1493     bitmap_render_scale(type, alpha_op, width, height);                       \
1494   }                                                                           \
1495   else                                                                        \
1496   {                                                                           \
1497     bitmap_render_rotate(type, alpha_op, width, height);                      \
1498   }                                                                           \
1499 }                                                                             \
1500
1501 render_scanline_bitmap_builder(mode3, normal, 240, 160);
1502 render_scanline_bitmap_builder(mode4, normal, 240, 160);
1503 render_scanline_bitmap_builder(mode5, normal, 160, 128);
1504
1505
1506 // Fill in the renderers for a layer based on the mode type,
1507
1508 #define tile_layer_render_functions(type)                                     \
1509 {                                                                             \
1510   render_scanline_##type##_base_normal,                                       \
1511   render_scanline_##type##_transparent_normal,                                \
1512   render_scanline_##type##_base_alpha,                                        \
1513   render_scanline_##type##_transparent_alpha,                                 \
1514   render_scanline_##type##_base_color16,                                      \
1515   render_scanline_##type##_transparent_color16,                               \
1516   render_scanline_##type##_base_color32,                                      \
1517   render_scanline_##type##_transparent_color32                                \
1518 }                                                                             \
1519
1520
1521 // Use if a layer is unsupported for that mode.
1522
1523 #define tile_layer_render_null()                                              \
1524 {                                                                             \
1525   NULL, NULL, NULL, NULL                                                      \
1526 }                                                                             \
1527
1528 #define bitmap_layer_render_functions(type)                                   \
1529 {                                                                             \
1530   render_scanline_bitmap_##type##_normal                                      \
1531 }                                                                             \
1532
1533 // Structs containing functions to render the layers for each mode, for
1534 // each render type.
1535 tile_layer_render_struct tile_mode_renderers[3][4] =
1536 {
1537   {
1538     tile_layer_render_functions(text), tile_layer_render_functions(text),
1539     tile_layer_render_functions(text), tile_layer_render_functions(text)
1540   },
1541   {
1542     tile_layer_render_functions(text), tile_layer_render_functions(text),
1543     tile_layer_render_functions(affine), tile_layer_render_functions(text)
1544   },
1545   {
1546     tile_layer_render_functions(text), tile_layer_render_functions(text),
1547     tile_layer_render_functions(affine), tile_layer_render_functions(affine)
1548   }
1549 };
1550
1551 bitmap_layer_render_struct bitmap_mode_renderers[3] =
1552 {
1553   bitmap_layer_render_functions(mode3),
1554   bitmap_layer_render_functions(mode4),
1555   bitmap_layer_render_functions(mode5)
1556 };
1557
1558
1559 #define render_scanline_layer_functions_tile()                                \
1560   tile_layer_render_struct *layer_renderers =                                 \
1561    tile_mode_renderers[dispcnt & 0x07]                                        \
1562
1563 #define render_scanline_layer_functions_bitmap()                              \
1564   bitmap_layer_render_struct *layer_renderers =                               \
1565    bitmap_mode_renderers + ((dispcnt & 0x07) - 3)                             \
1566
1567
1568 // Adjust a flipped obj's starting position
1569
1570 #define obj_tile_offset_noflip(color_depth)                                   \
1571
1572 #define obj_tile_offset_flip(color_depth)                                     \
1573   + (tile_size_##color_depth * ((obj_width - 8) / 8))                         \
1574
1575
1576 // Adjust the obj's starting point if it goes too far off the left edge of    \
1577 // the screen.                                                                \
1578
1579 #define obj_tile_right_offset_noflip(color_depth)                             \
1580   tile_ptr += (partial_tile_offset / 8) * tile_size_##color_depth             \
1581
1582 #define obj_tile_right_offset_flip(color_depth)                               \
1583   tile_ptr -= (partial_tile_offset / 8) * tile_size_##color_depth             \
1584
1585 // Get the current row offset into an obj in 1D map space
1586
1587 #define obj_tile_offset_1D(color_depth, flip_op)                              \
1588   tile_ptr = tile_base + ((obj_attribute_2 & 0x3FF) * 32)                     \
1589    + ((vertical_offset / 8) * (obj_width / 8) * tile_size_##color_depth)      \
1590    + ((vertical_offset % 8) * tile_width_##color_depth)                       \
1591    obj_tile_offset_##flip_op(color_depth)                                     \
1592
1593 // Get the current row offset into an obj in 2D map space
1594
1595 #define obj_tile_offset_2D(color_depth, flip_op)                              \
1596   tile_ptr = tile_base + ((obj_attribute_2 & 0x3FF) * 32)                     \
1597    + ((vertical_offset / 8) * 1024)                                           \
1598    + ((vertical_offset % 8) * tile_width_##color_depth)                       \
1599    obj_tile_offset_##flip_op(color_depth)                                     \
1600
1601
1602 // Get the palette for 4bpp obj.
1603
1604 #define obj_get_palette_4bpp()                                                \
1605   current_palette = (obj_attribute_2 >> 8) & 0xF0                             \
1606
1607 #define obj_get_palette_8bpp()                                                \
1608
1609
1610 // Render the current row of an obj.
1611
1612 #define obj_render(combine_op, color_depth, alpha_op, map_space, flip_op)     \
1613 {                                                                             \
1614   obj_get_palette_##color_depth();                                            \
1615   obj_tile_offset_##map_space(color_depth, flip_op);                          \
1616                                                                               \
1617   if(obj_x < (s32)start)                                                      \
1618   {                                                                           \
1619     dest_ptr = scanline + start;                                              \
1620     pixel_run = obj_width - (start - obj_x);                                  \
1621     if((s32)pixel_run > 0)                                                    \
1622     {                                                                         \
1623       if((obj_x + obj_width) >= end)                                          \
1624       {                                                                       \
1625         pixel_run = end - start;                                              \
1626         partial_tile_offset = start - obj_x;                                  \
1627         obj_tile_right_offset_##flip_op(color_depth);                         \
1628         partial_tile_offset %= 8;                                             \
1629                                                                               \
1630         if(partial_tile_offset)                                               \
1631         {                                                                     \
1632           partial_tile_run = 8 - partial_tile_offset;                         \
1633           if((s32)pixel_run < (s32)partial_tile_run)                          \
1634           {                                                                   \
1635             if((s32)pixel_run > 0)                                            \
1636             {                                                                 \
1637               partial_tile_run = pixel_run;                                   \
1638               partial_tile_mid_obj(combine_op, color_depth, alpha_op,         \
1639                flip_op);                                                      \
1640             }                                                                 \
1641             continue;                                                         \
1642           }                                                                   \
1643           else                                                                \
1644           {                                                                   \
1645             pixel_run -= partial_tile_run;                                    \
1646             partial_tile_right_obj(combine_op, color_depth, alpha_op,         \
1647              flip_op);                                                        \
1648           }                                                                   \
1649         }                                                                     \
1650         tile_run = pixel_run / 8;                                             \
1651         multiple_tile_obj(combine_op, color_depth, alpha_op, flip_op);        \
1652         partial_tile_run = pixel_run % 8;                                     \
1653         if(partial_tile_run)                                                  \
1654         {                                                                     \
1655           partial_tile_left_obj(combine_op, color_depth, alpha_op,            \
1656            flip_op);                                                          \
1657         }                                                                     \
1658       }                                                                       \
1659       else                                                                    \
1660       {                                                                       \
1661         partial_tile_offset = start - obj_x;                                  \
1662         obj_tile_right_offset_##flip_op(color_depth);                         \
1663         partial_tile_offset %= 8;                                             \
1664         if(partial_tile_offset)                                               \
1665         {                                                                     \
1666           partial_tile_run = 8 - partial_tile_offset;                         \
1667           partial_tile_right_obj(combine_op, color_depth, alpha_op,           \
1668            flip_op);                                                          \
1669         }                                                                     \
1670         tile_run = pixel_run / 8;                                             \
1671         multiple_tile_obj(combine_op, color_depth, alpha_op, flip_op);        \
1672       }                                                                       \
1673     }                                                                         \
1674   }                                                                           \
1675   else                                                                        \
1676                                                                               \
1677   if((obj_x + obj_width) >= end)                                              \
1678   {                                                                           \
1679     pixel_run = end - obj_x;                                                  \
1680     if((s32)pixel_run > 0)                                                    \
1681     {                                                                         \
1682       dest_ptr = scanline + obj_x;                                            \
1683       tile_run = pixel_run / 8;                                               \
1684       multiple_tile_obj(combine_op, color_depth, alpha_op, flip_op);          \
1685       partial_tile_run = pixel_run % 8;                                       \
1686       if(partial_tile_run)                                                    \
1687       {                                                                       \
1688         partial_tile_left_obj(combine_op, color_depth, alpha_op, flip_op);    \
1689       }                                                                       \
1690     }                                                                         \
1691   }                                                                           \
1692   else                                                                        \
1693   {                                                                           \
1694     dest_ptr = scanline + obj_x;                                              \
1695     tile_run = obj_width / 8;                                                 \
1696     multiple_tile_obj(combine_op, color_depth, alpha_op, flip_op);            \
1697   }                                                                           \
1698 }                                                                             \
1699
1700 #define obj_scale_offset_1D(color_depth)                                      \
1701   tile_ptr = tile_base + ((obj_attribute_2 & 0x3FF) * 32)                     \
1702    + ((vertical_offset / 8) * (max_x / 8) * tile_size_##color_depth)          \
1703    + ((vertical_offset % 8) * tile_width_##color_depth)                       \
1704
1705 // Get the current row offset into an obj in 2D map space
1706
1707 #define obj_scale_offset_2D(color_depth)                                      \
1708   tile_ptr = tile_base + ((obj_attribute_2 & 0x3FF) * 32)                     \
1709    + ((vertical_offset / 8) * 1024)                                           \
1710    + ((vertical_offset % 8) * tile_width_##color_depth)                       \
1711
1712 #define obj_render_scale_pixel_4bpp(combine_op, alpha_op)                     \
1713   if(tile_x & 0x01)                                                           \
1714   {                                                                           \
1715     current_pixel = tile_ptr[tile_map_offset + ((tile_x >> 1) & 0x03)] >> 4;  \
1716   }                                                                           \
1717   else                                                                        \
1718   {                                                                           \
1719     current_pixel =                                                           \
1720      tile_ptr[tile_map_offset + ((tile_x >> 1) & 0x03)] & 0x0F;               \
1721   }                                                                           \
1722                                                                               \
1723   tile_4bpp_draw_##combine_op(0, none, 0, alpha_op)                           \
1724
1725
1726 #define obj_render_scale_pixel_8bpp(combine_op, alpha_op)                     \
1727   current_pixel = tile_ptr[tile_map_offset + (tile_x & 0x07)];                \
1728   tile_8bpp_draw_##combine_op(0, none, 0, alpha_op);                          \
1729
1730 #define obj_render_scale(combine_op, color_depth, alpha_op, map_space)        \
1731 {                                                                             \
1732   u32 vertical_offset;                                                        \
1733   source_y += (y_delta * dmy);                                                \
1734   vertical_offset = (source_y >> 8);                                          \
1735   if((u32)vertical_offset < (u32)max_y)                                       \
1736   {                                                                           \
1737     obj_scale_offset_##map_space(color_depth);                                \
1738     source_x += (y_delta * dmx) - (middle_x * dx);                            \
1739                                                                               \
1740     for(i = 0; i < obj_width; i++)                                            \
1741     {                                                                         \
1742       tile_x = (source_x >> 8);                                               \
1743                                                                               \
1744       if((u32)tile_x < (u32)max_x)                                            \
1745         break;                                                                \
1746                                                                               \
1747       source_x += dx;                                                         \
1748       advance_dest_ptr_##combine_op(1);                                       \
1749     }                                                                         \
1750                                                                               \
1751     for(; i < obj_width; i++)                                                 \
1752     {                                                                         \
1753       tile_x = (source_x >> 8);                                               \
1754                                                                               \
1755       if((u32)tile_x >= (u32)max_x)                                           \
1756         break;                                                                \
1757                                                                               \
1758       tile_map_offset = (tile_x >> 3) * tile_size_##color_depth;              \
1759       obj_render_scale_pixel_##color_depth(combine_op, alpha_op);             \
1760                                                                               \
1761       source_x += dx;                                                         \
1762       advance_dest_ptr_##combine_op(1);                                       \
1763     }                                                                         \
1764   }                                                                           \
1765 }                                                                             \
1766
1767
1768 #define obj_rotate_offset_1D(color_depth)                                     \
1769   obj_tile_pitch = (max_x / 8) * tile_size_##color_depth                      \
1770
1771 #define obj_rotate_offset_2D(color_depth)                                     \
1772   obj_tile_pitch = 1024                                                       \
1773
1774 #define obj_render_rotate_pixel_4bpp(combine_op, alpha_op)                    \
1775   if(tile_x & 0x01)                                                           \
1776   {                                                                           \
1777     current_pixel = tile_ptr[tile_map_offset +                                \
1778      ((tile_x >> 1) & 0x03) + ((tile_y & 0x07) * obj_pitch)] >> 4;            \
1779   }                                                                           \
1780   else                                                                        \
1781   {                                                                           \
1782     current_pixel = tile_ptr[tile_map_offset +                                \
1783      ((tile_x >> 1) & 0x03) + ((tile_y & 0x07) * obj_pitch)] & 0x0F;          \
1784   }                                                                           \
1785                                                                               \
1786   tile_4bpp_draw_##combine_op(0, none, 0, alpha_op)                           \
1787
1788 #define obj_render_rotate_pixel_8bpp(combine_op, alpha_op)                    \
1789   current_pixel = tile_ptr[tile_map_offset +                                  \
1790    (tile_x & 0x07) + ((tile_y & 0x07) * obj_pitch)];                          \
1791                                                                               \
1792   tile_8bpp_draw_##combine_op(0, none, 0, alpha_op)                           \
1793
1794 #define obj_render_rotate(combine_op, color_depth, alpha_op, map_space)       \
1795 {                                                                             \
1796   tile_ptr = tile_base + ((obj_attribute_2 & 0x3FF) * 32);                    \
1797   obj_rotate_offset_##map_space(color_depth);                                 \
1798                                                                               \
1799   source_x += (y_delta * dmx) - (middle_x * dx);                              \
1800   source_y += (y_delta * dmy) - (middle_x * dy);                              \
1801                                                                               \
1802   for(i = 0; i < obj_width; i++)                                              \
1803   {                                                                           \
1804     tile_x = (source_x >> 8);                                                 \
1805     tile_y = (source_y >> 8);                                                 \
1806                                                                               \
1807     if(((u32)tile_x < (u32)max_x) && ((u32)tile_y < (u32)max_y))              \
1808       break;                                                                  \
1809                                                                               \
1810     source_x += dx;                                                           \
1811     source_y += dy;                                                           \
1812     advance_dest_ptr_##combine_op(1);                                         \
1813   }                                                                           \
1814                                                                               \
1815   for(; i < obj_width; i++)                                                   \
1816   {                                                                           \
1817     tile_x = (source_x >> 8);                                                 \
1818     tile_y = (source_y >> 8);                                                 \
1819                                                                               \
1820     if(((u32)tile_x >= (u32)max_x) || ((u32)tile_y >= (u32)max_y))            \
1821       break;                                                                  \
1822                                                                               \
1823     tile_map_offset = ((tile_x >> 3) * tile_size_##color_depth) +             \
1824     ((tile_y >> 3) * obj_tile_pitch);                                         \
1825     obj_render_rotate_pixel_##color_depth(combine_op, alpha_op);              \
1826                                                                               \
1827     source_x += dx;                                                           \
1828     source_y += dy;                                                           \
1829     advance_dest_ptr_##combine_op(1);                                         \
1830   }                                                                           \
1831 }                                                                             \
1832
1833 // Render the current row of an affine transformed OBJ.
1834
1835 #define obj_render_affine(combine_op, color_depth, alpha_op, map_space)       \
1836 {                                                                             \
1837   s16 *params = oam_ram + (((obj_attribute_1 >> 9) & 0x1F) * 16);             \
1838   s32 dx = params[3];                                                         \
1839   s32 dmx = params[7];                                                        \
1840   s32 dy = params[11];                                                        \
1841   s32 dmy = params[15];                                                       \
1842   s32 source_x, source_y;                                                     \
1843   s32 tile_x, tile_y;                                                         \
1844   u32 tile_offset;                                                            \
1845   u32 tile_map_offset;                                                        \
1846   s32 middle_x;                                                               \
1847   s32 middle_y;                                                               \
1848   s32 max_x = obj_width;                                                      \
1849   s32 max_y = obj_height;                                                     \
1850   s32 y_delta;                                                                \
1851   u32 obj_pitch = tile_width_##color_depth;                                   \
1852   u32 obj_tile_pitch;                                                         \
1853                                                                               \
1854   middle_x = (obj_width / 2);                                                 \
1855   middle_y = (obj_height / 2);                                                \
1856                                                                               \
1857   source_x = (middle_x << 8);                                                 \
1858   source_y = (middle_y << 8);                                                 \
1859                                                                               \
1860                                                                               \
1861   if(obj_attribute_0 & 0x200)                                                 \
1862   {                                                                           \
1863     obj_width *= 2;                                                           \
1864     obj_height *= 2;                                                          \
1865     middle_x *= 2;                                                            \
1866     middle_y *= 2;                                                            \
1867   }                                                                           \
1868                                                                               \
1869   if((s32)obj_x < (s32)start)                                                 \
1870   {                                                                           \
1871     u32 x_delta = start - obj_x;                                              \
1872     middle_x -= x_delta;                                                      \
1873     obj_width -= x_delta;                                                     \
1874     obj_x = start;                                                            \
1875                                                                               \
1876     if((s32)obj_width <= 0)                                                   \
1877       continue;                                                               \
1878   }                                                                           \
1879                                                                               \
1880   if((s32)(obj_x + obj_width) >= (s32)end)                                    \
1881   {                                                                           \
1882     obj_width = end - obj_x;                                                  \
1883                                                                               \
1884     if((s32)obj_width <= 0)                                                   \
1885       continue;                                                               \
1886   }                                                                           \
1887   dest_ptr = scanline + obj_x;                                                \
1888                                                                               \
1889   y_delta = vcount - (obj_y + middle_y);                                      \
1890                                                                               \
1891   obj_get_palette_##color_depth();                                            \
1892                                                                               \
1893   if(dy == 0)                                                                 \
1894   {                                                                           \
1895     obj_render_scale(combine_op, color_depth, alpha_op, map_space);           \
1896   }                                                                           \
1897   else                                                                        \
1898   {                                                                           \
1899     obj_render_rotate(combine_op, color_depth, alpha_op, map_space);          \
1900   }                                                                           \
1901 }                                                                             \
1902
1903 u32 obj_width_table[] = { 8, 16, 32, 64, 16, 32, 32, 64, 8, 8, 16, 32 };
1904 u32 obj_height_table[] = { 8, 16, 32, 64, 8, 8, 16, 32, 16, 32, 32, 64 };
1905
1906 u8 obj_priority_list[5][160][128];
1907 u32 obj_priority_count[5][160];
1908 u32 obj_alpha_count[160];
1909
1910
1911 // Build obj rendering functions
1912
1913 #ifdef RENDER_COLOR16_NORMAL
1914
1915 #define render_scanline_obj_extra_variables_normal(bg_type)                   \
1916   const u32 pixel_combine = (1 << 8)                                          \
1917
1918 #else
1919
1920 #define render_scanline_obj_extra_variables_normal(bg_type)                   \
1921   u16 *palette = palette_ram_converted + 256                                  \
1922
1923 #endif
1924
1925
1926 #define render_scanline_obj_extra_variables_color()                           \
1927   u32 dest;                                                                   \
1928   u32 pixel_combine = color_combine_mask(4) | (1 << 8)                        \
1929
1930 #define render_scanline_obj_extra_variables_alpha_obj(map_space)              \
1931   render_scanline_obj_extra_variables_color();                                \
1932   if((pixel_combine & 0x00000200) == 0)                                       \
1933   {                                                                           \
1934     render_scanline_obj_color32_##map_space(priority, start, end, scanline);  \
1935     return;                                                                   \
1936   }                                                                           \
1937
1938 #define render_scanline_obj_extra_variables_color16(map_space)                \
1939   render_scanline_obj_extra_variables_color()                                 \
1940
1941 #define render_scanline_obj_extra_variables_color32(map_space)                \
1942   render_scanline_obj_extra_variables_color()                                 \
1943
1944 #define render_scanline_obj_extra_variables_partial_alpha(map_space)          \
1945   render_scanline_obj_extra_variables_color();                                \
1946   u32 base_pixel_combine = pixel_combine                                      \
1947
1948 #define render_scanline_obj_extra_variables_copy(type)                        \
1949   u32 bldcnt = io_registers[REG_BLDCNT];                                      \
1950   u32 dispcnt = io_registers[REG_DISPCNT];                                    \
1951   u32 obj_enable = io_registers[REG_WINOUT] >> 8;                             \
1952   render_scanline_layer_functions_##type();                                   \
1953   u32 copy_start, copy_end;                                                   \
1954   u16 copy_buffer[240];                                                       \
1955   u16 *copy_ptr                                                               \
1956
1957 #define render_scanline_obj_extra_variables_copy_tile(map_space)              \
1958   render_scanline_obj_extra_variables_copy(tile)                              \
1959
1960 #define render_scanline_obj_extra_variables_copy_bitmap(map_space)            \
1961   render_scanline_obj_extra_variables_copy(bitmap)                            \
1962
1963
1964 #define render_scanline_obj_main(combine_op, alpha_op, map_space)             \
1965   if(obj_attribute_0 & 0x100)                                                 \
1966   {                                                                           \
1967     if((obj_attribute_0 >> 13) & 0x01)                                        \
1968     {                                                                         \
1969       obj_render_affine(combine_op, 8bpp, alpha_op, map_space);               \
1970     }                                                                         \
1971     else                                                                      \
1972     {                                                                         \
1973       obj_render_affine(combine_op, 4bpp, alpha_op, map_space);               \
1974     }                                                                         \
1975   }                                                                           \
1976   else                                                                        \
1977   {                                                                           \
1978     vertical_offset = vcount - obj_y;                                         \
1979                                                                               \
1980     if((obj_attribute_1 >> 13) & 0x01)                                        \
1981       vertical_offset = obj_height - vertical_offset - 1;                     \
1982                                                                               \
1983     switch(((obj_attribute_0 >> 12) & 0x02) |                                 \
1984      ((obj_attribute_1 >> 12) & 0x01))                                        \
1985     {                                                                         \
1986       case 0x0:                                                               \
1987         obj_render(combine_op, 4bpp, alpha_op, map_space, noflip);            \
1988         break;                                                                \
1989                                                                               \
1990       case 0x1:                                                               \
1991         obj_render(combine_op, 4bpp, alpha_op, map_space, flip);              \
1992         break;                                                                \
1993                                                                               \
1994       case 0x2:                                                               \
1995         obj_render(combine_op, 8bpp, alpha_op, map_space, noflip);            \
1996         break;                                                                \
1997                                                                               \
1998       case 0x3:                                                               \
1999         obj_render(combine_op, 8bpp, alpha_op, map_space, flip);              \
2000         break;                                                                \
2001     }                                                                         \
2002   }                                                                           \
2003
2004 #define render_scanline_obj_no_partial_alpha(combine_op, alpha_op, map_space) \
2005   render_scanline_obj_main(combine_op, alpha_op, map_space)                   \
2006
2007 #define render_scanline_obj_partial_alpha(combine_op, alpha_op, map_space)    \
2008   if((obj_attribute_0 >> 10) & 0x03)                                          \
2009   {                                                                           \
2010     pixel_combine = 0x00000300;                                               \
2011     render_scanline_obj_main(combine_op, alpha_obj, map_space);               \
2012   }                                                                           \
2013   else                                                                        \
2014   {                                                                           \
2015     pixel_combine = base_pixel_combine;                                       \
2016     render_scanline_obj_main(combine_op, color32, map_space);                 \
2017   }                                                                           \
2018
2019 #define render_scanline_obj_prologue_transparent(alpha_op)                    \
2020
2021 #define render_scanline_obj_prologue_copy_body(type)                          \
2022   copy_start = obj_x;                                                         \
2023   if(obj_attribute_0 & 0x200)                                                 \
2024     copy_end = obj_x + (obj_width * 2);                                       \
2025   else                                                                        \
2026     copy_end = obj_x + obj_width;                                             \
2027                                                                               \
2028   if(copy_start < start)                                                      \
2029     copy_start = start;                                                       \
2030   if(copy_end > end)                                                          \
2031     copy_end = end;                                                           \
2032                                                                               \
2033   if((copy_start < end) && (copy_end > start))                                \
2034   {                                                                           \
2035     render_scanline_conditional_##type(copy_start, copy_end, copy_buffer,     \
2036      obj_enable, dispcnt, bldcnt, layer_renderers);                           \
2037     copy_ptr = copy_buffer + copy_start;                                      \
2038   }                                                                           \
2039   else                                                                        \
2040   {                                                                           \
2041     continue;                                                                 \
2042   }                                                                           \
2043
2044 #define render_scanline_obj_prologue_copy_tile()                              \
2045   render_scanline_obj_prologue_copy_body(tile)                                \
2046
2047 #define render_scanline_obj_prologue_copy_bitmap()                            \
2048   render_scanline_obj_prologue_copy_body(bitmap)                              \
2049
2050 #define render_scanline_obj_prologue_copy(alpha_op)                           \
2051   render_scanline_obj_prologue_##alpha_op()                                   \
2052
2053
2054 #define render_scanline_obj_builder(combine_op, alpha_op, map_space,          \
2055  partial_alpha_op)                                                            \
2056 void render_scanline_obj_##alpha_op##_##map_space(u32 priority,               \
2057  u32 start, u32 end, render_scanline_dest_##alpha_op *scanline)               \
2058 {                                                                             \
2059   render_scanline_obj_extra_variables_##alpha_op(map_space);                  \
2060   s32 obj_num, i;                                                             \
2061   s32 obj_x, obj_y;                                                           \
2062   s32 obj_size;                                                               \
2063   s32 obj_width, obj_height;                                                  \
2064   u32 obj_attribute_0, obj_attribute_1, obj_attribute_2;                      \
2065   s32 vcount = io_registers[REG_VCOUNT];                                      \
2066   u32 tile_run;                                                               \
2067   u32 current_pixels;                                                         \
2068   u32 current_pixel;                                                          \
2069   u32 current_palette;                                                        \
2070   u32 vertical_offset;                                                        \
2071   u32 partial_tile_run, partial_tile_offset;                                  \
2072   u32 pixel_run;                                                              \
2073   u16 *oam_ptr;                                                               \
2074   render_scanline_dest_##alpha_op *dest_ptr;                                  \
2075   u8 *tile_base = vram + 0x10000;                                             \
2076   u8 *tile_ptr;                                                               \
2077   u32 obj_count = obj_priority_count[priority][vcount];                       \
2078   u8 *obj_list = obj_priority_list[priority][vcount];                         \
2079                                                                               \
2080   for(obj_num = 0; obj_num < obj_count; obj_num++)                            \
2081   {                                                                           \
2082     oam_ptr = oam_ram + (obj_list[obj_num] * 4);                              \
2083     obj_attribute_0 = oam_ptr[0];                                             \
2084     obj_attribute_1 = oam_ptr[1];                                             \
2085     obj_attribute_2 = oam_ptr[2];                                             \
2086     obj_size = ((obj_attribute_0 >> 12) & 0x0C) | (obj_attribute_1 >> 14);    \
2087                                                                               \
2088     obj_x = (s32)(obj_attribute_1 << 23) >> 23;                               \
2089     obj_width = obj_width_table[obj_size];                                    \
2090                                                                               \
2091     render_scanline_obj_prologue_##combine_op(alpha_op);                      \
2092                                                                               \
2093     obj_y = obj_attribute_0 & 0xFF;                                           \
2094                                                                               \
2095     if(obj_y > 160)                                                           \
2096       obj_y -= 256;                                                           \
2097                                                                               \
2098     obj_height = obj_height_table[obj_size];                                  \
2099     render_scanline_obj_##partial_alpha_op(combine_op, alpha_op, map_space);  \
2100   }                                                                           \
2101 }                                                                             \
2102
2103 render_scanline_obj_builder(transparent, normal, 1D, no_partial_alpha);
2104 render_scanline_obj_builder(transparent, normal, 2D, no_partial_alpha);
2105 render_scanline_obj_builder(transparent, color16, 1D, no_partial_alpha);
2106 render_scanline_obj_builder(transparent, color16, 2D, no_partial_alpha);
2107 render_scanline_obj_builder(transparent, color32, 1D, no_partial_alpha);
2108 render_scanline_obj_builder(transparent, color32, 2D, no_partial_alpha);
2109 render_scanline_obj_builder(transparent, alpha_obj, 1D, no_partial_alpha);
2110 render_scanline_obj_builder(transparent, alpha_obj, 2D, no_partial_alpha);
2111 render_scanline_obj_builder(transparent, partial_alpha, 1D, partial_alpha);
2112 render_scanline_obj_builder(transparent, partial_alpha, 2D, partial_alpha);
2113 render_scanline_obj_builder(copy, copy_tile, 1D, no_partial_alpha);
2114 render_scanline_obj_builder(copy, copy_tile, 2D, no_partial_alpha);
2115 render_scanline_obj_builder(copy, copy_bitmap, 1D, no_partial_alpha);
2116 render_scanline_obj_builder(copy, copy_bitmap, 2D, no_partial_alpha);
2117
2118
2119
2120 void order_obj(u32 video_mode)
2121 {
2122   s32 obj_num, priority, row;
2123   s32 obj_x, obj_y;
2124   s32 obj_size, obj_mode;
2125   s32 obj_width, obj_height;
2126   u32 obj_priority;
2127   u32 obj_attribute_0, obj_attribute_1, obj_attribute_2;
2128   s32 vcount = io_registers[REG_VCOUNT];
2129   u32 partial_tile_run, partial_tile_offset;
2130   u32 pixel_run;
2131   u32 current_count;
2132   u16 *oam_ptr = oam_ram + 508;
2133   u16 *dest_ptr;
2134   u8 *tile_base = vram + 0x10000;
2135   u8 *tile_ptr;
2136
2137   for(priority = 0; priority < 5; priority++)
2138   {
2139     for(row = 0; row < 160; row++)
2140     {
2141       obj_priority_count[priority][row] = 0;
2142     }
2143   }
2144
2145   for(row = 0; row < 160; row++)
2146   {
2147     obj_alpha_count[row] = 0;
2148   }
2149
2150   for(obj_num = 127; obj_num >= 0; obj_num--, oam_ptr -= 4)
2151   {
2152     obj_attribute_0 = oam_ptr[0];
2153     obj_attribute_2 = oam_ptr[2];
2154     obj_size = obj_attribute_0 & 0xC000;
2155     obj_priority = (obj_attribute_2 >> 10) & 0x03;
2156     obj_mode = (obj_attribute_0 >> 10) & 0x03;
2157
2158     if(((obj_attribute_0 & 0x0300) != 0x0200) && (obj_size != 0xC000) &&
2159      (obj_mode != 3) && ((video_mode < 3) ||
2160      ((obj_attribute_2 & 0x3FF) >= 512)))
2161     {
2162       obj_y = obj_attribute_0 & 0xFF;
2163       if(obj_y > 160)
2164         obj_y -= 256;
2165
2166       obj_attribute_1 = oam_ptr[1];
2167       obj_size = ((obj_size >> 12) & 0x0C) | (obj_attribute_1 >> 14);
2168       obj_height = obj_height_table[obj_size];
2169       obj_width = obj_width_table[obj_size];
2170
2171       if(obj_attribute_0 & 0x200)
2172       {
2173         obj_height *= 2;
2174         obj_width *= 2;
2175       }
2176
2177       if(((obj_y + obj_height) > 0) && (obj_y < 160))
2178       {
2179         obj_x = (s32)(obj_attribute_1 << 23) >> 23;
2180
2181         if(((obj_x + obj_width) > 0) && (obj_x < 240))
2182         {
2183           if(obj_y < 0)
2184           {
2185             obj_height += obj_y;
2186             obj_y = 0;
2187           }
2188
2189           if((obj_y + obj_height) >= 160)
2190           {
2191             obj_height = 160 - obj_y;
2192           }
2193
2194           if(obj_mode == 1)
2195           {
2196             for(row = obj_y; row < obj_y + obj_height; row++)
2197             {
2198               current_count = obj_priority_count[obj_priority][row];
2199               obj_priority_list[obj_priority][row][current_count] = obj_num;
2200               obj_priority_count[obj_priority][row] = current_count + 1;
2201               obj_alpha_count[row]++;
2202             }
2203           }
2204           else
2205           {
2206             if(obj_mode == 2)
2207             {
2208               obj_priority = 4;
2209             }
2210
2211             for(row = obj_y; row < obj_y + obj_height; row++)
2212             {
2213               current_count = obj_priority_count[obj_priority][row];
2214               obj_priority_list[obj_priority][row][current_count] = obj_num;
2215               obj_priority_count[obj_priority][row] = current_count + 1;
2216             }
2217           }
2218         }
2219       }
2220     }
2221   }
2222 }
2223
2224 u32 layer_order[16];
2225 u32 layer_count;
2226
2227 u32 order_layers(u32 layer_flags)
2228 {
2229   s32 priority, layer_number;
2230   layer_count = 0;
2231
2232   for(priority = 3; priority >= 0; priority--)
2233   {
2234     for(layer_number = 3; layer_number >= 0; layer_number--)
2235     {
2236       if(((layer_flags >> layer_number) & 1) &&
2237        ((io_registers[REG_BG0CNT + layer_number] & 0x03) == priority))
2238       {
2239         layer_order[layer_count] = layer_number;
2240         layer_count++;
2241       }
2242     }
2243
2244     if((obj_priority_count[priority][io_registers[REG_VCOUNT]] > 0)
2245      && (layer_flags & 0x10))
2246     {
2247       layer_order[layer_count] = priority | 0x04;
2248       layer_count++;
2249     }
2250   }
2251 }
2252
2253 #define fill_line(_start, _end)                                               \
2254   u32 i;                                                                      \
2255                                                                               \
2256   for(i = _start; i < _end; i++)                                              \
2257   {                                                                           \
2258     dest_ptr[i] = color;                                                      \
2259   }                                                                           \
2260
2261
2262 #define fill_line_color_normal()                                              \
2263   color = palette_ram_converted[color]                                        \
2264
2265 #define fill_line_color_alpha()                                               \
2266
2267 #define fill_line_color_color16()                                             \
2268
2269 #define fill_line_color_color32()                                             \
2270
2271 #define fill_line_builder(type)                                               \
2272 void fill_line_##type(u16 color, render_scanline_dest_##type *dest_ptr,       \
2273  u32 start, u32 end)                                                          \
2274 {                                                                             \
2275   fill_line_color_##type();                                                   \
2276   fill_line(start, end);                                                      \
2277 }                                                                             \
2278
2279 fill_line_builder(normal);
2280 fill_line_builder(alpha);
2281 fill_line_builder(color16);
2282 fill_line_builder(color32);
2283
2284
2285 // Alpha blend two pixels (pixel_top and pixel_bottom).
2286
2287 #define blend_pixel()                                                         \
2288   pixel_bottom = palette_ram_converted[(pixel_pair >> 16) & 0x1FF];           \
2289   pixel_bottom = (pixel_bottom | (pixel_bottom << 16)) & 0x07E0F81F;          \
2290   pixel_top = ((pixel_top * blend_a) + (pixel_bottom * blend_b)) >> 4         \
2291
2292
2293 // Alpha blend two pixels, allowing for saturation (individual channels > 31).
2294 // The operation is optimized towards saturation not occuring.
2295
2296 #define blend_saturate_pixel()                                                \
2297   pixel_bottom = palette_ram_converted[(pixel_pair >> 16) & 0x1FF];           \
2298   pixel_bottom = (pixel_bottom | (pixel_bottom << 16)) & 0x07E0F81F;          \
2299   pixel_top = ((pixel_top * blend_a) + (pixel_bottom * blend_b)) >> 4;        \
2300   if(pixel_top & 0x08010020)                                                  \
2301   {                                                                           \
2302     if(pixel_top & 0x08000000)                                                \
2303       pixel_top |= 0x07E00000;                                                \
2304                                                                               \
2305     if(pixel_top & 0x00010000)                                                \
2306       pixel_top |= 0x0000F800;                                                \
2307                                                                               \
2308     if(pixel_top & 0x00000020)                                                \
2309       pixel_top |= 0x0000001F;                                                \
2310   }                                                                           \
2311
2312 #define brighten_pixel()                                                      \
2313   pixel_top = upper + ((pixel_top * blend) >> 4);                             \
2314
2315 #define darken_pixel()                                                        \
2316   pixel_top = (pixel_top * blend) >> 4;                                       \
2317
2318 #define effect_condition_alpha                                                \
2319   ((pixel_pair & 0x04000200) == 0x04000200)                                   \
2320
2321 #define effect_condition_fade(pixel_source)                                   \
2322   ((pixel_source & 0x00000200) == 0x00000200)                                 \
2323
2324 #define expand_pixel_no_dest(expand_type, pixel_source)                       \
2325   pixel_top = (pixel_top | (pixel_top << 16)) & 0x07E0F81F;                   \
2326   expand_type##_pixel();                                                      \
2327   pixel_top &= 0x07E0F81F;                                                    \
2328   pixel_top = (pixel_top >> 16) | pixel_top                                   \
2329
2330 #define expand_pixel(expand_type, pixel_source)                               \
2331   pixel_top = palette_ram_converted[pixel_source & 0x1FF];                    \
2332   expand_pixel_no_dest(expand_type, pixel_source);                            \
2333   *screen_dest_ptr = pixel_top                                                \
2334
2335 #define expand_loop(expand_type, effect_condition, pixel_source)              \
2336   screen_src_ptr += start;                                                    \
2337   screen_dest_ptr += start;                                                   \
2338                                                                               \
2339   end -= start;                                                               \
2340                                                                               \
2341   for(i = 0; i < end; i++)                                                    \
2342   {                                                                           \
2343     pixel_source = *screen_src_ptr;                                           \
2344     if(effect_condition)                                                      \
2345     {                                                                         \
2346       expand_pixel(expand_type, pixel_source);                                \
2347     }                                                                         \
2348     else                                                                      \
2349     {                                                                         \
2350       *screen_dest_ptr =                                                      \
2351        palette_ram_converted[pixel_source & 0x1FF];                           \
2352     }                                                                         \
2353                                                                               \
2354     screen_src_ptr++;                                                         \
2355     screen_dest_ptr++;                                                        \
2356   }                                                                           \
2357
2358
2359 #define expand_loop_partial_alpha(alpha_expand, expand_type)                  \
2360   screen_src_ptr += start;                                                    \
2361   screen_dest_ptr += start;                                                   \
2362                                                                               \
2363   end -= start;                                                               \
2364                                                                               \
2365   for(i = 0; i < end; i++)                                                    \
2366   {                                                                           \
2367     pixel_pair = *screen_src_ptr;                                             \
2368     if(effect_condition_fade(pixel_pair))                                     \
2369     {                                                                         \
2370       if(effect_condition_alpha)                                              \
2371       {                                                                       \
2372         expand_pixel(alpha_expand, pixel_pair);                               \
2373       }                                                                       \
2374       else                                                                    \
2375       {                                                                       \
2376         expand_pixel(expand_type, pixel_pair);                                \
2377       }                                                                       \
2378     }                                                                         \
2379     else                                                                      \
2380     {                                                                         \
2381       *screen_dest_ptr =                                                      \
2382        palette_ram_converted[pixel_pair & 0x1FF];                             \
2383     }                                                                         \
2384                                                                               \
2385     screen_src_ptr++;                                                         \
2386     screen_dest_ptr++;                                                        \
2387   }                                                                           \
2388
2389
2390 #define expand_partial_alpha(expand_type)                                     \
2391   if((blend_a + blend_b) > 16)                                                \
2392   {                                                                           \
2393     expand_loop_partial_alpha(blend_saturate, expand_type);                   \
2394   }                                                                           \
2395   else                                                                        \
2396   {                                                                           \
2397     expand_loop_partial_alpha(blend, expand_type);                            \
2398   }                                                                           \
2399
2400
2401
2402 // Blend top two pixels of scanline with each other.
2403
2404 #ifdef RENDER_COLOR16_NORMAL
2405
2406 #ifndef ARM_ARCH
2407
2408 void expand_normal(u16 *screen_ptr, u32 start, u32 end)
2409 {
2410   u32 i, pixel_source;
2411   screen_ptr += start;
2412
2413   return;
2414
2415   end -= start;
2416
2417   for(i = 0; i < end; i++)
2418   {
2419     pixel_source = *screen_ptr;
2420     *screen_ptr = palette_ram_converted[pixel_source];
2421
2422     screen_ptr++;
2423   }
2424 }
2425
2426 #endif
2427
2428 #else
2429
2430 #define expand_normal(screen_ptr, start, end)
2431
2432 #endif
2433
2434
2435 void expand_blend(u32 *screen_src_ptr, u16 *screen_dest_ptr,
2436  u32 start, u32 end);
2437
2438 #ifndef ARM_ARCH
2439
2440 void expand_blend(u32 *screen_src_ptr, u16 *screen_dest_ptr,
2441  u32 start, u32 end)
2442 {
2443   u32 pixel_pair;
2444   u32 pixel_top, pixel_bottom;
2445   u32 bldalpha = io_registers[REG_BLDALPHA];
2446   u32 blend_a = bldalpha & 0x1F;
2447   u32 blend_b = (bldalpha >> 8) & 0x1F;
2448   u32 i;
2449
2450   if(blend_a > 16)
2451     blend_a = 16;
2452
2453   if(blend_b > 16)
2454     blend_b = 16;
2455
2456   // The individual colors can saturate over 31, this should be taken
2457   // care of in an alternate pass as it incurs a huge additional speedhit.
2458   if((blend_a + blend_b) > 16)
2459   {
2460     expand_loop(blend_saturate, effect_condition_alpha, pixel_pair);
2461   }
2462   else
2463   {
2464     expand_loop(blend, effect_condition_alpha, pixel_pair);
2465   }
2466 }
2467
2468 #endif
2469
2470 // Blend scanline with white.
2471
2472 void expand_darken(u16 *screen_src_ptr, u16 *screen_dest_ptr,
2473  u32 start, u32 end)
2474 {
2475   u32 pixel_top;
2476   s32 blend = 16 - (io_registers[REG_BLDY] & 0x1F);
2477   u32 i;
2478
2479   if(blend < 0)
2480     blend = 0;
2481
2482   expand_loop(darken, effect_condition_fade(pixel_top), pixel_top);
2483 }
2484
2485
2486 // Blend scanline with black.
2487
2488 void expand_brighten(u16 *screen_src_ptr, u16 *screen_dest_ptr,
2489  u32 start, u32 end)
2490 {
2491   u32 pixel_top;
2492   u32 blend = io_registers[REG_BLDY] & 0x1F;
2493   u32 upper;
2494   u32 i;
2495
2496   if(blend > 16)
2497     blend = 16;
2498
2499   upper = ((0x07E0F81F * blend) >> 4) & 0x07E0F81F;
2500   blend = 16 - blend;
2501
2502   expand_loop(brighten, effect_condition_fade(pixel_top), pixel_top);
2503
2504 }
2505
2506
2507 // Expand scanline such that if both top and bottom pass it's alpha,
2508 // if only top passes it's as specified, and if neither pass it's normal.
2509
2510 void expand_darken_partial_alpha(u32 *screen_src_ptr, u16 *screen_dest_ptr,
2511  u32 start, u32 end)
2512 {
2513   s32 blend = 16 - (io_registers[REG_BLDY] & 0x1F);
2514   u32 pixel_pair;
2515   u32 pixel_top, pixel_bottom;
2516   u32 bldalpha = io_registers[REG_BLDALPHA];
2517   u32 blend_a = bldalpha & 0x1F;
2518   u32 blend_b = (bldalpha >> 8) & 0x1F;
2519   u32 i;
2520
2521   if(blend < 0)
2522     blend = 0;
2523
2524   if(blend_a > 16)
2525     blend_a = 16;
2526
2527   if(blend_b > 16)
2528     blend_b = 16;
2529
2530   expand_partial_alpha(darken);
2531 }
2532
2533
2534 void expand_brighten_partial_alpha(u32 *screen_src_ptr, u16 *screen_dest_ptr,
2535  u32 start, u32 end)
2536 {
2537   s32 blend = io_registers[REG_BLDY] & 0x1F;
2538   u32 pixel_pair;
2539   u32 pixel_top, pixel_bottom;
2540   u32 bldalpha = io_registers[REG_BLDALPHA];
2541   u32 blend_a = bldalpha & 0x1F;
2542   u32 blend_b = (bldalpha >> 8) & 0x1F;
2543   u32 upper;
2544   u32 i;
2545
2546   if(blend > 16)
2547     blend = 16;
2548
2549   upper = ((0x07E0F81F * blend) >> 4) & 0x07E0F81F;
2550   blend = 16 - blend;
2551
2552   if(blend_a > 16)
2553     blend_a = 16;
2554
2555   if(blend_b > 16)
2556     blend_b = 16;
2557
2558   expand_partial_alpha(brighten);
2559 }
2560
2561
2562 // Render an OBJ layer from start to end, depending on the type (1D or 2D)
2563 // stored in dispcnt.
2564
2565 #define render_obj_layer(type, dest, _start, _end)                            \
2566   current_layer &= ~0x04;                                                     \
2567   if(dispcnt & 0x40)                                                          \
2568     render_scanline_obj_##type##_1D(current_layer, _start, _end, dest);       \
2569   else                                                                        \
2570     render_scanline_obj_##type##_2D(current_layer, _start, _end, dest)        \
2571
2572
2573 // Render a target all the way with the background color as taken from the
2574 // palette.
2575
2576 #define fill_line_bg(type, dest, _start, _end)                                \
2577   fill_line_##type(0, dest, _start, _end)                                     \
2578
2579
2580 // Render all layers as they appear in the layer order.
2581
2582 #define render_layers(tile_alpha, obj_alpha, dest)                            \
2583 {                                                                             \
2584   current_layer = layer_order[0];                                             \
2585   if(current_layer & 0x04)                                                    \
2586   {                                                                           \
2587     /* If the first one is OBJ render the background then render it. */       \
2588     fill_line_bg(tile_alpha, dest, 0, 240);                                   \
2589     render_obj_layer(obj_alpha, dest, 0, 240);                                \
2590   }                                                                           \
2591   else                                                                        \
2592   {                                                                           \
2593     /* Otherwise render a base layer. */                                      \
2594     layer_renderers[current_layer].tile_alpha##_render_base(current_layer,    \
2595      0, 240, dest);                                                           \
2596   }                                                                           \
2597                                                                               \
2598   /* Render the rest of the layers. */                                        \
2599   for(layer_order_pos = 1; layer_order_pos < layer_count; layer_order_pos++)  \
2600   {                                                                           \
2601     current_layer = layer_order[layer_order_pos];                             \
2602     if(current_layer & 0x04)                                                  \
2603     {                                                                         \
2604       render_obj_layer(obj_alpha, dest, 0, 240);                              \
2605     }                                                                         \
2606     else                                                                      \
2607     {                                                                         \
2608       layer_renderers[current_layer].                                         \
2609        tile_alpha##_render_transparent(current_layer, 0, 240, dest);          \
2610     }                                                                         \
2611   }                                                                           \
2612 }                                                                             \
2613
2614 #define render_condition_alpha                                                \
2615   (((io_registers[REG_BLDALPHA] & 0x1F1F) != 0x001F) &&                       \
2616    ((io_registers[REG_BLDCNT] & 0x3F) != 0) &&                                \
2617    ((io_registers[REG_BLDCNT] & 0x3F00) != 0))                                \
2618
2619 #define render_condition_fade                                                 \
2620   (((io_registers[REG_BLDY] & 0x1F) != 0) &&                                  \
2621    ((io_registers[REG_BLDCNT] & 0x3F) != 0))                                  \
2622
2623 #define render_layers_color_effect(renderer, layer_condition,                 \
2624  alpha_condition, fade_condition, _start, _end)                               \
2625 {                                                                             \
2626   if(layer_condition)                                                         \
2627   {                                                                           \
2628     if(obj_alpha_count[io_registers[REG_VCOUNT]] > 0)                         \
2629     {                                                                         \
2630       /* Render based on special effects mode. */                             \
2631       u32 screen_buffer[240];                                                 \
2632       switch((bldcnt >> 6) & 0x03)                                            \
2633       {                                                                       \
2634         /* Alpha blend */                                                     \
2635         case 0x01:                                                            \
2636         {                                                                     \
2637           if(alpha_condition)                                                 \
2638           {                                                                   \
2639             renderer(alpha, alpha_obj, screen_buffer);                        \
2640             expand_blend(screen_buffer, scanline, _start, _end);              \
2641             return;                                                           \
2642           }                                                                   \
2643           break;                                                              \
2644         }                                                                     \
2645                                                                               \
2646         /* Fade to white */                                                   \
2647         case 0x02:                                                            \
2648         {                                                                     \
2649           if(fade_condition)                                                  \
2650           {                                                                   \
2651             renderer(color32, partial_alpha, screen_buffer);                  \
2652             expand_brighten_partial_alpha(screen_buffer, scanline,            \
2653              _start, _end);                                                   \
2654             return;                                                           \
2655           }                                                                   \
2656           break;                                                              \
2657         }                                                                     \
2658                                                                               \
2659         /* Fade to black */                                                   \
2660         case 0x03:                                                            \
2661         {                                                                     \
2662           if(fade_condition)                                                  \
2663           {                                                                   \
2664             renderer(color32, partial_alpha, screen_buffer);                  \
2665             expand_darken_partial_alpha(screen_buffer, scanline,              \
2666              _start, _end);                                                   \
2667             return;                                                           \
2668           }                                                                   \
2669           break;                                                              \
2670         }                                                                     \
2671       }                                                                       \
2672                                                                               \
2673       renderer(color32, partial_alpha, screen_buffer);                        \
2674       expand_blend(screen_buffer, scanline, _start, _end);                    \
2675     }                                                                         \
2676     else                                                                      \
2677     {                                                                         \
2678       /* Render based on special effects mode. */                             \
2679       switch((bldcnt >> 6) & 0x03)                                            \
2680       {                                                                       \
2681         /* Alpha blend */                                                     \
2682         case 0x01:                                                            \
2683         {                                                                     \
2684           if(alpha_condition)                                                 \
2685           {                                                                   \
2686             u32 screen_buffer[240];                                           \
2687             renderer(alpha, alpha_obj, screen_buffer);                        \
2688             expand_blend(screen_buffer, scanline, _start, _end);              \
2689             return;                                                           \
2690           }                                                                   \
2691           break;                                                              \
2692         }                                                                     \
2693                                                                               \
2694         /* Fade to white */                                                   \
2695         case 0x02:                                                            \
2696         {                                                                     \
2697           if(fade_condition)                                                  \
2698           {                                                                   \
2699             renderer(color16, color16, scanline);                             \
2700             expand_brighten(scanline, scanline, _start, _end);                \
2701             return;                                                           \
2702           }                                                                   \
2703           break;                                                              \
2704         }                                                                     \
2705                                                                               \
2706         /* Fade to black */                                                   \
2707         case 0x03:                                                            \
2708         {                                                                     \
2709           if(fade_condition)                                                  \
2710           {                                                                   \
2711             renderer(color16, color16, scanline);                             \
2712             expand_darken(scanline, scanline, _start, _end);                  \
2713             return;                                                           \
2714           }                                                                   \
2715           break;                                                              \
2716         }                                                                     \
2717       }                                                                       \
2718                                                                               \
2719       renderer(normal, normal, scanline);                                     \
2720       expand_normal(scanline, _start, _end);                                  \
2721     }                                                                         \
2722   }                                                                           \
2723   else                                                                        \
2724   {                                                                           \
2725     u32 pixel_top = palette_ram_converted[0];                                 \
2726     switch((bldcnt >> 6) & 0x03)                                              \
2727     {                                                                         \
2728       /* Fade to white */                                                     \
2729       case 0x02:                                                              \
2730       {                                                                       \
2731         if(color_combine_mask_a(5))                                           \
2732         {                                                                     \
2733           u32 blend = io_registers[REG_BLDY] & 0x1F;                          \
2734           u32 upper;                                                          \
2735                                                                               \
2736           if(blend > 16)                                                      \
2737             blend = 16;                                                       \
2738                                                                               \
2739           upper = ((0x07E0F81F * blend) >> 4) & 0x07E0F81F;                   \
2740           blend = 16 - blend;                                                 \
2741                                                                               \
2742           expand_pixel_no_dest(brighten, pixel_top);                          \
2743         }                                                                     \
2744         break;                                                                \
2745       }                                                                       \
2746                                                                               \
2747       /* Fade to black */                                                     \
2748       case 0x03:                                                              \
2749       {                                                                       \
2750         if(color_combine_mask_a(5))                                           \
2751         {                                                                     \
2752           s32 blend = 16 - (io_registers[REG_BLDY] & 0x1F);                   \
2753                                                                               \
2754           if(blend < 0)                                                       \
2755             blend = 0;                                                        \
2756                                                                               \
2757           expand_pixel_no_dest(darken, pixel_top);                            \
2758         }                                                                     \
2759         break;                                                                \
2760       }                                                                       \
2761     }                                                                         \
2762     fill_line_color16(pixel_top, scanline, _start, _end);                     \
2763   }                                                                           \
2764 }                                                                             \
2765
2766
2767 // Renders an entire scanline from 0 to 240, based on current color mode.
2768
2769 void render_scanline_tile(u16 *scanline, u32 dispcnt)
2770 {
2771   u32 current_layer;
2772   u32 layer_order_pos;
2773   u32 bldcnt = io_registers[REG_BLDCNT];
2774   render_scanline_layer_functions_tile();
2775
2776   render_layers_color_effect(render_layers, layer_count,
2777    render_condition_alpha, render_condition_fade, 0, 240);
2778 }
2779
2780 void render_scanline_bitmap(u16 *scanline, u32 dispcnt)
2781 {
2782   u32 bldcnt = io_registers[REG_BLDCNT];
2783   render_scanline_layer_functions_bitmap();
2784   u32 current_layer;
2785   u32 layer_order_pos;
2786
2787   fill_line_bg(normal, scanline, 0, 240);
2788
2789   for(layer_order_pos = 0; layer_order_pos < layer_count; layer_order_pos++)
2790   {
2791     current_layer = layer_order[layer_order_pos];
2792     if(current_layer & 0x04)
2793     {
2794       render_obj_layer(normal, scanline, 0, 240);
2795     }
2796     else
2797     {
2798       layer_renderers->normal_render(0, 240, scanline);
2799     }
2800   }
2801 }
2802
2803 // Render layers from start to end based on if they're allowed in the
2804 // enable flags.
2805
2806 #define render_layers_conditional(tile_alpha, obj_alpha, dest)                \
2807 {                                                                             \
2808   __label__ skip;                                                             \
2809   current_layer = layer_order[layer_order_pos];                               \
2810   /* If OBJ aren't enabled skip to the first non-OBJ layer */                 \
2811   if(!(enable_flags & 0x10))                                                  \
2812   {                                                                           \
2813     while((current_layer & 0x04) || !((1 << current_layer) & enable_flags))   \
2814     {                                                                         \
2815       layer_order_pos++;                                                      \
2816       current_layer = layer_order[layer_order_pos];                           \
2817                                                                               \
2818       /* Oops, ran out of layers, render the background. */                   \
2819       if(layer_order_pos == layer_count)                                      \
2820       {                                                                       \
2821         fill_line_bg(tile_alpha, dest, start, end);                           \
2822         goto skip;                                                            \
2823       }                                                                       \
2824     }                                                                         \
2825                                                                               \
2826     /* Render the first valid layer */                                        \
2827     layer_renderers[current_layer].tile_alpha##_render_base(current_layer,    \
2828      start, end, dest);                                                       \
2829                                                                               \
2830     layer_order_pos++;                                                        \
2831                                                                               \
2832     /* Render the rest of the layers if active, skipping OBJ ones. */         \
2833     for(; layer_order_pos < layer_count; layer_order_pos++)                   \
2834     {                                                                         \
2835       current_layer = layer_order[layer_order_pos];                           \
2836       if(!(current_layer & 0x04) && ((1 << current_layer) & enable_flags))    \
2837       {                                                                       \
2838         layer_renderers[current_layer].                                       \
2839          tile_alpha##_render_transparent(current_layer, start, end, dest);    \
2840       }                                                                       \
2841     }                                                                         \
2842   }                                                                           \
2843   else                                                                        \
2844   {                                                                           \
2845     /* Find the first active layer, skip all of the inactive ones */          \
2846     while(!((current_layer & 0x04) || ((1 << current_layer) & enable_flags))) \
2847     {                                                                         \
2848       layer_order_pos++;                                                      \
2849       current_layer = layer_order[layer_order_pos];                           \
2850                                                                               \
2851       /* Oops, ran out of layers, render the background. */                   \
2852       if(layer_order_pos == layer_count)                                      \
2853       {                                                                       \
2854         fill_line_bg(tile_alpha, dest, start, end);                           \
2855         goto skip;                                                            \
2856       }                                                                       \
2857     }                                                                         \
2858                                                                               \
2859     if(current_layer & 0x04)                                                  \
2860     {                                                                         \
2861       /* If the first one is OBJ render the background then render it. */     \
2862       fill_line_bg(tile_alpha, dest, start, end);                             \
2863       render_obj_layer(obj_alpha, dest, start, end);                          \
2864     }                                                                         \
2865     else                                                                      \
2866     {                                                                         \
2867       /* Otherwise render a base layer. */                                    \
2868       layer_renderers[current_layer].                                         \
2869        tile_alpha##_render_base(current_layer, start, end, dest);             \
2870     }                                                                         \
2871                                                                               \
2872     layer_order_pos++;                                                        \
2873                                                                               \
2874     /* Render the rest of the layers. */                                      \
2875     for(; layer_order_pos < layer_count; layer_order_pos++)                   \
2876     {                                                                         \
2877       current_layer = layer_order[layer_order_pos];                           \
2878       if(current_layer & 0x04)                                                \
2879       {                                                                       \
2880         render_obj_layer(obj_alpha, dest, start, end);                        \
2881       }                                                                       \
2882       else                                                                    \
2883       {                                                                       \
2884         if(enable_flags & (1 << current_layer))                               \
2885         {                                                                     \
2886           layer_renderers[current_layer].                                     \
2887            tile_alpha##_render_transparent(current_layer, start, end, dest);  \
2888         }                                                                     \
2889       }                                                                       \
2890     }                                                                         \
2891   }                                                                           \
2892                                                                               \
2893   skip:                                                                       \
2894     ;                                                                         \
2895 }                                                                             \
2896
2897
2898 // Render all of the BG and OBJ in a tiled scanline from start to end ONLY if
2899 // enable_flag allows that layer/OBJ. Also conditionally render color effects.
2900
2901 void render_scanline_conditional_tile(u32 start, u32 end, u16 *scanline,
2902  u32 enable_flags, u32 dispcnt, u32 bldcnt, tile_layer_render_struct
2903  *layer_renderers)
2904 {
2905   u32 current_layer;
2906   u32 layer_order_pos = 0;
2907
2908   render_layers_color_effect(render_layers_conditional,
2909    (layer_count && (enable_flags & 0x1F)),
2910    ((enable_flags & 0x20) && render_condition_alpha),
2911    ((enable_flags & 0x20) && render_condition_fade), start, end);
2912 }
2913
2914
2915 // Render the BG and OBJ in a bitmap scanline from start to end ONLY if
2916 // enable_flag allows that layer/OBJ. Also conditionally render color effects.
2917
2918 void render_scanline_conditional_bitmap(u32 start, u32 end, u16 *scanline,
2919  u32 enable_flags, u32 dispcnt, u32 bldcnt, bitmap_layer_render_struct
2920  *layer_renderers)
2921 {
2922   u32 current_layer;
2923   u32 layer_order_pos;
2924
2925   fill_line_bg(normal, scanline, start, end);
2926
2927   for(layer_order_pos = 0; layer_order_pos < layer_count; layer_order_pos++)
2928   {
2929     current_layer = layer_order[layer_order_pos];
2930     if(current_layer & 0x04)
2931     {
2932       if(enable_flags & 0x10)
2933       {
2934         render_obj_layer(normal, scanline, start, end);
2935       }
2936     }
2937     else
2938     {
2939       if(enable_flags & 0x04)
2940         layer_renderers->normal_render(start, end, scanline);
2941     }
2942   }
2943 }
2944
2945
2946 #define window_x_coords(window_number)                                        \
2947   window_##window_number##_x1 =                                               \
2948    io_registers[REG_WIN##window_number##H] >> 8;                              \
2949   window_##window_number##_x2 =                                               \
2950    io_registers[REG_WIN##window_number##H] & 0xFF;                            \
2951   window_##window_number##_enable =                                           \
2952    (winin >> (window_number * 8)) & 0x3F;                                     \
2953                                                                               \
2954   if(window_##window_number##_x1 > 240)                                       \
2955     window_##window_number##_x1 = 240;                                        \
2956                                                                               \
2957   if(window_##window_number##_x2 > 240)                                       \
2958     window_##window_number##_x2 = 240                                         \
2959
2960 #define window_coords(window_number)                                          \
2961   u32 window_##window_number##_x1, window_##window_number##_x2;               \
2962   u32 window_##window_number##_y1, window_##window_number##_y2;               \
2963   u32 window_##window_number##_enable;                                        \
2964   window_##window_number##_y1 =                                               \
2965    io_registers[REG_WIN##window_number##V] >> 8;                              \
2966   window_##window_number##_y2 =                                               \
2967    io_registers[REG_WIN##window_number##V] & 0xFF;                            \
2968                                                                               \
2969   if(window_##window_number##_y1 > window_##window_number##_y2)               \
2970   {                                                                           \
2971     if((((vcount <= window_##window_number##_y2) ||                           \
2972      (vcount > window_##window_number##_y1)) ||                               \
2973      (window_##window_number##_y2 > 227)) &&                                  \
2974      (window_##window_number##_y1 <= 227))                                    \
2975     {                                                                         \
2976       window_x_coords(window_number);                                         \
2977     }                                                                         \
2978     else                                                                      \
2979     {                                                                         \
2980       window_##window_number##_x1 = 240;                                      \
2981       window_##window_number##_x2 = 240;                                      \
2982     }                                                                         \
2983   }                                                                           \
2984   else                                                                        \
2985   {                                                                           \
2986     if((((vcount >= window_##window_number##_y1) &&                           \
2987      (vcount < window_##window_number##_y2)) ||                               \
2988      (window_##window_number##_y2 > 227)) &&                                  \
2989      (window_##window_number##_y1 <= 227))                                    \
2990     {                                                                         \
2991       window_x_coords(window_number);                                         \
2992     }                                                                         \
2993     else                                                                      \
2994     {                                                                         \
2995       window_##window_number##_x1 = 240;                                      \
2996       window_##window_number##_x2 = 240;                                      \
2997     }                                                                         \
2998   }                                                                           \
2999
3000 #define render_window_segment(type, start, end, window_type)                  \
3001   if(start != end)                                                            \
3002   {                                                                           \
3003     render_scanline_conditional_##type(start, end, scanline,                  \
3004      window_##window_type##_enable, dispcnt, bldcnt, layer_renderers);        \
3005   }                                                                           \
3006
3007 #define render_window_segment_unequal(type, start, end, window_type)          \
3008   render_scanline_conditional_##type(start, end, scanline,                    \
3009    window_##window_type##_enable, dispcnt, bldcnt, layer_renderers)           \
3010
3011 #define render_window_segment_clip(type, clip_start, clip_end, start, end,    \
3012  window_type)                                                                 \
3013 {                                                                             \
3014   if(start != end)                                                            \
3015   {                                                                           \
3016     if(start < clip_start)                                                    \
3017     {                                                                         \
3018       if(end > clip_start)                                                    \
3019       {                                                                       \
3020         if(end > clip_end)                                                    \
3021         {                                                                     \
3022           render_window_segment_unequal(type, clip_start, clip_end,           \
3023            window_type);                                                      \
3024         }                                                                     \
3025         else                                                                  \
3026         {                                                                     \
3027           render_window_segment_unequal(type, clip_start, end, window_type);  \
3028         }                                                                     \
3029       }                                                                       \
3030     }                                                                         \
3031     else                                                                      \
3032                                                                               \
3033     if(end > clip_end)                                                        \
3034     {                                                                         \
3035       if(start < clip_end)                                                    \
3036         render_window_segment_unequal(type, start, clip_end, window_type);    \
3037     }                                                                         \
3038     else                                                                      \
3039     {                                                                         \
3040       render_window_segment_unequal(type, start, end, window_type);           \
3041     }                                                                         \
3042   }                                                                           \
3043 }                                                                             \
3044
3045 #define render_window_clip_1(type, start, end)                                \
3046   if(window_1_x1 != 240)                                                      \
3047   {                                                                           \
3048     if(window_1_x1 > window_1_x2)                                             \
3049     {                                                                         \
3050       render_window_segment_clip(type, start, end, 0, window_1_x2, 1);        \
3051       render_window_segment_clip(type, start, end, window_1_x2, window_1_x1,  \
3052        out);                                                                  \
3053       render_window_segment_clip(type, start, end, window_1_x1, 240, 1);      \
3054     }                                                                         \
3055     else                                                                      \
3056     {                                                                         \
3057       render_window_segment_clip(type, start, end, 0, window_1_x1, out);      \
3058       render_window_segment_clip(type, start, end, window_1_x1, window_1_x2,  \
3059        1);                                                                    \
3060       render_window_segment_clip(type, start, end, window_1_x2, 240, out);    \
3061     }                                                                         \
3062   }                                                                           \
3063   else                                                                        \
3064   {                                                                           \
3065     render_window_segment(type, start, end, out);                             \
3066   }                                                                           \
3067
3068 #define render_window_clip_obj(type, start, end);                             \
3069   render_window_segment(type, start, end, out);                               \
3070   if(dispcnt & 0x40)                                                          \
3071     render_scanline_obj_copy_##type##_1D(4, start, end, scanline);            \
3072   else                                                                        \
3073     render_scanline_obj_copy_##type##_2D(4, start, end, scanline)             \
3074
3075
3076 #define render_window_segment_clip_obj(type, clip_start, clip_end, start,     \
3077  end)                                                                         \
3078 {                                                                             \
3079   if(start != end)                                                            \
3080   {                                                                           \
3081     if(start < clip_start)                                                    \
3082     {                                                                         \
3083       if(end > clip_start)                                                    \
3084       {                                                                       \
3085         if(end > clip_end)                                                    \
3086         {                                                                     \
3087           render_window_clip_obj(type, clip_start, clip_end);                 \
3088         }                                                                     \
3089         else                                                                  \
3090         {                                                                     \
3091           render_window_clip_obj(type, clip_start, end);                      \
3092         }                                                                     \
3093       }                                                                       \
3094     }                                                                         \
3095     else                                                                      \
3096                                                                               \
3097     if(end > clip_end)                                                        \
3098     {                                                                         \
3099       if(start < clip_end)                                                    \
3100       {                                                                       \
3101         render_window_clip_obj(type, start, clip_end);                        \
3102       }                                                                       \
3103     }                                                                         \
3104     else                                                                      \
3105     {                                                                         \
3106       render_window_clip_obj(type, start, end);                               \
3107     }                                                                         \
3108   }                                                                           \
3109 }                                                                             \
3110
3111
3112 #define render_window_clip_1_obj(type, start, end)                            \
3113   if(window_1_x1 != 240)                                                      \
3114   {                                                                           \
3115     if(window_1_x1 > window_1_x2)                                             \
3116     {                                                                         \
3117       render_window_segment_clip(type, start, end, 0, window_1_x2, 1);        \
3118       render_window_segment_clip_obj(type, start, end, window_1_x2,           \
3119        window_1_x1);                                                          \
3120       render_window_segment_clip(type, start, end, window_1_x1, 240, 1);      \
3121     }                                                                         \
3122     else                                                                      \
3123     {                                                                         \
3124       render_window_segment_clip_obj(type, start, end, 0, window_1_x1);       \
3125       render_window_segment_clip(type, start, end, window_1_x1, window_1_x2,  \
3126        1);                                                                    \
3127       render_window_segment_clip_obj(type, start, end, window_1_x2, 240);     \
3128     }                                                                         \
3129   }                                                                           \
3130   else                                                                        \
3131   {                                                                           \
3132     render_window_clip_obj(type, start, end);                                 \
3133   }                                                                           \
3134
3135
3136
3137 #define render_window_single(type, window_number)                             \
3138   u32 winin = io_registers[REG_WININ];                                        \
3139   window_coords(window_number);                                               \
3140   if(window_##window_number##_x1 > window_##window_number##_x2)               \
3141   {                                                                           \
3142     render_window_segment(type, 0, window_##window_number##_x2,               \
3143      window_number);                                                          \
3144     render_window_segment(type, window_##window_number##_x2,                  \
3145      window_##window_number##_x1, out);                                       \
3146     render_window_segment(type, window_##window_number##_x1, 240,             \
3147      window_number);                                                          \
3148   }                                                                           \
3149   else                                                                        \
3150   {                                                                           \
3151     render_window_segment(type, 0, window_##window_number##_x1, out);         \
3152     render_window_segment(type, window_##window_number##_x1,                  \
3153      window_##window_number##_x2, window_number);                             \
3154     render_window_segment(type, window_##window_number##_x2, 240, out);       \
3155   }                                                                           \
3156
3157 #define render_window_multi(type, front, back)                                \
3158   if(window_##front##_x1 > window_##front##_x2)                               \
3159   {                                                                           \
3160     render_window_segment(type, 0, window_##front##_x2, front);               \
3161     render_window_clip_##back(type, window_##front##_x2,                      \
3162      window_##front##_x1);                                                    \
3163     render_window_segment(type, window_##front##_x1, 240, front);             \
3164   }                                                                           \
3165   else                                                                        \
3166   {                                                                           \
3167     render_window_clip_##back(type, 0, window_##front##_x1);                  \
3168     render_window_segment(type, window_##front##_x1, window_##front##_x2,     \
3169      front);                                                                  \
3170     render_window_clip_##back(type, window_##front##_x2, 240);                \
3171   }                                                                           \
3172
3173 #define render_scanline_window_builder(type)                                  \
3174 void render_scanline_window_##type(u16 *scanline, u32 dispcnt)                \
3175 {                                                                             \
3176   u32 vcount = io_registers[REG_VCOUNT];                                      \
3177   u32 winout = io_registers[REG_WINOUT];                                      \
3178   u32 bldcnt = io_registers[REG_BLDCNT];                                      \
3179   u32 window_out_enable = winout & 0x3F;                                      \
3180                                                                               \
3181   render_scanline_layer_functions_##type();                                   \
3182                                                                               \
3183   switch(dispcnt >> 13)                                                       \
3184   {                                                                           \
3185     /* Just window 0 */                                                       \
3186     case 0x01:                                                                \
3187     {                                                                         \
3188       render_window_single(type, 0);                                          \
3189       break;                                                                  \
3190     }                                                                         \
3191                                                                               \
3192     /* Just window 1 */                                                       \
3193     case 0x02:                                                                \
3194     {                                                                         \
3195       render_window_single(type, 1);                                          \
3196       break;                                                                  \
3197     }                                                                         \
3198                                                                               \
3199     /* Windows 1 and 2 */                                                     \
3200     case 0x03:                                                                \
3201     {                                                                         \
3202       u32 winin = io_registers[REG_WININ];                                    \
3203       window_coords(0);                                                       \
3204       window_coords(1);                                                       \
3205       render_window_multi(type, 0, 1);                                        \
3206       break;                                                                  \
3207     }                                                                         \
3208                                                                               \
3209     /* Just OBJ windows */                                                    \
3210     case 0x04:                                                                \
3211     {                                                                         \
3212       u32 window_obj_enable = winout >> 8;                                    \
3213       render_window_clip_obj(type, 0, 240);                                   \
3214       break;                                                                  \
3215     }                                                                         \
3216                                                                               \
3217     /* Window 0 and OBJ window */                                             \
3218     case 0x05:                                                                \
3219     {                                                                         \
3220       u32 window_obj_enable = winout >> 8;                                    \
3221       u32 winin = io_registers[REG_WININ];                                    \
3222       window_coords(0);                                                       \
3223       render_window_multi(type, 0, obj);                                      \
3224       break;                                                                  \
3225     }                                                                         \
3226                                                                               \
3227     /* Window 1 and OBJ window */                                             \
3228     case 0x06:                                                                \
3229     {                                                                         \
3230       u32 window_obj_enable = winout >> 8;                                    \
3231       u32 winin = io_registers[REG_WININ];                                    \
3232       window_coords(1);                                                       \
3233       render_window_multi(type, 1, obj);                                      \
3234       break;                                                                  \
3235     }                                                                         \
3236                                                                               \
3237     /* Window 0, 1, and OBJ window */                                         \
3238     case 0x07:                                                                \
3239     {                                                                         \
3240       u32 window_obj_enable = winout >> 8;                                    \
3241       u32 winin = io_registers[REG_WININ];                                    \
3242       window_coords(0);                                                       \
3243       window_coords(1);                                                       \
3244       render_window_multi(type, 0, 1_obj);                                    \
3245       break;                                                                  \
3246     }                                                                         \
3247   }                                                                           \
3248 }                                                                             \
3249
3250 render_scanline_window_builder(tile);
3251 render_scanline_window_builder(bitmap);
3252
3253 u32 active_layers[6] = { 0x1F, 0x17, 0x1C, 0x14, 0x14, 0x14 };
3254
3255 u32 small_resolution_width = 240;
3256 u32 small_resolution_height = 160;
3257 u32 resolution_width, resolution_height;
3258
3259 void update_scanline()
3260 {
3261   u32 pitch = get_screen_pitch();
3262   u32 dispcnt = io_registers[REG_DISPCNT];
3263   u32 display_flags = (dispcnt >> 8) & 0x1F;
3264   u32 vcount = io_registers[REG_VCOUNT];
3265   u16 *screen_offset = get_screen_pixels() + (vcount * pitch);
3266   u32 video_mode = dispcnt & 0x07;
3267   u32 current_layer;
3268
3269   // If OAM has been modified since the last scanline has been updated then
3270   // reorder and reprofile the OBJ lists.
3271   if(oam_update)
3272   {
3273     order_obj(video_mode);
3274     oam_update = 0;
3275   }
3276
3277   order_layers((dispcnt >> 8) & active_layers[video_mode]);
3278
3279   if(skip_next_frame)
3280     return;
3281
3282 #ifdef WIZ_BUILD
3283   if (screen_scale == unscaled_rot || screen_scale == scaled_aspect_rot)
3284   {
3285     if (rot_line_count == rot_lines_total)
3286     {
3287       rot_line_count = 0;
3288       if (vcount - rot_lines_total < FONT_HEIGHT && rot_msg_buff[0])
3289       {
3290         print_string_ext(rot_msg_buff, 0xFFFF, 0x0000, 0, 0,
3291           rot_buffer, 240, 0, vcount - rot_lines_total, rot_lines_total);
3292         if (vcount >= FONT_HEIGHT)
3293           rot_msg_buff[0] = 0;
3294       }
3295       if (screen_scale == unscaled_rot)
3296         do_rotated_blit(gpsp_gp2x_screen, rot_buffer, vcount);
3297       else
3298         upscale_aspect_row(gpsp_gp2x_screen, rot_buffer, vcount/3);
3299     }
3300     screen_offset = &rot_buffer[rot_line_count++ * 240];
3301   }
3302 #endif
3303
3304   // If the screen is in in forced blank draw pure white.
3305   if(dispcnt & 0x80)
3306   {
3307     fill_line_color16(0xFFFF, screen_offset, 0, 240);
3308   }
3309   else
3310   {
3311     if(video_mode < 3)
3312     {
3313       if(dispcnt >> 13)
3314       {
3315         render_scanline_window_tile(screen_offset, dispcnt);
3316       }
3317       else
3318       {
3319         render_scanline_tile(screen_offset, dispcnt);
3320       }
3321     }
3322     else
3323     {
3324       if(dispcnt >> 13)
3325         render_scanline_window_bitmap(screen_offset, dispcnt);
3326       else
3327         render_scanline_bitmap(screen_offset, dispcnt);
3328     }
3329   }
3330
3331   affine_reference_x[0] += (s16)io_registers[REG_BG2PB];
3332   affine_reference_y[0] += (s16)io_registers[REG_BG2PD];
3333   affine_reference_x[1] += (s16)io_registers[REG_BG3PB];
3334   affine_reference_y[1] += (s16)io_registers[REG_BG3PD];
3335 }
3336
3337 #ifdef PSP_BUILD
3338
3339 u32 screen_flip = 0;
3340
3341 void flip_screen()
3342 {
3343   if(video_direct == 0)
3344   {
3345     u32 *old_ge_cmd_ptr = ge_cmd_ptr;
3346     sceKernelDcacheWritebackAll();
3347
3348     // Render the current screen
3349     ge_cmd_ptr = ge_cmd + 2;
3350     GE_CMD(TBP0, ((u32)screen_pixels & 0x00FFFFFF));
3351     GE_CMD(TBW0, (((u32)screen_pixels & 0xFF000000) >> 8) |
3352      GBA_SCREEN_WIDTH);
3353     ge_cmd_ptr = old_ge_cmd_ptr;
3354
3355     sceGeListEnQueue(ge_cmd, ge_cmd_ptr, gecbid, NULL);
3356
3357     // Flip to the next screen
3358     screen_flip ^= 1;
3359
3360     if(screen_flip)
3361       screen_pixels = screen_texture + (240 * 160 * 2);
3362     else
3363       screen_pixels = screen_texture;
3364   }
3365 }
3366
3367 #elif defined(WIZ_BUILD)
3368
3369 void flip_screen()
3370 {
3371   if((resolution_width == small_resolution_width) &&
3372    (resolution_height == small_resolution_height))
3373   {
3374     switch(screen_scale)
3375     {
3376       case scaled_aspect:
3377         upscale_aspect(gpsp_gp2x_screen, screen_pixels);
3378         break;
3379       case unscaled_rot:
3380         do_rotated_blit(gpsp_gp2x_screen, rot_buffer, 160);
3381         rot_line_count = 0;
3382         goto no_clean;
3383       case scaled_aspect_rot:
3384         rot_line_count = 0;
3385         goto no_clean;
3386     }
3387   }
3388   warm_cache_op_all(WOP_D_CLEAN);
3389
3390 no_clean:
3391   pollux_video_flip();
3392   screen_pixels = (u16 *)gpsp_gp2x_screen + screen_offset;
3393 }
3394
3395 #else
3396
3397 #define integer_scale_copy_2()                                                \
3398   current_scanline_ptr[x2] = current_pixel;                                   \
3399   current_scanline_ptr[x2 - 1] = current_pixel;                               \
3400   x2 -= 2                                                                     \
3401
3402 #define integer_scale_copy_3()                                                \
3403   current_scanline_ptr[x2] = current_pixel;                                   \
3404   current_scanline_ptr[x2 - 1] = current_pixel;                               \
3405   current_scanline_ptr[x2 - 2] = current_pixel;                               \
3406   x2 -= 3                                                                     \
3407
3408 #define integer_scale_copy_4()                                                \
3409   current_scanline_ptr[x2] = current_pixel;                                   \
3410   current_scanline_ptr[x2 - 1] = current_pixel;                               \
3411   current_scanline_ptr[x2 - 2] = current_pixel;                               \
3412   current_scanline_ptr[x2 - 3] = current_pixel;                               \
3413   x2 -= 4                                                                     \
3414
3415 #define integer_scale_horizontal(scale_factor)                                \
3416   for(y = 0; y < 160; y++)                                                    \
3417   {                                                                           \
3418     for(x = 239, x2 = (240 * video_scale) - 1; x >= 0; x--)                   \
3419     {                                                                         \
3420       current_pixel = current_scanline_ptr[x];                                \
3421       integer_scale_copy_##scale_factor();                                    \
3422       current_scanline_ptr[x2] = current_scanline_ptr[x];                     \
3423       current_scanline_ptr[x2 - 1] = current_scanline_ptr[x];                 \
3424       current_scanline_ptr[x2 - 2] = current_scanline_ptr[x];                 \
3425     }                                                                         \
3426     current_scanline_ptr += pitch;                                            \
3427   }                                                                           \
3428
3429 void flip_screen()
3430 {
3431   if((video_scale != 1) && (current_scale != unscaled))
3432   {
3433     s32 x, y;
3434     s32 x2, y2;
3435     u16 *screen_ptr = get_screen_pixels();
3436     u16 *current_scanline_ptr = screen_ptr;
3437     u32 pitch = get_screen_pitch();
3438     u16 current_pixel;
3439     u32 i;
3440
3441     switch(video_scale)
3442     {
3443       case 2:
3444         integer_scale_horizontal(2);
3445         break;
3446
3447       case 3:
3448         integer_scale_horizontal(3);
3449         break;
3450
3451       default:
3452       case 4:
3453         integer_scale_horizontal(4);
3454         break;
3455
3456     }
3457
3458     for(y = 159, y2 = (160 * video_scale) - 1; y >= 0; y--)
3459     {
3460       for(i = 0; i < video_scale; i++)
3461       {
3462         memcpy(screen_ptr + (y2 * pitch),
3463          screen_ptr + (y * pitch), 480 * video_scale);
3464         y2--;
3465       }
3466     }
3467   }
3468 #ifdef GP2X_BUILD
3469   {
3470     if((resolution_width == small_resolution_width) &&
3471      (resolution_height == small_resolution_height))
3472     {
3473       switch (screen_scale)
3474       {
3475         case unscaled:
3476         {
3477           SDL_Rect srect = {0, 0, 240, 160};
3478           SDL_Rect drect = {40, 40, 240, 160};
3479           warm_cache_op_all(WOP_D_CLEAN);
3480           SDL_BlitSurface(screen, &srect, hw_screen, &drect);
3481           return;
3482         }
3483         case scaled_aspect:
3484         {
3485           SDL_Rect drect = {0, 10, 0, 0};
3486           warm_cache_op_all(WOP_D_CLEAN);
3487           SDL_BlitSurface(screen, NULL, hw_screen, &drect);
3488           return;
3489         }
3490         case scaled_aspect_sw:
3491         {
3492           upscale_aspect(hw_screen->pixels, get_screen_pixels());
3493           return;
3494         }
3495         case fullscreen:
3496           break;
3497       }
3498     }
3499     warm_cache_op_all(WOP_D_CLEAN);
3500     SDL_BlitSurface(screen, NULL, hw_screen, NULL);
3501   }
3502 #else
3503   SDL_Flip(screen);
3504 #endif
3505 }
3506
3507 #endif
3508
3509 u32 frame_to_render;
3510
3511 void update_screen()
3512 {
3513   if(!skip_next_frame)
3514     flip_screen();
3515 }
3516
3517 #ifdef PSP_BUILD
3518
3519 void init_video()
3520 {
3521   sceDisplaySetMode(0, PSP_SCREEN_WIDTH, PSP_SCREEN_HEIGHT);
3522
3523   sceDisplayWaitVblankStart();
3524   sceDisplaySetFrameBuf((void*)psp_gu_vram_base, PSP_LINE_SIZE,
3525    PSP_DISPLAY_PIXEL_FORMAT_565, PSP_DISPLAY_SETBUF_NEXTFRAME);
3526
3527   sceGuInit();
3528
3529   sceGuStart(GU_DIRECT, display_list);
3530   sceGuDrawBuffer(GU_PSM_5650, (void*)0, PSP_LINE_SIZE);
3531   sceGuDispBuffer(PSP_SCREEN_WIDTH, PSP_SCREEN_HEIGHT,
3532    (void*)0, PSP_LINE_SIZE);
3533   sceGuClear(GU_COLOR_BUFFER_BIT);
3534
3535   sceGuOffset(2048 - (PSP_SCREEN_WIDTH / 2), 2048 - (PSP_SCREEN_HEIGHT / 2));
3536   sceGuViewport(2048, 2048, PSP_SCREEN_WIDTH, PSP_SCREEN_HEIGHT);
3537
3538   sceGuScissor(0, 0, PSP_SCREEN_WIDTH + 1, PSP_SCREEN_HEIGHT + 1);
3539   sceGuEnable(GU_SCISSOR_TEST);
3540   sceGuTexMode(GU_PSM_5650, 0, 0, GU_FALSE);
3541   sceGuTexFunc(GU_TFX_REPLACE, GU_TCC_RGBA);
3542   sceGuTexFilter(GU_LINEAR, GU_LINEAR);
3543   sceGuEnable(GU_TEXTURE_2D);
3544
3545   sceGuFrontFace(GU_CW);
3546   sceGuDisable(GU_BLEND);
3547
3548   sceGuFinish();
3549   sceGuSync(0, 0);
3550
3551   sceDisplayWaitVblankStart();
3552   sceGuDisplay(GU_TRUE);
3553
3554   PspGeCallbackData gecb;
3555   gecb.signal_func = NULL;
3556   gecb.signal_arg = NULL;
3557   gecb.finish_func = Ge_Finish_Callback;
3558   gecb.finish_arg = NULL;
3559   gecbid = sceGeSetCallback(&gecb);
3560
3561   screen_vertex[0] = 0 + 0.5;
3562   screen_vertex[1] = 0 + 0.5;
3563   screen_vertex[2] = 0 + 0.5;
3564   screen_vertex[3] = 0 + 0.5;
3565   screen_vertex[4] = 0;
3566   screen_vertex[5] = GBA_SCREEN_WIDTH - 0.5;
3567   screen_vertex[6] = GBA_SCREEN_HEIGHT - 0.5;
3568   screen_vertex[7] = PSP_SCREEN_WIDTH - 0.5;
3569   screen_vertex[8] = PSP_SCREEN_HEIGHT - 0.5;
3570   screen_vertex[9] = 0;
3571
3572   // Set framebuffer to PSP VRAM
3573   GE_CMD(FBP, ((u32)psp_gu_vram_base & 0x00FFFFFF));
3574   GE_CMD(FBW, (((u32)psp_gu_vram_base & 0xFF000000) >> 8) | PSP_LINE_SIZE);
3575   // Set texture 0 to the screen texture
3576   GE_CMD(TBP0, ((u32)screen_texture & 0x00FFFFFF));
3577   GE_CMD(TBW0, (((u32)screen_texture & 0xFF000000) >> 8) | GBA_SCREEN_WIDTH);
3578   // Set the texture size to 256 by 256 (2^8 by 2^8)
3579   GE_CMD(TSIZE0, (8 << 8) | 8);
3580   // Flush the texture cache
3581   GE_CMD(TFLUSH, 0);
3582   // Use 2D coordinates, no indeces, no weights, 32bit float positions,
3583   // 32bit float texture coordinates
3584   GE_CMD(VTYPE, (1 << 23) | (0 << 11) | (0 << 9) |
3585    (3 << 7) | (0 << 5) | (0 << 2) | 3);
3586   // Set the base of the index list pointer to 0
3587   GE_CMD(BASE, 0);
3588   // Set the rest of index list pointer to 0 (not being used)
3589   GE_CMD(IADDR, 0);
3590   // Set the base of the screen vertex list pointer
3591   GE_CMD(BASE, ((u32)screen_vertex & 0xFF000000) >> 8);
3592   // Set the rest of the screen vertex list pointer
3593   GE_CMD(VADDR, ((u32)screen_vertex & 0x00FFFFFF));
3594   // Primitive kick: render sprite (primitive 6), 2 vertices
3595   GE_CMD(PRIM, (6 << 16) | 2);
3596   // Done with commands
3597   GE_CMD(FINISH, 0);
3598   // Raise signal interrupt
3599   GE_CMD(SIGNAL, 0);
3600   GE_CMD(NOP, 0);
3601   GE_CMD(NOP, 0);
3602 }
3603
3604 #elif defined(WIZ_BUILD)
3605
3606 void init_video()
3607 {
3608 }
3609
3610 #else
3611
3612 void init_video()
3613 {
3614   SDL_Init(SDL_INIT_VIDEO | SDL_INIT_JOYSTICK | SDL_INIT_NOPARACHUTE);
3615
3616 #ifdef GP2X_BUILD
3617   SDL_GP2X_AllowGfxMemory(NULL, 0);
3618
3619   hw_screen = SDL_SetVideoMode(320 * video_scale, 240 * video_scale,
3620    16, SDL_HWSURFACE);
3621
3622   screen = SDL_CreateRGBSurface(SDL_HWSURFACE, 240 * video_scale,
3623    160 * video_scale, 16, 0xFFFF, 0xFFFF, 0xFFFF, 0);
3624
3625   warm_change_cb_upper(WCB_C_BIT|WCB_B_BIT, 1);
3626 #else
3627   screen = SDL_SetVideoMode(240 * video_scale, 160 * video_scale, 16, 0);
3628 #endif
3629   SDL_ShowCursor(0);
3630 }
3631
3632 #endif
3633
3634 video_scale_type screen_scale = scaled_aspect;
3635 video_scale_type current_scale = scaled_aspect;
3636 video_filter_type screen_filter = filter_bilinear;
3637
3638
3639 #ifdef PSP_BUILD
3640
3641 void video_resolution_large()
3642 {
3643   if(video_direct != 1)
3644   {
3645     video_direct = 1;
3646     screen_pixels = psp_gu_vram_base;
3647     screen_pitch = 512;
3648     sceGuStart(GU_DIRECT, display_list);
3649     sceGuDispBuffer(PSP_SCREEN_WIDTH, PSP_SCREEN_HEIGHT,
3650      (void*)0, PSP_LINE_SIZE);
3651     sceGuFinish();
3652   }
3653 }
3654
3655 void set_gba_resolution(video_scale_type scale)
3656 {
3657   u32 filter_linear = 0;
3658   screen_scale = scale;
3659   switch(scale)
3660   {
3661     case unscaled:
3662       screen_vertex[2] = 120 + 0.5;
3663       screen_vertex[3] = 56 + 0.5;
3664       screen_vertex[7] = GBA_SCREEN_WIDTH + 120 - 0.5;
3665       screen_vertex[8] = GBA_SCREEN_HEIGHT + 56 - 0.5;
3666       break;
3667
3668     case scaled_aspect:
3669       screen_vertex[2] = 36 + 0.5;
3670       screen_vertex[3] = 0 + 0.5;
3671       screen_vertex[7] = 408 + 36 - 0.5;
3672       screen_vertex[8] = PSP_SCREEN_HEIGHT - 0.5;
3673       break;
3674
3675     case fullscreen:
3676       screen_vertex[2] = 0;
3677       screen_vertex[3] = 0;
3678       screen_vertex[7] = PSP_SCREEN_WIDTH;
3679       screen_vertex[8] = PSP_SCREEN_HEIGHT;
3680       break;
3681   }
3682
3683   sceGuStart(GU_DIRECT, display_list);
3684   if(screen_filter == filter_bilinear)
3685     sceGuTexFilter(GU_LINEAR, GU_LINEAR);
3686   else
3687     sceGuTexFilter(GU_NEAREST, GU_NEAREST);
3688
3689   sceGuFinish();
3690   sceGuSync(0, 0);
3691
3692   clear_screen(0x0000);
3693 }
3694
3695 void video_resolution_small()
3696 {
3697   if(video_direct != 0)
3698   {
3699     set_gba_resolution(screen_scale);
3700     video_direct = 0;
3701     screen_pixels = screen_texture;
3702     screen_flip = 0;
3703     screen_pitch = 240;
3704     sceGuStart(GU_DIRECT, display_list);
3705     sceGuDispBuffer(PSP_SCREEN_WIDTH, PSP_SCREEN_HEIGHT,
3706      (void*)0, PSP_LINE_SIZE);
3707     sceGuFinish();
3708   }
3709 }
3710
3711 void clear_screen(u16 color)
3712 {
3713   u32 i;
3714   u16 *src_ptr = get_screen_pixels();
3715
3716   sceGuSync(0, 0);
3717
3718   for(i = 0; i < (512 * 272); i++, src_ptr++)
3719   {
3720     *src_ptr = color;
3721   }
3722
3723   // I don't know why this doesn't work.
3724 /*  color = (((color & 0x1F) * 255 / 31) << 0) |
3725    ((((color >> 5) & 0x3F) * 255 / 63) << 8) |
3726    ((((color >> 11) & 0x1F) * 255 / 31) << 16) | (0xFF << 24);
3727
3728   sceGuStart(GU_DIRECT, display_list);
3729   sceGuDrawBuffer(GU_PSM_5650, (void*)0, PSP_LINE_SIZE);
3730   //sceGuDispBuffer(PSP_SCREEN_WIDTH, PSP_SCREEN_HEIGHT,
3731   // (void*)0, PSP_LINE_SIZE);
3732   sceGuClearColor(color);
3733   sceGuClear(GU_COLOR_BUFFER_BIT);
3734   sceGuFinish();
3735   sceGuSync(0, 0); */
3736 }
3737
3738 #elif defined(WIZ_BUILD)
3739
3740 void video_resolution_large()
3741 {
3742   screen_offset = 0;
3743   resolution_width = 320;
3744   resolution_height = 240;
3745
3746   fb_use_buffers(1);
3747   flip_screen();
3748   clear_screen(0);
3749   wiz_lcd_set_portrait(0);
3750 }
3751
3752 void video_resolution_small()
3753 {
3754   fb_use_buffers(4);
3755
3756   switch (screen_scale)
3757   {
3758     case unscaled:
3759       screen_offset = 320*40 + 40;
3760       wiz_lcd_set_portrait(0);
3761       break;
3762     case scaled_aspect:
3763       screen_offset = 320*(80 - 14) + 80;
3764       wiz_lcd_set_portrait(0);
3765       break;
3766     case unscaled_rot:
3767       wiz_lcd_set_portrait(1);
3768       rot_lines_total = 4;
3769       rot_line_count = 0;
3770       break;
3771     case scaled_aspect_rot:
3772       wiz_lcd_set_portrait(1);
3773       rot_lines_total = 3;
3774       rot_line_count = 0;
3775       break;
3776   }
3777
3778   flip_screen();
3779   clear_screen(0);
3780
3781   resolution_width = 240;
3782   resolution_height = 160;
3783 }
3784
3785 void set_gba_resolution(video_scale_type scale)
3786 {
3787   screen_scale = scale;
3788 }
3789
3790 void clear_screen(u16 color)
3791 {
3792   u32 col = ((u32)color << 16) | color;
3793   u32 *p = gpsp_gp2x_screen;
3794   int c = 320*240/2;
3795   while (c-- > 0)
3796     *p++ = col;
3797 }
3798
3799 #else
3800
3801 void video_resolution_large()
3802 {
3803   current_scale = unscaled;
3804
3805 #ifdef GP2X_BUILD
3806   SDL_FreeSurface(screen);
3807   SDL_GP2X_AllowGfxMemory(NULL, 0);
3808     hw_screen = SDL_SetVideoMode(320, 240, 16, SDL_HWSURFACE);
3809   screen = SDL_CreateRGBSurface(SDL_HWSURFACE, 320, 240, 16, 0xFFFF,
3810    0xFFFF, 0xFFFF, 0);
3811   resolution_width = 320;
3812     resolution_height = 240;
3813   SDL_ShowCursor(0);
3814
3815   warm_change_cb_upper(WCB_C_BIT|WCB_B_BIT, 1);
3816 #else
3817   screen = SDL_SetVideoMode(480, 272, 16, 0);
3818   resolution_width = 480;
3819   resolution_height = 272;
3820 #endif
3821 }
3822
3823 void video_resolution_small()
3824 {
3825   current_scale = screen_scale;
3826
3827 #ifdef GP2X_BUILD
3828   int w, h;
3829   SDL_FreeSurface(screen);
3830   SDL_GP2X_AllowGfxMemory(NULL, 0);
3831
3832   w = 320; h = 240;
3833   if (screen_scale == scaled_aspect || screen_scale == fullscreen)
3834   {
3835     w = small_resolution_width * video_scale;
3836     h = small_resolution_height * video_scale;
3837   }
3838   if (screen_scale == scaled_aspect) h += 20;
3839   hw_screen = SDL_SetVideoMode(w, h, 16, SDL_HWSURFACE);
3840
3841   w = small_resolution_width * video_scale;
3842   if (screen_scale == scaled_aspect_sw)
3843     w = 320;
3844   screen = SDL_CreateRGBSurface(SDL_HWSURFACE,
3845    w, small_resolution_height * video_scale,
3846    16, 0xFFFF, 0xFFFF, 0xFFFF, 0);
3847
3848   SDL_ShowCursor(0);
3849
3850   warm_change_cb_upper(WCB_C_BIT|WCB_B_BIT, 1);
3851 #else
3852   screen = SDL_SetVideoMode(small_resolution_width * video_scale,
3853    small_resolution_height * video_scale, 16, 0);
3854 #endif
3855   resolution_width = small_resolution_width;
3856   resolution_height = small_resolution_height;
3857 }
3858
3859 void set_gba_resolution(video_scale_type scale)
3860 {
3861   if(screen_scale != scale)
3862   {
3863     screen_scale = scale;
3864     switch(scale)
3865     {
3866       case unscaled:
3867       case scaled_aspect:
3868       case fullscreen:
3869         small_resolution_width = 240 * video_scale;
3870         small_resolution_height = 160 * video_scale;
3871         break;
3872     }
3873   }
3874 }
3875
3876 void clear_screen(u16 color)
3877 {
3878   u16 *dest_ptr = get_screen_pixels();
3879   u32 line_skip = get_screen_pitch() - screen->w;
3880   u32 x, y;
3881
3882   for(y = 0; y < screen->h; y++)
3883   {
3884     for(x = 0; x < screen->w; x++, dest_ptr++)
3885     {
3886       *dest_ptr = color;
3887     }
3888     dest_ptr += line_skip;
3889   }
3890 }
3891
3892 #endif
3893
3894 u16 *copy_screen()
3895 {
3896   u16 *copy = malloc(240 * 160 * 2);
3897   memcpy(copy, get_screen_pixels(), 240 * 160 * 2);
3898   return copy;
3899 }
3900
3901 void blit_to_screen(u16 *src, u32 w, u32 h, u32 dest_x, u32 dest_y)
3902 {
3903   u32 pitch = get_screen_pitch();
3904   u16 *dest_ptr = get_screen_pixels() + dest_x + (dest_y * pitch);
3905
3906   u16 *src_ptr = src;
3907   u32 line_skip = pitch - w;
3908   u32 x, y;
3909
3910   for(y = 0; y < h; y++)
3911   {
3912     for(x = 0; x < w; x++, src_ptr++, dest_ptr++)
3913     {
3914       *dest_ptr = *src_ptr;
3915     }
3916     dest_ptr += line_skip;
3917   }
3918 }
3919
3920 void print_string_ext(const char *str, u16 fg_color, u16 bg_color,
3921  u32 x, u32 y, void *_dest_ptr, u32 pitch, u32 pad, u32 h_offset, u32 height)
3922 {
3923   u16 *dest_ptr = (u16 *)_dest_ptr + (y * pitch) + x;
3924   u8 current_char = str[0];
3925   u32 current_row;
3926   u32 glyph_offset;
3927   u32 i = 0, i2, i3, h;
3928   u32 str_index = 1;
3929   u32 current_x = x;
3930
3931   if(y + height > resolution_height)
3932       return;
3933
3934   while(current_char)
3935   {
3936     if(current_char == '\n')
3937     {
3938       y += FONT_HEIGHT;
3939       current_x = x;
3940       dest_ptr = get_screen_pixels() + (y * pitch) + x;
3941     }
3942     else
3943     {
3944       glyph_offset = _font_offset[current_char];
3945       current_x += FONT_WIDTH;
3946       glyph_offset += h_offset;
3947       for(i2 = h_offset, h = 0; i2 < FONT_HEIGHT && h < height; i2++, h++, glyph_offset++)
3948       {
3949         current_row = _font_bits[glyph_offset];
3950         for(i3 = 0; i3 < FONT_WIDTH; i3++)
3951         {
3952           if((current_row >> (15 - i3)) & 0x01)
3953             *dest_ptr = fg_color;
3954           else
3955             *dest_ptr = bg_color;
3956           dest_ptr++;
3957         }
3958         dest_ptr += (pitch - FONT_WIDTH);
3959       }
3960       dest_ptr = dest_ptr - (pitch * h) + FONT_WIDTH;
3961     }
3962
3963     i++;
3964
3965     current_char = str[str_index];
3966
3967     if((i < pad) && (current_char == 0))
3968     {
3969       current_char = ' ';
3970     }
3971     else
3972     {
3973       str_index++;
3974     }
3975
3976     if(current_x + FONT_WIDTH > resolution_width /* EDIT */)
3977     {
3978       while (current_char && current_char != '\n')
3979       {
3980         current_char = str[str_index++];
3981       }
3982     }
3983   }
3984 }
3985
3986 void print_string(const char *str, u16 fg_color, u16 bg_color,
3987  u32 x, u32 y)
3988 {
3989 #ifdef WIZ_BUILD
3990   if ((screen_scale == unscaled_rot || screen_scale == scaled_aspect_rot) &&
3991    (resolution_width == small_resolution_width) &&
3992    (resolution_height == small_resolution_height))
3993   {
3994     snprintf(rot_msg_buff, sizeof(rot_msg_buff), "%s", str);
3995     return;
3996   }
3997 #endif
3998   print_string_ext(str, fg_color, bg_color, x, y, get_screen_pixels(),
3999    get_screen_pitch(), 0, 0, FONT_HEIGHT);
4000 }
4001
4002 void print_string_pad(const char *str, u16 fg_color, u16 bg_color,
4003  u32 x, u32 y, u32 pad)
4004 {
4005   print_string_ext(str, fg_color, bg_color, x, y, get_screen_pixels(),
4006    get_screen_pitch(), pad, 0, FONT_HEIGHT);
4007 }
4008
4009 u32 debug_cursor_x = 0;
4010 u32 debug_cursor_y = 0;
4011
4012 #ifdef STDIO_DEBUG
4013
4014 void debug_screen_clear()
4015 {
4016 }
4017
4018 void debug_screen_start()
4019 {
4020 }
4021
4022 void debug_screen_end()
4023 {
4024 }
4025
4026 void debug_screen_update()
4027 {
4028 }
4029
4030 void debug_screen_printf(const char *format, ...)
4031 {
4032   va_list ap;
4033
4034   va_start(ap, format);
4035   vprintf(format, ap);
4036   va_end(ap);
4037 }
4038
4039 void debug_screen_newline(u32 count)
4040 {
4041   printf("\n");
4042 }
4043
4044
4045 #else
4046
4047 void debug_screen_clear()
4048 {
4049   debug_cursor_x = 0;
4050   debug_cursor_y = 0;
4051   clear_screen(0x0000);
4052 }
4053
4054 void debug_screen_start()
4055 {
4056   video_resolution_large();
4057   debug_screen_clear();
4058 }
4059
4060 void debug_screen_end()
4061 {
4062   video_resolution_small();
4063 }
4064
4065 void debug_screen_update()
4066 {
4067   flip_screen();
4068 }
4069
4070 void debug_screen_printf(const char *format, ...)
4071 {
4072   char str_buffer[512];
4073   u32 str_buffer_length;
4074   va_list ap;
4075
4076   va_start(ap, format);
4077   str_buffer_length = vsnprintf(str_buffer, 512, format, ap);
4078   va_end(ap);
4079
4080   printf("printing debug string %s at %d %d\n", str_buffer,
4081    debug_cursor_x, debug_cursor_y);
4082
4083   print_string(str_buffer, 0xFFFF, 0x0000, debug_cursor_x, debug_cursor_y);
4084   debug_cursor_x += FONT_WIDTH * str_buffer_length;
4085 }
4086
4087 void debug_screen_newline(u32 count)
4088 {
4089   debug_cursor_x = 0;
4090   debug_cursor_y += FONT_HEIGHT * count;
4091 }
4092
4093 #endif
4094
4095 void debug_screen_printl(const char *format, ...)
4096 {
4097   va_list ap;
4098
4099   va_start(ap, format);
4100   debug_screen_printf(format, ap);
4101   debug_screen_newline(1);
4102 //  debug_screen_printf("\n");
4103   va_end(ap);
4104 }
4105
4106
4107 #define video_savestate_builder(type)                                         \
4108 void video_##type##_savestate(file_tag_type savestate_file)                   \
4109 {                                                                             \
4110   file_##type##_array(savestate_file, affine_reference_x);                    \
4111   file_##type##_array(savestate_file, affine_reference_y);                    \
4112 }                                                                             \
4113
4114 video_savestate_builder(read);
4115 video_savestate_builder(write_mem);
4116
4117