tweaks from 2008 (gpsp09-2xb_3)
[gpsp.git] / video.c
1 /* gameplaySP
2  *
3  * Copyright (C) 2006 Exophase <exophase@gmail.com>
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation; either version 2 of
8  * the License, or (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18  */
19
20 #include "common.h"
21 #include "font.h"
22
23 #ifdef PSP_BUILD
24
25 #include <pspctrl.h>
26
27 #include <pspkernel.h>
28 #include <pspdebug.h>
29 #include <pspdisplay.h>
30
31 #include <pspgu.h>
32 #include <psppower.h>
33 #include <psprtc.h>
34
35 static float *screen_vertex = (float *)0x441FC100;
36 static u32 *ge_cmd = (u32 *)0x441FC000;
37 static u16 *psp_gu_vram_base = (u16 *)(0x44000000);
38 static u32 *ge_cmd_ptr = (u32 *)0x441FC000;
39 static u32 gecbid;
40 static u32 video_direct = 0;
41
42 static u32 __attribute__((aligned(16))) display_list[32];
43
44 #define GBA_SCREEN_WIDTH 240
45 #define GBA_SCREEN_HEIGHT 160
46
47 #define PSP_SCREEN_WIDTH 480
48 #define PSP_SCREEN_HEIGHT 272
49 #define PSP_LINE_SIZE 512
50
51 #define PSP_ALL_BUTTON_MASK 0xFFFF
52
53 #define GE_CMD_FBP    0x9C
54 #define GE_CMD_FBW    0x9D
55 #define GE_CMD_TBP0   0xA0
56 #define GE_CMD_TBW0   0xA8
57 #define GE_CMD_TSIZE0 0xB8
58 #define GE_CMD_TFLUSH 0xCB
59 #define GE_CMD_CLEAR  0xD3
60 #define GE_CMD_VTYPE  0x12
61 #define GE_CMD_BASE   0x10
62 #define GE_CMD_VADDR  0x01
63 #define GE_CMD_IADDR  0x02
64 #define GE_CMD_PRIM   0x04
65 #define GE_CMD_FINISH 0x0F
66 #define GE_CMD_SIGNAL 0x0C
67 #define GE_CMD_NOP    0x00
68
69 #define GE_CMD(cmd, operand)                                                \
70   *ge_cmd_ptr = (((GE_CMD_##cmd) << 24) | (operand));                       \
71   ge_cmd_ptr++                                                              \
72
73 static u16 *screen_texture = (u16 *)(0x4000000 + (512 * 272 * 2));
74 static u16 *current_screen_texture = (u16 *)(0x4000000 + (512 * 272 * 2));
75 static u16 *screen_pixels = (u16 *)(0x4000000 + (512 * 272 * 2));
76 static u32 screen_pitch = 240;
77
78 static void Ge_Finish_Callback(int id, void *arg)
79 {
80 }
81
82 #define get_screen_pixels()                                                   \
83   screen_pixels                                                               \
84
85 #define get_screen_pitch()                                                    \
86   screen_pitch                                                                \
87
88 #else
89
90 #ifdef GP2X_BUILD
91 #include "SDL_gp2x.h"
92 SDL_Surface *hw_screen;
93 #endif
94 SDL_Surface *screen;
95 const u32 video_scale = 1;
96 extern void gp2x_flush_cache(void *beginning_addr, void *end_addr, int flags);
97
98 #define get_screen_pixels()                                                   \
99   ((u16 *)screen->pixels)                                                     \
100
101 #define get_screen_pitch()                                                    \
102   (screen->pitch / 2)                                                         \
103
104 #endif
105
106 void render_scanline_conditional_tile(u32 start, u32 end, u16 *scanline,
107  u32 enable_flags, u32 dispcnt, u32 bldcnt, tile_layer_render_struct
108  *layer_renderers);
109 void render_scanline_conditional_bitmap(u32 start, u32 end, u16 *scanline,
110  u32 enable_flags, u32 dispcnt, u32 bldcnt, bitmap_layer_render_struct
111  *layer_renderers);
112
113 #define no_op                                                                 \
114
115 // This old version is not necessary if the palette is either being converted
116 // transparently or the ABGR 1555 format is being used natively. The direct
117 // version (without conversion) is much faster.
118
119 #define tile_lookup_palette_full(palette, source)                             \
120   current_pixel = palette[source];                                            \
121   convert_palette(current_pixel)                                              \
122
123 #define tile_lookup_palette(palette, source)                                  \
124   current_pixel = palette[source];                                            \
125
126
127 #ifdef RENDER_COLOR16_NORMAL
128
129 #define tile_expand_base_normal(index)                                        \
130   tile_expand_base_color16(index)                                             \
131
132 #else
133
134 #define tile_expand_base_normal(index)                                        \
135   tile_lookup_palette(palette, current_pixel);                                \
136   dest_ptr[index] = current_pixel                                             \
137
138 #endif
139
140 #define tile_expand_transparent_normal(index)                                 \
141   tile_expand_base_normal(index)                                              \
142
143 #define tile_expand_copy(index)                                               \
144   dest_ptr[index] = copy_ptr[index]                                           \
145
146
147 #define advance_dest_ptr_base(delta)                                          \
148   dest_ptr += delta                                                           \
149
150 #define advance_dest_ptr_transparent(delta)                                   \
151   advance_dest_ptr_base(delta)                                                \
152
153 #define advance_dest_ptr_copy(delta)                                          \
154   advance_dest_ptr_base(delta);                                               \
155   copy_ptr += delta                                                           \
156
157
158 #define color_combine_mask_a(layer)                                           \
159   ((io_registers[REG_BLDCNT] >> layer) & 0x01)                                \
160
161 // For color blending operations, will create a mask that has in bit
162 // 10 if the layer is target B, and bit 9 if the layer is target A.
163
164 #define color_combine_mask(layer)                                             \
165   (color_combine_mask_a(layer) |                                              \
166    ((io_registers[REG_BLDCNT] >> (layer + 7)) & 0x02)) << 9                   \
167
168 // For alpha blending renderers, draw the palette index (9bpp) and
169 // layer bits rather than the raw RGB. For the base this should write to
170 // the 32bit location directly.
171
172 #define tile_expand_base_alpha(index)                                         \
173   dest_ptr[index] = current_pixel | pixel_combine                             \
174
175 #define tile_expand_base_bg(index)                                            \
176   dest_ptr[index] = bg_combine                                                \
177
178
179 // For layered (transparent) writes this should shift the "stack" and write
180 // to the bottom. This will preserve the topmost pixel and the most recent
181 // one.
182
183 #define tile_expand_transparent_alpha(index)                                  \
184   dest_ptr[index] = (dest_ptr[index] << 16) | current_pixel | pixel_combine   \
185
186
187 // OBJ should only shift if the top isn't already OBJ
188 #define tile_expand_transparent_alpha_obj(index)                              \
189   dest = dest_ptr[index];                                                     \
190   if(dest & 0x00000100)                                                       \
191   {                                                                           \
192     dest_ptr[index] = (dest & 0xFFFF0000) | current_pixel | pixel_combine;    \
193   }                                                                           \
194   else                                                                        \
195   {                                                                           \
196     dest_ptr[index] = (dest << 16) | current_pixel | pixel_combine;           \
197   }                                                                           \
198
199
200 // For color effects that don't need to preserve the previous layer.
201 // The color32 version should be used with 32bit wide dest_ptr so as to be
202 // compatible with alpha combine on top of it.
203
204 #define tile_expand_base_color16(index)                                       \
205   dest_ptr[index] = current_pixel | pixel_combine                             \
206
207 #define tile_expand_transparent_color16(index)                                \
208   tile_expand_base_color16(index)                                             \
209
210 #define tile_expand_base_color32(index)                                       \
211   tile_expand_base_color16(index)                                             \
212
213 #define tile_expand_transparent_color32(index)                                \
214   tile_expand_base_color16(index)                                             \
215
216
217 // Operations for isolation 8bpp pixels within 32bpp pixel blocks.
218
219 #define tile_8bpp_pixel_op_mask(op_param)                                     \
220   current_pixel = current_pixels & 0xFF                                       \
221
222 #define tile_8bpp_pixel_op_shift_mask(shift)                                  \
223   current_pixel = (current_pixels >> shift) & 0xFF                            \
224
225 #define tile_8bpp_pixel_op_shift(shift)                                       \
226   current_pixel = current_pixels >> shift                                     \
227
228 #define tile_8bpp_pixel_op_none(shift)                                        \
229
230 // Base should always draw raw in 8bpp mode; color 0 will be drawn where
231 // color 0 is.
232
233 #define tile_8bpp_draw_base_normal(index)                                     \
234   tile_expand_base_normal(index)                                              \
235
236 #define tile_8bpp_draw_base_alpha(index)                                      \
237   if(current_pixel)                                                           \
238   {                                                                           \
239     tile_expand_base_alpha(index);                                            \
240   }                                                                           \
241   else                                                                        \
242   {                                                                           \
243     tile_expand_base_bg(index);                                               \
244   }                                                                           \
245
246
247 #define tile_8bpp_draw_base_color16(index)                                    \
248   tile_8bpp_draw_base_alpha(index)                                            \
249
250 #define tile_8bpp_draw_base_color32(index)                                    \
251   tile_8bpp_draw_base_alpha(index)                                            \
252
253
254 #define tile_8bpp_draw_base(index, op, op_param, alpha_op)                    \
255   tile_8bpp_pixel_op_##op(op_param);                                          \
256   tile_8bpp_draw_base_##alpha_op(index)                                       \
257
258 // Transparent (layered) writes should only replace what is there if the
259 // pixel is not transparent (zero)
260
261 #define tile_8bpp_draw_transparent(index, op, op_param, alpha_op)             \
262   tile_8bpp_pixel_op_##op(op_param);                                          \
263   if(current_pixel)                                                           \
264   {                                                                           \
265     tile_expand_transparent_##alpha_op(index);                                \
266   }                                                                           \
267
268 #define tile_8bpp_draw_copy(index, op, op_param, alpha_op)                    \
269   tile_8bpp_pixel_op_##op(op_param);                                          \
270   if(current_pixel)                                                           \
271   {                                                                           \
272     tile_expand_copy(index);                                                  \
273   }                                                                           \
274
275 // Get the current tile from the map in 8bpp mode
276
277 #define get_tile_8bpp()                                                       \
278   current_tile = *map_ptr;                                                    \
279   tile_ptr = tile_base + ((current_tile & 0x3FF) * 64)                        \
280
281
282 // Draw half of a tile in 8bpp mode, for base renderer
283
284 #define tile_8bpp_draw_four_noflip(index, combine_op, alpha_op)               \
285   tile_8bpp_draw_##combine_op(index + 0, mask, 0, alpha_op);                  \
286   tile_8bpp_draw_##combine_op(index + 1, shift_mask, 8, alpha_op);            \
287   tile_8bpp_draw_##combine_op(index + 2, shift_mask, 16, alpha_op);           \
288   tile_8bpp_draw_##combine_op(index + 3, shift, 24, alpha_op)                 \
289
290
291 // Like the above, but draws the half-tile horizontally flipped
292
293 #define tile_8bpp_draw_four_flip(index, combine_op, alpha_op)                 \
294   tile_8bpp_draw_##combine_op(index + 3, mask, 0, alpha_op);                  \
295   tile_8bpp_draw_##combine_op(index + 2, shift_mask, 8, alpha_op);            \
296   tile_8bpp_draw_##combine_op(index + 1, shift_mask, 16, alpha_op);           \
297   tile_8bpp_draw_##combine_op(index + 0, shift, 24, alpha_op)                 \
298
299 #define tile_8bpp_draw_four_base(index, alpha_op, flip_op)                    \
300   tile_8bpp_draw_four_##flip_op(index, base, alpha_op)                        \
301
302
303 // Draw half of a tile in 8bpp mode, for transparent renderer; as an
304 // optimization the entire thing is checked against zero (in transparent
305 // capable renders it is more likely for the pixels to be transparent than
306 // opaque)
307
308 #define tile_8bpp_draw_four_transparent(index, alpha_op, flip_op)             \
309   if(current_pixels != 0)                                                     \
310   {                                                                           \
311     tile_8bpp_draw_four_##flip_op(index, transparent, alpha_op);              \
312   }                                                                           \
313
314 #define tile_8bpp_draw_four_copy(index, alpha_op, flip_op)                    \
315   if(current_pixels != 0)                                                     \
316   {                                                                           \
317     tile_8bpp_draw_four_##flip_op(index, copy, alpha_op);                     \
318   }                                                                           \
319
320 // Helper macro for drawing 8bpp tiles clipped against the edge of the screen
321
322 #define partial_tile_8bpp(combine_op, alpha_op)                               \
323   for(i = 0; i < partial_tile_run; i++)                                       \
324   {                                                                           \
325     tile_8bpp_draw_##combine_op(0, mask, 0, alpha_op);                        \
326     current_pixels >>= 8;                                                     \
327     advance_dest_ptr_##combine_op(1);                                         \
328   }                                                                           \
329
330
331 // Draws 8bpp tiles clipped against the left side of the screen,
332 // partial_tile_offset indicates how much clipped in it is, partial_tile_run
333 // indicates how much it should draw.
334
335 #define partial_tile_right_noflip_8bpp(combine_op, alpha_op)                  \
336   if(partial_tile_offset >= 4)                                                \
337   {                                                                           \
338     current_pixels = *((u32 *)(tile_ptr + 4)) >>                              \
339      ((partial_tile_offset - 4) * 8);                                         \
340     partial_tile_8bpp(combine_op, alpha_op);                                  \
341   }                                                                           \
342   else                                                                        \
343   {                                                                           \
344     partial_tile_run -= 4;                                                    \
345     current_pixels = *((u32 *)tile_ptr) >> (partial_tile_offset * 8);         \
346     partial_tile_8bpp(combine_op, alpha_op);                                  \
347     current_pixels = *((u32 *)(tile_ptr + 4));                                \
348     tile_8bpp_draw_four_##combine_op(0, alpha_op, noflip);                    \
349     advance_dest_ptr_##combine_op(4);                                         \
350   }                                                                           \
351
352
353 // Draws 8bpp tiles clipped against both the left and right side of the
354 // screen, IE, runs of less than 8 - partial_tile_offset.
355
356 #define partial_tile_mid_noflip_8bpp(combine_op, alpha_op)                    \
357   if(partial_tile_offset >= 4)                                                \
358   {                                                                           \
359     current_pixels = *((u32 *)(tile_ptr + 4)) >>                              \
360      ((partial_tile_offset - 4) * 8);                                         \
361     partial_tile_8bpp(combine_op, alpha_op);                                  \
362   }                                                                           \
363   else                                                                        \
364   {                                                                           \
365     current_pixels = *((u32 *)tile_ptr) >> (partial_tile_offset * 8);         \
366     if((partial_tile_offset + partial_tile_run) > 4)                          \
367     {                                                                         \
368       u32 old_run = partial_tile_run;                                         \
369       partial_tile_run = 4 - partial_tile_offset;                             \
370       partial_tile_8bpp(combine_op, alpha_op);                                \
371       partial_tile_run = old_run - partial_tile_run;                          \
372       current_pixels = *((u32 *)(tile_ptr + 4));                              \
373       partial_tile_8bpp(combine_op, alpha_op);                                \
374     }                                                                         \
375     else                                                                      \
376     {                                                                         \
377       partial_tile_8bpp(combine_op, alpha_op);                                \
378     }                                                                         \
379   }                                                                           \
380
381
382 // Draws 8bpp tiles clipped against the right side of the screen,
383 // partial_tile_run indicates how much there is to draw.
384
385 #define partial_tile_left_noflip_8bpp(combine_op, alpha_op)                   \
386   if(partial_tile_run >= 4)                                                   \
387   {                                                                           \
388     current_pixels = *((u32 *)tile_ptr);                                      \
389     tile_8bpp_draw_four_##combine_op(0, alpha_op, noflip);                    \
390     advance_dest_ptr_##combine_op(4);                                         \
391     tile_ptr += 4;                                                            \
392     partial_tile_run -= 4;                                                    \
393   }                                                                           \
394                                                                               \
395   current_pixels = *((u32 *)(tile_ptr));                                      \
396   partial_tile_8bpp(combine_op, alpha_op)                                     \
397
398
399 // Draws a non-clipped (complete) 8bpp tile.
400
401 #define tile_noflip_8bpp(combine_op, alpha_op)                                \
402   current_pixels = *((u32 *)tile_ptr);                                        \
403   tile_8bpp_draw_four_##combine_op(0, alpha_op, noflip);                      \
404   current_pixels = *((u32 *)(tile_ptr + 4));                                  \
405   tile_8bpp_draw_four_##combine_op(4, alpha_op, noflip)                       \
406
407
408 // Like the above versions but draws flipped tiles.
409
410 #define partial_tile_flip_8bpp(combine_op, alpha_op)                          \
411   for(i = 0; i < partial_tile_run; i++)                                       \
412   {                                                                           \
413     tile_8bpp_draw_##combine_op(0, shift, 24, alpha_op);                      \
414     current_pixels <<= 8;                                                     \
415     advance_dest_ptr_##combine_op(1);                                         \
416   }                                                                           \
417
418 #define partial_tile_right_flip_8bpp(combine_op, alpha_op)                    \
419   if(partial_tile_offset >= 4)                                                \
420   {                                                                           \
421     current_pixels = *((u32 *)tile_ptr) << ((partial_tile_offset - 4) * 8);   \
422     partial_tile_flip_8bpp(combine_op, alpha_op);                             \
423   }                                                                           \
424   else                                                                        \
425   {                                                                           \
426     partial_tile_run -= 4;                                                    \
427     current_pixels = *((u32 *)(tile_ptr + 4)) <<                              \
428      ((partial_tile_offset - 4) * 8);                                         \
429     partial_tile_flip_8bpp(combine_op, alpha_op);                             \
430     current_pixels = *((u32 *)tile_ptr);                                      \
431     tile_8bpp_draw_four_##combine_op(0, alpha_op, flip);                      \
432     advance_dest_ptr_##combine_op(4);                                         \
433   }                                                                           \
434
435 #define partial_tile_mid_flip_8bpp(combine_op, alpha_op)                      \
436   if(partial_tile_offset >= 4)                                                \
437   {                                                                           \
438     current_pixels = *((u32 *)tile_ptr) << ((partial_tile_offset - 4) * 8);   \
439     partial_tile_flip_8bpp(combine_op, alpha_op);                             \
440   }                                                                           \
441   else                                                                        \
442   {                                                                           \
443     current_pixels = *((u32 *)(tile_ptr + 4)) <<                              \
444      ((partial_tile_offset - 4) * 8);                                         \
445                                                                               \
446     if((partial_tile_offset + partial_tile_run) > 4)                          \
447     {                                                                         \
448       u32 old_run = partial_tile_run;                                         \
449       partial_tile_run = 4 - partial_tile_offset;                             \
450       partial_tile_flip_8bpp(combine_op, alpha_op);                           \
451       partial_tile_run = old_run - partial_tile_run;                          \
452       current_pixels = *((u32 *)(tile_ptr));                                  \
453       partial_tile_flip_8bpp(combine_op, alpha_op);                           \
454     }                                                                         \
455     else                                                                      \
456     {                                                                         \
457       partial_tile_flip_8bpp(combine_op, alpha_op);                           \
458     }                                                                         \
459   }                                                                           \
460
461 #define partial_tile_left_flip_8bpp(combine_op, alpha_op)                     \
462   if(partial_tile_run >= 4)                                                   \
463   {                                                                           \
464     current_pixels = *((u32 *)(tile_ptr + 4));                                \
465     tile_8bpp_draw_four_##combine_op(0, alpha_op, flip);                      \
466     advance_dest_ptr_##combine_op(4);                                         \
467     tile_ptr -= 4;                                                            \
468     partial_tile_run -= 4;                                                    \
469   }                                                                           \
470                                                                               \
471   current_pixels = *((u32 *)(tile_ptr + 4));                                  \
472   partial_tile_flip_8bpp(combine_op, alpha_op)                                \
473
474 #define tile_flip_8bpp(combine_op, alpha_op)                                  \
475   current_pixels = *((u32 *)(tile_ptr + 4));                                  \
476   tile_8bpp_draw_four_##combine_op(0, alpha_op, flip);                        \
477   current_pixels = *((u32 *)tile_ptr);                                        \
478   tile_8bpp_draw_four_##combine_op(4, alpha_op, flip)                         \
479
480
481 // Operations for isolating 4bpp tiles in a 32bit block
482
483 #define tile_4bpp_pixel_op_mask(op_param)                                     \
484   current_pixel = current_pixels & 0x0F                                       \
485
486 #define tile_4bpp_pixel_op_shift_mask(shift)                                  \
487   current_pixel = (current_pixels >> shift) & 0x0F                            \
488
489 #define tile_4bpp_pixel_op_shift(shift)                                       \
490   current_pixel = current_pixels >> shift                                     \
491
492 #define tile_4bpp_pixel_op_none(op_param)                                     \
493
494 // Draws a single 4bpp pixel as base, normal renderer; checks to see if the
495 // pixel is zero because if so the current palette should not be applied.
496 // These ifs can be replaced with a lookup table, may or may not be superior
497 // this way, should be benchmarked. The lookup table would be from 0-255
498 // identity map except for multiples of 16, which would map to 0.
499
500 #define tile_4bpp_draw_base_normal(index)                                     \
501   if(current_pixel)                                                           \
502   {                                                                           \
503     current_pixel |= current_palette;                                         \
504     tile_expand_base_normal(index);                                           \
505   }                                                                           \
506   else                                                                        \
507   {                                                                           \
508     tile_expand_base_normal(index);                                           \
509   }                                                                           \
510
511
512 #define tile_4bpp_draw_base_alpha(index)                                      \
513   if(current_pixel)                                                           \
514   {                                                                           \
515     current_pixel |= current_palette;                                         \
516     tile_expand_base_alpha(index);                                            \
517   }                                                                           \
518   else                                                                        \
519   {                                                                           \
520     tile_expand_base_bg(index);                                               \
521   }                                                                           \
522
523 #define tile_4bpp_draw_base_color16(index)                                    \
524   tile_4bpp_draw_base_alpha(index)                                            \
525
526 #define tile_4bpp_draw_base_color32(index)                                    \
527   tile_4bpp_draw_base_alpha(index)                                            \
528
529
530 #define tile_4bpp_draw_base(index, op, op_param, alpha_op)                    \
531   tile_4bpp_pixel_op_##op(op_param);                                          \
532   tile_4bpp_draw_base_##alpha_op(index)                                       \
533
534
535 // Draws a single 4bpp pixel as layered, if not transparent.
536
537 #define tile_4bpp_draw_transparent(index, op, op_param, alpha_op)             \
538   tile_4bpp_pixel_op_##op(op_param);                                          \
539   if(current_pixel)                                                           \
540   {                                                                           \
541     current_pixel |= current_palette;                                         \
542     tile_expand_transparent_##alpha_op(index);                                \
543   }                                                                           \
544
545 #define tile_4bpp_draw_copy(index, op, op_param, alpha_op)                    \
546   tile_4bpp_pixel_op_##op(op_param);                                          \
547   if(current_pixel)                                                           \
548   {                                                                           \
549     current_pixel |= current_palette;                                         \
550     tile_expand_copy(index);                                                  \
551   }                                                                           \
552
553
554 // Draws eight background pixels in transparent mode, for alpha or normal
555 // renderers.
556
557 #define tile_4bpp_draw_eight_base_zero(value)                                 \
558   dest_ptr[0] = value;                                                        \
559   dest_ptr[1] = value;                                                        \
560   dest_ptr[2] = value;                                                        \
561   dest_ptr[3] = value;                                                        \
562   dest_ptr[4] = value;                                                        \
563   dest_ptr[5] = value;                                                        \
564   dest_ptr[6] = value;                                                        \
565   dest_ptr[7] = value                                                         \
566
567
568 // Draws eight background pixels for the alpha renderer, basically color zero
569 // with the background flag high.
570
571 #define tile_4bpp_draw_eight_base_zero_alpha()                                \
572   tile_4bpp_draw_eight_base_zero(bg_combine)                                  \
573
574 #define tile_4bpp_draw_eight_base_zero_color16()                              \
575   tile_4bpp_draw_eight_base_zero_alpha()                                      \
576
577 #define tile_4bpp_draw_eight_base_zero_color32()                              \
578   tile_4bpp_draw_eight_base_zero_alpha()                                      \
579
580
581 // Draws eight background pixels for the normal renderer, just a bunch of
582 // zeros.
583
584 #ifdef RENDER_COLOR16_NORMAL
585
586 #define tile_4bpp_draw_eight_base_zero_normal()                               \
587   current_pixel = 0;                                                          \
588   tile_4bpp_draw_eight_base_zero(current_pixel)                               \
589
590 #else
591
592 #define tile_4bpp_draw_eight_base_zero_normal()                               \
593   current_pixel = palette[0];                                                 \
594   tile_4bpp_draw_eight_base_zero(current_pixel)                               \
595
596 #endif
597
598
599 // Draws eight 4bpp pixels.
600
601 #define tile_4bpp_draw_eight_noflip(combine_op, alpha_op)                     \
602   tile_4bpp_draw_##combine_op(0, mask, 0, alpha_op);                          \
603   tile_4bpp_draw_##combine_op(1, shift_mask, 4, alpha_op);                    \
604   tile_4bpp_draw_##combine_op(2, shift_mask, 8, alpha_op);                    \
605   tile_4bpp_draw_##combine_op(3, shift_mask, 12, alpha_op);                   \
606   tile_4bpp_draw_##combine_op(4, shift_mask, 16, alpha_op);                   \
607   tile_4bpp_draw_##combine_op(5, shift_mask, 20, alpha_op);                   \
608   tile_4bpp_draw_##combine_op(6, shift_mask, 24, alpha_op);                   \
609   tile_4bpp_draw_##combine_op(7, shift, 28, alpha_op)                         \
610
611
612 // Draws eight 4bpp pixels in reverse order (for hflip).
613
614 #define tile_4bpp_draw_eight_flip(combine_op, alpha_op)                       \
615   tile_4bpp_draw_##combine_op(7, mask, 0, alpha_op);                          \
616   tile_4bpp_draw_##combine_op(6, shift_mask, 4, alpha_op);                    \
617   tile_4bpp_draw_##combine_op(5, shift_mask, 8, alpha_op);                    \
618   tile_4bpp_draw_##combine_op(4, shift_mask, 12, alpha_op);                   \
619   tile_4bpp_draw_##combine_op(3, shift_mask, 16, alpha_op);                   \
620   tile_4bpp_draw_##combine_op(2, shift_mask, 20, alpha_op);                   \
621   tile_4bpp_draw_##combine_op(1, shift_mask, 24, alpha_op);                   \
622   tile_4bpp_draw_##combine_op(0, shift, 28, alpha_op)                         \
623
624
625 // Draws eight 4bpp pixels in base mode, checks if all are zero, if so draws
626 // the appropriate background pixels.
627
628 #define tile_4bpp_draw_eight_base(alpha_op, flip_op)                          \
629   if(current_pixels != 0)                                                     \
630   {                                                                           \
631     tile_4bpp_draw_eight_##flip_op(base, alpha_op);                           \
632   }                                                                           \
633   else                                                                        \
634   {                                                                           \
635     tile_4bpp_draw_eight_base_zero_##alpha_op();                              \
636   }                                                                           \
637
638
639 // Draws eight 4bpp pixels in transparent (layered) mode, checks if all are
640 // zero and if so draws nothing.
641
642 #define tile_4bpp_draw_eight_transparent(alpha_op, flip_op)                   \
643   if(current_pixels != 0)                                                     \
644   {                                                                           \
645     tile_4bpp_draw_eight_##flip_op(transparent, alpha_op);                    \
646   }                                                                           \
647
648
649 #define tile_4bpp_draw_eight_copy(alpha_op, flip_op)                          \
650   if(current_pixels != 0)                                                     \
651   {                                                                           \
652     tile_4bpp_draw_eight_##flip_op(copy, alpha_op);                           \
653   }                                                                           \
654
655 // Gets the current tile in 4bpp mode, also getting the current palette and
656 // the pixel block.
657
658 #define get_tile_4bpp()                                                       \
659   current_tile = *map_ptr;                                                    \
660   current_palette = (current_tile >> 12) << 4;                                \
661   tile_ptr = tile_base + ((current_tile & 0x3FF) * 32);                       \
662
663
664 // Helper macro for drawing clipped 4bpp tiles.
665
666 #define partial_tile_4bpp(combine_op, alpha_op)                               \
667   for(i = 0; i < partial_tile_run; i++)                                       \
668   {                                                                           \
669     tile_4bpp_draw_##combine_op(0, mask, 0, alpha_op);                        \
670     current_pixels >>= 4;                                                     \
671     advance_dest_ptr_##combine_op(1);                                         \
672   }                                                                           \
673
674
675 // Draws a 4bpp tile clipped against the left edge of the screen.
676 // partial_tile_offset is how far in it's clipped, partial_tile_run is
677 // how many to draw.
678
679 #define partial_tile_right_noflip_4bpp(combine_op, alpha_op)                  \
680   current_pixels = *((u32 *)tile_ptr) >> (partial_tile_offset * 4);           \
681   partial_tile_4bpp(combine_op, alpha_op)                                     \
682
683
684 // Draws a 4bpp tile clipped against both edges of the screen, same as right.
685
686 #define partial_tile_mid_noflip_4bpp(combine_op, alpha_op)                    \
687   partial_tile_right_noflip_4bpp(combine_op, alpha_op)                        \
688
689
690 // Draws a 4bpp tile clipped against the right edge of the screen.
691 // partial_tile_offset is how many to draw.
692
693 #define partial_tile_left_noflip_4bpp(combine_op, alpha_op)                   \
694   current_pixels = *((u32 *)tile_ptr);                                        \
695   partial_tile_4bpp(combine_op, alpha_op)                                     \
696
697
698 // Draws a complete 4bpp tile row (not clipped)
699 #define tile_noflip_4bpp(combine_op, alpha_op)                                \
700   current_pixels = *((u32 *)tile_ptr);                                        \
701   tile_4bpp_draw_eight_##combine_op(alpha_op, noflip)                         \
702
703
704 // Like the above, but draws flipped tiles.
705
706 #define partial_tile_flip_4bpp(combine_op, alpha_op)                          \
707   for(i = 0; i < partial_tile_run; i++)                                       \
708   {                                                                           \
709     tile_4bpp_draw_##combine_op(0, shift, 28, alpha_op);                      \
710     current_pixels <<= 4;                                                     \
711     advance_dest_ptr_##combine_op(1);                                         \
712   }                                                                           \
713
714 #define partial_tile_right_flip_4bpp(combine_op, alpha_op)                    \
715   current_pixels = *((u32 *)tile_ptr) << (partial_tile_offset * 4);           \
716   partial_tile_flip_4bpp(combine_op, alpha_op)                                \
717
718 #define partial_tile_mid_flip_4bpp(combine_op, alpha_op)                      \
719   partial_tile_right_flip_4bpp(combine_op, alpha_op)                          \
720
721 #define partial_tile_left_flip_4bpp(combine_op, alpha_op)                     \
722   current_pixels = *((u32 *)tile_ptr);                                        \
723   partial_tile_flip_4bpp(combine_op, alpha_op)                                \
724
725 #define tile_flip_4bpp(combine_op, alpha_op)                                  \
726   current_pixels = *((u32 *)tile_ptr);                                        \
727   tile_4bpp_draw_eight_##combine_op(alpha_op, flip)                           \
728
729
730 // Draws a single (partial or complete) tile from the tilemap, flipping
731 // as necessary.
732
733 #define single_tile_map(tile_type, combine_op, color_depth, alpha_op)         \
734   get_tile_##color_depth();                                                   \
735   if(current_tile & 0x800)                                                    \
736     tile_ptr += vertical_pixel_flip;                                          \
737                                                                               \
738   if(current_tile & 0x400)                                                    \
739   {                                                                           \
740     tile_type##_flip_##color_depth(combine_op, alpha_op);                     \
741   }                                                                           \
742   else                                                                        \
743   {                                                                           \
744     tile_type##_noflip_##color_depth(combine_op, alpha_op);                   \
745   }                                                                           \
746
747
748 // Draws multiple sequential tiles from the tilemap, hflips and vflips as
749 // necessary.
750
751 #define multiple_tile_map(combine_op, color_depth, alpha_op)                  \
752   for(i = 0; i < tile_run; i++)                                               \
753   {                                                                           \
754     single_tile_map(tile, combine_op, color_depth, alpha_op);                 \
755     advance_dest_ptr_##combine_op(8);                                         \
756     map_ptr++;                                                                \
757   }                                                                           \
758
759 // Draws a partial tile from a tilemap clipped against the left edge of the
760 // screen.
761
762 #define partial_tile_right_map(combine_op, color_depth, alpha_op)             \
763   single_tile_map(partial_tile_right, combine_op, color_depth, alpha_op);     \
764   map_ptr++                                                                   \
765
766 // Draws a partial tile from a tilemap clipped against both edges of the
767 // screen.
768
769 #define partial_tile_mid_map(combine_op, color_depth, alpha_op)               \
770   single_tile_map(partial_tile_mid, combine_op, color_depth, alpha_op)        \
771
772 // Draws a partial tile from a tilemap clipped against the right edge of the
773 // screen.
774
775 #define partial_tile_left_map(combine_op, color_depth, alpha_op)              \
776   single_tile_map(partial_tile_left, combine_op, color_depth, alpha_op)       \
777
778
779 // Advances a non-flipped 4bpp obj to the next tile.
780
781 #define obj_advance_noflip_4bpp()                                             \
782   tile_ptr += 32                                                              \
783
784
785 // Advances a non-flipped 8bpp obj to the next tile.
786
787 #define obj_advance_noflip_8bpp()                                             \
788   tile_ptr += 64                                                              \
789
790
791 // Advances a flipped 4bpp obj to the next tile.
792
793 #define obj_advance_flip_4bpp()                                               \
794   tile_ptr -= 32                                                              \
795
796
797 // Advances a flipped 8bpp obj to the next tile.
798
799 #define obj_advance_flip_8bpp()                                               \
800   tile_ptr -= 64                                                              \
801
802
803
804 // Draws multiple sequential tiles from an obj, flip_op determines if it should
805 // be flipped or not (set to flip or noflip)
806
807 #define multiple_tile_obj(combine_op, color_depth, alpha_op, flip_op)         \
808   for(i = 0; i < tile_run; i++)                                               \
809   {                                                                           \
810     tile_##flip_op##_##color_depth(combine_op, alpha_op);                     \
811     obj_advance_##flip_op##_##color_depth();                                  \
812     advance_dest_ptr_##combine_op(8);                                         \
813   }                                                                           \
814
815
816 // Draws an obj's tile clipped against the left side of the screen
817
818 #define partial_tile_right_obj(combine_op, color_depth, alpha_op, flip_op)    \
819   partial_tile_right_##flip_op##_##color_depth(combine_op, alpha_op);         \
820   obj_advance_##flip_op##_##color_depth()                                     \
821
822 // Draws an obj's tile clipped against both sides of the screen
823
824 #define partial_tile_mid_obj(combine_op, color_depth, alpha_op, flip_op)      \
825   partial_tile_mid_##flip_op##_##color_depth(combine_op, alpha_op)            \
826
827 // Draws an obj's tile clipped against the right side of the screen
828
829 #define partial_tile_left_obj(combine_op, color_depth, alpha_op, flip_op)     \
830   partial_tile_left_##flip_op##_##color_depth(combine_op, alpha_op)           \
831
832
833 // Extra variables specific for 8bpp/4bpp tile renderers.
834
835 #define tile_extra_variables_8bpp()                                           \
836
837 #define tile_extra_variables_4bpp()                                           \
838   u32 current_palette                                                         \
839
840
841 // Byte lengths of complete tiles and tile rows in 4bpp and 8bpp.
842
843 #define tile_width_4bpp 4
844 #define tile_size_4bpp 32
845 #define tile_width_8bpp 8
846 #define tile_size_8bpp 64
847
848
849 // Render a single scanline of text tiles
850
851 #define tile_render(color_depth, combine_op, alpha_op)                        \
852 {                                                                             \
853   u32 vertical_pixel_offset = (vertical_offset % 8) *                         \
854    tile_width_##color_depth;                                                  \
855   u32 vertical_pixel_flip =                                                   \
856    ((tile_size_##color_depth - tile_width_##color_depth) -                    \
857    vertical_pixel_offset) - vertical_pixel_offset;                            \
858   tile_extra_variables_##color_depth();                                       \
859   u8 *tile_base = vram + (((bg_control >> 2) & 0x03) * (1024 * 16)) +         \
860    vertical_pixel_offset;                                                     \
861   u32 pixel_run = 256 - (horizontal_offset % 256);                            \
862   u32 current_tile;                                                           \
863                                                                               \
864   map_base += ((vertical_offset % 256) / 8) * 32;                             \
865   partial_tile_offset = (horizontal_offset % 8);                              \
866                                                                               \
867   if(pixel_run >= end)                                                        \
868   {                                                                           \
869     if(partial_tile_offset)                                                   \
870     {                                                                         \
871       partial_tile_run = 8 - partial_tile_offset;                             \
872       if(end < partial_tile_run)                                              \
873       {                                                                       \
874         partial_tile_run = end;                                               \
875         partial_tile_mid_map(combine_op, color_depth, alpha_op);              \
876         return;                                                               \
877       }                                                                       \
878       else                                                                    \
879       {                                                                       \
880         end -= partial_tile_run;                                              \
881         partial_tile_right_map(combine_op, color_depth, alpha_op);            \
882       }                                                                       \
883     }                                                                         \
884                                                                               \
885     tile_run = end / 8;                                                       \
886     multiple_tile_map(combine_op, color_depth, alpha_op);                     \
887                                                                               \
888     partial_tile_run = end % 8;                                               \
889                                                                               \
890     if(partial_tile_run)                                                      \
891     {                                                                         \
892       partial_tile_left_map(combine_op, color_depth, alpha_op);               \
893     }                                                                         \
894   }                                                                           \
895   else                                                                        \
896   {                                                                           \
897     if(partial_tile_offset)                                                   \
898     {                                                                         \
899       partial_tile_run = 8 - partial_tile_offset;                             \
900       partial_tile_right_map(combine_op, color_depth, alpha_op);              \
901     }                                                                         \
902                                                                               \
903     tile_run = (pixel_run - partial_tile_run) / 8;                            \
904     multiple_tile_map(combine_op, color_depth, alpha_op);                     \
905     map_ptr = second_ptr;                                                     \
906     end -= pixel_run;                                                         \
907     tile_run = end / 8;                                                       \
908     multiple_tile_map(combine_op, color_depth, alpha_op);                     \
909                                                                               \
910     partial_tile_run = end % 8;                                               \
911     if(partial_tile_run)                                                      \
912     {                                                                         \
913       partial_tile_left_map(combine_op, color_depth, alpha_op);               \
914     }                                                                         \
915   }                                                                           \
916 }                                                                             \
917
918 #define render_scanline_dest_normal         u16
919 #define render_scanline_dest_alpha          u32
920 #define render_scanline_dest_alpha_obj      u32
921 #define render_scanline_dest_color16        u16
922 #define render_scanline_dest_color32        u32
923 #define render_scanline_dest_partial_alpha  u32
924 #define render_scanline_dest_copy_tile      u16
925 #define render_scanline_dest_copy_bitmap    u16
926
927
928 // If rendering a scanline that is not a target A then there's no point in
929 // keeping what's underneath it because it can't blend with it.
930
931 #define render_scanline_skip_alpha(bg_type, combine_op)                       \
932   if((pixel_combine & 0x00000200) == 0)                                       \
933   {                                                                           \
934     render_scanline_##bg_type##_##combine_op##_color32(layer,                 \
935      start, end, scanline);                                                   \
936     return;                                                                   \
937   }                                                                           \
938
939
940 #ifdef RENDER_COLOR16_NORMAL
941
942 #define render_scanline_extra_variables_base_normal(bg_type)                  \
943   const u32 pixel_combine = 0                                                 \
944
945 #else
946
947 #define render_scanline_extra_variables_base_normal(bg_type)                  \
948   u16 *palette = palette_ram_converted                                        \
949
950 #endif
951
952
953 #define render_scanline_extra_variables_base_alpha(bg_type)                   \
954   u32 bg_combine = color_combine_mask(5);                                     \
955   u32 pixel_combine = color_combine_mask(layer) | (bg_combine << 16);         \
956   render_scanline_skip_alpha(bg_type, base)                                   \
957
958 #define render_scanline_extra_variables_base_color()                          \
959   u32 bg_combine = color_combine_mask(5);                                     \
960   u32 pixel_combine = color_combine_mask(layer)                               \
961
962 #define render_scanline_extra_variables_base_color16(bg_type)                 \
963   render_scanline_extra_variables_base_color()                                \
964
965 #define render_scanline_extra_variables_base_color32(bg_type)                 \
966   render_scanline_extra_variables_base_color()                                \
967
968
969 #define render_scanline_extra_variables_transparent_normal(bg_type)           \
970   render_scanline_extra_variables_base_normal(bg_type)                        \
971
972 #define render_scanline_extra_variables_transparent_alpha(bg_type)            \
973   u32 pixel_combine = color_combine_mask(layer);                              \
974   render_scanline_skip_alpha(bg_type, transparent)                            \
975
976 #define render_scanline_extra_variables_transparent_color()                   \
977   u32 pixel_combine = color_combine_mask(layer)                               \
978
979 #define render_scanline_extra_variables_transparent_color16(bg_type)          \
980   render_scanline_extra_variables_transparent_color()                         \
981
982 #define render_scanline_extra_variables_transparent_color32(bg_type)          \
983   render_scanline_extra_variables_transparent_color()                         \
984
985
986
987
988
989 // Map widths and heights
990
991 u32 map_widths[] = { 256, 512, 256, 512 };
992 u32 map_heights[] = { 256, 256, 512, 512 };
993
994 // Build text scanline rendering functions.
995
996 #define render_scanline_text_builder(combine_op, alpha_op)                    \
997 void render_scanline_text_##combine_op##_##alpha_op(u32 layer,                \
998  u32 start, u32 end, void *scanline)                                          \
999 {                                                                             \
1000   render_scanline_extra_variables_##combine_op##_##alpha_op(text);            \
1001   u32 bg_control = io_registers[REG_BG0CNT + layer];                          \
1002   u32 map_size = (bg_control >> 14) & 0x03;                                   \
1003   u32 map_width = map_widths[map_size];                                       \
1004   u32 map_height = map_heights[map_size];                                     \
1005   u32 horizontal_offset =                                                     \
1006    (io_registers[REG_BG0HOFS + (layer * 2)] + start) % 512;                   \
1007   u32 vertical_offset = (io_registers[REG_VCOUNT] +                           \
1008    io_registers[REG_BG0VOFS + (layer * 2)]) % 512;                            \
1009   u32 current_pixel;                                                          \
1010   u32 current_pixels;                                                         \
1011   u32 partial_tile_run = 0;                                                   \
1012   u32 partial_tile_offset;                                                    \
1013   u32 tile_run;                                                               \
1014   u32 i;                                                                      \
1015   render_scanline_dest_##alpha_op *dest_ptr =                                 \
1016    ((render_scanline_dest_##alpha_op *)scanline) + start;                     \
1017                                                                               \
1018   u16 *map_base = (u16 *)(vram + ((bg_control >> 8) & 0x1F) * (1024 * 2));    \
1019   u16 *map_ptr, *second_ptr;                                                  \
1020   u8 *tile_ptr;                                                               \
1021                                                                               \
1022   end -= start;                                                               \
1023                                                                               \
1024   if((map_size & 0x02) && (vertical_offset >= 256))                           \
1025   {                                                                           \
1026     map_base += ((map_width / 8) * 32) +                                      \
1027      (((vertical_offset - 256) / 8) * 32);                                    \
1028   }                                                                           \
1029   else                                                                        \
1030   {                                                                           \
1031     map_base += (((vertical_offset % 256) / 8) * 32);                         \
1032   }                                                                           \
1033                                                                               \
1034   if(map_size & 0x01)                                                         \
1035   {                                                                           \
1036     if(horizontal_offset >= 256)                                              \
1037     {                                                                         \
1038       horizontal_offset -= 256;                                               \
1039       map_ptr = map_base + (32 * 32) + (horizontal_offset / 8);               \
1040       second_ptr = map_base;                                                  \
1041     }                                                                         \
1042     else                                                                      \
1043     {                                                                         \
1044       map_ptr = map_base + (horizontal_offset / 8);                           \
1045       second_ptr = map_base + (32 * 32);                                      \
1046     }                                                                         \
1047   }                                                                           \
1048   else                                                                        \
1049   {                                                                           \
1050     horizontal_offset %= 256;                                                 \
1051     map_ptr = map_base + (horizontal_offset / 8);                             \
1052     second_ptr = map_base;                                                    \
1053   }                                                                           \
1054                                                                               \
1055   if(bg_control & 0x80)                                                       \
1056   {                                                                           \
1057     tile_render(8bpp, combine_op, alpha_op);                                  \
1058   }                                                                           \
1059   else                                                                        \
1060   {                                                                           \
1061     tile_render(4bpp, combine_op, alpha_op);                                  \
1062   }                                                                           \
1063 }                                                                             \
1064
1065 render_scanline_text_builder(base, normal);
1066 render_scanline_text_builder(transparent, normal);
1067 render_scanline_text_builder(base, color16);
1068 render_scanline_text_builder(transparent, color16);
1069 render_scanline_text_builder(base, color32);
1070 render_scanline_text_builder(transparent, color32);
1071 render_scanline_text_builder(base, alpha);
1072 render_scanline_text_builder(transparent, alpha);
1073
1074
1075 s32 affine_reference_x[2];
1076 s32 affine_reference_y[2];
1077
1078 #define affine_render_bg_pixel_normal()                                       \
1079   current_pixel = palette_ram_converted[0]                                    \
1080
1081 #define affine_render_bg_pixel_alpha()                                        \
1082   current_pixel = bg_combine                                                  \
1083
1084 #define affine_render_bg_pixel_color16()                                      \
1085   affine_render_bg_pixel_alpha()                                              \
1086
1087 #define affine_render_bg_pixel_color32()                                      \
1088   affine_render_bg_pixel_alpha()                                              \
1089
1090 #define affine_render_bg_pixel_base(alpha_op)                                 \
1091   affine_render_bg_pixel_##alpha_op()                                         \
1092
1093 #define affine_render_bg_pixel_transparent(alpha_op)                          \
1094
1095 #define affine_render_bg_pixel_copy(alpha_op)                                 \
1096
1097 #define affine_render_bg_base(alpha_op)                                       \
1098   dest_ptr[0] = current_pixel
1099
1100 #define affine_render_bg_transparent(alpha_op)                                \
1101
1102 #define affine_render_bg_copy(alpha_op)                                       \
1103
1104 #define affine_render_bg_remainder_base(alpha_op)                             \
1105   affine_render_bg_pixel_##alpha_op();                                        \
1106   for(; i < end; i++)                                                         \
1107   {                                                                           \
1108     affine_render_bg_base(alpha_op);                                          \
1109     advance_dest_ptr_base(1);                                                 \
1110   }                                                                           \
1111
1112 #define affine_render_bg_remainder_transparent(alpha_op)                      \
1113
1114 #define affine_render_bg_remainder_copy(alpha_op)                             \
1115
1116 #define affine_render_next(combine_op)                                        \
1117   source_x += dx;                                                             \
1118   source_y += dy;                                                             \
1119   advance_dest_ptr_##combine_op(1)                                            \
1120
1121 #define affine_render_scale_offset()                                          \
1122   tile_base += ((pixel_y % 8) * 8);                                           \
1123   map_base += (pixel_y / 8) << map_pitch                                      \
1124
1125 #define affine_render_scale_pixel(combine_op, alpha_op)                       \
1126   map_offset = (pixel_x / 8);                                                 \
1127   if(map_offset != last_map_offset)                                           \
1128   {                                                                           \
1129     tile_ptr = tile_base + (map_base[map_offset] * 64);                       \
1130     last_map_offset = map_offset;                                             \
1131   }                                                                           \
1132   tile_ptr = tile_base + (map_base[(pixel_x / 8)] * 64);                      \
1133   current_pixel = tile_ptr[(pixel_x % 8)];                                    \
1134   tile_8bpp_draw_##combine_op(0, none, 0, alpha_op);                          \
1135   affine_render_next(combine_op)                                              \
1136
1137 #define affine_render_scale(combine_op, alpha_op)                             \
1138 {                                                                             \
1139   pixel_y = source_y >> 8;                                                    \
1140   u32 i = 0;                                                                  \
1141   affine_render_bg_pixel_##combine_op(alpha_op);                              \
1142   if((u32)pixel_y < (u32)width_height)                                        \
1143   {                                                                           \
1144     affine_render_scale_offset();                                             \
1145     for(; i < end; i++)                                                       \
1146     {                                                                         \
1147       pixel_x = source_x >> 8;                                                \
1148                                                                               \
1149       if((u32)pixel_x < (u32)width_height)                                    \
1150       {                                                                       \
1151         break;                                                                \
1152       }                                                                       \
1153                                                                               \
1154       affine_render_bg_##combine_op(alpha_op);                                \
1155       affine_render_next(combine_op);                                         \
1156     }                                                                         \
1157                                                                               \
1158     for(; i < end; i++)                                                       \
1159     {                                                                         \
1160       pixel_x = source_x >> 8;                                                \
1161                                                                               \
1162       if((u32)pixel_x >= (u32)width_height)                                   \
1163         break;                                                                \
1164                                                                               \
1165       affine_render_scale_pixel(combine_op, alpha_op);                        \
1166     }                                                                         \
1167   }                                                                           \
1168   affine_render_bg_remainder_##combine_op(alpha_op);                          \
1169 }                                                                             \
1170
1171 #define affine_render_scale_wrap(combine_op, alpha_op)                        \
1172 {                                                                             \
1173   u32 wrap_mask = width_height - 1;                                           \
1174   pixel_y = (source_y >> 8) & wrap_mask;                                      \
1175   if((u32)pixel_y < (u32)width_height)                                        \
1176   {                                                                           \
1177     affine_render_scale_offset();                                             \
1178     for(i = 0; i < end; i++)                                                  \
1179     {                                                                         \
1180       pixel_x = (source_x >> 8) & wrap_mask;                                  \
1181       affine_render_scale_pixel(combine_op, alpha_op);                        \
1182     }                                                                         \
1183   }                                                                           \
1184 }                                                                             \
1185
1186
1187 #define affine_render_rotate_pixel(combine_op, alpha_op)                      \
1188   map_offset = (pixel_x / 8) + ((pixel_y / 8) << map_pitch);                  \
1189   if(map_offset != last_map_offset)                                           \
1190   {                                                                           \
1191     tile_ptr = tile_base + (map_base[map_offset] * 64);                       \
1192     last_map_offset = map_offset;                                             \
1193   }                                                                           \
1194                                                                               \
1195   current_pixel = tile_ptr[(pixel_x % 8) + ((pixel_y % 8) * 8)];              \
1196   tile_8bpp_draw_##combine_op(0, none, 0, alpha_op);                          \
1197   affine_render_next(combine_op)                                              \
1198
1199 #define affine_render_rotate(combine_op, alpha_op)                            \
1200 {                                                                             \
1201   affine_render_bg_pixel_##combine_op(alpha_op);                              \
1202   for(i = 0; i < end; i++)                                                    \
1203   {                                                                           \
1204     pixel_x = source_x >> 8;                                                  \
1205     pixel_y = source_y >> 8;                                                  \
1206                                                                               \
1207     if(((u32)pixel_x < (u32)width_height) &&                                  \
1208      ((u32)pixel_y < (u32)width_height))                                      \
1209     {                                                                         \
1210       break;                                                                  \
1211     }                                                                         \
1212     affine_render_bg_##combine_op(alpha_op);                                  \
1213     affine_render_next(combine_op);                                           \
1214   }                                                                           \
1215                                                                               \
1216   for(; i < end; i++)                                                         \
1217   {                                                                           \
1218     pixel_x = source_x >> 8;                                                  \
1219     pixel_y = source_y >> 8;                                                  \
1220                                                                               \
1221     if(((u32)pixel_x >= (u32)width_height) ||                                 \
1222      ((u32)pixel_y >= (u32)width_height))                                     \
1223     {                                                                         \
1224       affine_render_bg_remainder_##combine_op(alpha_op);                      \
1225       break;                                                                  \
1226     }                                                                         \
1227                                                                               \
1228     affine_render_rotate_pixel(combine_op, alpha_op);                         \
1229   }                                                                           \
1230 }                                                                             \
1231
1232 #define affine_render_rotate_wrap(combine_op, alpha_op)                       \
1233 {                                                                             \
1234   u32 wrap_mask = width_height - 1;                                           \
1235   for(i = 0; i < end; i++)                                                    \
1236   {                                                                           \
1237     pixel_x = (source_x >> 8) & wrap_mask;                                    \
1238     pixel_y = (source_y >> 8) & wrap_mask;                                    \
1239                                                                               \
1240     affine_render_rotate_pixel(combine_op, alpha_op);                         \
1241   }                                                                           \
1242 }                                                                             \
1243
1244
1245 // Build affine background renderers.
1246
1247 #define render_scanline_affine_builder(combine_op, alpha_op)                  \
1248 void render_scanline_affine_##combine_op##_##alpha_op(u32 layer,              \
1249  u32 start, u32 end, void *scanline)                                          \
1250 {                                                                             \
1251   render_scanline_extra_variables_##combine_op##_##alpha_op(affine);          \
1252   u32 bg_control = io_registers[REG_BG0CNT + layer];                          \
1253   u32 current_pixel;                                                          \
1254   s32 source_x, source_y;                                                     \
1255   u32 vcount = io_registers[REG_VCOUNT];                                      \
1256   u32 pixel_x, pixel_y;                                                       \
1257   u32 layer_offset = (layer - 2) * 8;                                         \
1258   s32 dx, dy;                                                                 \
1259   u32 map_size = (bg_control >> 14) & 0x03;                                   \
1260   u32 width_height = 1 << (7 + map_size);                                     \
1261   u32 map_pitch = map_size + 4;                                               \
1262   u8 *map_base = vram + (((bg_control >> 8) & 0x1F) * (1024 * 2));            \
1263   u8 *tile_base = vram + (((bg_control >> 2) & 0x03) * (1024 * 16));          \
1264   u8 *tile_ptr;                                                               \
1265   u32 map_offset, last_map_offset = (u32)-1;                                  \
1266   u32 i;                                                                      \
1267   render_scanline_dest_##alpha_op *dest_ptr =                                 \
1268    ((render_scanline_dest_##alpha_op *)scanline) + start;                     \
1269                                                                               \
1270   dx = (s16)io_registers[REG_BG2PA + layer_offset];                           \
1271   dy = (s16)io_registers[REG_BG2PC + layer_offset];                           \
1272   source_x = affine_reference_x[layer - 2] + (start * dx);                    \
1273   source_y = affine_reference_y[layer - 2] + (start * dy);                    \
1274                                                                               \
1275   end -= start;                                                               \
1276                                                                               \
1277   switch(((bg_control >> 12) & 0x02) | (dy != 0))                             \
1278   {                                                                           \
1279     case 0x00:                                                                \
1280       affine_render_scale(combine_op, alpha_op);                              \
1281       break;                                                                  \
1282                                                                               \
1283     case 0x01:                                                                \
1284       affine_render_rotate(combine_op, alpha_op);                             \
1285       break;                                                                  \
1286                                                                               \
1287     case 0x02:                                                                \
1288       affine_render_scale_wrap(combine_op, alpha_op);                         \
1289       break;                                                                  \
1290                                                                               \
1291     case 0x03:                                                                \
1292       affine_render_rotate_wrap(combine_op, alpha_op);                        \
1293       break;                                                                  \
1294   }                                                                           \
1295 }                                                                             \
1296
1297 render_scanline_affine_builder(base, normal);
1298 render_scanline_affine_builder(transparent, normal);
1299 render_scanline_affine_builder(base, color16);
1300 render_scanline_affine_builder(transparent, color16);
1301 render_scanline_affine_builder(base, color32);
1302 render_scanline_affine_builder(transparent, color32);
1303 render_scanline_affine_builder(base, alpha);
1304 render_scanline_affine_builder(transparent, alpha);
1305
1306
1307 #define bitmap_render_pixel_mode3(alpha_op)                                   \
1308   convert_palette(current_pixel);                                             \
1309   *dest_ptr = current_pixel                                                   \
1310
1311 #define bitmap_render_pixel_mode4(alpha_op)                                   \
1312   tile_expand_base_##alpha_op(0)                                              \
1313
1314 #define bitmap_render_pixel_mode5(alpha_op)                                   \
1315   bitmap_render_pixel_mode3(alpha_op)                                         \
1316
1317
1318 #define bitmap_render_scale(type, alpha_op, width, height)                    \
1319   pixel_y = (source_y >> 8);                                                  \
1320   if((u32)pixel_y < (u32)height)                                              \
1321   {                                                                           \
1322     pixel_x = (source_x >> 8);                                                \
1323     src_ptr += (pixel_y * width);                                             \
1324     if(dx == 0x100)                                                           \
1325     {                                                                         \
1326       if(pixel_x < 0)                                                         \
1327       {                                                                       \
1328         end += pixel_x;                                                       \
1329         dest_ptr -= pixel_x;                                                  \
1330         pixel_x = 0;                                                          \
1331       }                                                                       \
1332       else                                                                    \
1333                                                                               \
1334       if(pixel_x > 0)                                                         \
1335       {                                                                       \
1336         src_ptr += pixel_x;                                                   \
1337       }                                                                       \
1338                                                                               \
1339       if((pixel_x + end) >= width)                                            \
1340         end = (width - pixel_x);                                              \
1341                                                                               \
1342       for(i = 0; (s32)i < (s32)end; i++)                                      \
1343       {                                                                       \
1344         current_pixel = *src_ptr;                                             \
1345         bitmap_render_pixel_##type(alpha_op);                                 \
1346         src_ptr++;                                                            \
1347         dest_ptr++;                                                           \
1348       }                                                                       \
1349     }                                                                         \
1350     else                                                                      \
1351     {                                                                         \
1352       if((u32)(source_y >> 8) < (u32)height)                                  \
1353       {                                                                       \
1354         for(i = 0; i < end; i++)                                              \
1355         {                                                                     \
1356           pixel_x = (source_x >> 8);                                          \
1357                                                                               \
1358           if((u32)pixel_x < (u32)width)                                       \
1359             break;                                                            \
1360                                                                               \
1361           source_x += dx;                                                     \
1362           dest_ptr++;                                                         \
1363         }                                                                     \
1364                                                                               \
1365         for(; i < end; i++)                                                   \
1366         {                                                                     \
1367           pixel_x = (source_x >> 8);                                          \
1368                                                                               \
1369           if((u32)pixel_x >= (u32)width)                                      \
1370             break;                                                            \
1371                                                                               \
1372           current_pixel = src_ptr[pixel_x];                                   \
1373           bitmap_render_pixel_##type(alpha_op);                               \
1374                                                                               \
1375           source_x += dx;                                                     \
1376           dest_ptr++;                                                         \
1377         }                                                                     \
1378       }                                                                       \
1379     }                                                                         \
1380   }                                                                           \
1381
1382 #define bitmap_render_rotate(type, alpha_op, width, height)                   \
1383   for(i = 0; i < end; i++)                                                    \
1384   {                                                                           \
1385     pixel_x = source_x >> 8;                                                  \
1386     pixel_y = source_y >> 8;                                                  \
1387                                                                               \
1388     if(((u32)pixel_x < (u32)width) && ((u32)pixel_y < (u32)height))           \
1389       break;                                                                  \
1390                                                                               \
1391     source_x += dx;                                                           \
1392     source_y += dy;                                                           \
1393     dest_ptr++;                                                               \
1394   }                                                                           \
1395                                                                               \
1396   for(; i < end; i++)                                                         \
1397   {                                                                           \
1398     pixel_x = (source_x >> 8);                                                \
1399     pixel_y = (source_y >> 8);                                                \
1400                                                                               \
1401     if(((u32)pixel_x >= (u32)width) || ((u32)pixel_y >= (u32)height))         \
1402       break;                                                                  \
1403                                                                               \
1404     current_pixel = src_ptr[pixel_x + (pixel_y * width)];                     \
1405      bitmap_render_pixel_##type(alpha_op);                                    \
1406                                                                               \
1407     source_x += dx;                                                           \
1408     source_y += dy;                                                           \
1409     dest_ptr++;                                                               \
1410   }                                                                           \
1411
1412
1413 #define render_scanline_vram_setup_mode3()                                    \
1414   u16 *src_ptr = (u16 *)vram                                                  \
1415
1416 #define render_scanline_vram_setup_mode5()                                    \
1417   u16 *src_ptr;                                                               \
1418   if(io_registers[REG_DISPCNT] & 0x10)                                        \
1419     src_ptr = (u16 *)(vram + 0xA000);                                         \
1420   else                                                                        \
1421     src_ptr = (u16 *)vram                                                     \
1422
1423
1424 #ifdef RENDER_COLOR16_NORMAL
1425
1426 #define render_scanline_vram_setup_mode4()                                    \
1427   const u32 pixel_combine = 0;                                                \
1428   u8 *src_ptr;                                                                \
1429   if(io_registers[REG_DISPCNT] & 0x10)                                        \
1430     src_ptr = vram + 0xA000;                                                  \
1431   else                                                                        \
1432     src_ptr = vram                                                            \
1433
1434
1435 #else
1436
1437 #define render_scanline_vram_setup_mode4()                                    \
1438   u16 *palette = palette_ram_converted;                                       \
1439   u8 *src_ptr;                                                                \
1440   if(io_registers[REG_DISPCNT] & 0x10)                                        \
1441     src_ptr = vram + 0xA000;                                                  \
1442   else                                                                        \
1443     src_ptr = vram                                                            \
1444
1445 #endif
1446
1447
1448
1449 // Build bitmap scanline rendering functions.
1450
1451 #define render_scanline_bitmap_builder(type, alpha_op, width, height)         \
1452 void render_scanline_bitmap_##type##_##alpha_op(u32 start, u32 end,           \
1453  void *scanline)                                                              \
1454 {                                                                             \
1455   u32 bg_control = io_registers[REG_BG2CNT];                                  \
1456   u32 current_pixel;                                                          \
1457   s32 source_x, source_y;                                                     \
1458   u32 vcount = io_registers[REG_VCOUNT];                                      \
1459   s32 pixel_x, pixel_y;                                                       \
1460                                                                               \
1461   s32 dx = (s16)io_registers[REG_BG2PA];                                      \
1462   s32 dy = (s16)io_registers[REG_BG2PC];                                      \
1463                                                                               \
1464   u32 i;                                                                      \
1465                                                                               \
1466   render_scanline_dest_##alpha_op *dest_ptr =                                 \
1467    ((render_scanline_dest_##alpha_op *)scanline) + start;                     \
1468   render_scanline_vram_setup_##type();                                        \
1469                                                                               \
1470   end -= start;                                                               \
1471                                                                               \
1472   source_x = affine_reference_x[0] + (start * dx);                            \
1473   source_y = affine_reference_y[0] + (start * dy);                            \
1474                                                                               \
1475   if(dy == 0)                                                                 \
1476   {                                                                           \
1477     bitmap_render_scale(type, alpha_op, width, height);                       \
1478   }                                                                           \
1479   else                                                                        \
1480   {                                                                           \
1481     bitmap_render_rotate(type, alpha_op, width, height);                      \
1482   }                                                                           \
1483 }                                                                             \
1484
1485 render_scanline_bitmap_builder(mode3, normal, 240, 160);
1486 render_scanline_bitmap_builder(mode4, normal, 240, 160);
1487 render_scanline_bitmap_builder(mode5, normal, 160, 128);
1488
1489
1490 // Fill in the renderers for a layer based on the mode type,
1491
1492 #define tile_layer_render_functions(type)                                     \
1493 {                                                                             \
1494   render_scanline_##type##_base_normal,                                       \
1495   render_scanline_##type##_transparent_normal,                                \
1496   render_scanline_##type##_base_alpha,                                        \
1497   render_scanline_##type##_transparent_alpha,                                 \
1498   render_scanline_##type##_base_color16,                                      \
1499   render_scanline_##type##_transparent_color16,                               \
1500   render_scanline_##type##_base_color32,                                      \
1501   render_scanline_##type##_transparent_color32                                \
1502 }                                                                             \
1503
1504
1505 // Use if a layer is unsupported for that mode.
1506
1507 #define tile_layer_render_null()                                              \
1508 {                                                                             \
1509   NULL, NULL, NULL, NULL                                                      \
1510 }                                                                             \
1511
1512 #define bitmap_layer_render_functions(type)                                   \
1513 {                                                                             \
1514   render_scanline_bitmap_##type##_normal                                      \
1515 }                                                                             \
1516
1517 // Structs containing functions to render the layers for each mode, for
1518 // each render type.
1519 tile_layer_render_struct tile_mode_renderers[3][4] =
1520 {
1521   {
1522     tile_layer_render_functions(text), tile_layer_render_functions(text),
1523     tile_layer_render_functions(text), tile_layer_render_functions(text)
1524   },
1525   {
1526     tile_layer_render_functions(text), tile_layer_render_functions(text),
1527     tile_layer_render_functions(affine), tile_layer_render_functions(text)
1528   },
1529   {
1530     tile_layer_render_functions(text), tile_layer_render_functions(text),
1531     tile_layer_render_functions(affine), tile_layer_render_functions(affine)
1532   }
1533 };
1534
1535 bitmap_layer_render_struct bitmap_mode_renderers[3] =
1536 {
1537   bitmap_layer_render_functions(mode3),
1538   bitmap_layer_render_functions(mode4),
1539   bitmap_layer_render_functions(mode5)
1540 };
1541
1542
1543 #define render_scanline_layer_functions_tile()                                \
1544   tile_layer_render_struct *layer_renderers =                                 \
1545    tile_mode_renderers[dispcnt & 0x07]                                        \
1546
1547 #define render_scanline_layer_functions_bitmap()                              \
1548   bitmap_layer_render_struct *layer_renderers =                               \
1549    bitmap_mode_renderers + ((dispcnt & 0x07) - 3)                             \
1550
1551
1552 // Adjust a flipped obj's starting position
1553
1554 #define obj_tile_offset_noflip(color_depth)                                   \
1555
1556 #define obj_tile_offset_flip(color_depth)                                     \
1557   + (tile_size_##color_depth * ((obj_width - 8) / 8))                         \
1558
1559
1560 // Adjust the obj's starting point if it goes too far off the left edge of    \
1561 // the screen.                                                                \
1562
1563 #define obj_tile_right_offset_noflip(color_depth)                             \
1564   tile_ptr += (partial_tile_offset / 8) * tile_size_##color_depth             \
1565
1566 #define obj_tile_right_offset_flip(color_depth)                               \
1567   tile_ptr -= (partial_tile_offset / 8) * tile_size_##color_depth             \
1568
1569 // Get the current row offset into an obj in 1D map space
1570
1571 #define obj_tile_offset_1D(color_depth, flip_op)                              \
1572   tile_ptr = tile_base + ((obj_attribute_2 & 0x3FF) * 32)                     \
1573    + ((vertical_offset / 8) * (obj_width / 8) * tile_size_##color_depth)      \
1574    + ((vertical_offset % 8) * tile_width_##color_depth)                       \
1575    obj_tile_offset_##flip_op(color_depth)                                     \
1576
1577 // Get the current row offset into an obj in 2D map space
1578
1579 #define obj_tile_offset_2D(color_depth, flip_op)                              \
1580   tile_ptr = tile_base + ((obj_attribute_2 & 0x3FF) * 32)                     \
1581    + ((vertical_offset / 8) * 1024)                                           \
1582    + ((vertical_offset % 8) * tile_width_##color_depth)                       \
1583    obj_tile_offset_##flip_op(color_depth)                                     \
1584
1585
1586 // Get the palette for 4bpp obj.
1587
1588 #define obj_get_palette_4bpp()                                                \
1589   current_palette = (obj_attribute_2 >> 8) & 0xF0                             \
1590
1591 #define obj_get_palette_8bpp()                                                \
1592
1593
1594 // Render the current row of an obj.
1595
1596 #define obj_render(combine_op, color_depth, alpha_op, map_space, flip_op)     \
1597 {                                                                             \
1598   obj_get_palette_##color_depth();                                            \
1599   obj_tile_offset_##map_space(color_depth, flip_op);                          \
1600                                                                               \
1601   if(obj_x < (s32)start)                                                      \
1602   {                                                                           \
1603     dest_ptr = scanline + start;                                              \
1604     pixel_run = obj_width - (start - obj_x);                                  \
1605     if((s32)pixel_run > 0)                                                    \
1606     {                                                                         \
1607       if((obj_x + obj_width) >= end)                                          \
1608       {                                                                       \
1609         pixel_run = end - start;                                              \
1610         partial_tile_offset = start - obj_x;                                  \
1611         obj_tile_right_offset_##flip_op(color_depth);                         \
1612         partial_tile_offset %= 8;                                             \
1613                                                                               \
1614         if(partial_tile_offset)                                               \
1615         {                                                                     \
1616           partial_tile_run = 8 - partial_tile_offset;                         \
1617           if((s32)pixel_run < (s32)partial_tile_run)                          \
1618           {                                                                   \
1619             if((s32)pixel_run > 0)                                            \
1620             {                                                                 \
1621               partial_tile_run = pixel_run;                                   \
1622               partial_tile_mid_obj(combine_op, color_depth, alpha_op,         \
1623                flip_op);                                                      \
1624             }                                                                 \
1625             continue;                                                         \
1626           }                                                                   \
1627           else                                                                \
1628           {                                                                   \
1629             pixel_run -= partial_tile_run;                                    \
1630             partial_tile_right_obj(combine_op, color_depth, alpha_op,         \
1631              flip_op);                                                        \
1632           }                                                                   \
1633         }                                                                     \
1634         tile_run = pixel_run / 8;                                             \
1635         multiple_tile_obj(combine_op, color_depth, alpha_op, flip_op);        \
1636         partial_tile_run = pixel_run % 8;                                     \
1637         if(partial_tile_run)                                                  \
1638         {                                                                     \
1639           partial_tile_left_obj(combine_op, color_depth, alpha_op,            \
1640            flip_op);                                                          \
1641         }                                                                     \
1642       }                                                                       \
1643       else                                                                    \
1644       {                                                                       \
1645         partial_tile_offset = start - obj_x;                                  \
1646         obj_tile_right_offset_##flip_op(color_depth);                         \
1647         partial_tile_offset %= 8;                                             \
1648         if(partial_tile_offset)                                               \
1649         {                                                                     \
1650           partial_tile_run = 8 - partial_tile_offset;                         \
1651           partial_tile_right_obj(combine_op, color_depth, alpha_op,           \
1652            flip_op);                                                          \
1653         }                                                                     \
1654         tile_run = pixel_run / 8;                                             \
1655         multiple_tile_obj(combine_op, color_depth, alpha_op, flip_op);        \
1656       }                                                                       \
1657     }                                                                         \
1658   }                                                                           \
1659   else                                                                        \
1660                                                                               \
1661   if((obj_x + obj_width) >= end)                                              \
1662   {                                                                           \
1663     pixel_run = end - obj_x;                                                  \
1664     if((s32)pixel_run > 0)                                                    \
1665     {                                                                         \
1666       dest_ptr = scanline + obj_x;                                            \
1667       tile_run = pixel_run / 8;                                               \
1668       multiple_tile_obj(combine_op, color_depth, alpha_op, flip_op);          \
1669       partial_tile_run = pixel_run % 8;                                       \
1670       if(partial_tile_run)                                                    \
1671       {                                                                       \
1672         partial_tile_left_obj(combine_op, color_depth, alpha_op, flip_op);    \
1673       }                                                                       \
1674     }                                                                         \
1675   }                                                                           \
1676   else                                                                        \
1677   {                                                                           \
1678     dest_ptr = scanline + obj_x;                                              \
1679     tile_run = obj_width / 8;                                                 \
1680     multiple_tile_obj(combine_op, color_depth, alpha_op, flip_op);            \
1681   }                                                                           \
1682 }                                                                             \
1683
1684 #define obj_scale_offset_1D(color_depth)                                      \
1685   tile_ptr = tile_base + ((obj_attribute_2 & 0x3FF) * 32)                     \
1686    + ((vertical_offset / 8) * (max_x / 8) * tile_size_##color_depth)          \
1687    + ((vertical_offset % 8) * tile_width_##color_depth)                       \
1688
1689 // Get the current row offset into an obj in 2D map space
1690
1691 #define obj_scale_offset_2D(color_depth)                                      \
1692   tile_ptr = tile_base + ((obj_attribute_2 & 0x3FF) * 32)                     \
1693    + ((vertical_offset / 8) * 1024)                                           \
1694    + ((vertical_offset % 8) * tile_width_##color_depth)                       \
1695
1696 #define obj_render_scale_pixel_4bpp(combine_op, alpha_op)                     \
1697   if(tile_x & 0x01)                                                           \
1698   {                                                                           \
1699     current_pixel = tile_ptr[tile_map_offset + ((tile_x >> 1) & 0x03)] >> 4;  \
1700   }                                                                           \
1701   else                                                                        \
1702   {                                                                           \
1703     current_pixel =                                                           \
1704      tile_ptr[tile_map_offset + ((tile_x >> 1) & 0x03)] & 0x0F;               \
1705   }                                                                           \
1706                                                                               \
1707   tile_4bpp_draw_##combine_op(0, none, 0, alpha_op)                           \
1708
1709
1710 #define obj_render_scale_pixel_8bpp(combine_op, alpha_op)                     \
1711   current_pixel = tile_ptr[tile_map_offset + (tile_x & 0x07)];                \
1712   tile_8bpp_draw_##combine_op(0, none, 0, alpha_op);                          \
1713
1714 #define obj_render_scale(combine_op, color_depth, alpha_op, map_space)        \
1715 {                                                                             \
1716   u32 vertical_offset;                                                        \
1717   source_y += (y_delta * dmy);                                                \
1718   vertical_offset = (source_y >> 8);                                          \
1719   if((u32)vertical_offset < (u32)max_y)                                       \
1720   {                                                                           \
1721     obj_scale_offset_##map_space(color_depth);                                \
1722     source_x += (y_delta * dmx) - (middle_x * dx);                            \
1723                                                                               \
1724     for(i = 0; i < obj_width; i++)                                            \
1725     {                                                                         \
1726       tile_x = (source_x >> 8);                                               \
1727                                                                               \
1728       if((u32)tile_x < (u32)max_x)                                            \
1729         break;                                                                \
1730                                                                               \
1731       source_x += dx;                                                         \
1732       advance_dest_ptr_##combine_op(1);                                       \
1733     }                                                                         \
1734                                                                               \
1735     for(; i < obj_width; i++)                                                 \
1736     {                                                                         \
1737       tile_x = (source_x >> 8);                                               \
1738                                                                               \
1739       if((u32)tile_x >= (u32)max_x)                                           \
1740         break;                                                                \
1741                                                                               \
1742       tile_map_offset = (tile_x >> 3) * tile_size_##color_depth;              \
1743       obj_render_scale_pixel_##color_depth(combine_op, alpha_op);             \
1744                                                                               \
1745       source_x += dx;                                                         \
1746       advance_dest_ptr_##combine_op(1);                                       \
1747     }                                                                         \
1748   }                                                                           \
1749 }                                                                             \
1750
1751
1752 #define obj_rotate_offset_1D(color_depth)                                     \
1753   obj_tile_pitch = (max_x / 8) * tile_size_##color_depth                      \
1754
1755 #define obj_rotate_offset_2D(color_depth)                                     \
1756   obj_tile_pitch = 1024                                                       \
1757
1758 #define obj_render_rotate_pixel_4bpp(combine_op, alpha_op)                    \
1759   if(tile_x & 0x01)                                                           \
1760   {                                                                           \
1761     current_pixel = tile_ptr[tile_map_offset +                                \
1762      ((tile_x >> 1) & 0x03) + ((tile_y & 0x07) * obj_pitch)] >> 4;            \
1763   }                                                                           \
1764   else                                                                        \
1765   {                                                                           \
1766     current_pixel = tile_ptr[tile_map_offset +                                \
1767      ((tile_x >> 1) & 0x03) + ((tile_y & 0x07) * obj_pitch)] & 0x0F;          \
1768   }                                                                           \
1769                                                                               \
1770   tile_4bpp_draw_##combine_op(0, none, 0, alpha_op)                           \
1771
1772 #define obj_render_rotate_pixel_8bpp(combine_op, alpha_op)                    \
1773   current_pixel = tile_ptr[tile_map_offset +                                  \
1774    (tile_x & 0x07) + ((tile_y & 0x07) * obj_pitch)];                          \
1775                                                                               \
1776   tile_8bpp_draw_##combine_op(0, none, 0, alpha_op)                           \
1777
1778 #define obj_render_rotate(combine_op, color_depth, alpha_op, map_space)       \
1779 {                                                                             \
1780   tile_ptr = tile_base + ((obj_attribute_2 & 0x3FF) * 32);                    \
1781   obj_rotate_offset_##map_space(color_depth);                                 \
1782                                                                               \
1783   source_x += (y_delta * dmx) - (middle_x * dx);                              \
1784   source_y += (y_delta * dmy) - (middle_x * dy);                              \
1785                                                                               \
1786   for(i = 0; i < obj_width; i++)                                              \
1787   {                                                                           \
1788     tile_x = (source_x >> 8);                                                 \
1789     tile_y = (source_y >> 8);                                                 \
1790                                                                               \
1791     if(((u32)tile_x < (u32)max_x) && ((u32)tile_y < (u32)max_y))              \
1792       break;                                                                  \
1793                                                                               \
1794     source_x += dx;                                                           \
1795     source_y += dy;                                                           \
1796     advance_dest_ptr_##combine_op(1);                                         \
1797   }                                                                           \
1798                                                                               \
1799   for(; i < obj_width; i++)                                                   \
1800   {                                                                           \
1801     tile_x = (source_x >> 8);                                                 \
1802     tile_y = (source_y >> 8);                                                 \
1803                                                                               \
1804     if(((u32)tile_x >= (u32)max_x) || ((u32)tile_y >= (u32)max_y))            \
1805       break;                                                                  \
1806                                                                               \
1807     tile_map_offset = ((tile_x >> 3) * tile_size_##color_depth) +             \
1808     ((tile_y >> 3) * obj_tile_pitch);                                         \
1809     obj_render_rotate_pixel_##color_depth(combine_op, alpha_op);              \
1810                                                                               \
1811     source_x += dx;                                                           \
1812     source_y += dy;                                                           \
1813     advance_dest_ptr_##combine_op(1);                                         \
1814   }                                                                           \
1815 }                                                                             \
1816
1817 // Render the current row of an affine transformed OBJ.
1818
1819 #define obj_render_affine(combine_op, color_depth, alpha_op, map_space)       \
1820 {                                                                             \
1821   s16 *params = oam_ram + (((obj_attribute_1 >> 9) & 0x1F) * 16);             \
1822   s32 dx = params[3];                                                         \
1823   s32 dmx = params[7];                                                        \
1824   s32 dy = params[11];                                                        \
1825   s32 dmy = params[15];                                                       \
1826   s32 source_x, source_y;                                                     \
1827   s32 tile_x, tile_y;                                                         \
1828   u32 tile_offset;                                                            \
1829   u32 tile_map_offset;                                                        \
1830   s32 middle_x;                                                               \
1831   s32 middle_y;                                                               \
1832   s32 max_x = obj_width;                                                      \
1833   s32 max_y = obj_height;                                                     \
1834   s32 y_delta;                                                                \
1835   u32 obj_pitch = tile_width_##color_depth;                                   \
1836   u32 obj_tile_pitch;                                                         \
1837                                                                               \
1838   middle_x = (obj_width / 2);                                                 \
1839   middle_y = (obj_height / 2);                                                \
1840                                                                               \
1841   source_x = (middle_x << 8);                                                 \
1842   source_y = (middle_y << 8);                                                 \
1843                                                                               \
1844                                                                               \
1845   if(obj_attribute_0 & 0x200)                                                 \
1846   {                                                                           \
1847     obj_width *= 2;                                                           \
1848     obj_height *= 2;                                                          \
1849     middle_x *= 2;                                                            \
1850     middle_y *= 2;                                                            \
1851   }                                                                           \
1852                                                                               \
1853   if((s32)obj_x < (s32)start)                                                 \
1854   {                                                                           \
1855     u32 x_delta = start - obj_x;                                              \
1856     middle_x -= x_delta;                                                      \
1857     obj_width -= x_delta;                                                     \
1858     obj_x = start;                                                            \
1859                                                                               \
1860     if((s32)obj_width <= 0)                                                   \
1861       continue;                                                               \
1862   }                                                                           \
1863                                                                               \
1864   if((s32)(obj_x + obj_width) >= (s32)end)                                    \
1865   {                                                                           \
1866     obj_width = end - obj_x;                                                  \
1867                                                                               \
1868     if((s32)obj_width <= 0)                                                   \
1869       continue;                                                               \
1870   }                                                                           \
1871   dest_ptr = scanline + obj_x;                                                \
1872                                                                               \
1873   y_delta = vcount - (obj_y + middle_y);                                      \
1874                                                                               \
1875   obj_get_palette_##color_depth();                                            \
1876                                                                               \
1877   if(dy == 0)                                                                 \
1878   {                                                                           \
1879     obj_render_scale(combine_op, color_depth, alpha_op, map_space);           \
1880   }                                                                           \
1881   else                                                                        \
1882   {                                                                           \
1883     obj_render_rotate(combine_op, color_depth, alpha_op, map_space);          \
1884   }                                                                           \
1885 }                                                                             \
1886
1887 u32 obj_width_table[] = { 8, 16, 32, 64, 16, 32, 32, 64, 8, 8, 16, 32 };
1888 u32 obj_height_table[] = { 8, 16, 32, 64, 8, 8, 16, 32, 16, 32, 32, 64 };
1889
1890 u8 obj_priority_list[5][160][128];
1891 u32 obj_priority_count[5][160];
1892 u32 obj_alpha_count[160];
1893
1894
1895 // Build obj rendering functions
1896
1897 #ifdef RENDER_COLOR16_NORMAL
1898
1899 #define render_scanline_obj_extra_variables_normal(bg_type)                   \
1900   const u32 pixel_combine = (1 << 8)                                          \
1901
1902 #else
1903
1904 #define render_scanline_obj_extra_variables_normal(bg_type)                   \
1905   u16 *palette = palette_ram_converted + 256                                  \
1906
1907 #endif
1908
1909
1910 #define render_scanline_obj_extra_variables_color()                           \
1911   u32 dest;                                                                   \
1912   u32 pixel_combine = color_combine_mask(4) | (1 << 8)                        \
1913
1914 #define render_scanline_obj_extra_variables_alpha_obj(map_space)              \
1915   render_scanline_obj_extra_variables_color();                                \
1916   if((pixel_combine & 0x00000200) == 0)                                       \
1917   {                                                                           \
1918     render_scanline_obj_color32_##map_space(priority, start, end, scanline);  \
1919     return;                                                                   \
1920   }                                                                           \
1921
1922 #define render_scanline_obj_extra_variables_color16(map_space)                \
1923   render_scanline_obj_extra_variables_color()                                 \
1924
1925 #define render_scanline_obj_extra_variables_color32(map_space)                \
1926   render_scanline_obj_extra_variables_color()                                 \
1927
1928 #define render_scanline_obj_extra_variables_partial_alpha(map_space)          \
1929   render_scanline_obj_extra_variables_color();                                \
1930   u32 base_pixel_combine = pixel_combine                                      \
1931
1932 #define render_scanline_obj_extra_variables_copy(type)                        \
1933   u32 bldcnt = io_registers[REG_BLDCNT];                                      \
1934   u32 dispcnt = io_registers[REG_DISPCNT];                                    \
1935   u32 obj_enable = io_registers[REG_WINOUT] >> 8;                             \
1936   render_scanline_layer_functions_##type();                                   \
1937   u32 copy_start, copy_end;                                                   \
1938   u16 copy_buffer[240];                                                       \
1939   u16 *copy_ptr                                                               \
1940
1941 #define render_scanline_obj_extra_variables_copy_tile(map_space)              \
1942   render_scanline_obj_extra_variables_copy(tile)                              \
1943
1944 #define render_scanline_obj_extra_variables_copy_bitmap(map_space)            \
1945   render_scanline_obj_extra_variables_copy(bitmap)                            \
1946
1947
1948 #define render_scanline_obj_main(combine_op, alpha_op, map_space)             \
1949   if(obj_attribute_0 & 0x100)                                                 \
1950   {                                                                           \
1951     if((obj_attribute_0 >> 13) & 0x01)                                        \
1952     {                                                                         \
1953       obj_render_affine(combine_op, 8bpp, alpha_op, map_space);               \
1954     }                                                                         \
1955     else                                                                      \
1956     {                                                                         \
1957       obj_render_affine(combine_op, 4bpp, alpha_op, map_space);               \
1958     }                                                                         \
1959   }                                                                           \
1960   else                                                                        \
1961   {                                                                           \
1962     vertical_offset = vcount - obj_y;                                         \
1963                                                                               \
1964     if((obj_attribute_1 >> 13) & 0x01)                                        \
1965       vertical_offset = obj_height - vertical_offset - 1;                     \
1966                                                                               \
1967     switch(((obj_attribute_0 >> 12) & 0x02) |                                 \
1968      ((obj_attribute_1 >> 12) & 0x01))                                        \
1969     {                                                                         \
1970       case 0x0:                                                               \
1971         obj_render(combine_op, 4bpp, alpha_op, map_space, noflip);            \
1972         break;                                                                \
1973                                                                               \
1974       case 0x1:                                                               \
1975         obj_render(combine_op, 4bpp, alpha_op, map_space, flip);              \
1976         break;                                                                \
1977                                                                               \
1978       case 0x2:                                                               \
1979         obj_render(combine_op, 8bpp, alpha_op, map_space, noflip);            \
1980         break;                                                                \
1981                                                                               \
1982       case 0x3:                                                               \
1983         obj_render(combine_op, 8bpp, alpha_op, map_space, flip);              \
1984         break;                                                                \
1985     }                                                                         \
1986   }                                                                           \
1987
1988 #define render_scanline_obj_no_partial_alpha(combine_op, alpha_op, map_space) \
1989   render_scanline_obj_main(combine_op, alpha_op, map_space)                   \
1990
1991 #define render_scanline_obj_partial_alpha(combine_op, alpha_op, map_space)    \
1992   if((obj_attribute_0 >> 10) & 0x03)                                          \
1993   {                                                                           \
1994     pixel_combine = 0x00000300;                                               \
1995     render_scanline_obj_main(combine_op, alpha_obj, map_space);               \
1996   }                                                                           \
1997   else                                                                        \
1998   {                                                                           \
1999     pixel_combine = base_pixel_combine;                                       \
2000     render_scanline_obj_main(combine_op, color32, map_space);                 \
2001   }                                                                           \
2002
2003 #define render_scanline_obj_prologue_transparent(alpha_op)                    \
2004
2005 #define render_scanline_obj_prologue_copy_body(type)                          \
2006   copy_start = obj_x;                                                         \
2007   if(obj_attribute_0 & 0x200)                                                 \
2008     copy_end = obj_x + (obj_width * 2);                                       \
2009   else                                                                        \
2010     copy_end = obj_x + obj_width;                                             \
2011                                                                               \
2012   if(copy_start < start)                                                      \
2013     copy_start = start;                                                       \
2014   if(copy_end > end)                                                          \
2015     copy_end = end;                                                           \
2016                                                                               \
2017   if((copy_start < end) && (copy_end > start))                                \
2018   {                                                                           \
2019     render_scanline_conditional_##type(copy_start, copy_end, copy_buffer,     \
2020      obj_enable, dispcnt, bldcnt, layer_renderers);                           \
2021     copy_ptr = copy_buffer + copy_start;                                      \
2022   }                                                                           \
2023   else                                                                        \
2024   {                                                                           \
2025     continue;                                                                 \
2026   }                                                                           \
2027
2028 #define render_scanline_obj_prologue_copy_tile()                              \
2029   render_scanline_obj_prologue_copy_body(tile)                                \
2030
2031 #define render_scanline_obj_prologue_copy_bitmap()                            \
2032   render_scanline_obj_prologue_copy_body(bitmap)                              \
2033
2034 #define render_scanline_obj_prologue_copy(alpha_op)                           \
2035   render_scanline_obj_prologue_##alpha_op()                                   \
2036
2037
2038 #define render_scanline_obj_builder(combine_op, alpha_op, map_space,          \
2039  partial_alpha_op)                                                            \
2040 void render_scanline_obj_##alpha_op##_##map_space(u32 priority,               \
2041  u32 start, u32 end, render_scanline_dest_##alpha_op *scanline)               \
2042 {                                                                             \
2043   render_scanline_obj_extra_variables_##alpha_op(map_space);                  \
2044   s32 obj_num, i;                                                             \
2045   s32 obj_x, obj_y;                                                           \
2046   s32 obj_size;                                                               \
2047   s32 obj_width, obj_height;                                                  \
2048   u32 obj_attribute_0, obj_attribute_1, obj_attribute_2;                      \
2049   s32 vcount = io_registers[REG_VCOUNT];                                      \
2050   u32 tile_run;                                                               \
2051   u32 current_pixels;                                                         \
2052   u32 current_pixel;                                                          \
2053   u32 current_palette;                                                        \
2054   u32 vertical_offset;                                                        \
2055   u32 partial_tile_run, partial_tile_offset;                                  \
2056   u32 pixel_run;                                                              \
2057   u16 *oam_ptr;                                                               \
2058   render_scanline_dest_##alpha_op *dest_ptr;                                  \
2059   u8 *tile_base = vram + 0x10000;                                             \
2060   u8 *tile_ptr;                                                               \
2061   u32 obj_count = obj_priority_count[priority][vcount];                       \
2062   u8 *obj_list = obj_priority_list[priority][vcount];                         \
2063                                                                               \
2064   for(obj_num = 0; obj_num < obj_count; obj_num++)                            \
2065   {                                                                           \
2066     oam_ptr = oam_ram + (obj_list[obj_num] * 4);                              \
2067     obj_attribute_0 = oam_ptr[0];                                             \
2068     obj_attribute_1 = oam_ptr[1];                                             \
2069     obj_attribute_2 = oam_ptr[2];                                             \
2070     obj_size = ((obj_attribute_0 >> 12) & 0x0C) | (obj_attribute_1 >> 14);    \
2071                                                                               \
2072     obj_x = (s32)(obj_attribute_1 << 23) >> 23;                               \
2073     obj_width = obj_width_table[obj_size];                                    \
2074                                                                               \
2075     render_scanline_obj_prologue_##combine_op(alpha_op);                      \
2076                                                                               \
2077     obj_y = obj_attribute_0 & 0xFF;                                           \
2078                                                                               \
2079     if(obj_y > 160)                                                           \
2080       obj_y -= 256;                                                           \
2081                                                                               \
2082     obj_height = obj_height_table[obj_size];                                  \
2083     render_scanline_obj_##partial_alpha_op(combine_op, alpha_op, map_space);  \
2084   }                                                                           \
2085 }                                                                             \
2086
2087 render_scanline_obj_builder(transparent, normal, 1D, no_partial_alpha);
2088 render_scanline_obj_builder(transparent, normal, 2D, no_partial_alpha);
2089 render_scanline_obj_builder(transparent, color16, 1D, no_partial_alpha);
2090 render_scanline_obj_builder(transparent, color16, 2D, no_partial_alpha);
2091 render_scanline_obj_builder(transparent, color32, 1D, no_partial_alpha);
2092 render_scanline_obj_builder(transparent, color32, 2D, no_partial_alpha);
2093 render_scanline_obj_builder(transparent, alpha_obj, 1D, no_partial_alpha);
2094 render_scanline_obj_builder(transparent, alpha_obj, 2D, no_partial_alpha);
2095 render_scanline_obj_builder(transparent, partial_alpha, 1D, partial_alpha);
2096 render_scanline_obj_builder(transparent, partial_alpha, 2D, partial_alpha);
2097 render_scanline_obj_builder(copy, copy_tile, 1D, no_partial_alpha);
2098 render_scanline_obj_builder(copy, copy_tile, 2D, no_partial_alpha);
2099 render_scanline_obj_builder(copy, copy_bitmap, 1D, no_partial_alpha);
2100 render_scanline_obj_builder(copy, copy_bitmap, 2D, no_partial_alpha);
2101
2102
2103
2104 void order_obj(u32 video_mode)
2105 {
2106   s32 obj_num, priority, row;
2107   s32 obj_x, obj_y;
2108   s32 obj_size, obj_mode;
2109   s32 obj_width, obj_height;
2110   u32 obj_priority;
2111   u32 obj_attribute_0, obj_attribute_1, obj_attribute_2;
2112   s32 vcount = io_registers[REG_VCOUNT];
2113   u32 partial_tile_run, partial_tile_offset;
2114   u32 pixel_run;
2115   u32 current_count;
2116   u16 *oam_ptr = oam_ram + 508;
2117   u16 *dest_ptr;
2118   u8 *tile_base = vram + 0x10000;
2119   u8 *tile_ptr;
2120
2121   for(priority = 0; priority < 5; priority++)
2122   {
2123     for(row = 0; row < 160; row++)
2124     {
2125       obj_priority_count[priority][row] = 0;
2126     }
2127   }
2128
2129   for(row = 0; row < 160; row++)
2130   {
2131     obj_alpha_count[row] = 0;
2132   }
2133
2134   for(obj_num = 127; obj_num >= 0; obj_num--, oam_ptr -= 4)
2135   {
2136     obj_attribute_0 = oam_ptr[0];
2137     obj_attribute_2 = oam_ptr[2];
2138     obj_size = obj_attribute_0 & 0xC000;
2139     obj_priority = (obj_attribute_2 >> 10) & 0x03;
2140     obj_mode = (obj_attribute_0 >> 10) & 0x03;
2141
2142     if(((obj_attribute_0 & 0x0300) != 0x0200) && (obj_size != 0xC000) &&
2143      (obj_mode != 3) && ((video_mode < 3) ||
2144      ((obj_attribute_2 & 0x3FF) >= 512)))
2145     {
2146       obj_y = obj_attribute_0 & 0xFF;
2147       if(obj_y > 160)
2148         obj_y -= 256;
2149
2150       obj_attribute_1 = oam_ptr[1];
2151       obj_size = ((obj_size >> 12) & 0x0C) | (obj_attribute_1 >> 14);
2152       obj_height = obj_height_table[obj_size];
2153       obj_width = obj_width_table[obj_size];
2154
2155       if(obj_attribute_0 & 0x200)
2156       {
2157         obj_height *= 2;
2158         obj_width *= 2;
2159       }
2160
2161       if(((obj_y + obj_height) > 0) && (obj_y < 160))
2162       {
2163         obj_x = (s32)(obj_attribute_1 << 23) >> 23;
2164
2165         if(((obj_x + obj_width) > 0) && (obj_x < 240))
2166         {
2167           if(obj_y < 0)
2168           {
2169             obj_height += obj_y;
2170             obj_y = 0;
2171           }
2172
2173           if((obj_y + obj_height) >= 160)
2174           {
2175             obj_height = 160 - obj_y;
2176           }
2177
2178           if(obj_mode == 1)
2179           {
2180             for(row = obj_y; row < obj_y + obj_height; row++)
2181             {
2182               current_count = obj_priority_count[obj_priority][row];
2183               obj_priority_list[obj_priority][row][current_count] = obj_num;
2184               obj_priority_count[obj_priority][row] = current_count + 1;
2185               obj_alpha_count[row]++;
2186             }
2187           }
2188           else
2189           {
2190             if(obj_mode == 2)
2191             {
2192               obj_priority = 4;
2193             }
2194
2195             for(row = obj_y; row < obj_y + obj_height; row++)
2196             {
2197               current_count = obj_priority_count[obj_priority][row];
2198               obj_priority_list[obj_priority][row][current_count] = obj_num;
2199               obj_priority_count[obj_priority][row] = current_count + 1;
2200             }
2201           }
2202         }
2203       }
2204     }
2205   }
2206 }
2207
2208 u32 layer_order[16];
2209 u32 layer_count;
2210
2211 u32 order_layers(u32 layer_flags)
2212 {
2213   s32 priority, layer_number;
2214   layer_count = 0;
2215
2216   for(priority = 3; priority >= 0; priority--)
2217   {
2218     for(layer_number = 3; layer_number >= 0; layer_number--)
2219     {
2220       if(((layer_flags >> layer_number) & 1) &&
2221        ((io_registers[REG_BG0CNT + layer_number] & 0x03) == priority))
2222       {
2223         layer_order[layer_count] = layer_number;
2224         layer_count++;
2225       }
2226     }
2227
2228     if((obj_priority_count[priority][io_registers[REG_VCOUNT]] > 0)
2229      && (layer_flags & 0x10))
2230     {
2231       layer_order[layer_count] = priority | 0x04;
2232       layer_count++;
2233     }
2234   }
2235 }
2236
2237 #define fill_line(_start, _end)                                               \
2238   u32 i;                                                                      \
2239                                                                               \
2240   for(i = _start; i < _end; i++)                                              \
2241   {                                                                           \
2242     dest_ptr[i] = color;                                                      \
2243   }                                                                           \
2244
2245
2246 #define fill_line_color_normal()                                              \
2247   color = palette_ram_converted[color]                                        \
2248
2249 #define fill_line_color_alpha()                                               \
2250
2251 #define fill_line_color_color16()                                             \
2252
2253 #define fill_line_color_color32()                                             \
2254
2255 #define fill_line_builder(type)                                               \
2256 void fill_line_##type(u16 color, render_scanline_dest_##type *dest_ptr,       \
2257  u32 start, u32 end)                                                          \
2258 {                                                                             \
2259   fill_line_color_##type();                                                   \
2260   fill_line(start, end);                                                      \
2261 }                                                                             \
2262
2263 fill_line_builder(normal);
2264 fill_line_builder(alpha);
2265 fill_line_builder(color16);
2266 fill_line_builder(color32);
2267
2268
2269 // Alpha blend two pixels (pixel_top and pixel_bottom).
2270
2271 #define blend_pixel()                                                         \
2272   pixel_bottom = palette_ram_converted[(pixel_pair >> 16) & 0x1FF];           \
2273   pixel_bottom = (pixel_bottom | (pixel_bottom << 16)) & 0x07E0F81F;          \
2274   pixel_top = ((pixel_top * blend_a) + (pixel_bottom * blend_b)) >> 4         \
2275
2276
2277 // Alpha blend two pixels, allowing for saturation (individual channels > 31).
2278 // The operation is optimized towards saturation not occuring.
2279
2280 #define blend_saturate_pixel()                                                \
2281   pixel_bottom = palette_ram_converted[(pixel_pair >> 16) & 0x1FF];           \
2282   pixel_bottom = (pixel_bottom | (pixel_bottom << 16)) & 0x07E0F81F;          \
2283   pixel_top = ((pixel_top * blend_a) + (pixel_bottom * blend_b)) >> 4;        \
2284   if(pixel_top & 0x08010020)                                                  \
2285   {                                                                           \
2286     if(pixel_top & 0x08000000)                                                \
2287       pixel_top |= 0x07E00000;                                                \
2288                                                                               \
2289     if(pixel_top & 0x00010000)                                                \
2290       pixel_top |= 0x0000F800;                                                \
2291                                                                               \
2292     if(pixel_top & 0x00000020)                                                \
2293       pixel_top |= 0x0000001F;                                                \
2294   }                                                                           \
2295
2296 #define brighten_pixel()                                                      \
2297   pixel_top = upper + ((pixel_top * blend) >> 4);                             \
2298
2299 #define darken_pixel()                                                        \
2300   pixel_top = (pixel_top * blend) >> 4;                                       \
2301
2302 #define effect_condition_alpha                                                \
2303   ((pixel_pair & 0x04000200) == 0x04000200)                                   \
2304
2305 #define effect_condition_fade(pixel_source)                                   \
2306   ((pixel_source & 0x00000200) == 0x00000200)                                 \
2307
2308 #define expand_pixel_no_dest(expand_type, pixel_source)                       \
2309   pixel_top = (pixel_top | (pixel_top << 16)) & 0x07E0F81F;                   \
2310   expand_type##_pixel();                                                      \
2311   pixel_top &= 0x07E0F81F;                                                    \
2312   pixel_top = (pixel_top >> 16) | pixel_top                                   \
2313
2314 #define expand_pixel(expand_type, pixel_source)                               \
2315   pixel_top = palette_ram_converted[pixel_source & 0x1FF];                    \
2316   expand_pixel_no_dest(expand_type, pixel_source);                            \
2317   *screen_dest_ptr = pixel_top                                                \
2318
2319 #define expand_loop(expand_type, effect_condition, pixel_source)              \
2320   screen_src_ptr += start;                                                    \
2321   screen_dest_ptr += start;                                                   \
2322                                                                               \
2323   end -= start;                                                               \
2324                                                                               \
2325   for(i = 0; i < end; i++)                                                    \
2326   {                                                                           \
2327     pixel_source = *screen_src_ptr;                                           \
2328     if(effect_condition)                                                      \
2329     {                                                                         \
2330       expand_pixel(expand_type, pixel_source);                                \
2331     }                                                                         \
2332     else                                                                      \
2333     {                                                                         \
2334       *screen_dest_ptr =                                                      \
2335        palette_ram_converted[pixel_source & 0x1FF];                           \
2336     }                                                                         \
2337                                                                               \
2338     screen_src_ptr++;                                                         \
2339     screen_dest_ptr++;                                                        \
2340   }                                                                           \
2341
2342
2343 #define expand_loop_partial_alpha(alpha_expand, expand_type)                  \
2344   screen_src_ptr += start;                                                    \
2345   screen_dest_ptr += start;                                                   \
2346                                                                               \
2347   end -= start;                                                               \
2348                                                                               \
2349   for(i = 0; i < end; i++)                                                    \
2350   {                                                                           \
2351     pixel_pair = *screen_src_ptr;                                             \
2352     if(effect_condition_fade(pixel_pair))                                     \
2353     {                                                                         \
2354       if(effect_condition_alpha)                                              \
2355       {                                                                       \
2356         expand_pixel(alpha_expand, pixel_pair);                               \
2357       }                                                                       \
2358       else                                                                    \
2359       {                                                                       \
2360         expand_pixel(expand_type, pixel_pair);                                \
2361       }                                                                       \
2362     }                                                                         \
2363     else                                                                      \
2364     {                                                                         \
2365       *screen_dest_ptr =                                                      \
2366        palette_ram_converted[pixel_pair & 0x1FF];                             \
2367     }                                                                         \
2368                                                                               \
2369     screen_src_ptr++;                                                         \
2370     screen_dest_ptr++;                                                        \
2371   }                                                                           \
2372
2373
2374 #define expand_partial_alpha(expand_type)                                     \
2375   if((blend_a + blend_b) > 16)                                                \
2376   {                                                                           \
2377     expand_loop_partial_alpha(blend_saturate, expand_type);                   \
2378   }                                                                           \
2379   else                                                                        \
2380   {                                                                           \
2381     expand_loop_partial_alpha(blend, expand_type);                            \
2382   }                                                                           \
2383
2384
2385
2386 // Blend top two pixels of scanline with each other.
2387
2388 #ifdef RENDER_COLOR16_NORMAL
2389
2390 #ifndef GP2X_BUILD
2391
2392 void expand_normal(u16 *screen_ptr, u32 start, u32 end)
2393 {
2394   u32 i, pixel_source;
2395   screen_ptr += start;
2396
2397   return;
2398
2399   end -= start;
2400
2401   for(i = 0; i < end; i++)
2402   {
2403     pixel_source = *screen_ptr;
2404     *screen_ptr = palette_ram_converted[pixel_source];
2405
2406     screen_ptr++;
2407   }
2408 }
2409
2410 #endif
2411
2412 #else
2413
2414 #define expand_normal(screen_ptr, start, end)
2415
2416 #endif
2417
2418
2419 #ifndef GP2X_BUILD
2420
2421 void expand_blend(u32 *screen_src_ptr, u16 *screen_dest_ptr,
2422  u32 start, u32 end)
2423 {
2424   u32 pixel_pair;
2425   u32 pixel_top, pixel_bottom;
2426   u32 bldalpha = io_registers[REG_BLDALPHA];
2427   u32 blend_a = bldalpha & 0x1F;
2428   u32 blend_b = (bldalpha >> 8) & 0x1F;
2429   u32 i;
2430
2431   if(blend_a > 16)
2432     blend_a = 16;
2433
2434   if(blend_b > 16)
2435     blend_b = 16;
2436
2437   // The individual colors can saturate over 31, this should be taken
2438   // care of in an alternate pass as it incurs a huge additional speedhit.
2439   if((blend_a + blend_b) > 16)
2440   {
2441     expand_loop(blend_saturate, effect_condition_alpha, pixel_pair);
2442   }
2443   else
2444   {
2445     expand_loop(blend, effect_condition_alpha, pixel_pair);
2446   }
2447 }
2448
2449 #endif
2450
2451 // Blend scanline with white.
2452
2453 void expand_darken(u16 *screen_src_ptr, u16 *screen_dest_ptr,
2454  u32 start, u32 end)
2455 {
2456   u32 pixel_top;
2457   s32 blend = 16 - (io_registers[REG_BLDY] & 0x1F);
2458   u32 i;
2459
2460   if(blend < 0)
2461     blend = 0;
2462
2463   expand_loop(darken, effect_condition_fade(pixel_top), pixel_top);
2464 }
2465
2466
2467 // Blend scanline with black.
2468
2469 void expand_brighten(u16 *screen_src_ptr, u16 *screen_dest_ptr,
2470  u32 start, u32 end)
2471 {
2472   u32 pixel_top;
2473   u32 blend = io_registers[REG_BLDY] & 0x1F;
2474   u32 upper;
2475   u32 i;
2476
2477   if(blend > 16)
2478     blend = 16;
2479
2480   upper = ((0x07E0F81F * blend) >> 4) & 0x07E0F81F;
2481   blend = 16 - blend;
2482
2483   expand_loop(brighten, effect_condition_fade(pixel_top), pixel_top);
2484
2485 }
2486
2487
2488 // Expand scanline such that if both top and bottom pass it's alpha,
2489 // if only top passes it's as specified, and if neither pass it's normal.
2490
2491 void expand_darken_partial_alpha(u32 *screen_src_ptr, u16 *screen_dest_ptr,
2492  u32 start, u32 end)
2493 {
2494   s32 blend = 16 - (io_registers[REG_BLDY] & 0x1F);
2495   u32 pixel_pair;
2496   u32 pixel_top, pixel_bottom;
2497   u32 bldalpha = io_registers[REG_BLDALPHA];
2498   u32 blend_a = bldalpha & 0x1F;
2499   u32 blend_b = (bldalpha >> 8) & 0x1F;
2500   u32 i;
2501
2502   if(blend < 0)
2503     blend = 0;
2504
2505   if(blend_a > 16)
2506     blend_a = 16;
2507
2508   if(blend_b > 16)
2509     blend_b = 16;
2510
2511   expand_partial_alpha(darken);
2512 }
2513
2514
2515 void expand_brighten_partial_alpha(u32 *screen_src_ptr, u16 *screen_dest_ptr,
2516  u32 start, u32 end)
2517 {
2518   s32 blend = io_registers[REG_BLDY] & 0x1F;
2519   u32 pixel_pair;
2520   u32 pixel_top, pixel_bottom;
2521   u32 bldalpha = io_registers[REG_BLDALPHA];
2522   u32 blend_a = bldalpha & 0x1F;
2523   u32 blend_b = (bldalpha >> 8) & 0x1F;
2524   u32 upper;
2525   u32 i;
2526
2527   if(blend > 16)
2528     blend = 16;
2529
2530   upper = ((0x07E0F81F * blend) >> 4) & 0x07E0F81F;
2531   blend = 16 - blend;
2532
2533   if(blend_a > 16)
2534     blend_a = 16;
2535
2536   if(blend_b > 16)
2537     blend_b = 16;
2538
2539   expand_partial_alpha(brighten);
2540 }
2541
2542
2543 // Render an OBJ layer from start to end, depending on the type (1D or 2D)
2544 // stored in dispcnt.
2545
2546 #define render_obj_layer(type, dest, _start, _end)                            \
2547   current_layer &= ~0x04;                                                     \
2548   if(dispcnt & 0x40)                                                          \
2549     render_scanline_obj_##type##_1D(current_layer, _start, _end, dest);       \
2550   else                                                                        \
2551     render_scanline_obj_##type##_2D(current_layer, _start, _end, dest)        \
2552
2553
2554 // Render a target all the way with the background color as taken from the
2555 // palette.
2556
2557 #define fill_line_bg(type, dest, _start, _end)                                \
2558   fill_line_##type(0, dest, _start, _end)                                     \
2559
2560
2561 // Render all layers as they appear in the layer order.
2562
2563 #define render_layers(tile_alpha, obj_alpha, dest)                            \
2564 {                                                                             \
2565   current_layer = layer_order[0];                                             \
2566   if(current_layer & 0x04)                                                    \
2567   {                                                                           \
2568     /* If the first one is OBJ render the background then render it. */       \
2569     fill_line_bg(tile_alpha, dest, 0, 240);                                   \
2570     render_obj_layer(obj_alpha, dest, 0, 240);                                \
2571   }                                                                           \
2572   else                                                                        \
2573   {                                                                           \
2574     /* Otherwise render a base layer. */                                      \
2575     layer_renderers[current_layer].tile_alpha##_render_base(current_layer,    \
2576      0, 240, dest);                                                           \
2577   }                                                                           \
2578                                                                               \
2579   /* Render the rest of the layers. */                                        \
2580   for(layer_order_pos = 1; layer_order_pos < layer_count; layer_order_pos++)  \
2581   {                                                                           \
2582     current_layer = layer_order[layer_order_pos];                             \
2583     if(current_layer & 0x04)                                                  \
2584     {                                                                         \
2585       render_obj_layer(obj_alpha, dest, 0, 240);                              \
2586     }                                                                         \
2587     else                                                                      \
2588     {                                                                         \
2589       layer_renderers[current_layer].                                         \
2590        tile_alpha##_render_transparent(current_layer, 0, 240, dest);          \
2591     }                                                                         \
2592   }                                                                           \
2593 }                                                                             \
2594
2595 #define render_condition_alpha                                                \
2596   (((io_registers[REG_BLDALPHA] & 0x1F1F) != 0x001F) &&                       \
2597    ((io_registers[REG_BLDCNT] & 0x3F) != 0) &&                                \
2598    ((io_registers[REG_BLDCNT] & 0x3F00) != 0))                                \
2599
2600 #define render_condition_fade                                                 \
2601   (((io_registers[REG_BLDY] & 0x1F) != 0) &&                                  \
2602    ((io_registers[REG_BLDCNT] & 0x3F) != 0))                                  \
2603
2604 #define render_layers_color_effect(renderer, layer_condition,                 \
2605  alpha_condition, fade_condition, _start, _end)                               \
2606 {                                                                             \
2607   if(layer_condition)                                                         \
2608   {                                                                           \
2609     if(obj_alpha_count[io_registers[REG_VCOUNT]] > 0)                         \
2610     {                                                                         \
2611       /* Render based on special effects mode. */                             \
2612       u32 screen_buffer[240];                                                 \
2613       switch((bldcnt >> 6) & 0x03)                                            \
2614       {                                                                       \
2615         /* Alpha blend */                                                     \
2616         case 0x01:                                                            \
2617         {                                                                     \
2618           if(alpha_condition)                                                 \
2619           {                                                                   \
2620             renderer(alpha, alpha_obj, screen_buffer);                        \
2621             expand_blend(screen_buffer, scanline, _start, _end);              \
2622             return;                                                           \
2623           }                                                                   \
2624           break;                                                              \
2625         }                                                                     \
2626                                                                               \
2627         /* Fade to white */                                                   \
2628         case 0x02:                                                            \
2629         {                                                                     \
2630           if(fade_condition)                                                  \
2631           {                                                                   \
2632             renderer(color32, partial_alpha, screen_buffer);                  \
2633             expand_brighten_partial_alpha(screen_buffer, scanline,            \
2634              _start, _end);                                                   \
2635             return;                                                           \
2636           }                                                                   \
2637           break;                                                              \
2638         }                                                                     \
2639                                                                               \
2640         /* Fade to black */                                                   \
2641         case 0x03:                                                            \
2642         {                                                                     \
2643           if(fade_condition)                                                  \
2644           {                                                                   \
2645             renderer(color32, partial_alpha, screen_buffer);                  \
2646             expand_darken_partial_alpha(screen_buffer, scanline,              \
2647              _start, _end);                                                   \
2648             return;                                                           \
2649           }                                                                   \
2650           break;                                                              \
2651         }                                                                     \
2652       }                                                                       \
2653                                                                               \
2654       renderer(color32, partial_alpha, screen_buffer);                        \
2655       expand_blend(screen_buffer, scanline, _start, _end);                    \
2656     }                                                                         \
2657     else                                                                      \
2658     {                                                                         \
2659       /* Render based on special effects mode. */                             \
2660       switch((bldcnt >> 6) & 0x03)                                            \
2661       {                                                                       \
2662         /* Alpha blend */                                                     \
2663         case 0x01:                                                            \
2664         {                                                                     \
2665           if(alpha_condition)                                                 \
2666           {                                                                   \
2667             u32 screen_buffer[240];                                           \
2668             renderer(alpha, alpha_obj, screen_buffer);                        \
2669             expand_blend(screen_buffer, scanline, _start, _end);              \
2670             return;                                                           \
2671           }                                                                   \
2672           break;                                                              \
2673         }                                                                     \
2674                                                                               \
2675         /* Fade to white */                                                   \
2676         case 0x02:                                                            \
2677         {                                                                     \
2678           if(fade_condition)                                                  \
2679           {                                                                   \
2680             renderer(color16, color16, scanline);                             \
2681             expand_brighten(scanline, scanline, _start, _end);                \
2682             return;                                                           \
2683           }                                                                   \
2684           break;                                                              \
2685         }                                                                     \
2686                                                                               \
2687         /* Fade to black */                                                   \
2688         case 0x03:                                                            \
2689         {                                                                     \
2690           if(fade_condition)                                                  \
2691           {                                                                   \
2692             renderer(color16, color16, scanline);                             \
2693             expand_darken(scanline, scanline, _start, _end);                  \
2694             return;                                                           \
2695           }                                                                   \
2696           break;                                                              \
2697         }                                                                     \
2698       }                                                                       \
2699                                                                               \
2700       renderer(normal, normal, scanline);                                     \
2701       expand_normal(scanline, _start, _end);                                  \
2702     }                                                                         \
2703   }                                                                           \
2704   else                                                                        \
2705   {                                                                           \
2706     u32 pixel_top = palette_ram_converted[0];                                 \
2707     switch((bldcnt >> 6) & 0x03)                                              \
2708     {                                                                         \
2709       /* Fade to white */                                                     \
2710       case 0x02:                                                              \
2711       {                                                                       \
2712         if(color_combine_mask_a(5))                                           \
2713         {                                                                     \
2714           u32 blend = io_registers[REG_BLDY] & 0x1F;                          \
2715           u32 upper;                                                          \
2716                                                                               \
2717           if(blend > 16)                                                      \
2718             blend = 16;                                                       \
2719                                                                               \
2720           upper = ((0x07E0F81F * blend) >> 4) & 0x07E0F81F;                   \
2721           blend = 16 - blend;                                                 \
2722                                                                               \
2723           expand_pixel_no_dest(brighten, pixel_top);                          \
2724         }                                                                     \
2725         break;                                                                \
2726       }                                                                       \
2727                                                                               \
2728       /* Fade to black */                                                     \
2729       case 0x03:                                                              \
2730       {                                                                       \
2731         if(color_combine_mask_a(5))                                           \
2732         {                                                                     \
2733           s32 blend = 16 - (io_registers[REG_BLDY] & 0x1F);                   \
2734                                                                               \
2735           if(blend < 0)                                                       \
2736             blend = 0;                                                        \
2737                                                                               \
2738           expand_pixel_no_dest(darken, pixel_top);                            \
2739         }                                                                     \
2740         break;                                                                \
2741       }                                                                       \
2742     }                                                                         \
2743     fill_line_color16(pixel_top, scanline, _start, _end);                     \
2744   }                                                                           \
2745 }                                                                             \
2746
2747
2748 // Renders an entire scanline from 0 to 240, based on current color mode.
2749
2750 void render_scanline_tile(u16 *scanline, u32 dispcnt)
2751 {
2752   u32 current_layer;
2753   u32 layer_order_pos;
2754   u32 bldcnt = io_registers[REG_BLDCNT];
2755   render_scanline_layer_functions_tile();
2756
2757   render_layers_color_effect(render_layers, layer_count,
2758    render_condition_alpha, render_condition_fade, 0, 240);
2759 }
2760
2761 void render_scanline_bitmap(u16 *scanline, u32 dispcnt)
2762 {
2763   u32 bldcnt = io_registers[REG_BLDCNT];
2764   render_scanline_layer_functions_bitmap();
2765   u32 current_layer;
2766   u32 layer_order_pos;
2767
2768   fill_line_bg(normal, scanline, 0, 240);
2769
2770   for(layer_order_pos = 0; layer_order_pos < layer_count; layer_order_pos++)
2771   {
2772     current_layer = layer_order[layer_order_pos];
2773     if(current_layer & 0x04)
2774     {
2775       render_obj_layer(normal, scanline, 0, 240);
2776     }
2777     else
2778     {
2779       layer_renderers->normal_render(0, 240, scanline);
2780     }
2781   }
2782 }
2783
2784 // Render layers from start to end based on if they're allowed in the
2785 // enable flags.
2786
2787 #define render_layers_conditional(tile_alpha, obj_alpha, dest)                \
2788 {                                                                             \
2789   __label__ skip;                                                             \
2790   current_layer = layer_order[layer_order_pos];                               \
2791   /* If OBJ aren't enabled skip to the first non-OBJ layer */                 \
2792   if(!(enable_flags & 0x10))                                                  \
2793   {                                                                           \
2794     while((current_layer & 0x04) || !((1 << current_layer) & enable_flags))   \
2795     {                                                                         \
2796       layer_order_pos++;                                                      \
2797       current_layer = layer_order[layer_order_pos];                           \
2798                                                                               \
2799       /* Oops, ran out of layers, render the background. */                   \
2800       if(layer_order_pos == layer_count)                                      \
2801       {                                                                       \
2802         fill_line_bg(tile_alpha, dest, start, end);                           \
2803         goto skip;                                                            \
2804       }                                                                       \
2805     }                                                                         \
2806                                                                               \
2807     /* Render the first valid layer */                                        \
2808     layer_renderers[current_layer].tile_alpha##_render_base(current_layer,    \
2809      start, end, dest);                                                       \
2810                                                                               \
2811     layer_order_pos++;                                                        \
2812                                                                               \
2813     /* Render the rest of the layers if active, skipping OBJ ones. */         \
2814     for(; layer_order_pos < layer_count; layer_order_pos++)                   \
2815     {                                                                         \
2816       current_layer = layer_order[layer_order_pos];                           \
2817       if(!(current_layer & 0x04) && ((1 << current_layer) & enable_flags))    \
2818       {                                                                       \
2819         layer_renderers[current_layer].                                       \
2820          tile_alpha##_render_transparent(current_layer, start, end, dest);    \
2821       }                                                                       \
2822     }                                                                         \
2823   }                                                                           \
2824   else                                                                        \
2825   {                                                                           \
2826     /* Find the first active layer, skip all of the inactive ones */          \
2827     while(!((current_layer & 0x04) || ((1 << current_layer) & enable_flags))) \
2828     {                                                                         \
2829       layer_order_pos++;                                                      \
2830       current_layer = layer_order[layer_order_pos];                           \
2831                                                                               \
2832       /* Oops, ran out of layers, render the background. */                   \
2833       if(layer_order_pos == layer_count)                                      \
2834       {                                                                       \
2835         fill_line_bg(tile_alpha, dest, start, end);                           \
2836         goto skip;                                                            \
2837       }                                                                       \
2838     }                                                                         \
2839                                                                               \
2840     if(current_layer & 0x04)                                                  \
2841     {                                                                         \
2842       /* If the first one is OBJ render the background then render it. */     \
2843       fill_line_bg(tile_alpha, dest, start, end);                             \
2844       render_obj_layer(obj_alpha, dest, start, end);                          \
2845     }                                                                         \
2846     else                                                                      \
2847     {                                                                         \
2848       /* Otherwise render a base layer. */                                    \
2849       layer_renderers[current_layer].                                         \
2850        tile_alpha##_render_base(current_layer, start, end, dest);             \
2851     }                                                                         \
2852                                                                               \
2853     layer_order_pos++;                                                        \
2854                                                                               \
2855     /* Render the rest of the layers. */                                      \
2856     for(; layer_order_pos < layer_count; layer_order_pos++)                   \
2857     {                                                                         \
2858       current_layer = layer_order[layer_order_pos];                           \
2859       if(current_layer & 0x04)                                                \
2860       {                                                                       \
2861         render_obj_layer(obj_alpha, dest, start, end);                        \
2862       }                                                                       \
2863       else                                                                    \
2864       {                                                                       \
2865         if(enable_flags & (1 << current_layer))                               \
2866         {                                                                     \
2867           layer_renderers[current_layer].                                     \
2868            tile_alpha##_render_transparent(current_layer, start, end, dest);  \
2869         }                                                                     \
2870       }                                                                       \
2871     }                                                                         \
2872   }                                                                           \
2873                                                                               \
2874   skip:                                                                       \
2875     ;                                                                         \
2876 }                                                                             \
2877
2878
2879 // Render all of the BG and OBJ in a tiled scanline from start to end ONLY if
2880 // enable_flag allows that layer/OBJ. Also conditionally render color effects.
2881
2882 void render_scanline_conditional_tile(u32 start, u32 end, u16 *scanline,
2883  u32 enable_flags, u32 dispcnt, u32 bldcnt, tile_layer_render_struct
2884  *layer_renderers)
2885 {
2886   u32 current_layer;
2887   u32 layer_order_pos = 0;
2888
2889   render_layers_color_effect(render_layers_conditional,
2890    (layer_count && (enable_flags & 0x1F)),
2891    ((enable_flags & 0x20) && render_condition_alpha),
2892    ((enable_flags & 0x20) && render_condition_fade), start, end);
2893 }
2894
2895
2896 // Render the BG and OBJ in a bitmap scanline from start to end ONLY if
2897 // enable_flag allows that layer/OBJ. Also conditionally render color effects.
2898
2899 void render_scanline_conditional_bitmap(u32 start, u32 end, u16 *scanline,
2900  u32 enable_flags, u32 dispcnt, u32 bldcnt, bitmap_layer_render_struct
2901  *layer_renderers)
2902 {
2903   u32 current_layer;
2904   u32 layer_order_pos;
2905
2906   fill_line_bg(normal, scanline, start, end);
2907
2908   for(layer_order_pos = 0; layer_order_pos < layer_count; layer_order_pos++)
2909   {
2910     current_layer = layer_order[layer_order_pos];
2911     if(current_layer & 0x04)
2912     {
2913       if(enable_flags & 0x10)
2914       {
2915         render_obj_layer(normal, scanline, start, end);
2916       }
2917     }
2918     else
2919     {
2920       if(enable_flags & 0x04)
2921         layer_renderers->normal_render(start, end, scanline);
2922     }
2923   }
2924 }
2925
2926
2927 #define window_x_coords(window_number)                                        \
2928   window_##window_number##_x1 =                                               \
2929    io_registers[REG_WIN##window_number##H] >> 8;                              \
2930   window_##window_number##_x2 =                                               \
2931    io_registers[REG_WIN##window_number##H] & 0xFF;                            \
2932   window_##window_number##_enable =                                           \
2933    (winin >> (window_number * 8)) & 0x3F;                                     \
2934                                                                               \
2935   if(window_##window_number##_x1 > 240)                                       \
2936     window_##window_number##_x1 = 240;                                        \
2937                                                                               \
2938   if(window_##window_number##_x2 > 240)                                       \
2939     window_##window_number##_x2 = 240                                         \
2940
2941 #define window_coords(window_number)                                          \
2942   u32 window_##window_number##_x1, window_##window_number##_x2;               \
2943   u32 window_##window_number##_y1, window_##window_number##_y2;               \
2944   u32 window_##window_number##_enable;                                        \
2945   window_##window_number##_y1 =                                               \
2946    io_registers[REG_WIN##window_number##V] >> 8;                              \
2947   window_##window_number##_y2 =                                               \
2948    io_registers[REG_WIN##window_number##V] & 0xFF;                            \
2949                                                                               \
2950   if(window_##window_number##_y1 > window_##window_number##_y2)               \
2951   {                                                                           \
2952     if((((vcount <= window_##window_number##_y2) ||                           \
2953      (vcount > window_##window_number##_y1)) ||                               \
2954      (window_##window_number##_y2 > 227)) &&                                  \
2955      (window_##window_number##_y1 <= 227))                                    \
2956     {                                                                         \
2957       window_x_coords(window_number);                                         \
2958     }                                                                         \
2959     else                                                                      \
2960     {                                                                         \
2961       window_##window_number##_x1 = 240;                                      \
2962       window_##window_number##_x2 = 240;                                      \
2963     }                                                                         \
2964   }                                                                           \
2965   else                                                                        \
2966   {                                                                           \
2967     if((((vcount >= window_##window_number##_y1) &&                           \
2968      (vcount < window_##window_number##_y2)) ||                               \
2969      (window_##window_number##_y2 > 227)) &&                                  \
2970      (window_##window_number##_y1 <= 227))                                    \
2971     {                                                                         \
2972       window_x_coords(window_number);                                         \
2973     }                                                                         \
2974     else                                                                      \
2975     {                                                                         \
2976       window_##window_number##_x1 = 240;                                      \
2977       window_##window_number##_x2 = 240;                                      \
2978     }                                                                         \
2979   }                                                                           \
2980
2981 #define render_window_segment(type, start, end, window_type)                  \
2982   if(start != end)                                                            \
2983   {                                                                           \
2984     render_scanline_conditional_##type(start, end, scanline,                  \
2985      window_##window_type##_enable, dispcnt, bldcnt, layer_renderers);        \
2986   }                                                                           \
2987
2988 #define render_window_segment_unequal(type, start, end, window_type)          \
2989   render_scanline_conditional_##type(start, end, scanline,                    \
2990    window_##window_type##_enable, dispcnt, bldcnt, layer_renderers)           \
2991
2992 #define render_window_segment_clip(type, clip_start, clip_end, start, end,    \
2993  window_type)                                                                 \
2994 {                                                                             \
2995   if(start != end)                                                            \
2996   {                                                                           \
2997     if(start < clip_start)                                                    \
2998     {                                                                         \
2999       if(end > clip_start)                                                    \
3000       {                                                                       \
3001         if(end > clip_end)                                                    \
3002         {                                                                     \
3003           render_window_segment_unequal(type, clip_start, clip_end,           \
3004            window_type);                                                      \
3005         }                                                                     \
3006         else                                                                  \
3007         {                                                                     \
3008           render_window_segment_unequal(type, clip_start, end, window_type);  \
3009         }                                                                     \
3010       }                                                                       \
3011     }                                                                         \
3012     else                                                                      \
3013                                                                               \
3014     if(end > clip_end)                                                        \
3015     {                                                                         \
3016       if(start < clip_end)                                                    \
3017         render_window_segment_unequal(type, start, clip_end, window_type);    \
3018     }                                                                         \
3019     else                                                                      \
3020     {                                                                         \
3021       render_window_segment_unequal(type, start, end, window_type);           \
3022     }                                                                         \
3023   }                                                                           \
3024 }                                                                             \
3025
3026 #define render_window_clip_1(type, start, end)                                \
3027   if(window_1_x1 != 240)                                                      \
3028   {                                                                           \
3029     if(window_1_x1 > window_1_x2)                                             \
3030     {                                                                         \
3031       render_window_segment_clip(type, start, end, 0, window_1_x2, 1);        \
3032       render_window_segment_clip(type, start, end, window_1_x2, window_1_x1,  \
3033        out);                                                                  \
3034       render_window_segment_clip(type, start, end, window_1_x1, 240, 1);      \
3035     }                                                                         \
3036     else                                                                      \
3037     {                                                                         \
3038       render_window_segment_clip(type, start, end, 0, window_1_x1, out);      \
3039       render_window_segment_clip(type, start, end, window_1_x1, window_1_x2,  \
3040        1);                                                                    \
3041       render_window_segment_clip(type, start, end, window_1_x2, 240, out);    \
3042     }                                                                         \
3043   }                                                                           \
3044   else                                                                        \
3045   {                                                                           \
3046     render_window_segment(type, start, end, out);                             \
3047   }                                                                           \
3048
3049 #define render_window_clip_obj(type, start, end);                             \
3050   render_window_segment(type, start, end, out);                               \
3051   if(dispcnt & 0x40)                                                          \
3052     render_scanline_obj_copy_##type##_1D(4, start, end, scanline);            \
3053   else                                                                        \
3054     render_scanline_obj_copy_##type##_2D(4, start, end, scanline)             \
3055
3056
3057 #define render_window_segment_clip_obj(type, clip_start, clip_end, start,     \
3058  end)                                                                         \
3059 {                                                                             \
3060   if(start != end)                                                            \
3061   {                                                                           \
3062     if(start < clip_start)                                                    \
3063     {                                                                         \
3064       if(end > clip_start)                                                    \
3065       {                                                                       \
3066         if(end > clip_end)                                                    \
3067         {                                                                     \
3068           render_window_clip_obj(type, clip_start, clip_end);                 \
3069         }                                                                     \
3070         else                                                                  \
3071         {                                                                     \
3072           render_window_clip_obj(type, clip_start, end);                      \
3073         }                                                                     \
3074       }                                                                       \
3075     }                                                                         \
3076     else                                                                      \
3077                                                                               \
3078     if(end > clip_end)                                                        \
3079     {                                                                         \
3080       if(start < clip_end)                                                    \
3081       {                                                                       \
3082         render_window_clip_obj(type, start, clip_end);                        \
3083       }                                                                       \
3084     }                                                                         \
3085     else                                                                      \
3086     {                                                                         \
3087       render_window_clip_obj(type, start, end);                               \
3088     }                                                                         \
3089   }                                                                           \
3090 }                                                                             \
3091
3092
3093 #define render_window_clip_1_obj(type, start, end)                            \
3094   if(window_1_x1 != 240)                                                      \
3095   {                                                                           \
3096     if(window_1_x1 > window_1_x2)                                             \
3097     {                                                                         \
3098       render_window_segment_clip(type, start, end, 0, window_1_x2, 1);        \
3099       render_window_segment_clip_obj(type, start, end, window_1_x2,           \
3100        window_1_x1);                                                          \
3101       render_window_segment_clip(type, start, end, window_1_x1, 240, 1);      \
3102     }                                                                         \
3103     else                                                                      \
3104     {                                                                         \
3105       render_window_segment_clip_obj(type, start, end, 0, window_1_x1);       \
3106       render_window_segment_clip(type, start, end, window_1_x1, window_1_x2,  \
3107        1);                                                                    \
3108       render_window_segment_clip_obj(type, start, end, window_1_x2, 240);     \
3109     }                                                                         \
3110   }                                                                           \
3111   else                                                                        \
3112   {                                                                           \
3113     render_window_clip_obj(type, start, end);                                 \
3114   }                                                                           \
3115
3116
3117
3118 #define render_window_single(type, window_number)                             \
3119   u32 winin = io_registers[REG_WININ];                                        \
3120   window_coords(window_number);                                               \
3121   if(window_##window_number##_x1 > window_##window_number##_x2)               \
3122   {                                                                           \
3123     render_window_segment(type, 0, window_##window_number##_x2,               \
3124      window_number);                                                          \
3125     render_window_segment(type, window_##window_number##_x2,                  \
3126      window_##window_number##_x1, out);                                       \
3127     render_window_segment(type, window_##window_number##_x1, 240,             \
3128      window_number);                                                          \
3129   }                                                                           \
3130   else                                                                        \
3131   {                                                                           \
3132     render_window_segment(type, 0, window_##window_number##_x1, out);         \
3133     render_window_segment(type, window_##window_number##_x1,                  \
3134      window_##window_number##_x2, window_number);                             \
3135     render_window_segment(type, window_##window_number##_x2, 240, out);       \
3136   }                                                                           \
3137
3138 #define render_window_multi(type, front, back)                                \
3139   if(window_##front##_x1 > window_##front##_x2)                               \
3140   {                                                                           \
3141     render_window_segment(type, 0, window_##front##_x2, front);               \
3142     render_window_clip_##back(type, window_##front##_x2,                      \
3143      window_##front##_x1);                                                    \
3144     render_window_segment(type, window_##front##_x1, 240, front);             \
3145   }                                                                           \
3146   else                                                                        \
3147   {                                                                           \
3148     render_window_clip_##back(type, 0, window_##front##_x1);                  \
3149     render_window_segment(type, window_##front##_x1, window_##front##_x2,     \
3150      front);                                                                  \
3151     render_window_clip_##back(type, window_##front##_x2, 240);                \
3152   }                                                                           \
3153
3154 #define render_scanline_window_builder(type)                                  \
3155 void render_scanline_window_##type(u16 *scanline, u32 dispcnt)                \
3156 {                                                                             \
3157   u32 vcount = io_registers[REG_VCOUNT];                                      \
3158   u32 winout = io_registers[REG_WINOUT];                                      \
3159   u32 bldcnt = io_registers[REG_BLDCNT];                                      \
3160   u32 window_out_enable = winout & 0x3F;                                      \
3161                                                                               \
3162   render_scanline_layer_functions_##type();                                   \
3163                                                                               \
3164   switch(dispcnt >> 13)                                                       \
3165   {                                                                           \
3166     /* Just window 0 */                                                       \
3167     case 0x01:                                                                \
3168     {                                                                         \
3169       render_window_single(type, 0);                                          \
3170       break;                                                                  \
3171     }                                                                         \
3172                                                                               \
3173     /* Just window 1 */                                                       \
3174     case 0x02:                                                                \
3175     {                                                                         \
3176       render_window_single(type, 1);                                          \
3177       break;                                                                  \
3178     }                                                                         \
3179                                                                               \
3180     /* Windows 1 and 2 */                                                     \
3181     case 0x03:                                                                \
3182     {                                                                         \
3183       u32 winin = io_registers[REG_WININ];                                    \
3184       window_coords(0);                                                       \
3185       window_coords(1);                                                       \
3186       render_window_multi(type, 0, 1);                                        \
3187       break;                                                                  \
3188     }                                                                         \
3189                                                                               \
3190     /* Just OBJ windows */                                                    \
3191     case 0x04:                                                                \
3192     {                                                                         \
3193       u32 window_obj_enable = winout >> 8;                                    \
3194       render_window_clip_obj(type, 0, 240);                                   \
3195       break;                                                                  \
3196     }                                                                         \
3197                                                                               \
3198     /* Window 0 and OBJ window */                                             \
3199     case 0x05:                                                                \
3200     {                                                                         \
3201       u32 window_obj_enable = winout >> 8;                                    \
3202       u32 winin = io_registers[REG_WININ];                                    \
3203       window_coords(0);                                                       \
3204       render_window_multi(type, 0, obj);                                      \
3205       break;                                                                  \
3206     }                                                                         \
3207                                                                               \
3208     /* Window 1 and OBJ window */                                             \
3209     case 0x06:                                                                \
3210     {                                                                         \
3211       u32 window_obj_enable = winout >> 8;                                    \
3212       u32 winin = io_registers[REG_WININ];                                    \
3213       window_coords(1);                                                       \
3214       render_window_multi(type, 1, obj);                                      \
3215       break;                                                                  \
3216     }                                                                         \
3217                                                                               \
3218     /* Window 0, 1, and OBJ window */                                         \
3219     case 0x07:                                                                \
3220     {                                                                         \
3221       u32 window_obj_enable = winout >> 8;                                    \
3222       u32 winin = io_registers[REG_WININ];                                    \
3223       window_coords(0);                                                       \
3224       window_coords(1);                                                       \
3225       render_window_multi(type, 0, 1_obj);                                    \
3226       break;                                                                  \
3227     }                                                                         \
3228   }                                                                           \
3229 }                                                                             \
3230
3231 render_scanline_window_builder(tile);
3232 render_scanline_window_builder(bitmap);
3233
3234 u32 active_layers[6] = { 0x1F, 0x17, 0x1C, 0x14, 0x14, 0x14 };
3235
3236 u32 small_resolution_width = 240;
3237 u32 small_resolution_height = 160;
3238 u32 resolution_width, resolution_height;
3239
3240 void update_scanline()
3241 {
3242   u32 pitch = get_screen_pitch();
3243   u32 dispcnt = io_registers[REG_DISPCNT];
3244   u32 display_flags = (dispcnt >> 8) & 0x1F;
3245   u32 vcount = io_registers[REG_VCOUNT];
3246   u16 *screen_offset = get_screen_pixels() + (vcount * pitch);
3247   u32 video_mode = dispcnt & 0x07;
3248   u32 current_layer;
3249
3250   // If OAM has been modified since the last scanline has been updated then
3251   // reorder and reprofile the OBJ lists.
3252   if(oam_update)
3253   {
3254     order_obj(video_mode);
3255     oam_update = 0;
3256   }
3257
3258   order_layers((dispcnt >> 8) & active_layers[video_mode]);
3259
3260   if(skip_next_frame)
3261     return;
3262
3263   // If the screen is in in forced blank draw pure white.
3264   if(dispcnt & 0x80)
3265   {
3266     fill_line_color16(0xFFFF, screen_offset, 0, 240);
3267   }
3268   else
3269   {
3270     if(video_mode < 3)
3271     {
3272       if(dispcnt >> 13)
3273       {
3274         render_scanline_window_tile(screen_offset, dispcnt);
3275       }
3276       else
3277       {
3278         render_scanline_tile(screen_offset, dispcnt);
3279       }
3280     }
3281     else
3282     {
3283       if(dispcnt >> 13)
3284         render_scanline_window_bitmap(screen_offset, dispcnt);
3285       else
3286         render_scanline_bitmap(screen_offset, dispcnt);
3287     }
3288   }
3289
3290   affine_reference_x[0] += (s16)io_registers[REG_BG2PB];
3291   affine_reference_y[0] += (s16)io_registers[REG_BG2PD];
3292   affine_reference_x[1] += (s16)io_registers[REG_BG3PB];
3293   affine_reference_y[1] += (s16)io_registers[REG_BG3PD];
3294 }
3295
3296 #ifdef PSP_BUILD
3297
3298 u32 screen_flip = 0;
3299
3300 void flip_screen()
3301 {
3302   if(video_direct == 0)
3303   {
3304     u32 *old_ge_cmd_ptr = ge_cmd_ptr;
3305     sceKernelDcacheWritebackAll();
3306
3307     // Render the current screen
3308     ge_cmd_ptr = ge_cmd + 2;
3309     GE_CMD(TBP0, ((u32)screen_pixels & 0x00FFFFFF));
3310     GE_CMD(TBW0, (((u32)screen_pixels & 0xFF000000) >> 8) |
3311      GBA_SCREEN_WIDTH);
3312     ge_cmd_ptr = old_ge_cmd_ptr;
3313
3314     sceGeListEnQueue(ge_cmd, ge_cmd_ptr, gecbid, NULL);
3315
3316     // Flip to the next screen
3317     screen_flip ^= 1;
3318
3319     if(screen_flip)
3320       screen_pixels = screen_texture + (240 * 160 * 2);
3321     else
3322       screen_pixels = screen_texture;
3323   }
3324 }
3325
3326 #else
3327
3328 #define integer_scale_copy_2()                                                \
3329   current_scanline_ptr[x2] = current_pixel;                                   \
3330   current_scanline_ptr[x2 - 1] = current_pixel;                               \
3331   x2 -= 2                                                                     \
3332
3333 #define integer_scale_copy_3()                                                \
3334   current_scanline_ptr[x2] = current_pixel;                                   \
3335   current_scanline_ptr[x2 - 1] = current_pixel;                               \
3336   current_scanline_ptr[x2 - 2] = current_pixel;                               \
3337   x2 -= 3                                                                     \
3338
3339 #define integer_scale_copy_4()                                                \
3340   current_scanline_ptr[x2] = current_pixel;                                   \
3341   current_scanline_ptr[x2 - 1] = current_pixel;                               \
3342   current_scanline_ptr[x2 - 2] = current_pixel;                               \
3343   current_scanline_ptr[x2 - 3] = current_pixel;                               \
3344   x2 -= 4                                                                     \
3345
3346 #define integer_scale_horizontal(scale_factor)                                \
3347   for(y = 0; y < 160; y++)                                                    \
3348   {                                                                           \
3349     for(x = 239, x2 = (240 * video_scale) - 1; x >= 0; x--)                   \
3350     {                                                                         \
3351       current_pixel = current_scanline_ptr[x];                                \
3352       integer_scale_copy_##scale_factor();                                    \
3353       current_scanline_ptr[x2] = current_scanline_ptr[x];                     \
3354       current_scanline_ptr[x2 - 1] = current_scanline_ptr[x];                 \
3355       current_scanline_ptr[x2 - 2] = current_scanline_ptr[x];                 \
3356     }                                                                         \
3357     current_scanline_ptr += pitch;                                            \
3358   }                                                                           \
3359
3360 void flip_screen()
3361 {
3362   if((video_scale != 1) && (current_scale != unscaled))
3363   {
3364     s32 x, y;
3365     s32 x2, y2;
3366     u16 *screen_ptr = get_screen_pixels();
3367     u16 *current_scanline_ptr = screen_ptr;
3368     u32 pitch = get_screen_pitch();
3369     u16 current_pixel;
3370     u32 i;
3371
3372     switch(video_scale)
3373     {
3374       case 2:
3375         integer_scale_horizontal(2);
3376         break;
3377
3378       case 3:
3379         integer_scale_horizontal(3);
3380         break;
3381
3382       default:
3383       case 4:
3384         integer_scale_horizontal(4);
3385         break;
3386
3387     }
3388
3389     for(y = 159, y2 = (160 * video_scale) - 1; y >= 0; y--)
3390     {
3391       for(i = 0; i < video_scale; i++)
3392       {
3393         memcpy(screen_ptr + (y2 * pitch),
3394          screen_ptr + (y * pitch), 480 * video_scale);
3395         y2--;
3396       }
3397     }
3398   }
3399 #ifdef GP2X_BUILD
3400   {
3401     if((screen_scale == unscaled) &&
3402      (resolution_width == small_resolution_width) &&
3403      (resolution_height == small_resolution_height))
3404     {
3405       SDL_Rect srect = {0, 0, 240, 160};
3406       SDL_Rect drect = {40, 40, 240, 160};
3407       SDL_BlitSurface(screen, &srect, hw_screen, &drect);
3408     }
3409     else if((screen_scale == scaled_aspect) &&
3410      (resolution_width == small_resolution_width) &&
3411      (resolution_height == small_resolution_height))
3412     {
3413       SDL_Rect drect = {0, 10, 0, 0};
3414       SDL_BlitSurface(screen, NULL, hw_screen, &drect);
3415     }
3416     else
3417     {
3418       SDL_BlitSurface(screen, NULL, hw_screen, NULL);
3419     }
3420     /* it is unclear if this syscall takes virtual or physical addresses,
3421      * but using virtual seems to work for me. */
3422     gp2x_flush_cache(hw_screen->pixels, hw_screen->pixels + 320*240, 0);
3423   }
3424 #else
3425   SDL_Flip(screen);
3426 #endif
3427 }
3428
3429 #endif
3430
3431 u32 frame_to_render;
3432
3433 void update_screen()
3434 {
3435   if(!skip_next_frame)
3436     flip_screen();
3437 }
3438
3439 #ifdef PSP_BUILD
3440
3441 void init_video()
3442 {
3443   sceDisplaySetMode(0, PSP_SCREEN_WIDTH, PSP_SCREEN_HEIGHT);
3444
3445   sceDisplayWaitVblankStart();
3446   sceDisplaySetFrameBuf((void*)psp_gu_vram_base, PSP_LINE_SIZE,
3447    PSP_DISPLAY_PIXEL_FORMAT_565, PSP_DISPLAY_SETBUF_NEXTFRAME);
3448
3449   sceGuInit();
3450
3451   sceGuStart(GU_DIRECT, display_list);
3452   sceGuDrawBuffer(GU_PSM_5650, (void*)0, PSP_LINE_SIZE);
3453   sceGuDispBuffer(PSP_SCREEN_WIDTH, PSP_SCREEN_HEIGHT,
3454    (void*)0, PSP_LINE_SIZE);
3455   sceGuClear(GU_COLOR_BUFFER_BIT);
3456
3457   sceGuOffset(2048 - (PSP_SCREEN_WIDTH / 2), 2048 - (PSP_SCREEN_HEIGHT / 2));
3458   sceGuViewport(2048, 2048, PSP_SCREEN_WIDTH, PSP_SCREEN_HEIGHT);
3459
3460   sceGuScissor(0, 0, PSP_SCREEN_WIDTH + 1, PSP_SCREEN_HEIGHT + 1);
3461   sceGuEnable(GU_SCISSOR_TEST);
3462   sceGuTexMode(GU_PSM_5650, 0, 0, GU_FALSE);
3463   sceGuTexFunc(GU_TFX_REPLACE, GU_TCC_RGBA);
3464   sceGuTexFilter(GU_LINEAR, GU_LINEAR);
3465   sceGuEnable(GU_TEXTURE_2D);
3466
3467   sceGuFrontFace(GU_CW);
3468   sceGuDisable(GU_BLEND);
3469
3470   sceGuFinish();
3471   sceGuSync(0, 0);
3472
3473   sceDisplayWaitVblankStart();
3474   sceGuDisplay(GU_TRUE);
3475
3476   PspGeCallbackData gecb;
3477   gecb.signal_func = NULL;
3478   gecb.signal_arg = NULL;
3479   gecb.finish_func = Ge_Finish_Callback;
3480   gecb.finish_arg = NULL;
3481   gecbid = sceGeSetCallback(&gecb);
3482
3483   screen_vertex[0] = 0 + 0.5;
3484   screen_vertex[1] = 0 + 0.5;
3485   screen_vertex[2] = 0 + 0.5;
3486   screen_vertex[3] = 0 + 0.5;
3487   screen_vertex[4] = 0;
3488   screen_vertex[5] = GBA_SCREEN_WIDTH - 0.5;
3489   screen_vertex[6] = GBA_SCREEN_HEIGHT - 0.5;
3490   screen_vertex[7] = PSP_SCREEN_WIDTH - 0.5;
3491   screen_vertex[8] = PSP_SCREEN_HEIGHT - 0.5;
3492   screen_vertex[9] = 0;
3493
3494   // Set framebuffer to PSP VRAM
3495   GE_CMD(FBP, ((u32)psp_gu_vram_base & 0x00FFFFFF));
3496   GE_CMD(FBW, (((u32)psp_gu_vram_base & 0xFF000000) >> 8) | PSP_LINE_SIZE);
3497   // Set texture 0 to the screen texture
3498   GE_CMD(TBP0, ((u32)screen_texture & 0x00FFFFFF));
3499   GE_CMD(TBW0, (((u32)screen_texture & 0xFF000000) >> 8) | GBA_SCREEN_WIDTH);
3500   // Set the texture size to 256 by 256 (2^8 by 2^8)
3501   GE_CMD(TSIZE0, (8 << 8) | 8);
3502   // Flush the texture cache
3503   GE_CMD(TFLUSH, 0);
3504   // Use 2D coordinates, no indeces, no weights, 32bit float positions,
3505   // 32bit float texture coordinates
3506   GE_CMD(VTYPE, (1 << 23) | (0 << 11) | (0 << 9) |
3507    (3 << 7) | (0 << 5) | (0 << 2) | 3);
3508   // Set the base of the index list pointer to 0
3509   GE_CMD(BASE, 0);
3510   // Set the rest of index list pointer to 0 (not being used)
3511   GE_CMD(IADDR, 0);
3512   // Set the base of the screen vertex list pointer
3513   GE_CMD(BASE, ((u32)screen_vertex & 0xFF000000) >> 8);
3514   // Set the rest of the screen vertex list pointer
3515   GE_CMD(VADDR, ((u32)screen_vertex & 0x00FFFFFF));
3516   // Primitive kick: render sprite (primitive 6), 2 vertices
3517   GE_CMD(PRIM, (6 << 16) | 2);
3518   // Done with commands
3519   GE_CMD(FINISH, 0);
3520   // Raise signal interrupt
3521   GE_CMD(SIGNAL, 0);
3522   GE_CMD(NOP, 0);
3523   GE_CMD(NOP, 0);
3524 }
3525
3526 #else
3527
3528 void init_video()
3529 {
3530   SDL_Init(SDL_INIT_VIDEO | SDL_INIT_JOYSTICK | SDL_INIT_NOPARACHUTE);
3531
3532 #ifdef GP2X_BUILD
3533   SDL_GP2X_AllowGfxMemory(NULL, 0);
3534
3535   hw_screen = SDL_SetVideoMode(320 * video_scale, 240 * video_scale,
3536    16, SDL_HWSURFACE);
3537
3538   screen = SDL_CreateRGBSurface(SDL_HWSURFACE, 240 * video_scale,
3539    160 * video_scale, 16, 0xFFFF, 0xFFFF, 0xFFFF, 0);
3540
3541   gp2x_load_mmuhack();
3542 #else
3543   screen = SDL_SetVideoMode(240 * video_scale, 160 * video_scale, 16, 0);
3544 #endif
3545   SDL_ShowCursor(0);
3546 }
3547
3548 #endif
3549
3550 video_scale_type screen_scale = scaled_aspect;
3551 video_scale_type current_scale = scaled_aspect;
3552 video_filter_type screen_filter = filter_bilinear;
3553
3554
3555 #ifdef PSP_BUILD
3556
3557 void video_resolution_large()
3558 {
3559   if(video_direct != 1)
3560   {
3561     video_direct = 1;
3562     screen_pixels = psp_gu_vram_base;
3563     screen_pitch = 512;
3564     sceGuStart(GU_DIRECT, display_list);
3565     sceGuDispBuffer(PSP_SCREEN_WIDTH, PSP_SCREEN_HEIGHT,
3566      (void*)0, PSP_LINE_SIZE);
3567     sceGuFinish();
3568   }
3569 }
3570
3571 void set_gba_resolution(video_scale_type scale)
3572 {
3573   u32 filter_linear = 0;
3574   screen_scale = scale;
3575   switch(scale)
3576   {
3577     case unscaled:
3578       screen_vertex[2] = 120 + 0.5;
3579       screen_vertex[3] = 56 + 0.5;
3580       screen_vertex[7] = GBA_SCREEN_WIDTH + 120 - 0.5;
3581       screen_vertex[8] = GBA_SCREEN_HEIGHT + 56 - 0.5;
3582       break;
3583
3584     case scaled_aspect:
3585       screen_vertex[2] = 36 + 0.5;
3586       screen_vertex[3] = 0 + 0.5;
3587       screen_vertex[7] = 408 + 36 - 0.5;
3588       screen_vertex[8] = PSP_SCREEN_HEIGHT - 0.5;
3589       break;
3590
3591     case fullscreen:
3592       screen_vertex[2] = 0;
3593       screen_vertex[3] = 0;
3594       screen_vertex[7] = PSP_SCREEN_WIDTH;
3595       screen_vertex[8] = PSP_SCREEN_HEIGHT;
3596       break;
3597   }
3598
3599   sceGuStart(GU_DIRECT, display_list);
3600   if(screen_filter == filter_bilinear)
3601     sceGuTexFilter(GU_LINEAR, GU_LINEAR);
3602   else
3603     sceGuTexFilter(GU_NEAREST, GU_NEAREST);
3604
3605   sceGuFinish();
3606   sceGuSync(0, 0);
3607
3608   clear_screen(0x0000);
3609 }
3610
3611 void video_resolution_small()
3612 {
3613   if(video_direct != 0)
3614   {
3615     set_gba_resolution(screen_scale);
3616     video_direct = 0;
3617     screen_pixels = screen_texture;
3618     screen_flip = 0;
3619     screen_pitch = 240;
3620     sceGuStart(GU_DIRECT, display_list);
3621     sceGuDispBuffer(PSP_SCREEN_WIDTH, PSP_SCREEN_HEIGHT,
3622      (void*)0, PSP_LINE_SIZE);
3623     sceGuFinish();
3624   }
3625 }
3626
3627 void clear_screen(u16 color)
3628 {
3629   u32 i;
3630   u16 *src_ptr = get_screen_pixels();
3631
3632   sceGuSync(0, 0);
3633
3634   for(i = 0; i < (512 * 272); i++, src_ptr++)
3635   {
3636     *src_ptr = color;
3637   }
3638
3639   // I don't know why this doesn't work.
3640 /*  color = (((color & 0x1F) * 255 / 31) << 0) |
3641    ((((color >> 5) & 0x3F) * 255 / 63) << 8) |
3642    ((((color >> 11) & 0x1F) * 255 / 31) << 16) | (0xFF << 24);
3643
3644   sceGuStart(GU_DIRECT, display_list);
3645   sceGuDrawBuffer(GU_PSM_5650, (void*)0, PSP_LINE_SIZE);
3646   //sceGuDispBuffer(PSP_SCREEN_WIDTH, PSP_SCREEN_HEIGHT,
3647   // (void*)0, PSP_LINE_SIZE);
3648   sceGuClearColor(color);
3649   sceGuClear(GU_COLOR_BUFFER_BIT);
3650   sceGuFinish();
3651   sceGuSync(0, 0); */
3652 }
3653
3654 #else
3655
3656 void video_resolution_large()
3657 {
3658   current_scale = unscaled;
3659
3660 #ifdef GP2X_BUILD
3661   SDL_FreeSurface(screen);
3662   SDL_GP2X_AllowGfxMemory(NULL, 0);
3663     hw_screen = SDL_SetVideoMode(320, 240, 16, SDL_HWSURFACE);
3664   screen = SDL_CreateRGBSurface(SDL_HWSURFACE, 320, 240, 16, 0xFFFF,
3665    0xFFFF, 0xFFFF, 0);
3666   resolution_width = 320;
3667     resolution_height = 240;
3668   SDL_ShowCursor(0);
3669
3670   gp2x_load_mmuhack();
3671 #else
3672   screen = SDL_SetVideoMode(480, 272, 16, 0);
3673   resolution_width = 480;
3674   resolution_height = 272;
3675 #endif
3676 }
3677
3678 void video_resolution_small()
3679 {
3680   current_scale = screen_scale;
3681
3682 #ifdef GP2X_BUILD
3683   int w, h;
3684   SDL_FreeSurface(screen);
3685   SDL_GP2X_AllowGfxMemory(NULL, 0);
3686
3687   w = 320; h = 240;
3688   if (screen_scale != unscaled)
3689   {
3690     w = small_resolution_width * video_scale;
3691     h = small_resolution_height * video_scale;
3692   }
3693   if (screen_scale == scaled_aspect) h += 20;
3694   hw_screen = SDL_SetVideoMode(w, h, 16, SDL_HWSURFACE);
3695
3696   screen = SDL_CreateRGBSurface(SDL_HWSURFACE,
3697    small_resolution_width * video_scale, small_resolution_height *
3698    video_scale, 16, 0xFFFF, 0xFFFF, 0xFFFF, 0);
3699
3700   SDL_ShowCursor(0);
3701
3702   gp2x_load_mmuhack();
3703 #else
3704   screen = SDL_SetVideoMode(small_resolution_width * video_scale,
3705    small_resolution_height * video_scale, 16, 0);
3706 #endif
3707   resolution_width = small_resolution_width;
3708   resolution_height = small_resolution_height;
3709 }
3710
3711 void set_gba_resolution(video_scale_type scale)
3712 {
3713   if(screen_scale != scale)
3714   {
3715     screen_scale = scale;
3716     switch(scale)
3717     {
3718       case unscaled:
3719       case scaled_aspect:
3720       case fullscreen:
3721         small_resolution_width = 240 * video_scale;
3722         small_resolution_height = 160 * video_scale;
3723         break;
3724     }
3725   }
3726 }
3727
3728 void clear_screen(u16 color)
3729 {
3730   u16 *dest_ptr = get_screen_pixels();
3731   u32 line_skip = get_screen_pitch() - screen->w;
3732   u32 x, y;
3733
3734   for(y = 0; y < screen->h; y++)
3735   {
3736     for(x = 0; x < screen->w; x++, dest_ptr++)
3737     {
3738       *dest_ptr = color;
3739     }
3740     dest_ptr += line_skip;
3741   }
3742 }
3743
3744 #endif
3745
3746 u16 *copy_screen()
3747 {
3748   u16 *copy = malloc(240 * 160 * 2);
3749   memcpy(copy, get_screen_pixels(), 240 * 160 * 2);
3750   return copy;
3751 }
3752
3753 void blit_to_screen(u16 *src, u32 w, u32 h, u32 dest_x, u32 dest_y)
3754 {
3755   u32 pitch = get_screen_pitch();
3756   u16 *dest_ptr = get_screen_pixels() + dest_x + (dest_y * pitch);
3757
3758   u16 *src_ptr = src;
3759   u32 line_skip = pitch - w;
3760   u32 x, y;
3761
3762   for(y = 0; y < h; y++)
3763   {
3764     for(x = 0; x < w; x++, src_ptr++, dest_ptr++)
3765     {
3766       *dest_ptr = *src_ptr;
3767     }
3768     dest_ptr += line_skip;
3769   }
3770 }
3771
3772 void print_string_ext(const char *str, u16 fg_color, u16 bg_color,
3773  u32 x, u32 y, void *_dest_ptr, u32 pitch, u32 pad)
3774 {
3775   u16 *dest_ptr = (u16 *)_dest_ptr + (y * pitch) + x;
3776   u8 current_char = str[0];
3777   u32 current_row;
3778   u32 glyph_offset;
3779   u32 i = 0, i2, i3;
3780   u32 str_index = 1;
3781   u32 current_x = x;
3782
3783
3784   /* EDIT */
3785   if(y + FONT_HEIGHT > resolution_height)
3786       return;
3787
3788   while(current_char)
3789   {
3790     if(current_char == '\n')
3791     {
3792       y += FONT_HEIGHT;
3793       current_x = x;
3794       dest_ptr = get_screen_pixels() + (y * pitch) + x;
3795     }
3796     else
3797     {
3798       glyph_offset = _font_offset[current_char];
3799       current_x += FONT_WIDTH;
3800       for(i2 = 0; i2 < FONT_HEIGHT; i2++, glyph_offset++)
3801       {
3802         current_row = _font_bits[glyph_offset];
3803         for(i3 = 0; i3 < FONT_WIDTH; i3++)
3804         {
3805           if((current_row >> (15 - i3)) & 0x01)
3806             *dest_ptr = fg_color;
3807           else
3808             *dest_ptr = bg_color;
3809           dest_ptr++;
3810         }
3811         dest_ptr += (pitch - FONT_WIDTH);
3812       }
3813       dest_ptr = dest_ptr - (pitch * FONT_HEIGHT) + FONT_WIDTH;
3814     }
3815
3816     i++;
3817
3818     current_char = str[str_index];
3819
3820     if((i < pad) && (current_char == 0))
3821     {
3822       current_char = ' ';
3823     }
3824     else
3825     {
3826       str_index++;
3827     }
3828
3829     if(current_x + FONT_WIDTH > resolution_width /* EDIT */)
3830     {
3831       while (current_char && current_char != '\n')
3832       {
3833         current_char = str[str_index++];
3834       }
3835     }
3836   }
3837 }
3838
3839 void print_string(const char *str, u16 fg_color, u16 bg_color,
3840  u32 x, u32 y)
3841 {
3842   print_string_ext(str, fg_color, bg_color, x, y, get_screen_pixels(),
3843    get_screen_pitch(), 0);
3844 }
3845
3846 void print_string_pad(const char *str, u16 fg_color, u16 bg_color,
3847  u32 x, u32 y, u32 pad)
3848 {
3849   print_string_ext(str, fg_color, bg_color, x, y, get_screen_pixels(),
3850    get_screen_pitch(), pad);
3851 }
3852
3853 u32 debug_cursor_x = 0;
3854 u32 debug_cursor_y = 0;
3855
3856 #ifdef STDIO_DEBUG
3857
3858 void debug_screen_clear()
3859 {
3860 }
3861
3862 void debug_screen_start()
3863 {
3864 }
3865
3866 void debug_screen_end()
3867 {
3868 }
3869
3870 void debug_screen_update()
3871 {
3872 }
3873
3874 void debug_screen_printf(const char *format, ...)
3875 {
3876   va_list ap;
3877
3878   va_start(ap, format);
3879   vprintf(format, ap);
3880   va_end(ap);
3881 }
3882
3883 void debug_screen_newline(u32 count)
3884 {
3885   printf("\n");
3886 }
3887
3888
3889 #else
3890
3891 void debug_screen_clear()
3892 {
3893   debug_cursor_x = 0;
3894   debug_cursor_y = 0;
3895   clear_screen(0x0000);
3896 }
3897
3898 void debug_screen_start()
3899 {
3900   video_resolution_large();
3901   debug_screen_clear();
3902 }
3903
3904 void debug_screen_end()
3905 {
3906   video_resolution_small();
3907 }
3908
3909 void debug_screen_update()
3910 {
3911   flip_screen();
3912 }
3913
3914 void debug_screen_printf(const char *format, ...)
3915 {
3916   char str_buffer[512];
3917   u32 str_buffer_length;
3918   va_list ap;
3919
3920   va_start(ap, format);
3921   str_buffer_length = vsnprintf(str_buffer, 512, format, ap);
3922   va_end(ap);
3923
3924   printf("printing debug string %s at %d %d\n", str_buffer,
3925    debug_cursor_x, debug_cursor_y);
3926
3927   print_string(str_buffer, 0xFFFF, 0x0000, debug_cursor_x, debug_cursor_y);
3928   debug_cursor_x += FONT_WIDTH * str_buffer_length;
3929 }
3930
3931 void debug_screen_newline(u32 count)
3932 {
3933   debug_cursor_x = 0;
3934   debug_cursor_y += FONT_HEIGHT * count;
3935 }
3936
3937 #endif
3938
3939 void debug_screen_printl(const char *format, ...)
3940 {
3941   va_list ap;
3942
3943   va_start(ap, format);
3944   debug_screen_printf(format, ap);
3945   debug_screen_newline(1);
3946 //  debug_screen_printf("\n");
3947   va_end(ap);
3948 }
3949
3950
3951 #define video_savestate_builder(type)                                         \
3952 void video_##type##_savestate(file_tag_type savestate_file)                   \
3953 {                                                                             \
3954   file_##type##_array(savestate_file, affine_reference_x);                    \
3955   file_##type##_array(savestate_file, affine_reference_y);                    \
3956 }                                                                             \
3957
3958 video_savestate_builder(read);
3959 video_savestate_builder(write_mem);
3960
3961