initial pandora port, with hardware scaling and stuff
[gpsp.git] / video.c
1 /* gameplaySP
2  *
3  * Copyright (C) 2006 Exophase <exophase@gmail.com>
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation; either version 2 of
8  * the License, or (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18  */
19
20 #include "common.h"
21 #include "font.h"
22
23 #ifdef PSP_BUILD
24
25 #include <pspctrl.h>
26
27 #include <pspkernel.h>
28 #include <pspdebug.h>
29 #include <pspdisplay.h>
30
31 #include <pspgu.h>
32 #include <psppower.h>
33 #include <psprtc.h>
34
35 static float *screen_vertex = (float *)0x441FC100;
36 static u32 *ge_cmd = (u32 *)0x441FC000;
37 static u16 *psp_gu_vram_base = (u16 *)(0x44000000);
38 static u32 *ge_cmd_ptr = (u32 *)0x441FC000;
39 static u32 gecbid;
40 static u32 video_direct = 0;
41
42 static u32 __attribute__((aligned(16))) display_list[32];
43
44 #define GBA_SCREEN_WIDTH 240
45 #define GBA_SCREEN_HEIGHT 160
46
47 #define PSP_SCREEN_WIDTH 480
48 #define PSP_SCREEN_HEIGHT 272
49 #define PSP_LINE_SIZE 512
50
51 #define PSP_ALL_BUTTON_MASK 0xFFFF
52
53 #define GE_CMD_FBP    0x9C
54 #define GE_CMD_FBW    0x9D
55 #define GE_CMD_TBP0   0xA0
56 #define GE_CMD_TBW0   0xA8
57 #define GE_CMD_TSIZE0 0xB8
58 #define GE_CMD_TFLUSH 0xCB
59 #define GE_CMD_CLEAR  0xD3
60 #define GE_CMD_VTYPE  0x12
61 #define GE_CMD_BASE   0x10
62 #define GE_CMD_VADDR  0x01
63 #define GE_CMD_IADDR  0x02
64 #define GE_CMD_PRIM   0x04
65 #define GE_CMD_FINISH 0x0F
66 #define GE_CMD_SIGNAL 0x0C
67 #define GE_CMD_NOP    0x00
68
69 #define GE_CMD(cmd, operand)                                                \
70   *ge_cmd_ptr = (((GE_CMD_##cmd) << 24) | (operand));                       \
71   ge_cmd_ptr++                                                              \
72
73 static u16 *screen_texture = (u16 *)(0x4000000 + (512 * 272 * 2));
74 static u16 *current_screen_texture = (u16 *)(0x4000000 + (512 * 272 * 2));
75 static u16 *screen_pixels = (u16 *)(0x4000000 + (512 * 272 * 2));
76 static u32 screen_pitch = 240;
77
78 static void Ge_Finish_Callback(int id, void *arg)
79 {
80 }
81
82 #define get_screen_pixels()                                                   \
83   screen_pixels                                                               \
84
85 #define get_screen_pitch()                                                    \
86   screen_pitch                                                                \
87
88 #elif defined(WIZ_BUILD)
89
90 static u16 rot_buffer[240*4];
91 static u32 rot_lines_total = 4;
92 static u32 rot_line_count = 0;
93 static char rot_msg_buff[64];
94
95 static u32 screen_offset = 0;
96 static u16 *screen_pixels = NULL;
97 const u32 screen_pitch = 320;
98
99 #define get_screen_pixels()                                                   \
100   screen_pixels                                                               \
101
102 #define get_screen_pitch()                                                    \
103   screen_pitch                                                                \
104
105 #elif defined(PND_BUILD)
106
107 static u16 *screen_pixels = NULL;
108
109 #define get_screen_pixels()                                                   \
110   screen_pixels                                                               \
111
112 #define get_screen_pitch()                                                    \
113   resolution_width                                                            \
114
115 #else
116
117 #ifdef GP2X_BUILD
118 #include "SDL_gp2x.h"
119 SDL_Surface *hw_screen;
120 #endif
121 SDL_Surface *screen;
122 const u32 video_scale = 1;
123
124 #define get_screen_pixels()                                                   \
125   ((u16 *)screen->pixels)                                                     \
126
127 #define get_screen_pitch()                                                    \
128   (screen->pitch / 2)                                                         \
129
130 #endif
131
132 void render_scanline_conditional_tile(u32 start, u32 end, u16 *scanline,
133  u32 enable_flags, u32 dispcnt, u32 bldcnt, tile_layer_render_struct
134  *layer_renderers);
135 void render_scanline_conditional_bitmap(u32 start, u32 end, u16 *scanline,
136  u32 enable_flags, u32 dispcnt, u32 bldcnt, bitmap_layer_render_struct
137  *layer_renderers);
138
139 #define no_op                                                                 \
140
141 // This old version is not necessary if the palette is either being converted
142 // transparently or the ABGR 1555 format is being used natively. The direct
143 // version (without conversion) is much faster.
144
145 #define tile_lookup_palette_full(palette, source)                             \
146   current_pixel = palette[source];                                            \
147   convert_palette(current_pixel)                                              \
148
149 #define tile_lookup_palette(palette, source)                                  \
150   current_pixel = palette[source];                                            \
151
152
153 #ifdef RENDER_COLOR16_NORMAL
154
155 #define tile_expand_base_normal(index)                                        \
156   tile_expand_base_color16(index)                                             \
157
158 #else
159
160 #define tile_expand_base_normal(index)                                        \
161   tile_lookup_palette(palette, current_pixel);                                \
162   dest_ptr[index] = current_pixel                                             \
163
164 #endif
165
166 #define tile_expand_transparent_normal(index)                                 \
167   tile_expand_base_normal(index)                                              \
168
169 #define tile_expand_copy(index)                                               \
170   dest_ptr[index] = copy_ptr[index]                                           \
171
172
173 #define advance_dest_ptr_base(delta)                                          \
174   dest_ptr += delta                                                           \
175
176 #define advance_dest_ptr_transparent(delta)                                   \
177   advance_dest_ptr_base(delta)                                                \
178
179 #define advance_dest_ptr_copy(delta)                                          \
180   advance_dest_ptr_base(delta);                                               \
181   copy_ptr += delta                                                           \
182
183
184 #define color_combine_mask_a(layer)                                           \
185   ((io_registers[REG_BLDCNT] >> layer) & 0x01)                                \
186
187 // For color blending operations, will create a mask that has in bit
188 // 10 if the layer is target B, and bit 9 if the layer is target A.
189
190 #define color_combine_mask(layer)                                             \
191   (color_combine_mask_a(layer) |                                              \
192    ((io_registers[REG_BLDCNT] >> (layer + 7)) & 0x02)) << 9                   \
193
194 // For alpha blending renderers, draw the palette index (9bpp) and
195 // layer bits rather than the raw RGB. For the base this should write to
196 // the 32bit location directly.
197
198 #define tile_expand_base_alpha(index)                                         \
199   dest_ptr[index] = current_pixel | pixel_combine                             \
200
201 #define tile_expand_base_bg(index)                                            \
202   dest_ptr[index] = bg_combine                                                \
203
204
205 // For layered (transparent) writes this should shift the "stack" and write
206 // to the bottom. This will preserve the topmost pixel and the most recent
207 // one.
208
209 #define tile_expand_transparent_alpha(index)                                  \
210   dest_ptr[index] = (dest_ptr[index] << 16) | current_pixel | pixel_combine   \
211
212
213 // OBJ should only shift if the top isn't already OBJ
214 #define tile_expand_transparent_alpha_obj(index)                              \
215   dest = dest_ptr[index];                                                     \
216   if(dest & 0x00000100)                                                       \
217   {                                                                           \
218     dest_ptr[index] = (dest & 0xFFFF0000) | current_pixel | pixel_combine;    \
219   }                                                                           \
220   else                                                                        \
221   {                                                                           \
222     dest_ptr[index] = (dest << 16) | current_pixel | pixel_combine;           \
223   }                                                                           \
224
225
226 // For color effects that don't need to preserve the previous layer.
227 // The color32 version should be used with 32bit wide dest_ptr so as to be
228 // compatible with alpha combine on top of it.
229
230 #define tile_expand_base_color16(index)                                       \
231   dest_ptr[index] = current_pixel | pixel_combine                             \
232
233 #define tile_expand_transparent_color16(index)                                \
234   tile_expand_base_color16(index)                                             \
235
236 #define tile_expand_base_color32(index)                                       \
237   tile_expand_base_color16(index)                                             \
238
239 #define tile_expand_transparent_color32(index)                                \
240   tile_expand_base_color16(index)                                             \
241
242
243 // Operations for isolation 8bpp pixels within 32bpp pixel blocks.
244
245 #define tile_8bpp_pixel_op_mask(op_param)                                     \
246   current_pixel = current_pixels & 0xFF                                       \
247
248 #define tile_8bpp_pixel_op_shift_mask(shift)                                  \
249   current_pixel = (current_pixels >> shift) & 0xFF                            \
250
251 #define tile_8bpp_pixel_op_shift(shift)                                       \
252   current_pixel = current_pixels >> shift                                     \
253
254 #define tile_8bpp_pixel_op_none(shift)                                        \
255
256 // Base should always draw raw in 8bpp mode; color 0 will be drawn where
257 // color 0 is.
258
259 #define tile_8bpp_draw_base_normal(index)                                     \
260   tile_expand_base_normal(index)                                              \
261
262 #define tile_8bpp_draw_base_alpha(index)                                      \
263   if(current_pixel)                                                           \
264   {                                                                           \
265     tile_expand_base_alpha(index);                                            \
266   }                                                                           \
267   else                                                                        \
268   {                                                                           \
269     tile_expand_base_bg(index);                                               \
270   }                                                                           \
271
272
273 #define tile_8bpp_draw_base_color16(index)                                    \
274   tile_8bpp_draw_base_alpha(index)                                            \
275
276 #define tile_8bpp_draw_base_color32(index)                                    \
277   tile_8bpp_draw_base_alpha(index)                                            \
278
279
280 #define tile_8bpp_draw_base(index, op, op_param, alpha_op)                    \
281   tile_8bpp_pixel_op_##op(op_param);                                          \
282   tile_8bpp_draw_base_##alpha_op(index)                                       \
283
284 // Transparent (layered) writes should only replace what is there if the
285 // pixel is not transparent (zero)
286
287 #define tile_8bpp_draw_transparent(index, op, op_param, alpha_op)             \
288   tile_8bpp_pixel_op_##op(op_param);                                          \
289   if(current_pixel)                                                           \
290   {                                                                           \
291     tile_expand_transparent_##alpha_op(index);                                \
292   }                                                                           \
293
294 #define tile_8bpp_draw_copy(index, op, op_param, alpha_op)                    \
295   tile_8bpp_pixel_op_##op(op_param);                                          \
296   if(current_pixel)                                                           \
297   {                                                                           \
298     tile_expand_copy(index);                                                  \
299   }                                                                           \
300
301 // Get the current tile from the map in 8bpp mode
302
303 #define get_tile_8bpp()                                                       \
304   current_tile = *map_ptr;                                                    \
305   tile_ptr = tile_base + ((current_tile & 0x3FF) * 64)                        \
306
307
308 // Draw half of a tile in 8bpp mode, for base renderer
309
310 #define tile_8bpp_draw_four_noflip(index, combine_op, alpha_op)               \
311   tile_8bpp_draw_##combine_op(index + 0, mask, 0, alpha_op);                  \
312   tile_8bpp_draw_##combine_op(index + 1, shift_mask, 8, alpha_op);            \
313   tile_8bpp_draw_##combine_op(index + 2, shift_mask, 16, alpha_op);           \
314   tile_8bpp_draw_##combine_op(index + 3, shift, 24, alpha_op)                 \
315
316
317 // Like the above, but draws the half-tile horizontally flipped
318
319 #define tile_8bpp_draw_four_flip(index, combine_op, alpha_op)                 \
320   tile_8bpp_draw_##combine_op(index + 3, mask, 0, alpha_op);                  \
321   tile_8bpp_draw_##combine_op(index + 2, shift_mask, 8, alpha_op);            \
322   tile_8bpp_draw_##combine_op(index + 1, shift_mask, 16, alpha_op);           \
323   tile_8bpp_draw_##combine_op(index + 0, shift, 24, alpha_op)                 \
324
325 #define tile_8bpp_draw_four_base(index, alpha_op, flip_op)                    \
326   tile_8bpp_draw_four_##flip_op(index, base, alpha_op)                        \
327
328
329 // Draw half of a tile in 8bpp mode, for transparent renderer; as an
330 // optimization the entire thing is checked against zero (in transparent
331 // capable renders it is more likely for the pixels to be transparent than
332 // opaque)
333
334 #define tile_8bpp_draw_four_transparent(index, alpha_op, flip_op)             \
335   if(current_pixels != 0)                                                     \
336   {                                                                           \
337     tile_8bpp_draw_four_##flip_op(index, transparent, alpha_op);              \
338   }                                                                           \
339
340 #define tile_8bpp_draw_four_copy(index, alpha_op, flip_op)                    \
341   if(current_pixels != 0)                                                     \
342   {                                                                           \
343     tile_8bpp_draw_four_##flip_op(index, copy, alpha_op);                     \
344   }                                                                           \
345
346 // Helper macro for drawing 8bpp tiles clipped against the edge of the screen
347
348 #define partial_tile_8bpp(combine_op, alpha_op)                               \
349   for(i = 0; i < partial_tile_run; i++)                                       \
350   {                                                                           \
351     tile_8bpp_draw_##combine_op(0, mask, 0, alpha_op);                        \
352     current_pixels >>= 8;                                                     \
353     advance_dest_ptr_##combine_op(1);                                         \
354   }                                                                           \
355
356
357 // Draws 8bpp tiles clipped against the left side of the screen,
358 // partial_tile_offset indicates how much clipped in it is, partial_tile_run
359 // indicates how much it should draw.
360
361 #define partial_tile_right_noflip_8bpp(combine_op, alpha_op)                  \
362   if(partial_tile_offset >= 4)                                                \
363   {                                                                           \
364     current_pixels = *((u32 *)(tile_ptr + 4)) >>                              \
365      ((partial_tile_offset - 4) * 8);                                         \
366     partial_tile_8bpp(combine_op, alpha_op);                                  \
367   }                                                                           \
368   else                                                                        \
369   {                                                                           \
370     partial_tile_run -= 4;                                                    \
371     current_pixels = *((u32 *)tile_ptr) >> (partial_tile_offset * 8);         \
372     partial_tile_8bpp(combine_op, alpha_op);                                  \
373     current_pixels = *((u32 *)(tile_ptr + 4));                                \
374     tile_8bpp_draw_four_##combine_op(0, alpha_op, noflip);                    \
375     advance_dest_ptr_##combine_op(4);                                         \
376   }                                                                           \
377
378
379 // Draws 8bpp tiles clipped against both the left and right side of the
380 // screen, IE, runs of less than 8 - partial_tile_offset.
381
382 #define partial_tile_mid_noflip_8bpp(combine_op, alpha_op)                    \
383   if(partial_tile_offset >= 4)                                                \
384   {                                                                           \
385     current_pixels = *((u32 *)(tile_ptr + 4)) >>                              \
386      ((partial_tile_offset - 4) * 8);                                         \
387     partial_tile_8bpp(combine_op, alpha_op);                                  \
388   }                                                                           \
389   else                                                                        \
390   {                                                                           \
391     current_pixels = *((u32 *)tile_ptr) >> (partial_tile_offset * 8);         \
392     if((partial_tile_offset + partial_tile_run) > 4)                          \
393     {                                                                         \
394       u32 old_run = partial_tile_run;                                         \
395       partial_tile_run = 4 - partial_tile_offset;                             \
396       partial_tile_8bpp(combine_op, alpha_op);                                \
397       partial_tile_run = old_run - partial_tile_run;                          \
398       current_pixels = *((u32 *)(tile_ptr + 4));                              \
399       partial_tile_8bpp(combine_op, alpha_op);                                \
400     }                                                                         \
401     else                                                                      \
402     {                                                                         \
403       partial_tile_8bpp(combine_op, alpha_op);                                \
404     }                                                                         \
405   }                                                                           \
406
407
408 // Draws 8bpp tiles clipped against the right side of the screen,
409 // partial_tile_run indicates how much there is to draw.
410
411 #define partial_tile_left_noflip_8bpp(combine_op, alpha_op)                   \
412   if(partial_tile_run >= 4)                                                   \
413   {                                                                           \
414     current_pixels = *((u32 *)tile_ptr);                                      \
415     tile_8bpp_draw_four_##combine_op(0, alpha_op, noflip);                    \
416     advance_dest_ptr_##combine_op(4);                                         \
417     tile_ptr += 4;                                                            \
418     partial_tile_run -= 4;                                                    \
419   }                                                                           \
420                                                                               \
421   current_pixels = *((u32 *)(tile_ptr));                                      \
422   partial_tile_8bpp(combine_op, alpha_op)                                     \
423
424
425 // Draws a non-clipped (complete) 8bpp tile.
426
427 #define tile_noflip_8bpp(combine_op, alpha_op)                                \
428   current_pixels = *((u32 *)tile_ptr);                                        \
429   tile_8bpp_draw_four_##combine_op(0, alpha_op, noflip);                      \
430   current_pixels = *((u32 *)(tile_ptr + 4));                                  \
431   tile_8bpp_draw_four_##combine_op(4, alpha_op, noflip)                       \
432
433
434 // Like the above versions but draws flipped tiles.
435
436 #define partial_tile_flip_8bpp(combine_op, alpha_op)                          \
437   for(i = 0; i < partial_tile_run; i++)                                       \
438   {                                                                           \
439     tile_8bpp_draw_##combine_op(0, shift, 24, alpha_op);                      \
440     current_pixels <<= 8;                                                     \
441     advance_dest_ptr_##combine_op(1);                                         \
442   }                                                                           \
443
444 #define partial_tile_right_flip_8bpp(combine_op, alpha_op)                    \
445   if(partial_tile_offset >= 4)                                                \
446   {                                                                           \
447     current_pixels = *((u32 *)tile_ptr) << ((partial_tile_offset - 4) * 8);   \
448     partial_tile_flip_8bpp(combine_op, alpha_op);                             \
449   }                                                                           \
450   else                                                                        \
451   {                                                                           \
452     partial_tile_run -= 4;                                                    \
453     current_pixels = *((u32 *)(tile_ptr + 4)) <<                              \
454      ((partial_tile_offset - 4) * 8);                                         \
455     partial_tile_flip_8bpp(combine_op, alpha_op);                             \
456     current_pixels = *((u32 *)tile_ptr);                                      \
457     tile_8bpp_draw_four_##combine_op(0, alpha_op, flip);                      \
458     advance_dest_ptr_##combine_op(4);                                         \
459   }                                                                           \
460
461 #define partial_tile_mid_flip_8bpp(combine_op, alpha_op)                      \
462   if(partial_tile_offset >= 4)                                                \
463   {                                                                           \
464     current_pixels = *((u32 *)tile_ptr) << ((partial_tile_offset - 4) * 8);   \
465     partial_tile_flip_8bpp(combine_op, alpha_op);                             \
466   }                                                                           \
467   else                                                                        \
468   {                                                                           \
469     current_pixels = *((u32 *)(tile_ptr + 4)) <<                              \
470      ((partial_tile_offset - 4) * 8);                                         \
471                                                                               \
472     if((partial_tile_offset + partial_tile_run) > 4)                          \
473     {                                                                         \
474       u32 old_run = partial_tile_run;                                         \
475       partial_tile_run = 4 - partial_tile_offset;                             \
476       partial_tile_flip_8bpp(combine_op, alpha_op);                           \
477       partial_tile_run = old_run - partial_tile_run;                          \
478       current_pixels = *((u32 *)(tile_ptr));                                  \
479       partial_tile_flip_8bpp(combine_op, alpha_op);                           \
480     }                                                                         \
481     else                                                                      \
482     {                                                                         \
483       partial_tile_flip_8bpp(combine_op, alpha_op);                           \
484     }                                                                         \
485   }                                                                           \
486
487 #define partial_tile_left_flip_8bpp(combine_op, alpha_op)                     \
488   if(partial_tile_run >= 4)                                                   \
489   {                                                                           \
490     current_pixels = *((u32 *)(tile_ptr + 4));                                \
491     tile_8bpp_draw_four_##combine_op(0, alpha_op, flip);                      \
492     advance_dest_ptr_##combine_op(4);                                         \
493     tile_ptr -= 4;                                                            \
494     partial_tile_run -= 4;                                                    \
495   }                                                                           \
496                                                                               \
497   current_pixels = *((u32 *)(tile_ptr + 4));                                  \
498   partial_tile_flip_8bpp(combine_op, alpha_op)                                \
499
500 #define tile_flip_8bpp(combine_op, alpha_op)                                  \
501   current_pixels = *((u32 *)(tile_ptr + 4));                                  \
502   tile_8bpp_draw_four_##combine_op(0, alpha_op, flip);                        \
503   current_pixels = *((u32 *)tile_ptr);                                        \
504   tile_8bpp_draw_four_##combine_op(4, alpha_op, flip)                         \
505
506
507 // Operations for isolating 4bpp tiles in a 32bit block
508
509 #define tile_4bpp_pixel_op_mask(op_param)                                     \
510   current_pixel = current_pixels & 0x0F                                       \
511
512 #define tile_4bpp_pixel_op_shift_mask(shift)                                  \
513   current_pixel = (current_pixels >> shift) & 0x0F                            \
514
515 #define tile_4bpp_pixel_op_shift(shift)                                       \
516   current_pixel = current_pixels >> shift                                     \
517
518 #define tile_4bpp_pixel_op_none(op_param)                                     \
519
520 // Draws a single 4bpp pixel as base, normal renderer; checks to see if the
521 // pixel is zero because if so the current palette should not be applied.
522 // These ifs can be replaced with a lookup table, may or may not be superior
523 // this way, should be benchmarked. The lookup table would be from 0-255
524 // identity map except for multiples of 16, which would map to 0.
525
526 #define tile_4bpp_draw_base_normal(index)                                     \
527   if(current_pixel)                                                           \
528   {                                                                           \
529     current_pixel |= current_palette;                                         \
530     tile_expand_base_normal(index);                                           \
531   }                                                                           \
532   else                                                                        \
533   {                                                                           \
534     tile_expand_base_normal(index);                                           \
535   }                                                                           \
536
537
538 #define tile_4bpp_draw_base_alpha(index)                                      \
539   if(current_pixel)                                                           \
540   {                                                                           \
541     current_pixel |= current_palette;                                         \
542     tile_expand_base_alpha(index);                                            \
543   }                                                                           \
544   else                                                                        \
545   {                                                                           \
546     tile_expand_base_bg(index);                                               \
547   }                                                                           \
548
549 #define tile_4bpp_draw_base_color16(index)                                    \
550   tile_4bpp_draw_base_alpha(index)                                            \
551
552 #define tile_4bpp_draw_base_color32(index)                                    \
553   tile_4bpp_draw_base_alpha(index)                                            \
554
555
556 #define tile_4bpp_draw_base(index, op, op_param, alpha_op)                    \
557   tile_4bpp_pixel_op_##op(op_param);                                          \
558   tile_4bpp_draw_base_##alpha_op(index)                                       \
559
560
561 // Draws a single 4bpp pixel as layered, if not transparent.
562
563 #define tile_4bpp_draw_transparent(index, op, op_param, alpha_op)             \
564   tile_4bpp_pixel_op_##op(op_param);                                          \
565   if(current_pixel)                                                           \
566   {                                                                           \
567     current_pixel |= current_palette;                                         \
568     tile_expand_transparent_##alpha_op(index);                                \
569   }                                                                           \
570
571 #define tile_4bpp_draw_copy(index, op, op_param, alpha_op)                    \
572   tile_4bpp_pixel_op_##op(op_param);                                          \
573   if(current_pixel)                                                           \
574   {                                                                           \
575     current_pixel |= current_palette;                                         \
576     tile_expand_copy(index);                                                  \
577   }                                                                           \
578
579
580 // Draws eight background pixels in transparent mode, for alpha or normal
581 // renderers.
582
583 #define tile_4bpp_draw_eight_base_zero(value)                                 \
584   dest_ptr[0] = value;                                                        \
585   dest_ptr[1] = value;                                                        \
586   dest_ptr[2] = value;                                                        \
587   dest_ptr[3] = value;                                                        \
588   dest_ptr[4] = value;                                                        \
589   dest_ptr[5] = value;                                                        \
590   dest_ptr[6] = value;                                                        \
591   dest_ptr[7] = value                                                         \
592
593
594 // Draws eight background pixels for the alpha renderer, basically color zero
595 // with the background flag high.
596
597 #define tile_4bpp_draw_eight_base_zero_alpha()                                \
598   tile_4bpp_draw_eight_base_zero(bg_combine)                                  \
599
600 #define tile_4bpp_draw_eight_base_zero_color16()                              \
601   tile_4bpp_draw_eight_base_zero_alpha()                                      \
602
603 #define tile_4bpp_draw_eight_base_zero_color32()                              \
604   tile_4bpp_draw_eight_base_zero_alpha()                                      \
605
606
607 // Draws eight background pixels for the normal renderer, just a bunch of
608 // zeros.
609
610 #ifdef RENDER_COLOR16_NORMAL
611
612 #define tile_4bpp_draw_eight_base_zero_normal()                               \
613   current_pixel = 0;                                                          \
614   tile_4bpp_draw_eight_base_zero(current_pixel)                               \
615
616 #else
617
618 #define tile_4bpp_draw_eight_base_zero_normal()                               \
619   current_pixel = palette[0];                                                 \
620   tile_4bpp_draw_eight_base_zero(current_pixel)                               \
621
622 #endif
623
624
625 // Draws eight 4bpp pixels.
626
627 #define tile_4bpp_draw_eight_noflip(combine_op, alpha_op)                     \
628   tile_4bpp_draw_##combine_op(0, mask, 0, alpha_op);                          \
629   tile_4bpp_draw_##combine_op(1, shift_mask, 4, alpha_op);                    \
630   tile_4bpp_draw_##combine_op(2, shift_mask, 8, alpha_op);                    \
631   tile_4bpp_draw_##combine_op(3, shift_mask, 12, alpha_op);                   \
632   tile_4bpp_draw_##combine_op(4, shift_mask, 16, alpha_op);                   \
633   tile_4bpp_draw_##combine_op(5, shift_mask, 20, alpha_op);                   \
634   tile_4bpp_draw_##combine_op(6, shift_mask, 24, alpha_op);                   \
635   tile_4bpp_draw_##combine_op(7, shift, 28, alpha_op)                         \
636
637
638 // Draws eight 4bpp pixels in reverse order (for hflip).
639
640 #define tile_4bpp_draw_eight_flip(combine_op, alpha_op)                       \
641   tile_4bpp_draw_##combine_op(7, mask, 0, alpha_op);                          \
642   tile_4bpp_draw_##combine_op(6, shift_mask, 4, alpha_op);                    \
643   tile_4bpp_draw_##combine_op(5, shift_mask, 8, alpha_op);                    \
644   tile_4bpp_draw_##combine_op(4, shift_mask, 12, alpha_op);                   \
645   tile_4bpp_draw_##combine_op(3, shift_mask, 16, alpha_op);                   \
646   tile_4bpp_draw_##combine_op(2, shift_mask, 20, alpha_op);                   \
647   tile_4bpp_draw_##combine_op(1, shift_mask, 24, alpha_op);                   \
648   tile_4bpp_draw_##combine_op(0, shift, 28, alpha_op)                         \
649
650
651 // Draws eight 4bpp pixels in base mode, checks if all are zero, if so draws
652 // the appropriate background pixels.
653
654 #define tile_4bpp_draw_eight_base(alpha_op, flip_op)                          \
655   if(current_pixels != 0)                                                     \
656   {                                                                           \
657     tile_4bpp_draw_eight_##flip_op(base, alpha_op);                           \
658   }                                                                           \
659   else                                                                        \
660   {                                                                           \
661     tile_4bpp_draw_eight_base_zero_##alpha_op();                              \
662   }                                                                           \
663
664
665 // Draws eight 4bpp pixels in transparent (layered) mode, checks if all are
666 // zero and if so draws nothing.
667
668 #define tile_4bpp_draw_eight_transparent(alpha_op, flip_op)                   \
669   if(current_pixels != 0)                                                     \
670   {                                                                           \
671     tile_4bpp_draw_eight_##flip_op(transparent, alpha_op);                    \
672   }                                                                           \
673
674
675 #define tile_4bpp_draw_eight_copy(alpha_op, flip_op)                          \
676   if(current_pixels != 0)                                                     \
677   {                                                                           \
678     tile_4bpp_draw_eight_##flip_op(copy, alpha_op);                           \
679   }                                                                           \
680
681 // Gets the current tile in 4bpp mode, also getting the current palette and
682 // the pixel block.
683
684 #define get_tile_4bpp()                                                       \
685   current_tile = *map_ptr;                                                    \
686   current_palette = (current_tile >> 12) << 4;                                \
687   tile_ptr = tile_base + ((current_tile & 0x3FF) * 32);                       \
688
689
690 // Helper macro for drawing clipped 4bpp tiles.
691
692 #define partial_tile_4bpp(combine_op, alpha_op)                               \
693   for(i = 0; i < partial_tile_run; i++)                                       \
694   {                                                                           \
695     tile_4bpp_draw_##combine_op(0, mask, 0, alpha_op);                        \
696     current_pixels >>= 4;                                                     \
697     advance_dest_ptr_##combine_op(1);                                         \
698   }                                                                           \
699
700
701 // Draws a 4bpp tile clipped against the left edge of the screen.
702 // partial_tile_offset is how far in it's clipped, partial_tile_run is
703 // how many to draw.
704
705 #define partial_tile_right_noflip_4bpp(combine_op, alpha_op)                  \
706   current_pixels = *((u32 *)tile_ptr) >> (partial_tile_offset * 4);           \
707   partial_tile_4bpp(combine_op, alpha_op)                                     \
708
709
710 // Draws a 4bpp tile clipped against both edges of the screen, same as right.
711
712 #define partial_tile_mid_noflip_4bpp(combine_op, alpha_op)                    \
713   partial_tile_right_noflip_4bpp(combine_op, alpha_op)                        \
714
715
716 // Draws a 4bpp tile clipped against the right edge of the screen.
717 // partial_tile_offset is how many to draw.
718
719 #define partial_tile_left_noflip_4bpp(combine_op, alpha_op)                   \
720   current_pixels = *((u32 *)tile_ptr);                                        \
721   partial_tile_4bpp(combine_op, alpha_op)                                     \
722
723
724 // Draws a complete 4bpp tile row (not clipped)
725 #define tile_noflip_4bpp(combine_op, alpha_op)                                \
726   current_pixels = *((u32 *)tile_ptr);                                        \
727   tile_4bpp_draw_eight_##combine_op(alpha_op, noflip)                         \
728
729
730 // Like the above, but draws flipped tiles.
731
732 #define partial_tile_flip_4bpp(combine_op, alpha_op)                          \
733   for(i = 0; i < partial_tile_run; i++)                                       \
734   {                                                                           \
735     tile_4bpp_draw_##combine_op(0, shift, 28, alpha_op);                      \
736     current_pixels <<= 4;                                                     \
737     advance_dest_ptr_##combine_op(1);                                         \
738   }                                                                           \
739
740 #define partial_tile_right_flip_4bpp(combine_op, alpha_op)                    \
741   current_pixels = *((u32 *)tile_ptr) << (partial_tile_offset * 4);           \
742   partial_tile_flip_4bpp(combine_op, alpha_op)                                \
743
744 #define partial_tile_mid_flip_4bpp(combine_op, alpha_op)                      \
745   partial_tile_right_flip_4bpp(combine_op, alpha_op)                          \
746
747 #define partial_tile_left_flip_4bpp(combine_op, alpha_op)                     \
748   current_pixels = *((u32 *)tile_ptr);                                        \
749   partial_tile_flip_4bpp(combine_op, alpha_op)                                \
750
751 #define tile_flip_4bpp(combine_op, alpha_op)                                  \
752   current_pixels = *((u32 *)tile_ptr);                                        \
753   tile_4bpp_draw_eight_##combine_op(alpha_op, flip)                           \
754
755
756 // Draws a single (partial or complete) tile from the tilemap, flipping
757 // as necessary.
758
759 #define single_tile_map(tile_type, combine_op, color_depth, alpha_op)         \
760   get_tile_##color_depth();                                                   \
761   if(current_tile & 0x800)                                                    \
762     tile_ptr += vertical_pixel_flip;                                          \
763                                                                               \
764   if(current_tile & 0x400)                                                    \
765   {                                                                           \
766     tile_type##_flip_##color_depth(combine_op, alpha_op);                     \
767   }                                                                           \
768   else                                                                        \
769   {                                                                           \
770     tile_type##_noflip_##color_depth(combine_op, alpha_op);                   \
771   }                                                                           \
772
773
774 // Draws multiple sequential tiles from the tilemap, hflips and vflips as
775 // necessary.
776
777 #define multiple_tile_map(combine_op, color_depth, alpha_op)                  \
778   for(i = 0; i < tile_run; i++)                                               \
779   {                                                                           \
780     single_tile_map(tile, combine_op, color_depth, alpha_op);                 \
781     advance_dest_ptr_##combine_op(8);                                         \
782     map_ptr++;                                                                \
783   }                                                                           \
784
785 // Draws a partial tile from a tilemap clipped against the left edge of the
786 // screen.
787
788 #define partial_tile_right_map(combine_op, color_depth, alpha_op)             \
789   single_tile_map(partial_tile_right, combine_op, color_depth, alpha_op);     \
790   map_ptr++                                                                   \
791
792 // Draws a partial tile from a tilemap clipped against both edges of the
793 // screen.
794
795 #define partial_tile_mid_map(combine_op, color_depth, alpha_op)               \
796   single_tile_map(partial_tile_mid, combine_op, color_depth, alpha_op)        \
797
798 // Draws a partial tile from a tilemap clipped against the right edge of the
799 // screen.
800
801 #define partial_tile_left_map(combine_op, color_depth, alpha_op)              \
802   single_tile_map(partial_tile_left, combine_op, color_depth, alpha_op)       \
803
804
805 // Advances a non-flipped 4bpp obj to the next tile.
806
807 #define obj_advance_noflip_4bpp()                                             \
808   tile_ptr += 32                                                              \
809
810
811 // Advances a non-flipped 8bpp obj to the next tile.
812
813 #define obj_advance_noflip_8bpp()                                             \
814   tile_ptr += 64                                                              \
815
816
817 // Advances a flipped 4bpp obj to the next tile.
818
819 #define obj_advance_flip_4bpp()                                               \
820   tile_ptr -= 32                                                              \
821
822
823 // Advances a flipped 8bpp obj to the next tile.
824
825 #define obj_advance_flip_8bpp()                                               \
826   tile_ptr -= 64                                                              \
827
828
829
830 // Draws multiple sequential tiles from an obj, flip_op determines if it should
831 // be flipped or not (set to flip or noflip)
832
833 #define multiple_tile_obj(combine_op, color_depth, alpha_op, flip_op)         \
834   for(i = 0; i < tile_run; i++)                                               \
835   {                                                                           \
836     tile_##flip_op##_##color_depth(combine_op, alpha_op);                     \
837     obj_advance_##flip_op##_##color_depth();                                  \
838     advance_dest_ptr_##combine_op(8);                                         \
839   }                                                                           \
840
841
842 // Draws an obj's tile clipped against the left side of the screen
843
844 #define partial_tile_right_obj(combine_op, color_depth, alpha_op, flip_op)    \
845   partial_tile_right_##flip_op##_##color_depth(combine_op, alpha_op);         \
846   obj_advance_##flip_op##_##color_depth()                                     \
847
848 // Draws an obj's tile clipped against both sides of the screen
849
850 #define partial_tile_mid_obj(combine_op, color_depth, alpha_op, flip_op)      \
851   partial_tile_mid_##flip_op##_##color_depth(combine_op, alpha_op)            \
852
853 // Draws an obj's tile clipped against the right side of the screen
854
855 #define partial_tile_left_obj(combine_op, color_depth, alpha_op, flip_op)     \
856   partial_tile_left_##flip_op##_##color_depth(combine_op, alpha_op)           \
857
858
859 // Extra variables specific for 8bpp/4bpp tile renderers.
860
861 #define tile_extra_variables_8bpp()                                           \
862
863 #define tile_extra_variables_4bpp()                                           \
864   u32 current_palette                                                         \
865
866
867 // Byte lengths of complete tiles and tile rows in 4bpp and 8bpp.
868
869 #define tile_width_4bpp 4
870 #define tile_size_4bpp 32
871 #define tile_width_8bpp 8
872 #define tile_size_8bpp 64
873
874
875 // Render a single scanline of text tiles
876
877 #define tile_render(color_depth, combine_op, alpha_op)                        \
878 {                                                                             \
879   u32 vertical_pixel_offset = (vertical_offset % 8) *                         \
880    tile_width_##color_depth;                                                  \
881   u32 vertical_pixel_flip =                                                   \
882    ((tile_size_##color_depth - tile_width_##color_depth) -                    \
883    vertical_pixel_offset) - vertical_pixel_offset;                            \
884   tile_extra_variables_##color_depth();                                       \
885   u8 *tile_base = vram + (((bg_control >> 2) & 0x03) * (1024 * 16)) +         \
886    vertical_pixel_offset;                                                     \
887   u32 pixel_run = 256 - (horizontal_offset % 256);                            \
888   u32 current_tile;                                                           \
889                                                                               \
890   map_base += ((vertical_offset % 256) / 8) * 32;                             \
891   partial_tile_offset = (horizontal_offset % 8);                              \
892                                                                               \
893   if(pixel_run >= end)                                                        \
894   {                                                                           \
895     if(partial_tile_offset)                                                   \
896     {                                                                         \
897       partial_tile_run = 8 - partial_tile_offset;                             \
898       if(end < partial_tile_run)                                              \
899       {                                                                       \
900         partial_tile_run = end;                                               \
901         partial_tile_mid_map(combine_op, color_depth, alpha_op);              \
902         return;                                                               \
903       }                                                                       \
904       else                                                                    \
905       {                                                                       \
906         end -= partial_tile_run;                                              \
907         partial_tile_right_map(combine_op, color_depth, alpha_op);            \
908       }                                                                       \
909     }                                                                         \
910                                                                               \
911     tile_run = end / 8;                                                       \
912     multiple_tile_map(combine_op, color_depth, alpha_op);                     \
913                                                                               \
914     partial_tile_run = end % 8;                                               \
915                                                                               \
916     if(partial_tile_run)                                                      \
917     {                                                                         \
918       partial_tile_left_map(combine_op, color_depth, alpha_op);               \
919     }                                                                         \
920   }                                                                           \
921   else                                                                        \
922   {                                                                           \
923     if(partial_tile_offset)                                                   \
924     {                                                                         \
925       partial_tile_run = 8 - partial_tile_offset;                             \
926       partial_tile_right_map(combine_op, color_depth, alpha_op);              \
927     }                                                                         \
928                                                                               \
929     tile_run = (pixel_run - partial_tile_run) / 8;                            \
930     multiple_tile_map(combine_op, color_depth, alpha_op);                     \
931     map_ptr = second_ptr;                                                     \
932     end -= pixel_run;                                                         \
933     tile_run = end / 8;                                                       \
934     multiple_tile_map(combine_op, color_depth, alpha_op);                     \
935                                                                               \
936     partial_tile_run = end % 8;                                               \
937     if(partial_tile_run)                                                      \
938     {                                                                         \
939       partial_tile_left_map(combine_op, color_depth, alpha_op);               \
940     }                                                                         \
941   }                                                                           \
942 }                                                                             \
943
944 #define render_scanline_dest_normal         u16
945 #define render_scanline_dest_alpha          u32
946 #define render_scanline_dest_alpha_obj      u32
947 #define render_scanline_dest_color16        u16
948 #define render_scanline_dest_color32        u32
949 #define render_scanline_dest_partial_alpha  u32
950 #define render_scanline_dest_copy_tile      u16
951 #define render_scanline_dest_copy_bitmap    u16
952
953
954 // If rendering a scanline that is not a target A then there's no point in
955 // keeping what's underneath it because it can't blend with it.
956
957 #define render_scanline_skip_alpha(bg_type, combine_op)                       \
958   if((pixel_combine & 0x00000200) == 0)                                       \
959   {                                                                           \
960     render_scanline_##bg_type##_##combine_op##_color32(layer,                 \
961      start, end, scanline);                                                   \
962     return;                                                                   \
963   }                                                                           \
964
965
966 #ifdef RENDER_COLOR16_NORMAL
967
968 #define render_scanline_extra_variables_base_normal(bg_type)                  \
969   const u32 pixel_combine = 0                                                 \
970
971 #else
972
973 #define render_scanline_extra_variables_base_normal(bg_type)                  \
974   u16 *palette = palette_ram_converted                                        \
975
976 #endif
977
978
979 #define render_scanline_extra_variables_base_alpha(bg_type)                   \
980   u32 bg_combine = color_combine_mask(5);                                     \
981   u32 pixel_combine = color_combine_mask(layer) | (bg_combine << 16);         \
982   render_scanline_skip_alpha(bg_type, base)                                   \
983
984 #define render_scanline_extra_variables_base_color()                          \
985   u32 bg_combine = color_combine_mask(5);                                     \
986   u32 pixel_combine = color_combine_mask(layer)                               \
987
988 #define render_scanline_extra_variables_base_color16(bg_type)                 \
989   render_scanline_extra_variables_base_color()                                \
990
991 #define render_scanline_extra_variables_base_color32(bg_type)                 \
992   render_scanline_extra_variables_base_color()                                \
993
994
995 #define render_scanline_extra_variables_transparent_normal(bg_type)           \
996   render_scanline_extra_variables_base_normal(bg_type)                        \
997
998 #define render_scanline_extra_variables_transparent_alpha(bg_type)            \
999   u32 pixel_combine = color_combine_mask(layer);                              \
1000   render_scanline_skip_alpha(bg_type, transparent)                            \
1001
1002 #define render_scanline_extra_variables_transparent_color()                   \
1003   u32 pixel_combine = color_combine_mask(layer)                               \
1004
1005 #define render_scanline_extra_variables_transparent_color16(bg_type)          \
1006   render_scanline_extra_variables_transparent_color()                         \
1007
1008 #define render_scanline_extra_variables_transparent_color32(bg_type)          \
1009   render_scanline_extra_variables_transparent_color()                         \
1010
1011
1012
1013
1014
1015 // Map widths and heights
1016
1017 u32 map_widths[] = { 256, 512, 256, 512 };
1018 u32 map_heights[] = { 256, 256, 512, 512 };
1019
1020 // Build text scanline rendering functions.
1021
1022 #define render_scanline_text_builder(combine_op, alpha_op)                    \
1023 void render_scanline_text_##combine_op##_##alpha_op(u32 layer,                \
1024  u32 start, u32 end, void *scanline)                                          \
1025 {                                                                             \
1026   render_scanline_extra_variables_##combine_op##_##alpha_op(text);            \
1027   u32 bg_control = io_registers[REG_BG0CNT + layer];                          \
1028   u32 map_size = (bg_control >> 14) & 0x03;                                   \
1029   u32 map_width = map_widths[map_size];                                       \
1030   u32 map_height = map_heights[map_size];                                     \
1031   u32 horizontal_offset =                                                     \
1032    (io_registers[REG_BG0HOFS + (layer * 2)] + start) % 512;                   \
1033   u32 vertical_offset = (io_registers[REG_VCOUNT] +                           \
1034    io_registers[REG_BG0VOFS + (layer * 2)]) % 512;                            \
1035   u32 current_pixel;                                                          \
1036   u32 current_pixels;                                                         \
1037   u32 partial_tile_run = 0;                                                   \
1038   u32 partial_tile_offset;                                                    \
1039   u32 tile_run;                                                               \
1040   u32 i;                                                                      \
1041   render_scanline_dest_##alpha_op *dest_ptr =                                 \
1042    ((render_scanline_dest_##alpha_op *)scanline) + start;                     \
1043                                                                               \
1044   u16 *map_base = (u16 *)(vram + ((bg_control >> 8) & 0x1F) * (1024 * 2));    \
1045   u16 *map_ptr, *second_ptr;                                                  \
1046   u8 *tile_ptr;                                                               \
1047                                                                               \
1048   end -= start;                                                               \
1049                                                                               \
1050   if((map_size & 0x02) && (vertical_offset >= 256))                           \
1051   {                                                                           \
1052     map_base += ((map_width / 8) * 32) +                                      \
1053      (((vertical_offset - 256) / 8) * 32);                                    \
1054   }                                                                           \
1055   else                                                                        \
1056   {                                                                           \
1057     map_base += (((vertical_offset % 256) / 8) * 32);                         \
1058   }                                                                           \
1059                                                                               \
1060   if(map_size & 0x01)                                                         \
1061   {                                                                           \
1062     if(horizontal_offset >= 256)                                              \
1063     {                                                                         \
1064       horizontal_offset -= 256;                                               \
1065       map_ptr = map_base + (32 * 32) + (horizontal_offset / 8);               \
1066       second_ptr = map_base;                                                  \
1067     }                                                                         \
1068     else                                                                      \
1069     {                                                                         \
1070       map_ptr = map_base + (horizontal_offset / 8);                           \
1071       second_ptr = map_base + (32 * 32);                                      \
1072     }                                                                         \
1073   }                                                                           \
1074   else                                                                        \
1075   {                                                                           \
1076     horizontal_offset %= 256;                                                 \
1077     map_ptr = map_base + (horizontal_offset / 8);                             \
1078     second_ptr = map_base;                                                    \
1079   }                                                                           \
1080                                                                               \
1081   if(bg_control & 0x80)                                                       \
1082   {                                                                           \
1083     tile_render(8bpp, combine_op, alpha_op);                                  \
1084   }                                                                           \
1085   else                                                                        \
1086   {                                                                           \
1087     tile_render(4bpp, combine_op, alpha_op);                                  \
1088   }                                                                           \
1089 }                                                                             \
1090
1091 render_scanline_text_builder(base, normal);
1092 render_scanline_text_builder(transparent, normal);
1093 render_scanline_text_builder(base, color16);
1094 render_scanline_text_builder(transparent, color16);
1095 render_scanline_text_builder(base, color32);
1096 render_scanline_text_builder(transparent, color32);
1097 render_scanline_text_builder(base, alpha);
1098 render_scanline_text_builder(transparent, alpha);
1099
1100
1101 s32 affine_reference_x[2];
1102 s32 affine_reference_y[2];
1103
1104 #define affine_render_bg_pixel_normal()                                       \
1105   current_pixel = palette_ram_converted[0]                                    \
1106
1107 #define affine_render_bg_pixel_alpha()                                        \
1108   current_pixel = bg_combine                                                  \
1109
1110 #define affine_render_bg_pixel_color16()                                      \
1111   affine_render_bg_pixel_alpha()                                              \
1112
1113 #define affine_render_bg_pixel_color32()                                      \
1114   affine_render_bg_pixel_alpha()                                              \
1115
1116 #define affine_render_bg_pixel_base(alpha_op)                                 \
1117   affine_render_bg_pixel_##alpha_op()                                         \
1118
1119 #define affine_render_bg_pixel_transparent(alpha_op)                          \
1120
1121 #define affine_render_bg_pixel_copy(alpha_op)                                 \
1122
1123 #define affine_render_bg_base(alpha_op)                                       \
1124   dest_ptr[0] = current_pixel
1125
1126 #define affine_render_bg_transparent(alpha_op)                                \
1127
1128 #define affine_render_bg_copy(alpha_op)                                       \
1129
1130 #define affine_render_bg_remainder_base(alpha_op)                             \
1131   affine_render_bg_pixel_##alpha_op();                                        \
1132   for(; i < end; i++)                                                         \
1133   {                                                                           \
1134     affine_render_bg_base(alpha_op);                                          \
1135     advance_dest_ptr_base(1);                                                 \
1136   }                                                                           \
1137
1138 #define affine_render_bg_remainder_transparent(alpha_op)                      \
1139
1140 #define affine_render_bg_remainder_copy(alpha_op)                             \
1141
1142 #define affine_render_next(combine_op)                                        \
1143   source_x += dx;                                                             \
1144   source_y += dy;                                                             \
1145   advance_dest_ptr_##combine_op(1)                                            \
1146
1147 #define affine_render_scale_offset()                                          \
1148   tile_base += ((pixel_y % 8) * 8);                                           \
1149   map_base += (pixel_y / 8) << map_pitch                                      \
1150
1151 #define affine_render_scale_pixel(combine_op, alpha_op)                       \
1152   map_offset = (pixel_x / 8);                                                 \
1153   if(map_offset != last_map_offset)                                           \
1154   {                                                                           \
1155     tile_ptr = tile_base + (map_base[map_offset] * 64);                       \
1156     last_map_offset = map_offset;                                             \
1157   }                                                                           \
1158   tile_ptr = tile_base + (map_base[(pixel_x / 8)] * 64);                      \
1159   current_pixel = tile_ptr[(pixel_x % 8)];                                    \
1160   tile_8bpp_draw_##combine_op(0, none, 0, alpha_op);                          \
1161   affine_render_next(combine_op)                                              \
1162
1163 #define affine_render_scale(combine_op, alpha_op)                             \
1164 {                                                                             \
1165   pixel_y = source_y >> 8;                                                    \
1166   u32 i = 0;                                                                  \
1167   affine_render_bg_pixel_##combine_op(alpha_op);                              \
1168   if((u32)pixel_y < (u32)width_height)                                        \
1169   {                                                                           \
1170     affine_render_scale_offset();                                             \
1171     for(; i < end; i++)                                                       \
1172     {                                                                         \
1173       pixel_x = source_x >> 8;                                                \
1174                                                                               \
1175       if((u32)pixel_x < (u32)width_height)                                    \
1176       {                                                                       \
1177         break;                                                                \
1178       }                                                                       \
1179                                                                               \
1180       affine_render_bg_##combine_op(alpha_op);                                \
1181       affine_render_next(combine_op);                                         \
1182     }                                                                         \
1183                                                                               \
1184     for(; i < end; i++)                                                       \
1185     {                                                                         \
1186       pixel_x = source_x >> 8;                                                \
1187                                                                               \
1188       if((u32)pixel_x >= (u32)width_height)                                   \
1189         break;                                                                \
1190                                                                               \
1191       affine_render_scale_pixel(combine_op, alpha_op);                        \
1192     }                                                                         \
1193   }                                                                           \
1194   affine_render_bg_remainder_##combine_op(alpha_op);                          \
1195 }                                                                             \
1196
1197 #define affine_render_scale_wrap(combine_op, alpha_op)                        \
1198 {                                                                             \
1199   u32 wrap_mask = width_height - 1;                                           \
1200   pixel_y = (source_y >> 8) & wrap_mask;                                      \
1201   if((u32)pixel_y < (u32)width_height)                                        \
1202   {                                                                           \
1203     affine_render_scale_offset();                                             \
1204     for(i = 0; i < end; i++)                                                  \
1205     {                                                                         \
1206       pixel_x = (source_x >> 8) & wrap_mask;                                  \
1207       affine_render_scale_pixel(combine_op, alpha_op);                        \
1208     }                                                                         \
1209   }                                                                           \
1210 }                                                                             \
1211
1212
1213 #define affine_render_rotate_pixel(combine_op, alpha_op)                      \
1214   map_offset = (pixel_x / 8) + ((pixel_y / 8) << map_pitch);                  \
1215   if(map_offset != last_map_offset)                                           \
1216   {                                                                           \
1217     tile_ptr = tile_base + (map_base[map_offset] * 64);                       \
1218     last_map_offset = map_offset;                                             \
1219   }                                                                           \
1220                                                                               \
1221   current_pixel = tile_ptr[(pixel_x % 8) + ((pixel_y % 8) * 8)];              \
1222   tile_8bpp_draw_##combine_op(0, none, 0, alpha_op);                          \
1223   affine_render_next(combine_op)                                              \
1224
1225 #define affine_render_rotate(combine_op, alpha_op)                            \
1226 {                                                                             \
1227   affine_render_bg_pixel_##combine_op(alpha_op);                              \
1228   for(i = 0; i < end; i++)                                                    \
1229   {                                                                           \
1230     pixel_x = source_x >> 8;                                                  \
1231     pixel_y = source_y >> 8;                                                  \
1232                                                                               \
1233     if(((u32)pixel_x < (u32)width_height) &&                                  \
1234      ((u32)pixel_y < (u32)width_height))                                      \
1235     {                                                                         \
1236       break;                                                                  \
1237     }                                                                         \
1238     affine_render_bg_##combine_op(alpha_op);                                  \
1239     affine_render_next(combine_op);                                           \
1240   }                                                                           \
1241                                                                               \
1242   for(; i < end; i++)                                                         \
1243   {                                                                           \
1244     pixel_x = source_x >> 8;                                                  \
1245     pixel_y = source_y >> 8;                                                  \
1246                                                                               \
1247     if(((u32)pixel_x >= (u32)width_height) ||                                 \
1248      ((u32)pixel_y >= (u32)width_height))                                     \
1249     {                                                                         \
1250       affine_render_bg_remainder_##combine_op(alpha_op);                      \
1251       break;                                                                  \
1252     }                                                                         \
1253                                                                               \
1254     affine_render_rotate_pixel(combine_op, alpha_op);                         \
1255   }                                                                           \
1256 }                                                                             \
1257
1258 #define affine_render_rotate_wrap(combine_op, alpha_op)                       \
1259 {                                                                             \
1260   u32 wrap_mask = width_height - 1;                                           \
1261   for(i = 0; i < end; i++)                                                    \
1262   {                                                                           \
1263     pixel_x = (source_x >> 8) & wrap_mask;                                    \
1264     pixel_y = (source_y >> 8) & wrap_mask;                                    \
1265                                                                               \
1266     affine_render_rotate_pixel(combine_op, alpha_op);                         \
1267   }                                                                           \
1268 }                                                                             \
1269
1270
1271 // Build affine background renderers.
1272
1273 #define render_scanline_affine_builder(combine_op, alpha_op)                  \
1274 void render_scanline_affine_##combine_op##_##alpha_op(u32 layer,              \
1275  u32 start, u32 end, void *scanline)                                          \
1276 {                                                                             \
1277   render_scanline_extra_variables_##combine_op##_##alpha_op(affine);          \
1278   u32 bg_control = io_registers[REG_BG0CNT + layer];                          \
1279   u32 current_pixel;                                                          \
1280   s32 source_x, source_y;                                                     \
1281   u32 vcount = io_registers[REG_VCOUNT];                                      \
1282   u32 pixel_x, pixel_y;                                                       \
1283   u32 layer_offset = (layer - 2) * 8;                                         \
1284   s32 dx, dy;                                                                 \
1285   u32 map_size = (bg_control >> 14) & 0x03;                                   \
1286   u32 width_height = 1 << (7 + map_size);                                     \
1287   u32 map_pitch = map_size + 4;                                               \
1288   u8 *map_base = vram + (((bg_control >> 8) & 0x1F) * (1024 * 2));            \
1289   u8 *tile_base = vram + (((bg_control >> 2) & 0x03) * (1024 * 16));          \
1290   u8 *tile_ptr;                                                               \
1291   u32 map_offset, last_map_offset = (u32)-1;                                  \
1292   u32 i;                                                                      \
1293   render_scanline_dest_##alpha_op *dest_ptr =                                 \
1294    ((render_scanline_dest_##alpha_op *)scanline) + start;                     \
1295                                                                               \
1296   dx = (s16)io_registers[REG_BG2PA + layer_offset];                           \
1297   dy = (s16)io_registers[REG_BG2PC + layer_offset];                           \
1298   source_x = affine_reference_x[layer - 2] + (start * dx);                    \
1299   source_y = affine_reference_y[layer - 2] + (start * dy);                    \
1300                                                                               \
1301   end -= start;                                                               \
1302                                                                               \
1303   switch(((bg_control >> 12) & 0x02) | (dy != 0))                             \
1304   {                                                                           \
1305     case 0x00:                                                                \
1306       affine_render_scale(combine_op, alpha_op);                              \
1307       break;                                                                  \
1308                                                                               \
1309     case 0x01:                                                                \
1310       affine_render_rotate(combine_op, alpha_op);                             \
1311       break;                                                                  \
1312                                                                               \
1313     case 0x02:                                                                \
1314       affine_render_scale_wrap(combine_op, alpha_op);                         \
1315       break;                                                                  \
1316                                                                               \
1317     case 0x03:                                                                \
1318       affine_render_rotate_wrap(combine_op, alpha_op);                        \
1319       break;                                                                  \
1320   }                                                                           \
1321 }                                                                             \
1322
1323 render_scanline_affine_builder(base, normal);
1324 render_scanline_affine_builder(transparent, normal);
1325 render_scanline_affine_builder(base, color16);
1326 render_scanline_affine_builder(transparent, color16);
1327 render_scanline_affine_builder(base, color32);
1328 render_scanline_affine_builder(transparent, color32);
1329 render_scanline_affine_builder(base, alpha);
1330 render_scanline_affine_builder(transparent, alpha);
1331
1332
1333 #define bitmap_render_pixel_mode3(alpha_op)                                   \
1334   convert_palette(current_pixel);                                             \
1335   *dest_ptr = current_pixel                                                   \
1336
1337 #define bitmap_render_pixel_mode4(alpha_op)                                   \
1338   tile_expand_base_##alpha_op(0)                                              \
1339
1340 #define bitmap_render_pixel_mode5(alpha_op)                                   \
1341   bitmap_render_pixel_mode3(alpha_op)                                         \
1342
1343
1344 #define bitmap_render_scale(type, alpha_op, width, height)                    \
1345   pixel_y = (source_y >> 8);                                                  \
1346   if((u32)pixel_y < (u32)height)                                              \
1347   {                                                                           \
1348     pixel_x = (source_x >> 8);                                                \
1349     src_ptr += (pixel_y * width);                                             \
1350     if(dx == 0x100)                                                           \
1351     {                                                                         \
1352       if(pixel_x < 0)                                                         \
1353       {                                                                       \
1354         end += pixel_x;                                                       \
1355         dest_ptr -= pixel_x;                                                  \
1356         pixel_x = 0;                                                          \
1357       }                                                                       \
1358       else                                                                    \
1359                                                                               \
1360       if(pixel_x > 0)                                                         \
1361       {                                                                       \
1362         src_ptr += pixel_x;                                                   \
1363       }                                                                       \
1364                                                                               \
1365       if((pixel_x + end) >= width)                                            \
1366         end = (width - pixel_x);                                              \
1367                                                                               \
1368       for(i = 0; (s32)i < (s32)end; i++)                                      \
1369       {                                                                       \
1370         current_pixel = *src_ptr;                                             \
1371         bitmap_render_pixel_##type(alpha_op);                                 \
1372         src_ptr++;                                                            \
1373         dest_ptr++;                                                           \
1374       }                                                                       \
1375     }                                                                         \
1376     else                                                                      \
1377     {                                                                         \
1378       if((u32)(source_y >> 8) < (u32)height)                                  \
1379       {                                                                       \
1380         for(i = 0; i < end; i++)                                              \
1381         {                                                                     \
1382           pixel_x = (source_x >> 8);                                          \
1383                                                                               \
1384           if((u32)pixel_x < (u32)width)                                       \
1385             break;                                                            \
1386                                                                               \
1387           source_x += dx;                                                     \
1388           dest_ptr++;                                                         \
1389         }                                                                     \
1390                                                                               \
1391         for(; i < end; i++)                                                   \
1392         {                                                                     \
1393           pixel_x = (source_x >> 8);                                          \
1394                                                                               \
1395           if((u32)pixel_x >= (u32)width)                                      \
1396             break;                                                            \
1397                                                                               \
1398           current_pixel = src_ptr[pixel_x];                                   \
1399           bitmap_render_pixel_##type(alpha_op);                               \
1400                                                                               \
1401           source_x += dx;                                                     \
1402           dest_ptr++;                                                         \
1403         }                                                                     \
1404       }                                                                       \
1405     }                                                                         \
1406   }                                                                           \
1407
1408 #define bitmap_render_rotate(type, alpha_op, width, height)                   \
1409   for(i = 0; i < end; i++)                                                    \
1410   {                                                                           \
1411     pixel_x = source_x >> 8;                                                  \
1412     pixel_y = source_y >> 8;                                                  \
1413                                                                               \
1414     if(((u32)pixel_x < (u32)width) && ((u32)pixel_y < (u32)height))           \
1415       break;                                                                  \
1416                                                                               \
1417     source_x += dx;                                                           \
1418     source_y += dy;                                                           \
1419     dest_ptr++;                                                               \
1420   }                                                                           \
1421                                                                               \
1422   for(; i < end; i++)                                                         \
1423   {                                                                           \
1424     pixel_x = (source_x >> 8);                                                \
1425     pixel_y = (source_y >> 8);                                                \
1426                                                                               \
1427     if(((u32)pixel_x >= (u32)width) || ((u32)pixel_y >= (u32)height))         \
1428       break;                                                                  \
1429                                                                               \
1430     current_pixel = src_ptr[pixel_x + (pixel_y * width)];                     \
1431      bitmap_render_pixel_##type(alpha_op);                                    \
1432                                                                               \
1433     source_x += dx;                                                           \
1434     source_y += dy;                                                           \
1435     dest_ptr++;                                                               \
1436   }                                                                           \
1437
1438
1439 #define render_scanline_vram_setup_mode3()                                    \
1440   u16 *src_ptr = (u16 *)vram                                                  \
1441
1442 #define render_scanline_vram_setup_mode5()                                    \
1443   u16 *src_ptr;                                                               \
1444   if(io_registers[REG_DISPCNT] & 0x10)                                        \
1445     src_ptr = (u16 *)(vram + 0xA000);                                         \
1446   else                                                                        \
1447     src_ptr = (u16 *)vram                                                     \
1448
1449
1450 #ifdef RENDER_COLOR16_NORMAL
1451
1452 #define render_scanline_vram_setup_mode4()                                    \
1453   const u32 pixel_combine = 0;                                                \
1454   u8 *src_ptr;                                                                \
1455   if(io_registers[REG_DISPCNT] & 0x10)                                        \
1456     src_ptr = vram + 0xA000;                                                  \
1457   else                                                                        \
1458     src_ptr = vram                                                            \
1459
1460
1461 #else
1462
1463 #define render_scanline_vram_setup_mode4()                                    \
1464   u16 *palette = palette_ram_converted;                                       \
1465   u8 *src_ptr;                                                                \
1466   if(io_registers[REG_DISPCNT] & 0x10)                                        \
1467     src_ptr = vram + 0xA000;                                                  \
1468   else                                                                        \
1469     src_ptr = vram                                                            \
1470
1471 #endif
1472
1473
1474
1475 // Build bitmap scanline rendering functions.
1476
1477 #define render_scanline_bitmap_builder(type, alpha_op, width, height)         \
1478 void render_scanline_bitmap_##type##_##alpha_op(u32 start, u32 end,           \
1479  void *scanline)                                                              \
1480 {                                                                             \
1481   u32 bg_control = io_registers[REG_BG2CNT];                                  \
1482   u32 current_pixel;                                                          \
1483   s32 source_x, source_y;                                                     \
1484   u32 vcount = io_registers[REG_VCOUNT];                                      \
1485   s32 pixel_x, pixel_y;                                                       \
1486                                                                               \
1487   s32 dx = (s16)io_registers[REG_BG2PA];                                      \
1488   s32 dy = (s16)io_registers[REG_BG2PC];                                      \
1489                                                                               \
1490   u32 i;                                                                      \
1491                                                                               \
1492   render_scanline_dest_##alpha_op *dest_ptr =                                 \
1493    ((render_scanline_dest_##alpha_op *)scanline) + start;                     \
1494   render_scanline_vram_setup_##type();                                        \
1495                                                                               \
1496   end -= start;                                                               \
1497                                                                               \
1498   source_x = affine_reference_x[0] + (start * dx);                            \
1499   source_y = affine_reference_y[0] + (start * dy);                            \
1500                                                                               \
1501   if(dy == 0)                                                                 \
1502   {                                                                           \
1503     bitmap_render_scale(type, alpha_op, width, height);                       \
1504   }                                                                           \
1505   else                                                                        \
1506   {                                                                           \
1507     bitmap_render_rotate(type, alpha_op, width, height);                      \
1508   }                                                                           \
1509 }                                                                             \
1510
1511 render_scanline_bitmap_builder(mode3, normal, 240, 160);
1512 render_scanline_bitmap_builder(mode4, normal, 240, 160);
1513 render_scanline_bitmap_builder(mode5, normal, 160, 128);
1514
1515
1516 // Fill in the renderers for a layer based on the mode type,
1517
1518 #define tile_layer_render_functions(type)                                     \
1519 {                                                                             \
1520   render_scanline_##type##_base_normal,                                       \
1521   render_scanline_##type##_transparent_normal,                                \
1522   render_scanline_##type##_base_alpha,                                        \
1523   render_scanline_##type##_transparent_alpha,                                 \
1524   render_scanline_##type##_base_color16,                                      \
1525   render_scanline_##type##_transparent_color16,                               \
1526   render_scanline_##type##_base_color32,                                      \
1527   render_scanline_##type##_transparent_color32                                \
1528 }                                                                             \
1529
1530
1531 // Use if a layer is unsupported for that mode.
1532
1533 #define tile_layer_render_null()                                              \
1534 {                                                                             \
1535   NULL, NULL, NULL, NULL                                                      \
1536 }                                                                             \
1537
1538 #define bitmap_layer_render_functions(type)                                   \
1539 {                                                                             \
1540   render_scanline_bitmap_##type##_normal                                      \
1541 }                                                                             \
1542
1543 // Structs containing functions to render the layers for each mode, for
1544 // each render type.
1545 tile_layer_render_struct tile_mode_renderers[3][4] =
1546 {
1547   {
1548     tile_layer_render_functions(text), tile_layer_render_functions(text),
1549     tile_layer_render_functions(text), tile_layer_render_functions(text)
1550   },
1551   {
1552     tile_layer_render_functions(text), tile_layer_render_functions(text),
1553     tile_layer_render_functions(affine), tile_layer_render_functions(text)
1554   },
1555   {
1556     tile_layer_render_functions(text), tile_layer_render_functions(text),
1557     tile_layer_render_functions(affine), tile_layer_render_functions(affine)
1558   }
1559 };
1560
1561 bitmap_layer_render_struct bitmap_mode_renderers[3] =
1562 {
1563   bitmap_layer_render_functions(mode3),
1564   bitmap_layer_render_functions(mode4),
1565   bitmap_layer_render_functions(mode5)
1566 };
1567
1568
1569 #define render_scanline_layer_functions_tile()                                \
1570   tile_layer_render_struct *layer_renderers =                                 \
1571    tile_mode_renderers[dispcnt & 0x07]                                        \
1572
1573 #define render_scanline_layer_functions_bitmap()                              \
1574   bitmap_layer_render_struct *layer_renderers =                               \
1575    bitmap_mode_renderers + ((dispcnt & 0x07) - 3)                             \
1576
1577
1578 // Adjust a flipped obj's starting position
1579
1580 #define obj_tile_offset_noflip(color_depth)                                   \
1581
1582 #define obj_tile_offset_flip(color_depth)                                     \
1583   + (tile_size_##color_depth * ((obj_width - 8) / 8))                         \
1584
1585
1586 // Adjust the obj's starting point if it goes too far off the left edge of    \
1587 // the screen.                                                                \
1588
1589 #define obj_tile_right_offset_noflip(color_depth)                             \
1590   tile_ptr += (partial_tile_offset / 8) * tile_size_##color_depth             \
1591
1592 #define obj_tile_right_offset_flip(color_depth)                               \
1593   tile_ptr -= (partial_tile_offset / 8) * tile_size_##color_depth             \
1594
1595 // Get the current row offset into an obj in 1D map space
1596
1597 #define obj_tile_offset_1D(color_depth, flip_op)                              \
1598   tile_ptr = tile_base + ((obj_attribute_2 & 0x3FF) * 32)                     \
1599    + ((vertical_offset / 8) * (obj_width / 8) * tile_size_##color_depth)      \
1600    + ((vertical_offset % 8) * tile_width_##color_depth)                       \
1601    obj_tile_offset_##flip_op(color_depth)                                     \
1602
1603 // Get the current row offset into an obj in 2D map space
1604
1605 #define obj_tile_offset_2D(color_depth, flip_op)                              \
1606   tile_ptr = tile_base + ((obj_attribute_2 & 0x3FF) * 32)                     \
1607    + ((vertical_offset / 8) * 1024)                                           \
1608    + ((vertical_offset % 8) * tile_width_##color_depth)                       \
1609    obj_tile_offset_##flip_op(color_depth)                                     \
1610
1611
1612 // Get the palette for 4bpp obj.
1613
1614 #define obj_get_palette_4bpp()                                                \
1615   current_palette = (obj_attribute_2 >> 8) & 0xF0                             \
1616
1617 #define obj_get_palette_8bpp()                                                \
1618
1619
1620 // Render the current row of an obj.
1621
1622 #define obj_render(combine_op, color_depth, alpha_op, map_space, flip_op)     \
1623 {                                                                             \
1624   obj_get_palette_##color_depth();                                            \
1625   obj_tile_offset_##map_space(color_depth, flip_op);                          \
1626                                                                               \
1627   if(obj_x < (s32)start)                                                      \
1628   {                                                                           \
1629     dest_ptr = scanline + start;                                              \
1630     pixel_run = obj_width - (start - obj_x);                                  \
1631     if((s32)pixel_run > 0)                                                    \
1632     {                                                                         \
1633       if((obj_x + obj_width) >= end)                                          \
1634       {                                                                       \
1635         pixel_run = end - start;                                              \
1636         partial_tile_offset = start - obj_x;                                  \
1637         obj_tile_right_offset_##flip_op(color_depth);                         \
1638         partial_tile_offset %= 8;                                             \
1639                                                                               \
1640         if(partial_tile_offset)                                               \
1641         {                                                                     \
1642           partial_tile_run = 8 - partial_tile_offset;                         \
1643           if((s32)pixel_run < (s32)partial_tile_run)                          \
1644           {                                                                   \
1645             if((s32)pixel_run > 0)                                            \
1646             {                                                                 \
1647               partial_tile_run = pixel_run;                                   \
1648               partial_tile_mid_obj(combine_op, color_depth, alpha_op,         \
1649                flip_op);                                                      \
1650             }                                                                 \
1651             continue;                                                         \
1652           }                                                                   \
1653           else                                                                \
1654           {                                                                   \
1655             pixel_run -= partial_tile_run;                                    \
1656             partial_tile_right_obj(combine_op, color_depth, alpha_op,         \
1657              flip_op);                                                        \
1658           }                                                                   \
1659         }                                                                     \
1660         tile_run = pixel_run / 8;                                             \
1661         multiple_tile_obj(combine_op, color_depth, alpha_op, flip_op);        \
1662         partial_tile_run = pixel_run % 8;                                     \
1663         if(partial_tile_run)                                                  \
1664         {                                                                     \
1665           partial_tile_left_obj(combine_op, color_depth, alpha_op,            \
1666            flip_op);                                                          \
1667         }                                                                     \
1668       }                                                                       \
1669       else                                                                    \
1670       {                                                                       \
1671         partial_tile_offset = start - obj_x;                                  \
1672         obj_tile_right_offset_##flip_op(color_depth);                         \
1673         partial_tile_offset %= 8;                                             \
1674         if(partial_tile_offset)                                               \
1675         {                                                                     \
1676           partial_tile_run = 8 - partial_tile_offset;                         \
1677           partial_tile_right_obj(combine_op, color_depth, alpha_op,           \
1678            flip_op);                                                          \
1679         }                                                                     \
1680         tile_run = pixel_run / 8;                                             \
1681         multiple_tile_obj(combine_op, color_depth, alpha_op, flip_op);        \
1682       }                                                                       \
1683     }                                                                         \
1684   }                                                                           \
1685   else                                                                        \
1686                                                                               \
1687   if((obj_x + obj_width) >= end)                                              \
1688   {                                                                           \
1689     pixel_run = end - obj_x;                                                  \
1690     if((s32)pixel_run > 0)                                                    \
1691     {                                                                         \
1692       dest_ptr = scanline + obj_x;                                            \
1693       tile_run = pixel_run / 8;                                               \
1694       multiple_tile_obj(combine_op, color_depth, alpha_op, flip_op);          \
1695       partial_tile_run = pixel_run % 8;                                       \
1696       if(partial_tile_run)                                                    \
1697       {                                                                       \
1698         partial_tile_left_obj(combine_op, color_depth, alpha_op, flip_op);    \
1699       }                                                                       \
1700     }                                                                         \
1701   }                                                                           \
1702   else                                                                        \
1703   {                                                                           \
1704     dest_ptr = scanline + obj_x;                                              \
1705     tile_run = obj_width / 8;                                                 \
1706     multiple_tile_obj(combine_op, color_depth, alpha_op, flip_op);            \
1707   }                                                                           \
1708 }                                                                             \
1709
1710 #define obj_scale_offset_1D(color_depth)                                      \
1711   tile_ptr = tile_base + ((obj_attribute_2 & 0x3FF) * 32)                     \
1712    + ((vertical_offset / 8) * (max_x / 8) * tile_size_##color_depth)          \
1713    + ((vertical_offset % 8) * tile_width_##color_depth)                       \
1714
1715 // Get the current row offset into an obj in 2D map space
1716
1717 #define obj_scale_offset_2D(color_depth)                                      \
1718   tile_ptr = tile_base + ((obj_attribute_2 & 0x3FF) * 32)                     \
1719    + ((vertical_offset / 8) * 1024)                                           \
1720    + ((vertical_offset % 8) * tile_width_##color_depth)                       \
1721
1722 #define obj_render_scale_pixel_4bpp(combine_op, alpha_op)                     \
1723   if(tile_x & 0x01)                                                           \
1724   {                                                                           \
1725     current_pixel = tile_ptr[tile_map_offset + ((tile_x >> 1) & 0x03)] >> 4;  \
1726   }                                                                           \
1727   else                                                                        \
1728   {                                                                           \
1729     current_pixel =                                                           \
1730      tile_ptr[tile_map_offset + ((tile_x >> 1) & 0x03)] & 0x0F;               \
1731   }                                                                           \
1732                                                                               \
1733   tile_4bpp_draw_##combine_op(0, none, 0, alpha_op)                           \
1734
1735
1736 #define obj_render_scale_pixel_8bpp(combine_op, alpha_op)                     \
1737   current_pixel = tile_ptr[tile_map_offset + (tile_x & 0x07)];                \
1738   tile_8bpp_draw_##combine_op(0, none, 0, alpha_op);                          \
1739
1740 #define obj_render_scale(combine_op, color_depth, alpha_op, map_space)        \
1741 {                                                                             \
1742   u32 vertical_offset;                                                        \
1743   source_y += (y_delta * dmy);                                                \
1744   vertical_offset = (source_y >> 8);                                          \
1745   if((u32)vertical_offset < (u32)max_y)                                       \
1746   {                                                                           \
1747     obj_scale_offset_##map_space(color_depth);                                \
1748     source_x += (y_delta * dmx) - (middle_x * dx);                            \
1749                                                                               \
1750     for(i = 0; i < obj_width; i++)                                            \
1751     {                                                                         \
1752       tile_x = (source_x >> 8);                                               \
1753                                                                               \
1754       if((u32)tile_x < (u32)max_x)                                            \
1755         break;                                                                \
1756                                                                               \
1757       source_x += dx;                                                         \
1758       advance_dest_ptr_##combine_op(1);                                       \
1759     }                                                                         \
1760                                                                               \
1761     for(; i < obj_width; i++)                                                 \
1762     {                                                                         \
1763       tile_x = (source_x >> 8);                                               \
1764                                                                               \
1765       if((u32)tile_x >= (u32)max_x)                                           \
1766         break;                                                                \
1767                                                                               \
1768       tile_map_offset = (tile_x >> 3) * tile_size_##color_depth;              \
1769       obj_render_scale_pixel_##color_depth(combine_op, alpha_op);             \
1770                                                                               \
1771       source_x += dx;                                                         \
1772       advance_dest_ptr_##combine_op(1);                                       \
1773     }                                                                         \
1774   }                                                                           \
1775 }                                                                             \
1776
1777
1778 #define obj_rotate_offset_1D(color_depth)                                     \
1779   obj_tile_pitch = (max_x / 8) * tile_size_##color_depth                      \
1780
1781 #define obj_rotate_offset_2D(color_depth)                                     \
1782   obj_tile_pitch = 1024                                                       \
1783
1784 #define obj_render_rotate_pixel_4bpp(combine_op, alpha_op)                    \
1785   if(tile_x & 0x01)                                                           \
1786   {                                                                           \
1787     current_pixel = tile_ptr[tile_map_offset +                                \
1788      ((tile_x >> 1) & 0x03) + ((tile_y & 0x07) * obj_pitch)] >> 4;            \
1789   }                                                                           \
1790   else                                                                        \
1791   {                                                                           \
1792     current_pixel = tile_ptr[tile_map_offset +                                \
1793      ((tile_x >> 1) & 0x03) + ((tile_y & 0x07) * obj_pitch)] & 0x0F;          \
1794   }                                                                           \
1795                                                                               \
1796   tile_4bpp_draw_##combine_op(0, none, 0, alpha_op)                           \
1797
1798 #define obj_render_rotate_pixel_8bpp(combine_op, alpha_op)                    \
1799   current_pixel = tile_ptr[tile_map_offset +                                  \
1800    (tile_x & 0x07) + ((tile_y & 0x07) * obj_pitch)];                          \
1801                                                                               \
1802   tile_8bpp_draw_##combine_op(0, none, 0, alpha_op)                           \
1803
1804 #define obj_render_rotate(combine_op, color_depth, alpha_op, map_space)       \
1805 {                                                                             \
1806   tile_ptr = tile_base + ((obj_attribute_2 & 0x3FF) * 32);                    \
1807   obj_rotate_offset_##map_space(color_depth);                                 \
1808                                                                               \
1809   source_x += (y_delta * dmx) - (middle_x * dx);                              \
1810   source_y += (y_delta * dmy) - (middle_x * dy);                              \
1811                                                                               \
1812   for(i = 0; i < obj_width; i++)                                              \
1813   {                                                                           \
1814     tile_x = (source_x >> 8);                                                 \
1815     tile_y = (source_y >> 8);                                                 \
1816                                                                               \
1817     if(((u32)tile_x < (u32)max_x) && ((u32)tile_y < (u32)max_y))              \
1818       break;                                                                  \
1819                                                                               \
1820     source_x += dx;                                                           \
1821     source_y += dy;                                                           \
1822     advance_dest_ptr_##combine_op(1);                                         \
1823   }                                                                           \
1824                                                                               \
1825   for(; i < obj_width; i++)                                                   \
1826   {                                                                           \
1827     tile_x = (source_x >> 8);                                                 \
1828     tile_y = (source_y >> 8);                                                 \
1829                                                                               \
1830     if(((u32)tile_x >= (u32)max_x) || ((u32)tile_y >= (u32)max_y))            \
1831       break;                                                                  \
1832                                                                               \
1833     tile_map_offset = ((tile_x >> 3) * tile_size_##color_depth) +             \
1834     ((tile_y >> 3) * obj_tile_pitch);                                         \
1835     obj_render_rotate_pixel_##color_depth(combine_op, alpha_op);              \
1836                                                                               \
1837     source_x += dx;                                                           \
1838     source_y += dy;                                                           \
1839     advance_dest_ptr_##combine_op(1);                                         \
1840   }                                                                           \
1841 }                                                                             \
1842
1843 // Render the current row of an affine transformed OBJ.
1844
1845 #define obj_render_affine(combine_op, color_depth, alpha_op, map_space)       \
1846 {                                                                             \
1847   s16 *params = oam_ram + (((obj_attribute_1 >> 9) & 0x1F) * 16);             \
1848   s32 dx = params[3];                                                         \
1849   s32 dmx = params[7];                                                        \
1850   s32 dy = params[11];                                                        \
1851   s32 dmy = params[15];                                                       \
1852   s32 source_x, source_y;                                                     \
1853   s32 tile_x, tile_y;                                                         \
1854   u32 tile_offset;                                                            \
1855   u32 tile_map_offset;                                                        \
1856   s32 middle_x;                                                               \
1857   s32 middle_y;                                                               \
1858   s32 max_x = obj_width;                                                      \
1859   s32 max_y = obj_height;                                                     \
1860   s32 y_delta;                                                                \
1861   u32 obj_pitch = tile_width_##color_depth;                                   \
1862   u32 obj_tile_pitch;                                                         \
1863                                                                               \
1864   middle_x = (obj_width / 2);                                                 \
1865   middle_y = (obj_height / 2);                                                \
1866                                                                               \
1867   source_x = (middle_x << 8);                                                 \
1868   source_y = (middle_y << 8);                                                 \
1869                                                                               \
1870                                                                               \
1871   if(obj_attribute_0 & 0x200)                                                 \
1872   {                                                                           \
1873     obj_width *= 2;                                                           \
1874     obj_height *= 2;                                                          \
1875     middle_x *= 2;                                                            \
1876     middle_y *= 2;                                                            \
1877   }                                                                           \
1878                                                                               \
1879   if((s32)obj_x < (s32)start)                                                 \
1880   {                                                                           \
1881     u32 x_delta = start - obj_x;                                              \
1882     middle_x -= x_delta;                                                      \
1883     obj_width -= x_delta;                                                     \
1884     obj_x = start;                                                            \
1885                                                                               \
1886     if((s32)obj_width <= 0)                                                   \
1887       continue;                                                               \
1888   }                                                                           \
1889                                                                               \
1890   if((s32)(obj_x + obj_width) >= (s32)end)                                    \
1891   {                                                                           \
1892     obj_width = end - obj_x;                                                  \
1893                                                                               \
1894     if((s32)obj_width <= 0)                                                   \
1895       continue;                                                               \
1896   }                                                                           \
1897   dest_ptr = scanline + obj_x;                                                \
1898                                                                               \
1899   y_delta = vcount - (obj_y + middle_y);                                      \
1900                                                                               \
1901   obj_get_palette_##color_depth();                                            \
1902                                                                               \
1903   if(dy == 0)                                                                 \
1904   {                                                                           \
1905     obj_render_scale(combine_op, color_depth, alpha_op, map_space);           \
1906   }                                                                           \
1907   else                                                                        \
1908   {                                                                           \
1909     obj_render_rotate(combine_op, color_depth, alpha_op, map_space);          \
1910   }                                                                           \
1911 }                                                                             \
1912
1913 u32 obj_width_table[] = { 8, 16, 32, 64, 16, 32, 32, 64, 8, 8, 16, 32 };
1914 u32 obj_height_table[] = { 8, 16, 32, 64, 8, 8, 16, 32, 16, 32, 32, 64 };
1915
1916 u8 obj_priority_list[5][160][128];
1917 u32 obj_priority_count[5][160];
1918 u32 obj_alpha_count[160];
1919
1920
1921 // Build obj rendering functions
1922
1923 #ifdef RENDER_COLOR16_NORMAL
1924
1925 #define render_scanline_obj_extra_variables_normal(bg_type)                   \
1926   const u32 pixel_combine = (1 << 8)                                          \
1927
1928 #else
1929
1930 #define render_scanline_obj_extra_variables_normal(bg_type)                   \
1931   u16 *palette = palette_ram_converted + 256                                  \
1932
1933 #endif
1934
1935
1936 #define render_scanline_obj_extra_variables_color()                           \
1937   u32 dest;                                                                   \
1938   u32 pixel_combine = color_combine_mask(4) | (1 << 8)                        \
1939
1940 #define render_scanline_obj_extra_variables_alpha_obj(map_space)              \
1941   render_scanline_obj_extra_variables_color();                                \
1942   if((pixel_combine & 0x00000200) == 0)                                       \
1943   {                                                                           \
1944     render_scanline_obj_color32_##map_space(priority, start, end, scanline);  \
1945     return;                                                                   \
1946   }                                                                           \
1947
1948 #define render_scanline_obj_extra_variables_color16(map_space)                \
1949   render_scanline_obj_extra_variables_color()                                 \
1950
1951 #define render_scanline_obj_extra_variables_color32(map_space)                \
1952   render_scanline_obj_extra_variables_color()                                 \
1953
1954 #define render_scanline_obj_extra_variables_partial_alpha(map_space)          \
1955   render_scanline_obj_extra_variables_color();                                \
1956   u32 base_pixel_combine = pixel_combine                                      \
1957
1958 #define render_scanline_obj_extra_variables_copy(type)                        \
1959   u32 bldcnt = io_registers[REG_BLDCNT];                                      \
1960   u32 dispcnt = io_registers[REG_DISPCNT];                                    \
1961   u32 obj_enable = io_registers[REG_WINOUT] >> 8;                             \
1962   render_scanline_layer_functions_##type();                                   \
1963   u32 copy_start, copy_end;                                                   \
1964   u16 copy_buffer[240];                                                       \
1965   u16 *copy_ptr                                                               \
1966
1967 #define render_scanline_obj_extra_variables_copy_tile(map_space)              \
1968   render_scanline_obj_extra_variables_copy(tile)                              \
1969
1970 #define render_scanline_obj_extra_variables_copy_bitmap(map_space)            \
1971   render_scanline_obj_extra_variables_copy(bitmap)                            \
1972
1973
1974 #define render_scanline_obj_main(combine_op, alpha_op, map_space)             \
1975   if(obj_attribute_0 & 0x100)                                                 \
1976   {                                                                           \
1977     if((obj_attribute_0 >> 13) & 0x01)                                        \
1978     {                                                                         \
1979       obj_render_affine(combine_op, 8bpp, alpha_op, map_space);               \
1980     }                                                                         \
1981     else                                                                      \
1982     {                                                                         \
1983       obj_render_affine(combine_op, 4bpp, alpha_op, map_space);               \
1984     }                                                                         \
1985   }                                                                           \
1986   else                                                                        \
1987   {                                                                           \
1988     vertical_offset = vcount - obj_y;                                         \
1989                                                                               \
1990     if((obj_attribute_1 >> 13) & 0x01)                                        \
1991       vertical_offset = obj_height - vertical_offset - 1;                     \
1992                                                                               \
1993     switch(((obj_attribute_0 >> 12) & 0x02) |                                 \
1994      ((obj_attribute_1 >> 12) & 0x01))                                        \
1995     {                                                                         \
1996       case 0x0:                                                               \
1997         obj_render(combine_op, 4bpp, alpha_op, map_space, noflip);            \
1998         break;                                                                \
1999                                                                               \
2000       case 0x1:                                                               \
2001         obj_render(combine_op, 4bpp, alpha_op, map_space, flip);              \
2002         break;                                                                \
2003                                                                               \
2004       case 0x2:                                                               \
2005         obj_render(combine_op, 8bpp, alpha_op, map_space, noflip);            \
2006         break;                                                                \
2007                                                                               \
2008       case 0x3:                                                               \
2009         obj_render(combine_op, 8bpp, alpha_op, map_space, flip);              \
2010         break;                                                                \
2011     }                                                                         \
2012   }                                                                           \
2013
2014 #define render_scanline_obj_no_partial_alpha(combine_op, alpha_op, map_space) \
2015   render_scanline_obj_main(combine_op, alpha_op, map_space)                   \
2016
2017 #define render_scanline_obj_partial_alpha(combine_op, alpha_op, map_space)    \
2018   if((obj_attribute_0 >> 10) & 0x03)                                          \
2019   {                                                                           \
2020     pixel_combine = 0x00000300;                                               \
2021     render_scanline_obj_main(combine_op, alpha_obj, map_space);               \
2022   }                                                                           \
2023   else                                                                        \
2024   {                                                                           \
2025     pixel_combine = base_pixel_combine;                                       \
2026     render_scanline_obj_main(combine_op, color32, map_space);                 \
2027   }                                                                           \
2028
2029 #define render_scanline_obj_prologue_transparent(alpha_op)                    \
2030
2031 #define render_scanline_obj_prologue_copy_body(type)                          \
2032   copy_start = obj_x;                                                         \
2033   if(obj_attribute_0 & 0x200)                                                 \
2034     copy_end = obj_x + (obj_width * 2);                                       \
2035   else                                                                        \
2036     copy_end = obj_x + obj_width;                                             \
2037                                                                               \
2038   if(copy_start < start)                                                      \
2039     copy_start = start;                                                       \
2040   if(copy_end > end)                                                          \
2041     copy_end = end;                                                           \
2042                                                                               \
2043   if((copy_start < end) && (copy_end > start))                                \
2044   {                                                                           \
2045     render_scanline_conditional_##type(copy_start, copy_end, copy_buffer,     \
2046      obj_enable, dispcnt, bldcnt, layer_renderers);                           \
2047     copy_ptr = copy_buffer + copy_start;                                      \
2048   }                                                                           \
2049   else                                                                        \
2050   {                                                                           \
2051     continue;                                                                 \
2052   }                                                                           \
2053
2054 #define render_scanline_obj_prologue_copy_tile()                              \
2055   render_scanline_obj_prologue_copy_body(tile)                                \
2056
2057 #define render_scanline_obj_prologue_copy_bitmap()                            \
2058   render_scanline_obj_prologue_copy_body(bitmap)                              \
2059
2060 #define render_scanline_obj_prologue_copy(alpha_op)                           \
2061   render_scanline_obj_prologue_##alpha_op()                                   \
2062
2063
2064 #define render_scanline_obj_builder(combine_op, alpha_op, map_space,          \
2065  partial_alpha_op)                                                            \
2066 void render_scanline_obj_##alpha_op##_##map_space(u32 priority,               \
2067  u32 start, u32 end, render_scanline_dest_##alpha_op *scanline)               \
2068 {                                                                             \
2069   render_scanline_obj_extra_variables_##alpha_op(map_space);                  \
2070   s32 obj_num, i;                                                             \
2071   s32 obj_x, obj_y;                                                           \
2072   s32 obj_size;                                                               \
2073   s32 obj_width, obj_height;                                                  \
2074   u32 obj_attribute_0, obj_attribute_1, obj_attribute_2;                      \
2075   s32 vcount = io_registers[REG_VCOUNT];                                      \
2076   u32 tile_run;                                                               \
2077   u32 current_pixels;                                                         \
2078   u32 current_pixel;                                                          \
2079   u32 current_palette;                                                        \
2080   u32 vertical_offset;                                                        \
2081   u32 partial_tile_run, partial_tile_offset;                                  \
2082   u32 pixel_run;                                                              \
2083   u16 *oam_ptr;                                                               \
2084   render_scanline_dest_##alpha_op *dest_ptr;                                  \
2085   u8 *tile_base = vram + 0x10000;                                             \
2086   u8 *tile_ptr;                                                               \
2087   u32 obj_count = obj_priority_count[priority][vcount];                       \
2088   u8 *obj_list = obj_priority_list[priority][vcount];                         \
2089                                                                               \
2090   for(obj_num = 0; obj_num < obj_count; obj_num++)                            \
2091   {                                                                           \
2092     oam_ptr = oam_ram + (obj_list[obj_num] * 4);                              \
2093     obj_attribute_0 = oam_ptr[0];                                             \
2094     obj_attribute_1 = oam_ptr[1];                                             \
2095     obj_attribute_2 = oam_ptr[2];                                             \
2096     obj_size = ((obj_attribute_0 >> 12) & 0x0C) | (obj_attribute_1 >> 14);    \
2097                                                                               \
2098     obj_x = (s32)(obj_attribute_1 << 23) >> 23;                               \
2099     obj_width = obj_width_table[obj_size];                                    \
2100                                                                               \
2101     render_scanline_obj_prologue_##combine_op(alpha_op);                      \
2102                                                                               \
2103     obj_y = obj_attribute_0 & 0xFF;                                           \
2104                                                                               \
2105     if(obj_y > 160)                                                           \
2106       obj_y -= 256;                                                           \
2107                                                                               \
2108     obj_height = obj_height_table[obj_size];                                  \
2109     render_scanline_obj_##partial_alpha_op(combine_op, alpha_op, map_space);  \
2110   }                                                                           \
2111 }                                                                             \
2112
2113 render_scanline_obj_builder(transparent, normal, 1D, no_partial_alpha);
2114 render_scanline_obj_builder(transparent, normal, 2D, no_partial_alpha);
2115 render_scanline_obj_builder(transparent, color16, 1D, no_partial_alpha);
2116 render_scanline_obj_builder(transparent, color16, 2D, no_partial_alpha);
2117 render_scanline_obj_builder(transparent, color32, 1D, no_partial_alpha);
2118 render_scanline_obj_builder(transparent, color32, 2D, no_partial_alpha);
2119 render_scanline_obj_builder(transparent, alpha_obj, 1D, no_partial_alpha);
2120 render_scanline_obj_builder(transparent, alpha_obj, 2D, no_partial_alpha);
2121 render_scanline_obj_builder(transparent, partial_alpha, 1D, partial_alpha);
2122 render_scanline_obj_builder(transparent, partial_alpha, 2D, partial_alpha);
2123 render_scanline_obj_builder(copy, copy_tile, 1D, no_partial_alpha);
2124 render_scanline_obj_builder(copy, copy_tile, 2D, no_partial_alpha);
2125 render_scanline_obj_builder(copy, copy_bitmap, 1D, no_partial_alpha);
2126 render_scanline_obj_builder(copy, copy_bitmap, 2D, no_partial_alpha);
2127
2128
2129
2130 void order_obj(u32 video_mode)
2131 {
2132   s32 obj_num, priority, row;
2133   s32 obj_x, obj_y;
2134   s32 obj_size, obj_mode;
2135   s32 obj_width, obj_height;
2136   u32 obj_priority;
2137   u32 obj_attribute_0, obj_attribute_1, obj_attribute_2;
2138   s32 vcount = io_registers[REG_VCOUNT];
2139   u32 partial_tile_run, partial_tile_offset;
2140   u32 pixel_run;
2141   u32 current_count;
2142   u16 *oam_ptr = oam_ram + 508;
2143   u16 *dest_ptr;
2144   u8 *tile_base = vram + 0x10000;
2145   u8 *tile_ptr;
2146
2147   for(priority = 0; priority < 5; priority++)
2148   {
2149     for(row = 0; row < 160; row++)
2150     {
2151       obj_priority_count[priority][row] = 0;
2152     }
2153   }
2154
2155   for(row = 0; row < 160; row++)
2156   {
2157     obj_alpha_count[row] = 0;
2158   }
2159
2160   for(obj_num = 127; obj_num >= 0; obj_num--, oam_ptr -= 4)
2161   {
2162     obj_attribute_0 = oam_ptr[0];
2163     obj_attribute_2 = oam_ptr[2];
2164     obj_size = obj_attribute_0 & 0xC000;
2165     obj_priority = (obj_attribute_2 >> 10) & 0x03;
2166     obj_mode = (obj_attribute_0 >> 10) & 0x03;
2167
2168     if(((obj_attribute_0 & 0x0300) != 0x0200) && (obj_size != 0xC000) &&
2169      (obj_mode != 3) && ((video_mode < 3) ||
2170      ((obj_attribute_2 & 0x3FF) >= 512)))
2171     {
2172       obj_y = obj_attribute_0 & 0xFF;
2173       if(obj_y > 160)
2174         obj_y -= 256;
2175
2176       obj_attribute_1 = oam_ptr[1];
2177       obj_size = ((obj_size >> 12) & 0x0C) | (obj_attribute_1 >> 14);
2178       obj_height = obj_height_table[obj_size];
2179       obj_width = obj_width_table[obj_size];
2180
2181       if(obj_attribute_0 & 0x200)
2182       {
2183         obj_height *= 2;
2184         obj_width *= 2;
2185       }
2186
2187       if(((obj_y + obj_height) > 0) && (obj_y < 160))
2188       {
2189         obj_x = (s32)(obj_attribute_1 << 23) >> 23;
2190
2191         if(((obj_x + obj_width) > 0) && (obj_x < 240))
2192         {
2193           if(obj_y < 0)
2194           {
2195             obj_height += obj_y;
2196             obj_y = 0;
2197           }
2198
2199           if((obj_y + obj_height) >= 160)
2200           {
2201             obj_height = 160 - obj_y;
2202           }
2203
2204           if(obj_mode == 1)
2205           {
2206             for(row = obj_y; row < obj_y + obj_height; row++)
2207             {
2208               current_count = obj_priority_count[obj_priority][row];
2209               obj_priority_list[obj_priority][row][current_count] = obj_num;
2210               obj_priority_count[obj_priority][row] = current_count + 1;
2211               obj_alpha_count[row]++;
2212             }
2213           }
2214           else
2215           {
2216             if(obj_mode == 2)
2217             {
2218               obj_priority = 4;
2219             }
2220
2221             for(row = obj_y; row < obj_y + obj_height; row++)
2222             {
2223               current_count = obj_priority_count[obj_priority][row];
2224               obj_priority_list[obj_priority][row][current_count] = obj_num;
2225               obj_priority_count[obj_priority][row] = current_count + 1;
2226             }
2227           }
2228         }
2229       }
2230     }
2231   }
2232 }
2233
2234 u32 layer_order[16];
2235 u32 layer_count;
2236
2237 u32 order_layers(u32 layer_flags)
2238 {
2239   s32 priority, layer_number;
2240   layer_count = 0;
2241
2242   for(priority = 3; priority >= 0; priority--)
2243   {
2244     for(layer_number = 3; layer_number >= 0; layer_number--)
2245     {
2246       if(((layer_flags >> layer_number) & 1) &&
2247        ((io_registers[REG_BG0CNT + layer_number] & 0x03) == priority))
2248       {
2249         layer_order[layer_count] = layer_number;
2250         layer_count++;
2251       }
2252     }
2253
2254     if((obj_priority_count[priority][io_registers[REG_VCOUNT]] > 0)
2255      && (layer_flags & 0x10))
2256     {
2257       layer_order[layer_count] = priority | 0x04;
2258       layer_count++;
2259     }
2260   }
2261 }
2262
2263 #define fill_line(_start, _end)                                               \
2264   u32 i;                                                                      \
2265                                                                               \
2266   for(i = _start; i < _end; i++)                                              \
2267   {                                                                           \
2268     dest_ptr[i] = color;                                                      \
2269   }                                                                           \
2270
2271
2272 #define fill_line_color_normal()                                              \
2273   color = palette_ram_converted[color]                                        \
2274
2275 #define fill_line_color_alpha()                                               \
2276
2277 #define fill_line_color_color16()                                             \
2278
2279 #define fill_line_color_color32()                                             \
2280
2281 #define fill_line_builder(type)                                               \
2282 void fill_line_##type(u16 color, render_scanline_dest_##type *dest_ptr,       \
2283  u32 start, u32 end)                                                          \
2284 {                                                                             \
2285   fill_line_color_##type();                                                   \
2286   fill_line(start, end);                                                      \
2287 }                                                                             \
2288
2289 fill_line_builder(normal);
2290 fill_line_builder(alpha);
2291 fill_line_builder(color16);
2292 fill_line_builder(color32);
2293
2294
2295 // Alpha blend two pixels (pixel_top and pixel_bottom).
2296
2297 #define blend_pixel()                                                         \
2298   pixel_bottom = palette_ram_converted[(pixel_pair >> 16) & 0x1FF];           \
2299   pixel_bottom = (pixel_bottom | (pixel_bottom << 16)) & 0x07E0F81F;          \
2300   pixel_top = ((pixel_top * blend_a) + (pixel_bottom * blend_b)) >> 4         \
2301
2302
2303 // Alpha blend two pixels, allowing for saturation (individual channels > 31).
2304 // The operation is optimized towards saturation not occuring.
2305
2306 #define blend_saturate_pixel()                                                \
2307   pixel_bottom = palette_ram_converted[(pixel_pair >> 16) & 0x1FF];           \
2308   pixel_bottom = (pixel_bottom | (pixel_bottom << 16)) & 0x07E0F81F;          \
2309   pixel_top = ((pixel_top * blend_a) + (pixel_bottom * blend_b)) >> 4;        \
2310   if(pixel_top & 0x08010020)                                                  \
2311   {                                                                           \
2312     if(pixel_top & 0x08000000)                                                \
2313       pixel_top |= 0x07E00000;                                                \
2314                                                                               \
2315     if(pixel_top & 0x00010000)                                                \
2316       pixel_top |= 0x0000F800;                                                \
2317                                                                               \
2318     if(pixel_top & 0x00000020)                                                \
2319       pixel_top |= 0x0000001F;                                                \
2320   }                                                                           \
2321
2322 #define brighten_pixel()                                                      \
2323   pixel_top = upper + ((pixel_top * blend) >> 4);                             \
2324
2325 #define darken_pixel()                                                        \
2326   pixel_top = (pixel_top * blend) >> 4;                                       \
2327
2328 #define effect_condition_alpha                                                \
2329   ((pixel_pair & 0x04000200) == 0x04000200)                                   \
2330
2331 #define effect_condition_fade(pixel_source)                                   \
2332   ((pixel_source & 0x00000200) == 0x00000200)                                 \
2333
2334 #define expand_pixel_no_dest(expand_type, pixel_source)                       \
2335   pixel_top = (pixel_top | (pixel_top << 16)) & 0x07E0F81F;                   \
2336   expand_type##_pixel();                                                      \
2337   pixel_top &= 0x07E0F81F;                                                    \
2338   pixel_top = (pixel_top >> 16) | pixel_top                                   \
2339
2340 #define expand_pixel(expand_type, pixel_source)                               \
2341   pixel_top = palette_ram_converted[pixel_source & 0x1FF];                    \
2342   expand_pixel_no_dest(expand_type, pixel_source);                            \
2343   *screen_dest_ptr = pixel_top                                                \
2344
2345 #define expand_loop(expand_type, effect_condition, pixel_source)              \
2346   screen_src_ptr += start;                                                    \
2347   screen_dest_ptr += start;                                                   \
2348                                                                               \
2349   end -= start;                                                               \
2350                                                                               \
2351   for(i = 0; i < end; i++)                                                    \
2352   {                                                                           \
2353     pixel_source = *screen_src_ptr;                                           \
2354     if(effect_condition)                                                      \
2355     {                                                                         \
2356       expand_pixel(expand_type, pixel_source);                                \
2357     }                                                                         \
2358     else                                                                      \
2359     {                                                                         \
2360       *screen_dest_ptr =                                                      \
2361        palette_ram_converted[pixel_source & 0x1FF];                           \
2362     }                                                                         \
2363                                                                               \
2364     screen_src_ptr++;                                                         \
2365     screen_dest_ptr++;                                                        \
2366   }                                                                           \
2367
2368
2369 #define expand_loop_partial_alpha(alpha_expand, expand_type)                  \
2370   screen_src_ptr += start;                                                    \
2371   screen_dest_ptr += start;                                                   \
2372                                                                               \
2373   end -= start;                                                               \
2374                                                                               \
2375   for(i = 0; i < end; i++)                                                    \
2376   {                                                                           \
2377     pixel_pair = *screen_src_ptr;                                             \
2378     if(effect_condition_fade(pixel_pair))                                     \
2379     {                                                                         \
2380       if(effect_condition_alpha)                                              \
2381       {                                                                       \
2382         expand_pixel(alpha_expand, pixel_pair);                               \
2383       }                                                                       \
2384       else                                                                    \
2385       {                                                                       \
2386         expand_pixel(expand_type, pixel_pair);                                \
2387       }                                                                       \
2388     }                                                                         \
2389     else                                                                      \
2390     {                                                                         \
2391       *screen_dest_ptr =                                                      \
2392        palette_ram_converted[pixel_pair & 0x1FF];                             \
2393     }                                                                         \
2394                                                                               \
2395     screen_src_ptr++;                                                         \
2396     screen_dest_ptr++;                                                        \
2397   }                                                                           \
2398
2399
2400 #define expand_partial_alpha(expand_type)                                     \
2401   if((blend_a + blend_b) > 16)                                                \
2402   {                                                                           \
2403     expand_loop_partial_alpha(blend_saturate, expand_type);                   \
2404   }                                                                           \
2405   else                                                                        \
2406   {                                                                           \
2407     expand_loop_partial_alpha(blend, expand_type);                            \
2408   }                                                                           \
2409
2410
2411
2412 // Blend top two pixels of scanline with each other.
2413
2414 #ifdef RENDER_COLOR16_NORMAL
2415
2416 #ifndef ARM_ARCH
2417
2418 void expand_normal(u16 *screen_ptr, u32 start, u32 end)
2419 {
2420   u32 i, pixel_source;
2421   screen_ptr += start;
2422
2423   return;
2424
2425   end -= start;
2426
2427   for(i = 0; i < end; i++)
2428   {
2429     pixel_source = *screen_ptr;
2430     *screen_ptr = palette_ram_converted[pixel_source];
2431
2432     screen_ptr++;
2433   }
2434 }
2435
2436 #endif
2437
2438 #else
2439
2440 #define expand_normal(screen_ptr, start, end)
2441
2442 #endif
2443
2444
2445 void expand_blend(u32 *screen_src_ptr, u16 *screen_dest_ptr,
2446  u32 start, u32 end);
2447
2448 #ifndef ARM_ARCH
2449
2450 void expand_blend(u32 *screen_src_ptr, u16 *screen_dest_ptr,
2451  u32 start, u32 end)
2452 {
2453   u32 pixel_pair;
2454   u32 pixel_top, pixel_bottom;
2455   u32 bldalpha = io_registers[REG_BLDALPHA];
2456   u32 blend_a = bldalpha & 0x1F;
2457   u32 blend_b = (bldalpha >> 8) & 0x1F;
2458   u32 i;
2459
2460   if(blend_a > 16)
2461     blend_a = 16;
2462
2463   if(blend_b > 16)
2464     blend_b = 16;
2465
2466   // The individual colors can saturate over 31, this should be taken
2467   // care of in an alternate pass as it incurs a huge additional speedhit.
2468   if((blend_a + blend_b) > 16)
2469   {
2470     expand_loop(blend_saturate, effect_condition_alpha, pixel_pair);
2471   }
2472   else
2473   {
2474     expand_loop(blend, effect_condition_alpha, pixel_pair);
2475   }
2476 }
2477
2478 #endif
2479
2480 // Blend scanline with white.
2481
2482 void expand_darken(u16 *screen_src_ptr, u16 *screen_dest_ptr,
2483  u32 start, u32 end)
2484 {
2485   u32 pixel_top;
2486   s32 blend = 16 - (io_registers[REG_BLDY] & 0x1F);
2487   u32 i;
2488
2489   if(blend < 0)
2490     blend = 0;
2491
2492   expand_loop(darken, effect_condition_fade(pixel_top), pixel_top);
2493 }
2494
2495
2496 // Blend scanline with black.
2497
2498 void expand_brighten(u16 *screen_src_ptr, u16 *screen_dest_ptr,
2499  u32 start, u32 end)
2500 {
2501   u32 pixel_top;
2502   u32 blend = io_registers[REG_BLDY] & 0x1F;
2503   u32 upper;
2504   u32 i;
2505
2506   if(blend > 16)
2507     blend = 16;
2508
2509   upper = ((0x07E0F81F * blend) >> 4) & 0x07E0F81F;
2510   blend = 16 - blend;
2511
2512   expand_loop(brighten, effect_condition_fade(pixel_top), pixel_top);
2513
2514 }
2515
2516
2517 // Expand scanline such that if both top and bottom pass it's alpha,
2518 // if only top passes it's as specified, and if neither pass it's normal.
2519
2520 void expand_darken_partial_alpha(u32 *screen_src_ptr, u16 *screen_dest_ptr,
2521  u32 start, u32 end)
2522 {
2523   s32 blend = 16 - (io_registers[REG_BLDY] & 0x1F);
2524   u32 pixel_pair;
2525   u32 pixel_top, pixel_bottom;
2526   u32 bldalpha = io_registers[REG_BLDALPHA];
2527   u32 blend_a = bldalpha & 0x1F;
2528   u32 blend_b = (bldalpha >> 8) & 0x1F;
2529   u32 i;
2530
2531   if(blend < 0)
2532     blend = 0;
2533
2534   if(blend_a > 16)
2535     blend_a = 16;
2536
2537   if(blend_b > 16)
2538     blend_b = 16;
2539
2540   expand_partial_alpha(darken);
2541 }
2542
2543
2544 void expand_brighten_partial_alpha(u32 *screen_src_ptr, u16 *screen_dest_ptr,
2545  u32 start, u32 end)
2546 {
2547   s32 blend = io_registers[REG_BLDY] & 0x1F;
2548   u32 pixel_pair;
2549   u32 pixel_top, pixel_bottom;
2550   u32 bldalpha = io_registers[REG_BLDALPHA];
2551   u32 blend_a = bldalpha & 0x1F;
2552   u32 blend_b = (bldalpha >> 8) & 0x1F;
2553   u32 upper;
2554   u32 i;
2555
2556   if(blend > 16)
2557     blend = 16;
2558
2559   upper = ((0x07E0F81F * blend) >> 4) & 0x07E0F81F;
2560   blend = 16 - blend;
2561
2562   if(blend_a > 16)
2563     blend_a = 16;
2564
2565   if(blend_b > 16)
2566     blend_b = 16;
2567
2568   expand_partial_alpha(brighten);
2569 }
2570
2571
2572 // Render an OBJ layer from start to end, depending on the type (1D or 2D)
2573 // stored in dispcnt.
2574
2575 #define render_obj_layer(type, dest, _start, _end)                            \
2576   current_layer &= ~0x04;                                                     \
2577   if(dispcnt & 0x40)                                                          \
2578     render_scanline_obj_##type##_1D(current_layer, _start, _end, dest);       \
2579   else                                                                        \
2580     render_scanline_obj_##type##_2D(current_layer, _start, _end, dest)        \
2581
2582
2583 // Render a target all the way with the background color as taken from the
2584 // palette.
2585
2586 #define fill_line_bg(type, dest, _start, _end)                                \
2587   fill_line_##type(0, dest, _start, _end)                                     \
2588
2589
2590 // Render all layers as they appear in the layer order.
2591
2592 #define render_layers(tile_alpha, obj_alpha, dest)                            \
2593 {                                                                             \
2594   current_layer = layer_order[0];                                             \
2595   if(current_layer & 0x04)                                                    \
2596   {                                                                           \
2597     /* If the first one is OBJ render the background then render it. */       \
2598     fill_line_bg(tile_alpha, dest, 0, 240);                                   \
2599     render_obj_layer(obj_alpha, dest, 0, 240);                                \
2600   }                                                                           \
2601   else                                                                        \
2602   {                                                                           \
2603     /* Otherwise render a base layer. */                                      \
2604     layer_renderers[current_layer].tile_alpha##_render_base(current_layer,    \
2605      0, 240, dest);                                                           \
2606   }                                                                           \
2607                                                                               \
2608   /* Render the rest of the layers. */                                        \
2609   for(layer_order_pos = 1; layer_order_pos < layer_count; layer_order_pos++)  \
2610   {                                                                           \
2611     current_layer = layer_order[layer_order_pos];                             \
2612     if(current_layer & 0x04)                                                  \
2613     {                                                                         \
2614       render_obj_layer(obj_alpha, dest, 0, 240);                              \
2615     }                                                                         \
2616     else                                                                      \
2617     {                                                                         \
2618       layer_renderers[current_layer].                                         \
2619        tile_alpha##_render_transparent(current_layer, 0, 240, dest);          \
2620     }                                                                         \
2621   }                                                                           \
2622 }                                                                             \
2623
2624 #define render_condition_alpha                                                \
2625   (((io_registers[REG_BLDALPHA] & 0x1F1F) != 0x001F) &&                       \
2626    ((io_registers[REG_BLDCNT] & 0x3F) != 0) &&                                \
2627    ((io_registers[REG_BLDCNT] & 0x3F00) != 0))                                \
2628
2629 #define render_condition_fade                                                 \
2630   (((io_registers[REG_BLDY] & 0x1F) != 0) &&                                  \
2631    ((io_registers[REG_BLDCNT] & 0x3F) != 0))                                  \
2632
2633 #define render_layers_color_effect(renderer, layer_condition,                 \
2634  alpha_condition, fade_condition, _start, _end)                               \
2635 {                                                                             \
2636   if(layer_condition)                                                         \
2637   {                                                                           \
2638     if(obj_alpha_count[io_registers[REG_VCOUNT]] > 0)                         \
2639     {                                                                         \
2640       /* Render based on special effects mode. */                             \
2641       u32 screen_buffer[240];                                                 \
2642       switch((bldcnt >> 6) & 0x03)                                            \
2643       {                                                                       \
2644         /* Alpha blend */                                                     \
2645         case 0x01:                                                            \
2646         {                                                                     \
2647           if(alpha_condition)                                                 \
2648           {                                                                   \
2649             renderer(alpha, alpha_obj, screen_buffer);                        \
2650             expand_blend(screen_buffer, scanline, _start, _end);              \
2651             return;                                                           \
2652           }                                                                   \
2653           break;                                                              \
2654         }                                                                     \
2655                                                                               \
2656         /* Fade to white */                                                   \
2657         case 0x02:                                                            \
2658         {                                                                     \
2659           if(fade_condition)                                                  \
2660           {                                                                   \
2661             renderer(color32, partial_alpha, screen_buffer);                  \
2662             expand_brighten_partial_alpha(screen_buffer, scanline,            \
2663              _start, _end);                                                   \
2664             return;                                                           \
2665           }                                                                   \
2666           break;                                                              \
2667         }                                                                     \
2668                                                                               \
2669         /* Fade to black */                                                   \
2670         case 0x03:                                                            \
2671         {                                                                     \
2672           if(fade_condition)                                                  \
2673           {                                                                   \
2674             renderer(color32, partial_alpha, screen_buffer);                  \
2675             expand_darken_partial_alpha(screen_buffer, scanline,              \
2676              _start, _end);                                                   \
2677             return;                                                           \
2678           }                                                                   \
2679           break;                                                              \
2680         }                                                                     \
2681       }                                                                       \
2682                                                                               \
2683       renderer(color32, partial_alpha, screen_buffer);                        \
2684       expand_blend(screen_buffer, scanline, _start, _end);                    \
2685     }                                                                         \
2686     else                                                                      \
2687     {                                                                         \
2688       /* Render based on special effects mode. */                             \
2689       switch((bldcnt >> 6) & 0x03)                                            \
2690       {                                                                       \
2691         /* Alpha blend */                                                     \
2692         case 0x01:                                                            \
2693         {                                                                     \
2694           if(alpha_condition)                                                 \
2695           {                                                                   \
2696             u32 screen_buffer[240];                                           \
2697             renderer(alpha, alpha_obj, screen_buffer);                        \
2698             expand_blend(screen_buffer, scanline, _start, _end);              \
2699             return;                                                           \
2700           }                                                                   \
2701           break;                                                              \
2702         }                                                                     \
2703                                                                               \
2704         /* Fade to white */                                                   \
2705         case 0x02:                                                            \
2706         {                                                                     \
2707           if(fade_condition)                                                  \
2708           {                                                                   \
2709             renderer(color16, color16, scanline);                             \
2710             expand_brighten(scanline, scanline, _start, _end);                \
2711             return;                                                           \
2712           }                                                                   \
2713           break;                                                              \
2714         }                                                                     \
2715                                                                               \
2716         /* Fade to black */                                                   \
2717         case 0x03:                                                            \
2718         {                                                                     \
2719           if(fade_condition)                                                  \
2720           {                                                                   \
2721             renderer(color16, color16, scanline);                             \
2722             expand_darken(scanline, scanline, _start, _end);                  \
2723             return;                                                           \
2724           }                                                                   \
2725           break;                                                              \
2726         }                                                                     \
2727       }                                                                       \
2728                                                                               \
2729       renderer(normal, normal, scanline);                                     \
2730       expand_normal(scanline, _start, _end);                                  \
2731     }                                                                         \
2732   }                                                                           \
2733   else                                                                        \
2734   {                                                                           \
2735     u32 pixel_top = palette_ram_converted[0];                                 \
2736     switch((bldcnt >> 6) & 0x03)                                              \
2737     {                                                                         \
2738       /* Fade to white */                                                     \
2739       case 0x02:                                                              \
2740       {                                                                       \
2741         if(color_combine_mask_a(5))                                           \
2742         {                                                                     \
2743           u32 blend = io_registers[REG_BLDY] & 0x1F;                          \
2744           u32 upper;                                                          \
2745                                                                               \
2746           if(blend > 16)                                                      \
2747             blend = 16;                                                       \
2748                                                                               \
2749           upper = ((0x07E0F81F * blend) >> 4) & 0x07E0F81F;                   \
2750           blend = 16 - blend;                                                 \
2751                                                                               \
2752           expand_pixel_no_dest(brighten, pixel_top);                          \
2753         }                                                                     \
2754         break;                                                                \
2755       }                                                                       \
2756                                                                               \
2757       /* Fade to black */                                                     \
2758       case 0x03:                                                              \
2759       {                                                                       \
2760         if(color_combine_mask_a(5))                                           \
2761         {                                                                     \
2762           s32 blend = 16 - (io_registers[REG_BLDY] & 0x1F);                   \
2763                                                                               \
2764           if(blend < 0)                                                       \
2765             blend = 0;                                                        \
2766                                                                               \
2767           expand_pixel_no_dest(darken, pixel_top);                            \
2768         }                                                                     \
2769         break;                                                                \
2770       }                                                                       \
2771     }                                                                         \
2772     fill_line_color16(pixel_top, scanline, _start, _end);                     \
2773   }                                                                           \
2774 }                                                                             \
2775
2776
2777 // Renders an entire scanline from 0 to 240, based on current color mode.
2778
2779 void render_scanline_tile(u16 *scanline, u32 dispcnt)
2780 {
2781   u32 current_layer;
2782   u32 layer_order_pos;
2783   u32 bldcnt = io_registers[REG_BLDCNT];
2784   render_scanline_layer_functions_tile();
2785
2786   render_layers_color_effect(render_layers, layer_count,
2787    render_condition_alpha, render_condition_fade, 0, 240);
2788 }
2789
2790 void render_scanline_bitmap(u16 *scanline, u32 dispcnt)
2791 {
2792   u32 bldcnt = io_registers[REG_BLDCNT];
2793   render_scanline_layer_functions_bitmap();
2794   u32 current_layer;
2795   u32 layer_order_pos;
2796
2797   fill_line_bg(normal, scanline, 0, 240);
2798
2799   for(layer_order_pos = 0; layer_order_pos < layer_count; layer_order_pos++)
2800   {
2801     current_layer = layer_order[layer_order_pos];
2802     if(current_layer & 0x04)
2803     {
2804       render_obj_layer(normal, scanline, 0, 240);
2805     }
2806     else
2807     {
2808       layer_renderers->normal_render(0, 240, scanline);
2809     }
2810   }
2811 }
2812
2813 // Render layers from start to end based on if they're allowed in the
2814 // enable flags.
2815
2816 #define render_layers_conditional(tile_alpha, obj_alpha, dest)                \
2817 {                                                                             \
2818   __label__ skip;                                                             \
2819   current_layer = layer_order[layer_order_pos];                               \
2820   /* If OBJ aren't enabled skip to the first non-OBJ layer */                 \
2821   if(!(enable_flags & 0x10))                                                  \
2822   {                                                                           \
2823     while((current_layer & 0x04) || !((1 << current_layer) & enable_flags))   \
2824     {                                                                         \
2825       layer_order_pos++;                                                      \
2826       current_layer = layer_order[layer_order_pos];                           \
2827                                                                               \
2828       /* Oops, ran out of layers, render the background. */                   \
2829       if(layer_order_pos == layer_count)                                      \
2830       {                                                                       \
2831         fill_line_bg(tile_alpha, dest, start, end);                           \
2832         goto skip;                                                            \
2833       }                                                                       \
2834     }                                                                         \
2835                                                                               \
2836     /* Render the first valid layer */                                        \
2837     layer_renderers[current_layer].tile_alpha##_render_base(current_layer,    \
2838      start, end, dest);                                                       \
2839                                                                               \
2840     layer_order_pos++;                                                        \
2841                                                                               \
2842     /* Render the rest of the layers if active, skipping OBJ ones. */         \
2843     for(; layer_order_pos < layer_count; layer_order_pos++)                   \
2844     {                                                                         \
2845       current_layer = layer_order[layer_order_pos];                           \
2846       if(!(current_layer & 0x04) && ((1 << current_layer) & enable_flags))    \
2847       {                                                                       \
2848         layer_renderers[current_layer].                                       \
2849          tile_alpha##_render_transparent(current_layer, start, end, dest);    \
2850       }                                                                       \
2851     }                                                                         \
2852   }                                                                           \
2853   else                                                                        \
2854   {                                                                           \
2855     /* Find the first active layer, skip all of the inactive ones */          \
2856     while(!((current_layer & 0x04) || ((1 << current_layer) & enable_flags))) \
2857     {                                                                         \
2858       layer_order_pos++;                                                      \
2859       current_layer = layer_order[layer_order_pos];                           \
2860                                                                               \
2861       /* Oops, ran out of layers, render the background. */                   \
2862       if(layer_order_pos == layer_count)                                      \
2863       {                                                                       \
2864         fill_line_bg(tile_alpha, dest, start, end);                           \
2865         goto skip;                                                            \
2866       }                                                                       \
2867     }                                                                         \
2868                                                                               \
2869     if(current_layer & 0x04)                                                  \
2870     {                                                                         \
2871       /* If the first one is OBJ render the background then render it. */     \
2872       fill_line_bg(tile_alpha, dest, start, end);                             \
2873       render_obj_layer(obj_alpha, dest, start, end);                          \
2874     }                                                                         \
2875     else                                                                      \
2876     {                                                                         \
2877       /* Otherwise render a base layer. */                                    \
2878       layer_renderers[current_layer].                                         \
2879        tile_alpha##_render_base(current_layer, start, end, dest);             \
2880     }                                                                         \
2881                                                                               \
2882     layer_order_pos++;                                                        \
2883                                                                               \
2884     /* Render the rest of the layers. */                                      \
2885     for(; layer_order_pos < layer_count; layer_order_pos++)                   \
2886     {                                                                         \
2887       current_layer = layer_order[layer_order_pos];                           \
2888       if(current_layer & 0x04)                                                \
2889       {                                                                       \
2890         render_obj_layer(obj_alpha, dest, start, end);                        \
2891       }                                                                       \
2892       else                                                                    \
2893       {                                                                       \
2894         if(enable_flags & (1 << current_layer))                               \
2895         {                                                                     \
2896           layer_renderers[current_layer].                                     \
2897            tile_alpha##_render_transparent(current_layer, start, end, dest);  \
2898         }                                                                     \
2899       }                                                                       \
2900     }                                                                         \
2901   }                                                                           \
2902                                                                               \
2903   skip:                                                                       \
2904     ;                                                                         \
2905 }                                                                             \
2906
2907
2908 // Render all of the BG and OBJ in a tiled scanline from start to end ONLY if
2909 // enable_flag allows that layer/OBJ. Also conditionally render color effects.
2910
2911 void render_scanline_conditional_tile(u32 start, u32 end, u16 *scanline,
2912  u32 enable_flags, u32 dispcnt, u32 bldcnt, tile_layer_render_struct
2913  *layer_renderers)
2914 {
2915   u32 current_layer;
2916   u32 layer_order_pos = 0;
2917
2918   render_layers_color_effect(render_layers_conditional,
2919    (layer_count && (enable_flags & 0x1F)),
2920    ((enable_flags & 0x20) && render_condition_alpha),
2921    ((enable_flags & 0x20) && render_condition_fade), start, end);
2922 }
2923
2924
2925 // Render the BG and OBJ in a bitmap scanline from start to end ONLY if
2926 // enable_flag allows that layer/OBJ. Also conditionally render color effects.
2927
2928 void render_scanline_conditional_bitmap(u32 start, u32 end, u16 *scanline,
2929  u32 enable_flags, u32 dispcnt, u32 bldcnt, bitmap_layer_render_struct
2930  *layer_renderers)
2931 {
2932   u32 current_layer;
2933   u32 layer_order_pos;
2934
2935   fill_line_bg(normal, scanline, start, end);
2936
2937   for(layer_order_pos = 0; layer_order_pos < layer_count; layer_order_pos++)
2938   {
2939     current_layer = layer_order[layer_order_pos];
2940     if(current_layer & 0x04)
2941     {
2942       if(enable_flags & 0x10)
2943       {
2944         render_obj_layer(normal, scanline, start, end);
2945       }
2946     }
2947     else
2948     {
2949       if(enable_flags & 0x04)
2950         layer_renderers->normal_render(start, end, scanline);
2951     }
2952   }
2953 }
2954
2955
2956 #define window_x_coords(window_number)                                        \
2957   window_##window_number##_x1 =                                               \
2958    io_registers[REG_WIN##window_number##H] >> 8;                              \
2959   window_##window_number##_x2 =                                               \
2960    io_registers[REG_WIN##window_number##H] & 0xFF;                            \
2961   window_##window_number##_enable =                                           \
2962    (winin >> (window_number * 8)) & 0x3F;                                     \
2963                                                                               \
2964   if(window_##window_number##_x1 > 240)                                       \
2965     window_##window_number##_x1 = 240;                                        \
2966                                                                               \
2967   if(window_##window_number##_x2 > 240)                                       \
2968     window_##window_number##_x2 = 240                                         \
2969
2970 #define window_coords(window_number)                                          \
2971   u32 window_##window_number##_x1, window_##window_number##_x2;               \
2972   u32 window_##window_number##_y1, window_##window_number##_y2;               \
2973   u32 window_##window_number##_enable;                                        \
2974   window_##window_number##_y1 =                                               \
2975    io_registers[REG_WIN##window_number##V] >> 8;                              \
2976   window_##window_number##_y2 =                                               \
2977    io_registers[REG_WIN##window_number##V] & 0xFF;                            \
2978                                                                               \
2979   if(window_##window_number##_y1 > window_##window_number##_y2)               \
2980   {                                                                           \
2981     if((((vcount <= window_##window_number##_y2) ||                           \
2982      (vcount > window_##window_number##_y1)) ||                               \
2983      (window_##window_number##_y2 > 227)) &&                                  \
2984      (window_##window_number##_y1 <= 227))                                    \
2985     {                                                                         \
2986       window_x_coords(window_number);                                         \
2987     }                                                                         \
2988     else                                                                      \
2989     {                                                                         \
2990       window_##window_number##_x1 = 240;                                      \
2991       window_##window_number##_x2 = 240;                                      \
2992     }                                                                         \
2993   }                                                                           \
2994   else                                                                        \
2995   {                                                                           \
2996     if((((vcount >= window_##window_number##_y1) &&                           \
2997      (vcount < window_##window_number##_y2)) ||                               \
2998      (window_##window_number##_y2 > 227)) &&                                  \
2999      (window_##window_number##_y1 <= 227))                                    \
3000     {                                                                         \
3001       window_x_coords(window_number);                                         \
3002     }                                                                         \
3003     else                                                                      \
3004     {                                                                         \
3005       window_##window_number##_x1 = 240;                                      \
3006       window_##window_number##_x2 = 240;                                      \
3007     }                                                                         \
3008   }                                                                           \
3009
3010 #define render_window_segment(type, start, end, window_type)                  \
3011   if(start != end)                                                            \
3012   {                                                                           \
3013     render_scanline_conditional_##type(start, end, scanline,                  \
3014      window_##window_type##_enable, dispcnt, bldcnt, layer_renderers);        \
3015   }                                                                           \
3016
3017 #define render_window_segment_unequal(type, start, end, window_type)          \
3018   render_scanline_conditional_##type(start, end, scanline,                    \
3019    window_##window_type##_enable, dispcnt, bldcnt, layer_renderers)           \
3020
3021 #define render_window_segment_clip(type, clip_start, clip_end, start, end,    \
3022  window_type)                                                                 \
3023 {                                                                             \
3024   if(start != end)                                                            \
3025   {                                                                           \
3026     if(start < clip_start)                                                    \
3027     {                                                                         \
3028       if(end > clip_start)                                                    \
3029       {                                                                       \
3030         if(end > clip_end)                                                    \
3031         {                                                                     \
3032           render_window_segment_unequal(type, clip_start, clip_end,           \
3033            window_type);                                                      \
3034         }                                                                     \
3035         else                                                                  \
3036         {                                                                     \
3037           render_window_segment_unequal(type, clip_start, end, window_type);  \
3038         }                                                                     \
3039       }                                                                       \
3040     }                                                                         \
3041     else                                                                      \
3042                                                                               \
3043     if(end > clip_end)                                                        \
3044     {                                                                         \
3045       if(start < clip_end)                                                    \
3046         render_window_segment_unequal(type, start, clip_end, window_type);    \
3047     }                                                                         \
3048     else                                                                      \
3049     {                                                                         \
3050       render_window_segment_unequal(type, start, end, window_type);           \
3051     }                                                                         \
3052   }                                                                           \
3053 }                                                                             \
3054
3055 #define render_window_clip_1(type, start, end)                                \
3056   if(window_1_x1 != 240)                                                      \
3057   {                                                                           \
3058     if(window_1_x1 > window_1_x2)                                             \
3059     {                                                                         \
3060       render_window_segment_clip(type, start, end, 0, window_1_x2, 1);        \
3061       render_window_segment_clip(type, start, end, window_1_x2, window_1_x1,  \
3062        out);                                                                  \
3063       render_window_segment_clip(type, start, end, window_1_x1, 240, 1);      \
3064     }                                                                         \
3065     else                                                                      \
3066     {                                                                         \
3067       render_window_segment_clip(type, start, end, 0, window_1_x1, out);      \
3068       render_window_segment_clip(type, start, end, window_1_x1, window_1_x2,  \
3069        1);                                                                    \
3070       render_window_segment_clip(type, start, end, window_1_x2, 240, out);    \
3071     }                                                                         \
3072   }                                                                           \
3073   else                                                                        \
3074   {                                                                           \
3075     render_window_segment(type, start, end, out);                             \
3076   }                                                                           \
3077
3078 #define render_window_clip_obj(type, start, end);                             \
3079   render_window_segment(type, start, end, out);                               \
3080   if(dispcnt & 0x40)                                                          \
3081     render_scanline_obj_copy_##type##_1D(4, start, end, scanline);            \
3082   else                                                                        \
3083     render_scanline_obj_copy_##type##_2D(4, start, end, scanline)             \
3084
3085
3086 #define render_window_segment_clip_obj(type, clip_start, clip_end, start,     \
3087  end)                                                                         \
3088 {                                                                             \
3089   if(start != end)                                                            \
3090   {                                                                           \
3091     if(start < clip_start)                                                    \
3092     {                                                                         \
3093       if(end > clip_start)                                                    \
3094       {                                                                       \
3095         if(end > clip_end)                                                    \
3096         {                                                                     \
3097           render_window_clip_obj(type, clip_start, clip_end);                 \
3098         }                                                                     \
3099         else                                                                  \
3100         {                                                                     \
3101           render_window_clip_obj(type, clip_start, end);                      \
3102         }                                                                     \
3103       }                                                                       \
3104     }                                                                         \
3105     else                                                                      \
3106                                                                               \
3107     if(end > clip_end)                                                        \
3108     {                                                                         \
3109       if(start < clip_end)                                                    \
3110       {                                                                       \
3111         render_window_clip_obj(type, start, clip_end);                        \
3112       }                                                                       \
3113     }                                                                         \
3114     else                                                                      \
3115     {                                                                         \
3116       render_window_clip_obj(type, start, end);                               \
3117     }                                                                         \
3118   }                                                                           \
3119 }                                                                             \
3120
3121
3122 #define render_window_clip_1_obj(type, start, end)                            \
3123   if(window_1_x1 != 240)                                                      \
3124   {                                                                           \
3125     if(window_1_x1 > window_1_x2)                                             \
3126     {                                                                         \
3127       render_window_segment_clip(type, start, end, 0, window_1_x2, 1);        \
3128       render_window_segment_clip_obj(type, start, end, window_1_x2,           \
3129        window_1_x1);                                                          \
3130       render_window_segment_clip(type, start, end, window_1_x1, 240, 1);      \
3131     }                                                                         \
3132     else                                                                      \
3133     {                                                                         \
3134       render_window_segment_clip_obj(type, start, end, 0, window_1_x1);       \
3135       render_window_segment_clip(type, start, end, window_1_x1, window_1_x2,  \
3136        1);                                                                    \
3137       render_window_segment_clip_obj(type, start, end, window_1_x2, 240);     \
3138     }                                                                         \
3139   }                                                                           \
3140   else                                                                        \
3141   {                                                                           \
3142     render_window_clip_obj(type, start, end);                                 \
3143   }                                                                           \
3144
3145
3146
3147 #define render_window_single(type, window_number)                             \
3148   u32 winin = io_registers[REG_WININ];                                        \
3149   window_coords(window_number);                                               \
3150   if(window_##window_number##_x1 > window_##window_number##_x2)               \
3151   {                                                                           \
3152     render_window_segment(type, 0, window_##window_number##_x2,               \
3153      window_number);                                                          \
3154     render_window_segment(type, window_##window_number##_x2,                  \
3155      window_##window_number##_x1, out);                                       \
3156     render_window_segment(type, window_##window_number##_x1, 240,             \
3157      window_number);                                                          \
3158   }                                                                           \
3159   else                                                                        \
3160   {                                                                           \
3161     render_window_segment(type, 0, window_##window_number##_x1, out);         \
3162     render_window_segment(type, window_##window_number##_x1,                  \
3163      window_##window_number##_x2, window_number);                             \
3164     render_window_segment(type, window_##window_number##_x2, 240, out);       \
3165   }                                                                           \
3166
3167 #define render_window_multi(type, front, back)                                \
3168   if(window_##front##_x1 > window_##front##_x2)                               \
3169   {                                                                           \
3170     render_window_segment(type, 0, window_##front##_x2, front);               \
3171     render_window_clip_##back(type, window_##front##_x2,                      \
3172      window_##front##_x1);                                                    \
3173     render_window_segment(type, window_##front##_x1, 240, front);             \
3174   }                                                                           \
3175   else                                                                        \
3176   {                                                                           \
3177     render_window_clip_##back(type, 0, window_##front##_x1);                  \
3178     render_window_segment(type, window_##front##_x1, window_##front##_x2,     \
3179      front);                                                                  \
3180     render_window_clip_##back(type, window_##front##_x2, 240);                \
3181   }                                                                           \
3182
3183 #define render_scanline_window_builder(type)                                  \
3184 void render_scanline_window_##type(u16 *scanline, u32 dispcnt)                \
3185 {                                                                             \
3186   u32 vcount = io_registers[REG_VCOUNT];                                      \
3187   u32 winout = io_registers[REG_WINOUT];                                      \
3188   u32 bldcnt = io_registers[REG_BLDCNT];                                      \
3189   u32 window_out_enable = winout & 0x3F;                                      \
3190                                                                               \
3191   render_scanline_layer_functions_##type();                                   \
3192                                                                               \
3193   switch(dispcnt >> 13)                                                       \
3194   {                                                                           \
3195     /* Just window 0 */                                                       \
3196     case 0x01:                                                                \
3197     {                                                                         \
3198       render_window_single(type, 0);                                          \
3199       break;                                                                  \
3200     }                                                                         \
3201                                                                               \
3202     /* Just window 1 */                                                       \
3203     case 0x02:                                                                \
3204     {                                                                         \
3205       render_window_single(type, 1);                                          \
3206       break;                                                                  \
3207     }                                                                         \
3208                                                                               \
3209     /* Windows 1 and 2 */                                                     \
3210     case 0x03:                                                                \
3211     {                                                                         \
3212       u32 winin = io_registers[REG_WININ];                                    \
3213       window_coords(0);                                                       \
3214       window_coords(1);                                                       \
3215       render_window_multi(type, 0, 1);                                        \
3216       break;                                                                  \
3217     }                                                                         \
3218                                                                               \
3219     /* Just OBJ windows */                                                    \
3220     case 0x04:                                                                \
3221     {                                                                         \
3222       u32 window_obj_enable = winout >> 8;                                    \
3223       render_window_clip_obj(type, 0, 240);                                   \
3224       break;                                                                  \
3225     }                                                                         \
3226                                                                               \
3227     /* Window 0 and OBJ window */                                             \
3228     case 0x05:                                                                \
3229     {                                                                         \
3230       u32 window_obj_enable = winout >> 8;                                    \
3231       u32 winin = io_registers[REG_WININ];                                    \
3232       window_coords(0);                                                       \
3233       render_window_multi(type, 0, obj);                                      \
3234       break;                                                                  \
3235     }                                                                         \
3236                                                                               \
3237     /* Window 1 and OBJ window */                                             \
3238     case 0x06:                                                                \
3239     {                                                                         \
3240       u32 window_obj_enable = winout >> 8;                                    \
3241       u32 winin = io_registers[REG_WININ];                                    \
3242       window_coords(1);                                                       \
3243       render_window_multi(type, 1, obj);                                      \
3244       break;                                                                  \
3245     }                                                                         \
3246                                                                               \
3247     /* Window 0, 1, and OBJ window */                                         \
3248     case 0x07:                                                                \
3249     {                                                                         \
3250       u32 window_obj_enable = winout >> 8;                                    \
3251       u32 winin = io_registers[REG_WININ];                                    \
3252       window_coords(0);                                                       \
3253       window_coords(1);                                                       \
3254       render_window_multi(type, 0, 1_obj);                                    \
3255       break;                                                                  \
3256     }                                                                         \
3257   }                                                                           \
3258 }                                                                             \
3259
3260 render_scanline_window_builder(tile);
3261 render_scanline_window_builder(bitmap);
3262
3263 u32 active_layers[6] = { 0x1F, 0x17, 0x1C, 0x14, 0x14, 0x14 };
3264
3265 u32 small_resolution_width = 240;
3266 u32 small_resolution_height = 160;
3267 u32 resolution_width, resolution_height;
3268
3269 void update_scanline()
3270 {
3271   u32 pitch = get_screen_pitch();
3272   u32 dispcnt = io_registers[REG_DISPCNT];
3273   u32 display_flags = (dispcnt >> 8) & 0x1F;
3274   u32 vcount = io_registers[REG_VCOUNT];
3275   u16 *screen_offset = get_screen_pixels() + (vcount * pitch);
3276   u32 video_mode = dispcnt & 0x07;
3277   u32 current_layer;
3278
3279   // If OAM has been modified since the last scanline has been updated then
3280   // reorder and reprofile the OBJ lists.
3281   if(oam_update)
3282   {
3283     order_obj(video_mode);
3284     oam_update = 0;
3285   }
3286
3287   order_layers((dispcnt >> 8) & active_layers[video_mode]);
3288
3289   if(skip_next_frame)
3290     return;
3291
3292 #ifdef WIZ_BUILD
3293   if (screen_scale == unscaled_rot || screen_scale == scaled_aspect_rot)
3294   {
3295     if (rot_line_count == rot_lines_total)
3296     {
3297       rot_line_count = 0;
3298       if (vcount - rot_lines_total < FONT_HEIGHT && rot_msg_buff[0])
3299       {
3300         print_string_ext(rot_msg_buff, 0xFFFF, 0x0000, 0, 0,
3301           rot_buffer, 240, 0, vcount - rot_lines_total, rot_lines_total);
3302         if (vcount >= FONT_HEIGHT)
3303           rot_msg_buff[0] = 0;
3304       }
3305       if (screen_scale == unscaled_rot)
3306         do_rotated_blit(gpsp_gp2x_screen, rot_buffer, vcount);
3307       else
3308         upscale_aspect_row(gpsp_gp2x_screen, rot_buffer, vcount/3);
3309     }
3310     screen_offset = &rot_buffer[rot_line_count++ * 240];
3311   }
3312 #endif
3313
3314   // If the screen is in in forced blank draw pure white.
3315   if(dispcnt & 0x80)
3316   {
3317     fill_line_color16(0xFFFF, screen_offset, 0, 240);
3318   }
3319   else
3320   {
3321     if(video_mode < 3)
3322     {
3323       if(dispcnt >> 13)
3324       {
3325         render_scanline_window_tile(screen_offset, dispcnt);
3326       }
3327       else
3328       {
3329         render_scanline_tile(screen_offset, dispcnt);
3330       }
3331     }
3332     else
3333     {
3334       if(dispcnt >> 13)
3335         render_scanline_window_bitmap(screen_offset, dispcnt);
3336       else
3337         render_scanline_bitmap(screen_offset, dispcnt);
3338     }
3339   }
3340
3341   affine_reference_x[0] += (s16)io_registers[REG_BG2PB];
3342   affine_reference_y[0] += (s16)io_registers[REG_BG2PD];
3343   affine_reference_x[1] += (s16)io_registers[REG_BG3PB];
3344   affine_reference_y[1] += (s16)io_registers[REG_BG3PD];
3345 }
3346
3347 #ifdef PSP_BUILD
3348
3349 u32 screen_flip = 0;
3350
3351 void flip_screen()
3352 {
3353   if(video_direct == 0)
3354   {
3355     u32 *old_ge_cmd_ptr = ge_cmd_ptr;
3356     sceKernelDcacheWritebackAll();
3357
3358     // Render the current screen
3359     ge_cmd_ptr = ge_cmd + 2;
3360     GE_CMD(TBP0, ((u32)screen_pixels & 0x00FFFFFF));
3361     GE_CMD(TBW0, (((u32)screen_pixels & 0xFF000000) >> 8) |
3362      GBA_SCREEN_WIDTH);
3363     ge_cmd_ptr = old_ge_cmd_ptr;
3364
3365     sceGeListEnQueue(ge_cmd, ge_cmd_ptr, gecbid, NULL);
3366
3367     // Flip to the next screen
3368     screen_flip ^= 1;
3369
3370     if(screen_flip)
3371       screen_pixels = screen_texture + (240 * 160 * 2);
3372     else
3373       screen_pixels = screen_texture;
3374   }
3375 }
3376
3377 #elif defined(WIZ_BUILD)
3378
3379 void flip_screen()
3380 {
3381   if((resolution_width == small_resolution_width) &&
3382    (resolution_height == small_resolution_height))
3383   {
3384     switch(screen_scale)
3385     {
3386       case scaled_aspect:
3387         upscale_aspect(gpsp_gp2x_screen, screen_pixels);
3388         break;
3389       case unscaled_rot:
3390         do_rotated_blit(gpsp_gp2x_screen, rot_buffer, 160);
3391         rot_line_count = 0;
3392         goto no_clean;
3393       case scaled_aspect_rot:
3394         rot_line_count = 0;
3395         goto no_clean;
3396     }
3397   }
3398   warm_cache_op_all(WOP_D_CLEAN);
3399
3400 no_clean:
3401   pollux_video_flip();
3402   screen_pixels = (u16 *)gpsp_gp2x_screen + screen_offset;
3403 }
3404
3405 #elif defined(PND_BUILD)
3406
3407 void flip_screen()
3408 {
3409   screen_pixels = fb_flip_screen();
3410 }
3411
3412 #else
3413
3414 #define integer_scale_copy_2()                                                \
3415   current_scanline_ptr[x2] = current_pixel;                                   \
3416   current_scanline_ptr[x2 - 1] = current_pixel;                               \
3417   x2 -= 2                                                                     \
3418
3419 #define integer_scale_copy_3()                                                \
3420   current_scanline_ptr[x2] = current_pixel;                                   \
3421   current_scanline_ptr[x2 - 1] = current_pixel;                               \
3422   current_scanline_ptr[x2 - 2] = current_pixel;                               \
3423   x2 -= 3                                                                     \
3424
3425 #define integer_scale_copy_4()                                                \
3426   current_scanline_ptr[x2] = current_pixel;                                   \
3427   current_scanline_ptr[x2 - 1] = current_pixel;                               \
3428   current_scanline_ptr[x2 - 2] = current_pixel;                               \
3429   current_scanline_ptr[x2 - 3] = current_pixel;                               \
3430   x2 -= 4                                                                     \
3431
3432 #define integer_scale_horizontal(scale_factor)                                \
3433   for(y = 0; y < 160; y++)                                                    \
3434   {                                                                           \
3435     for(x = 239, x2 = (240 * video_scale) - 1; x >= 0; x--)                   \
3436     {                                                                         \
3437       current_pixel = current_scanline_ptr[x];                                \
3438       integer_scale_copy_##scale_factor();                                    \
3439       current_scanline_ptr[x2] = current_scanline_ptr[x];                     \
3440       current_scanline_ptr[x2 - 1] = current_scanline_ptr[x];                 \
3441       current_scanline_ptr[x2 - 2] = current_scanline_ptr[x];                 \
3442     }                                                                         \
3443     current_scanline_ptr += pitch;                                            \
3444   }                                                                           \
3445
3446 void flip_screen()
3447 {
3448   if((video_scale != 1) && (current_scale != unscaled))
3449   {
3450     s32 x, y;
3451     s32 x2, y2;
3452     u16 *screen_ptr = get_screen_pixels();
3453     u16 *current_scanline_ptr = screen_ptr;
3454     u32 pitch = get_screen_pitch();
3455     u16 current_pixel;
3456     u32 i;
3457
3458     switch(video_scale)
3459     {
3460       case 2:
3461         integer_scale_horizontal(2);
3462         break;
3463
3464       case 3:
3465         integer_scale_horizontal(3);
3466         break;
3467
3468       default:
3469       case 4:
3470         integer_scale_horizontal(4);
3471         break;
3472
3473     }
3474
3475     for(y = 159, y2 = (160 * video_scale) - 1; y >= 0; y--)
3476     {
3477       for(i = 0; i < video_scale; i++)
3478       {
3479         memcpy(screen_ptr + (y2 * pitch),
3480          screen_ptr + (y * pitch), 480 * video_scale);
3481         y2--;
3482       }
3483     }
3484   }
3485 #ifdef GP2X_BUILD
3486   {
3487     if((resolution_width == small_resolution_width) &&
3488      (resolution_height == small_resolution_height))
3489     {
3490       switch (screen_scale)
3491       {
3492         case unscaled:
3493         {
3494           SDL_Rect srect = {0, 0, 240, 160};
3495           SDL_Rect drect = {40, 40, 240, 160};
3496           warm_cache_op_all(WOP_D_CLEAN);
3497           SDL_BlitSurface(screen, &srect, hw_screen, &drect);
3498           return;
3499         }
3500         case scaled_aspect:
3501         {
3502           SDL_Rect drect = {0, 10, 0, 0};
3503           warm_cache_op_all(WOP_D_CLEAN);
3504           SDL_BlitSurface(screen, NULL, hw_screen, &drect);
3505           return;
3506         }
3507         case scaled_aspect_sw:
3508         {
3509           upscale_aspect(hw_screen->pixels, get_screen_pixels());
3510           return;
3511         }
3512         case fullscreen:
3513           break;
3514       }
3515     }
3516     warm_cache_op_all(WOP_D_CLEAN);
3517     SDL_BlitSurface(screen, NULL, hw_screen, NULL);
3518   }
3519 #else
3520   SDL_Flip(screen);
3521 #endif
3522 }
3523
3524 #endif
3525
3526 u32 frame_to_render;
3527
3528 void update_screen()
3529 {
3530   if(!skip_next_frame)
3531     flip_screen();
3532 }
3533
3534 #ifdef PSP_BUILD
3535
3536 void init_video()
3537 {
3538   sceDisplaySetMode(0, PSP_SCREEN_WIDTH, PSP_SCREEN_HEIGHT);
3539
3540   sceDisplayWaitVblankStart();
3541   sceDisplaySetFrameBuf((void*)psp_gu_vram_base, PSP_LINE_SIZE,
3542    PSP_DISPLAY_PIXEL_FORMAT_565, PSP_DISPLAY_SETBUF_NEXTFRAME);
3543
3544   sceGuInit();
3545
3546   sceGuStart(GU_DIRECT, display_list);
3547   sceGuDrawBuffer(GU_PSM_5650, (void*)0, PSP_LINE_SIZE);
3548   sceGuDispBuffer(PSP_SCREEN_WIDTH, PSP_SCREEN_HEIGHT,
3549    (void*)0, PSP_LINE_SIZE);
3550   sceGuClear(GU_COLOR_BUFFER_BIT);
3551
3552   sceGuOffset(2048 - (PSP_SCREEN_WIDTH / 2), 2048 - (PSP_SCREEN_HEIGHT / 2));
3553   sceGuViewport(2048, 2048, PSP_SCREEN_WIDTH, PSP_SCREEN_HEIGHT);
3554
3555   sceGuScissor(0, 0, PSP_SCREEN_WIDTH + 1, PSP_SCREEN_HEIGHT + 1);
3556   sceGuEnable(GU_SCISSOR_TEST);
3557   sceGuTexMode(GU_PSM_5650, 0, 0, GU_FALSE);
3558   sceGuTexFunc(GU_TFX_REPLACE, GU_TCC_RGBA);
3559   sceGuTexFilter(GU_LINEAR, GU_LINEAR);
3560   sceGuEnable(GU_TEXTURE_2D);
3561
3562   sceGuFrontFace(GU_CW);
3563   sceGuDisable(GU_BLEND);
3564
3565   sceGuFinish();
3566   sceGuSync(0, 0);
3567
3568   sceDisplayWaitVblankStart();
3569   sceGuDisplay(GU_TRUE);
3570
3571   PspGeCallbackData gecb;
3572   gecb.signal_func = NULL;
3573   gecb.signal_arg = NULL;
3574   gecb.finish_func = Ge_Finish_Callback;
3575   gecb.finish_arg = NULL;
3576   gecbid = sceGeSetCallback(&gecb);
3577
3578   screen_vertex[0] = 0 + 0.5;
3579   screen_vertex[1] = 0 + 0.5;
3580   screen_vertex[2] = 0 + 0.5;
3581   screen_vertex[3] = 0 + 0.5;
3582   screen_vertex[4] = 0;
3583   screen_vertex[5] = GBA_SCREEN_WIDTH - 0.5;
3584   screen_vertex[6] = GBA_SCREEN_HEIGHT - 0.5;
3585   screen_vertex[7] = PSP_SCREEN_WIDTH - 0.5;
3586   screen_vertex[8] = PSP_SCREEN_HEIGHT - 0.5;
3587   screen_vertex[9] = 0;
3588
3589   // Set framebuffer to PSP VRAM
3590   GE_CMD(FBP, ((u32)psp_gu_vram_base & 0x00FFFFFF));
3591   GE_CMD(FBW, (((u32)psp_gu_vram_base & 0xFF000000) >> 8) | PSP_LINE_SIZE);
3592   // Set texture 0 to the screen texture
3593   GE_CMD(TBP0, ((u32)screen_texture & 0x00FFFFFF));
3594   GE_CMD(TBW0, (((u32)screen_texture & 0xFF000000) >> 8) | GBA_SCREEN_WIDTH);
3595   // Set the texture size to 256 by 256 (2^8 by 2^8)
3596   GE_CMD(TSIZE0, (8 << 8) | 8);
3597   // Flush the texture cache
3598   GE_CMD(TFLUSH, 0);
3599   // Use 2D coordinates, no indeces, no weights, 32bit float positions,
3600   // 32bit float texture coordinates
3601   GE_CMD(VTYPE, (1 << 23) | (0 << 11) | (0 << 9) |
3602    (3 << 7) | (0 << 5) | (0 << 2) | 3);
3603   // Set the base of the index list pointer to 0
3604   GE_CMD(BASE, 0);
3605   // Set the rest of index list pointer to 0 (not being used)
3606   GE_CMD(IADDR, 0);
3607   // Set the base of the screen vertex list pointer
3608   GE_CMD(BASE, ((u32)screen_vertex & 0xFF000000) >> 8);
3609   // Set the rest of the screen vertex list pointer
3610   GE_CMD(VADDR, ((u32)screen_vertex & 0x00FFFFFF));
3611   // Primitive kick: render sprite (primitive 6), 2 vertices
3612   GE_CMD(PRIM, (6 << 16) | 2);
3613   // Done with commands
3614   GE_CMD(FINISH, 0);
3615   // Raise signal interrupt
3616   GE_CMD(SIGNAL, 0);
3617   GE_CMD(NOP, 0);
3618   GE_CMD(NOP, 0);
3619 }
3620
3621 #elif defined(WIZ_BUILD) || defined(PND_BUILD)
3622
3623 void init_video()
3624 {
3625 }
3626
3627 #else
3628
3629 void init_video()
3630 {
3631   SDL_Init(SDL_INIT_VIDEO | SDL_INIT_JOYSTICK | SDL_INIT_NOPARACHUTE);
3632
3633 #ifdef GP2X_BUILD
3634   SDL_GP2X_AllowGfxMemory(NULL, 0);
3635
3636   hw_screen = SDL_SetVideoMode(320 * video_scale, 240 * video_scale,
3637    16, SDL_HWSURFACE);
3638
3639   screen = SDL_CreateRGBSurface(SDL_HWSURFACE, 240 * video_scale,
3640    160 * video_scale, 16, 0xFFFF, 0xFFFF, 0xFFFF, 0);
3641
3642   warm_change_cb_upper(WCB_C_BIT|WCB_B_BIT, 1);
3643 #else
3644   screen = SDL_SetVideoMode(240 * video_scale, 160 * video_scale, 16, 0);
3645 #endif
3646   SDL_ShowCursor(0);
3647 }
3648
3649 #endif
3650
3651 video_scale_type screen_scale = scaled_aspect;
3652 video_scale_type current_scale = scaled_aspect;
3653 video_filter_type screen_filter = filter_bilinear;
3654
3655
3656 #ifdef PSP_BUILD
3657
3658 void video_resolution_large()
3659 {
3660   if(video_direct != 1)
3661   {
3662     video_direct = 1;
3663     screen_pixels = psp_gu_vram_base;
3664     screen_pitch = 512;
3665     sceGuStart(GU_DIRECT, display_list);
3666     sceGuDispBuffer(PSP_SCREEN_WIDTH, PSP_SCREEN_HEIGHT,
3667      (void*)0, PSP_LINE_SIZE);
3668     sceGuFinish();
3669   }
3670 }
3671
3672 void set_gba_resolution(video_scale_type scale)
3673 {
3674   u32 filter_linear = 0;
3675   screen_scale = scale;
3676   switch(scale)
3677   {
3678     case unscaled:
3679       screen_vertex[2] = 120 + 0.5;
3680       screen_vertex[3] = 56 + 0.5;
3681       screen_vertex[7] = GBA_SCREEN_WIDTH + 120 - 0.5;
3682       screen_vertex[8] = GBA_SCREEN_HEIGHT + 56 - 0.5;
3683       break;
3684
3685     case scaled_aspect:
3686       screen_vertex[2] = 36 + 0.5;
3687       screen_vertex[3] = 0 + 0.5;
3688       screen_vertex[7] = 408 + 36 - 0.5;
3689       screen_vertex[8] = PSP_SCREEN_HEIGHT - 0.5;
3690       break;
3691
3692     case fullscreen:
3693       screen_vertex[2] = 0;
3694       screen_vertex[3] = 0;
3695       screen_vertex[7] = PSP_SCREEN_WIDTH;
3696       screen_vertex[8] = PSP_SCREEN_HEIGHT;
3697       break;
3698   }
3699
3700   sceGuStart(GU_DIRECT, display_list);
3701   if(screen_filter == filter_bilinear)
3702     sceGuTexFilter(GU_LINEAR, GU_LINEAR);
3703   else
3704     sceGuTexFilter(GU_NEAREST, GU_NEAREST);
3705
3706   sceGuFinish();
3707   sceGuSync(0, 0);
3708
3709   clear_screen(0x0000);
3710 }
3711
3712 void video_resolution_small()
3713 {
3714   if(video_direct != 0)
3715   {
3716     set_gba_resolution(screen_scale);
3717     video_direct = 0;
3718     screen_pixels = screen_texture;
3719     screen_flip = 0;
3720     screen_pitch = 240;
3721     sceGuStart(GU_DIRECT, display_list);
3722     sceGuDispBuffer(PSP_SCREEN_WIDTH, PSP_SCREEN_HEIGHT,
3723      (void*)0, PSP_LINE_SIZE);
3724     sceGuFinish();
3725   }
3726 }
3727
3728 void clear_screen(u16 color)
3729 {
3730   u32 i;
3731   u16 *src_ptr = get_screen_pixels();
3732
3733   sceGuSync(0, 0);
3734
3735   for(i = 0; i < (512 * 272); i++, src_ptr++)
3736   {
3737     *src_ptr = color;
3738   }
3739
3740   // I don't know why this doesn't work.
3741 /*  color = (((color & 0x1F) * 255 / 31) << 0) |
3742    ((((color >> 5) & 0x3F) * 255 / 63) << 8) |
3743    ((((color >> 11) & 0x1F) * 255 / 31) << 16) | (0xFF << 24);
3744
3745   sceGuStart(GU_DIRECT, display_list);
3746   sceGuDrawBuffer(GU_PSM_5650, (void*)0, PSP_LINE_SIZE);
3747   //sceGuDispBuffer(PSP_SCREEN_WIDTH, PSP_SCREEN_HEIGHT,
3748   // (void*)0, PSP_LINE_SIZE);
3749   sceGuClearColor(color);
3750   sceGuClear(GU_COLOR_BUFFER_BIT);
3751   sceGuFinish();
3752   sceGuSync(0, 0); */
3753 }
3754
3755 #elif defined(WIZ_BUILD)
3756
3757 void video_resolution_large()
3758 {
3759   screen_offset = 0;
3760   resolution_width = 320;
3761   resolution_height = 240;
3762
3763   fb_use_buffers(1);
3764   flip_screen();
3765   clear_screen(0);
3766   wiz_lcd_set_portrait(0);
3767 }
3768
3769 void video_resolution_small()
3770 {
3771   fb_use_buffers(4);
3772
3773   switch (screen_scale)
3774   {
3775     case unscaled:
3776       screen_offset = 320*40 + 40;
3777       wiz_lcd_set_portrait(0);
3778       break;
3779     case scaled_aspect:
3780       screen_offset = 320*(80 - 14) + 80;
3781       wiz_lcd_set_portrait(0);
3782       break;
3783     case unscaled_rot:
3784       wiz_lcd_set_portrait(1);
3785       rot_lines_total = 4;
3786       rot_line_count = 0;
3787       break;
3788     case scaled_aspect_rot:
3789       wiz_lcd_set_portrait(1);
3790       rot_lines_total = 3;
3791       rot_line_count = 0;
3792       break;
3793   }
3794
3795   flip_screen();
3796   clear_screen(0);
3797
3798   resolution_width = 240;
3799   resolution_height = 160;
3800 }
3801
3802 void set_gba_resolution(video_scale_type scale)
3803 {
3804   screen_scale = scale;
3805 }
3806
3807 void clear_screen(u16 color)
3808 {
3809   u32 col = ((u32)color << 16) | color;
3810   u32 *p = gpsp_gp2x_screen;
3811   int c = 320*240/2;
3812   while (c-- > 0)
3813     *p++ = col;
3814 }
3815
3816 #elif defined(PND_BUILD)
3817
3818 void video_resolution_large()
3819 {
3820   resolution_width = 400;
3821   resolution_height = 272;
3822
3823   fb_set_mode(400, 272, 1, 15, screen_filter);
3824   flip_screen();
3825   clear_screen(0);
3826 }
3827
3828 void video_resolution_small()
3829 {
3830   resolution_width = 240;
3831   resolution_height = 160;
3832
3833   fb_set_mode(240, 160, 4, screen_scale, screen_filter);
3834   flip_screen();
3835   clear_screen(0);
3836 }
3837
3838 void set_gba_resolution(video_scale_type scale)
3839 {
3840   screen_scale = scale;
3841 }
3842
3843 void clear_screen(u16 color)
3844 {
3845   u32 col = ((u32)color << 16) | color;
3846   u32 *p = (u32 *)get_screen_pixels();
3847   int c = resolution_width * resolution_height / 2;
3848   while (c-- > 0)
3849     *p++ = col;
3850 }
3851
3852 #else
3853
3854 void video_resolution_large()
3855 {
3856   current_scale = unscaled;
3857
3858 #ifdef GP2X_BUILD
3859   SDL_FreeSurface(screen);
3860   SDL_GP2X_AllowGfxMemory(NULL, 0);
3861     hw_screen = SDL_SetVideoMode(320, 240, 16, SDL_HWSURFACE);
3862   screen = SDL_CreateRGBSurface(SDL_HWSURFACE, 320, 240, 16, 0xFFFF,
3863    0xFFFF, 0xFFFF, 0);
3864   resolution_width = 320;
3865     resolution_height = 240;
3866   SDL_ShowCursor(0);
3867
3868   warm_change_cb_upper(WCB_C_BIT|WCB_B_BIT, 1);
3869 #else
3870   screen = SDL_SetVideoMode(480, 272, 16, 0);
3871   resolution_width = 480;
3872   resolution_height = 272;
3873 #endif
3874 }
3875
3876 void video_resolution_small()
3877 {
3878   current_scale = screen_scale;
3879
3880 #ifdef GP2X_BUILD
3881   int w, h;
3882   SDL_FreeSurface(screen);
3883   SDL_GP2X_AllowGfxMemory(NULL, 0);
3884
3885   w = 320; h = 240;
3886   if (screen_scale == scaled_aspect || screen_scale == fullscreen)
3887   {
3888     w = small_resolution_width * video_scale;
3889     h = small_resolution_height * video_scale;
3890   }
3891   if (screen_scale == scaled_aspect) h += 20;
3892   hw_screen = SDL_SetVideoMode(w, h, 16, SDL_HWSURFACE);
3893
3894   w = small_resolution_width * video_scale;
3895   if (screen_scale == scaled_aspect_sw)
3896     w = 320;
3897   screen = SDL_CreateRGBSurface(SDL_HWSURFACE,
3898    w, small_resolution_height * video_scale,
3899    16, 0xFFFF, 0xFFFF, 0xFFFF, 0);
3900
3901   SDL_ShowCursor(0);
3902
3903   warm_change_cb_upper(WCB_C_BIT|WCB_B_BIT, 1);
3904 #else
3905   screen = SDL_SetVideoMode(small_resolution_width * video_scale,
3906    small_resolution_height * video_scale, 16, 0);
3907 #endif
3908   resolution_width = small_resolution_width;
3909   resolution_height = small_resolution_height;
3910 }
3911
3912 void set_gba_resolution(video_scale_type scale)
3913 {
3914   if(screen_scale != scale)
3915   {
3916     screen_scale = scale;
3917     switch(scale)
3918     {
3919       case unscaled:
3920       case scaled_aspect:
3921       case fullscreen:
3922         small_resolution_width = 240 * video_scale;
3923         small_resolution_height = 160 * video_scale;
3924         break;
3925     }
3926   }
3927 }
3928
3929 void clear_screen(u16 color)
3930 {
3931   u16 *dest_ptr = get_screen_pixels();
3932   u32 line_skip = get_screen_pitch() - screen->w;
3933   u32 x, y;
3934
3935   for(y = 0; y < screen->h; y++)
3936   {
3937     for(x = 0; x < screen->w; x++, dest_ptr++)
3938     {
3939       *dest_ptr = color;
3940     }
3941     dest_ptr += line_skip;
3942   }
3943 }
3944
3945 #endif
3946
3947 u16 *copy_screen()
3948 {
3949   u16 *copy = malloc(240 * 160 * 2);
3950   memcpy(copy, get_screen_pixels(), 240 * 160 * 2);
3951   return copy;
3952 }
3953
3954 void blit_to_screen(u16 *src, u32 w, u32 h, u32 dest_x, u32 dest_y)
3955 {
3956   u32 pitch = get_screen_pitch();
3957   u16 *dest_ptr = get_screen_pixels() + dest_x + (dest_y * pitch);
3958
3959   s32 w1 = dest_x + w > pitch ? pitch - dest_x : w;
3960   u16 *src_ptr = src;
3961   s32 x, y;
3962
3963   for(y = 0; y < h; y++)
3964   {
3965     for(x = 0; x < w1; x++)
3966     {
3967       dest_ptr[x] = src_ptr[x];
3968     }
3969     src_ptr += w;
3970     dest_ptr += pitch;
3971   }
3972 }
3973
3974 void print_string_ext(const char *str, u16 fg_color, u16 bg_color,
3975  u32 x, u32 y, void *_dest_ptr, u32 pitch, u32 pad, u32 h_offset, u32 height)
3976 {
3977   u16 *dest_ptr = (u16 *)_dest_ptr + (y * pitch) + x;
3978   u8 current_char = str[0];
3979   u32 current_row;
3980   u32 glyph_offset;
3981   u32 i = 0, i2, i3, h;
3982   u32 str_index = 1;
3983   u32 current_x = x;
3984
3985   if(y + height > resolution_height)
3986       return;
3987
3988   while(current_char)
3989   {
3990     if(current_char == '\n')
3991     {
3992       y += FONT_HEIGHT;
3993       current_x = x;
3994       dest_ptr = get_screen_pixels() + (y * pitch) + x;
3995     }
3996     else
3997     {
3998       glyph_offset = _font_offset[current_char];
3999       current_x += FONT_WIDTH;
4000       glyph_offset += h_offset;
4001       for(i2 = h_offset, h = 0; i2 < FONT_HEIGHT && h < height; i2++, h++, glyph_offset++)
4002       {
4003         current_row = _font_bits[glyph_offset];
4004         for(i3 = 0; i3 < FONT_WIDTH; i3++)
4005         {
4006           if((current_row >> (15 - i3)) & 0x01)
4007             *dest_ptr = fg_color;
4008           else
4009             *dest_ptr = bg_color;
4010           dest_ptr++;
4011         }
4012         dest_ptr += (pitch - FONT_WIDTH);
4013       }
4014       dest_ptr = dest_ptr - (pitch * h) + FONT_WIDTH;
4015     }
4016
4017     i++;
4018
4019     current_char = str[str_index];
4020
4021     if((i < pad) && (current_char == 0))
4022     {
4023       current_char = ' ';
4024     }
4025     else
4026     {
4027       str_index++;
4028     }
4029
4030     if(current_x + FONT_WIDTH > resolution_width /* EDIT */)
4031     {
4032       while (current_char && current_char != '\n')
4033       {
4034         current_char = str[str_index++];
4035       }
4036     }
4037   }
4038 }
4039
4040 void print_string(const char *str, u16 fg_color, u16 bg_color,
4041  u32 x, u32 y)
4042 {
4043 #ifdef WIZ_BUILD
4044   if ((screen_scale == unscaled_rot || screen_scale == scaled_aspect_rot) &&
4045    (resolution_width == small_resolution_width) &&
4046    (resolution_height == small_resolution_height))
4047   {
4048     snprintf(rot_msg_buff, sizeof(rot_msg_buff), "%s", str);
4049     return;
4050   }
4051 #endif
4052   print_string_ext(str, fg_color, bg_color, x, y, get_screen_pixels(),
4053    get_screen_pitch(), 0, 0, FONT_HEIGHT);
4054 }
4055
4056 void print_string_pad(const char *str, u16 fg_color, u16 bg_color,
4057  u32 x, u32 y, u32 pad)
4058 {
4059   print_string_ext(str, fg_color, bg_color, x, y, get_screen_pixels(),
4060    get_screen_pitch(), pad, 0, FONT_HEIGHT);
4061 }
4062
4063 u32 debug_cursor_x = 0;
4064 u32 debug_cursor_y = 0;
4065
4066 #ifdef STDIO_DEBUG
4067
4068 void debug_screen_clear()
4069 {
4070 }
4071
4072 void debug_screen_start()
4073 {
4074 }
4075
4076 void debug_screen_end()
4077 {
4078 }
4079
4080 void debug_screen_update()
4081 {
4082 }
4083
4084 void debug_screen_printf(const char *format, ...)
4085 {
4086   va_list ap;
4087
4088   va_start(ap, format);
4089   vprintf(format, ap);
4090   va_end(ap);
4091 }
4092
4093 void debug_screen_newline(u32 count)
4094 {
4095   printf("\n");
4096 }
4097
4098
4099 #else
4100
4101 void debug_screen_clear()
4102 {
4103   debug_cursor_x = 0;
4104   debug_cursor_y = 0;
4105   clear_screen(0x0000);
4106 }
4107
4108 void debug_screen_start()
4109 {
4110   video_resolution_large();
4111   debug_screen_clear();
4112 }
4113
4114 void debug_screen_end()
4115 {
4116   video_resolution_small();
4117 }
4118
4119 void debug_screen_update()
4120 {
4121   flip_screen();
4122 }
4123
4124 void debug_screen_printf(const char *format, ...)
4125 {
4126   char str_buffer[512];
4127   u32 str_buffer_length;
4128   va_list ap;
4129
4130   va_start(ap, format);
4131   str_buffer_length = vsnprintf(str_buffer, 512, format, ap);
4132   va_end(ap);
4133
4134   printf("printing debug string %s at %d %d\n", str_buffer,
4135    debug_cursor_x, debug_cursor_y);
4136
4137   print_string(str_buffer, 0xFFFF, 0x0000, debug_cursor_x, debug_cursor_y);
4138   debug_cursor_x += FONT_WIDTH * str_buffer_length;
4139 }
4140
4141 void debug_screen_newline(u32 count)
4142 {
4143   debug_cursor_x = 0;
4144   debug_cursor_y += FONT_HEIGHT * count;
4145 }
4146
4147 #endif
4148
4149 void debug_screen_printl(const char *format, ...)
4150 {
4151   va_list ap;
4152
4153   va_start(ap, format);
4154   debug_screen_printf(format, ap);
4155   debug_screen_newline(1);
4156 //  debug_screen_printf("\n");
4157   va_end(ap);
4158 }
4159
4160
4161 #define video_savestate_builder(type)                                         \
4162 void video_##type##_savestate(file_tag_type savestate_file)                   \
4163 {                                                                             \
4164   file_##type##_array(savestate_file, affine_reference_x);                    \
4165   file_##type##_array(savestate_file, affine_reference_y);                    \
4166 }                                                                             \
4167
4168 video_savestate_builder(read);
4169 video_savestate_builder(write_mem);
4170
4171