wiz port wip
[gpsp.git] / video.c
1 /* gameplaySP
2  *
3  * Copyright (C) 2006 Exophase <exophase@gmail.com>
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation; either version 2 of
8  * the License, or (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18  */
19
20 #include "common.h"
21 #include "font.h"
22
23 #ifdef PSP_BUILD
24
25 #include <pspctrl.h>
26
27 #include <pspkernel.h>
28 #include <pspdebug.h>
29 #include <pspdisplay.h>
30
31 #include <pspgu.h>
32 #include <psppower.h>
33 #include <psprtc.h>
34
35 static float *screen_vertex = (float *)0x441FC100;
36 static u32 *ge_cmd = (u32 *)0x441FC000;
37 static u16 *psp_gu_vram_base = (u16 *)(0x44000000);
38 static u32 *ge_cmd_ptr = (u32 *)0x441FC000;
39 static u32 gecbid;
40 static u32 video_direct = 0;
41
42 static u32 __attribute__((aligned(16))) display_list[32];
43
44 #define GBA_SCREEN_WIDTH 240
45 #define GBA_SCREEN_HEIGHT 160
46
47 #define PSP_SCREEN_WIDTH 480
48 #define PSP_SCREEN_HEIGHT 272
49 #define PSP_LINE_SIZE 512
50
51 #define PSP_ALL_BUTTON_MASK 0xFFFF
52
53 #define GE_CMD_FBP    0x9C
54 #define GE_CMD_FBW    0x9D
55 #define GE_CMD_TBP0   0xA0
56 #define GE_CMD_TBW0   0xA8
57 #define GE_CMD_TSIZE0 0xB8
58 #define GE_CMD_TFLUSH 0xCB
59 #define GE_CMD_CLEAR  0xD3
60 #define GE_CMD_VTYPE  0x12
61 #define GE_CMD_BASE   0x10
62 #define GE_CMD_VADDR  0x01
63 #define GE_CMD_IADDR  0x02
64 #define GE_CMD_PRIM   0x04
65 #define GE_CMD_FINISH 0x0F
66 #define GE_CMD_SIGNAL 0x0C
67 #define GE_CMD_NOP    0x00
68
69 #define GE_CMD(cmd, operand)                                                \
70   *ge_cmd_ptr = (((GE_CMD_##cmd) << 24) | (operand));                       \
71   ge_cmd_ptr++                                                              \
72
73 static u16 *screen_texture = (u16 *)(0x4000000 + (512 * 272 * 2));
74 static u16 *current_screen_texture = (u16 *)(0x4000000 + (512 * 272 * 2));
75 static u16 *screen_pixels = (u16 *)(0x4000000 + (512 * 272 * 2));
76 static u32 screen_pitch = 240;
77
78 static void Ge_Finish_Callback(int id, void *arg)
79 {
80 }
81
82 #define get_screen_pixels()                                                   \
83   screen_pixels                                                               \
84
85 #define get_screen_pitch()                                                    \
86   screen_pitch                                                                \
87
88 #else
89
90 #ifdef GP2X_BUILD
91  #ifdef WIZ_BUILD
92   static void SDL_GP2X_AllowGfxMemory() {}
93   #include <SDL.h>
94  #else
95   #include "SDL_gp2x.h"
96  #endif
97  SDL_Surface *hw_screen;
98 #endif
99 SDL_Surface *screen;
100 const u32 video_scale = 1;
101 extern void gp2x_flush_cache(void *beginning_addr, void *end_addr, int flags);
102
103 #define get_screen_pixels()                                                   \
104   ((u16 *)screen->pixels)                                                     \
105
106 #define get_screen_pitch()                                                    \
107   (screen->pitch / 2)                                                         \
108
109 #endif
110
111 void render_scanline_conditional_tile(u32 start, u32 end, u16 *scanline,
112  u32 enable_flags, u32 dispcnt, u32 bldcnt, tile_layer_render_struct
113  *layer_renderers);
114 void render_scanline_conditional_bitmap(u32 start, u32 end, u16 *scanline,
115  u32 enable_flags, u32 dispcnt, u32 bldcnt, bitmap_layer_render_struct
116  *layer_renderers);
117
118 #define no_op                                                                 \
119
120 // This old version is not necessary if the palette is either being converted
121 // transparently or the ABGR 1555 format is being used natively. The direct
122 // version (without conversion) is much faster.
123
124 #define tile_lookup_palette_full(palette, source)                             \
125   current_pixel = palette[source];                                            \
126   convert_palette(current_pixel)                                              \
127
128 #define tile_lookup_palette(palette, source)                                  \
129   current_pixel = palette[source];                                            \
130
131
132 #ifdef RENDER_COLOR16_NORMAL
133
134 #define tile_expand_base_normal(index)                                        \
135   tile_expand_base_color16(index)                                             \
136
137 #else
138
139 #define tile_expand_base_normal(index)                                        \
140   tile_lookup_palette(palette, current_pixel);                                \
141   dest_ptr[index] = current_pixel                                             \
142
143 #endif
144
145 #define tile_expand_transparent_normal(index)                                 \
146   tile_expand_base_normal(index)                                              \
147
148 #define tile_expand_copy(index)                                               \
149   dest_ptr[index] = copy_ptr[index]                                           \
150
151
152 #define advance_dest_ptr_base(delta)                                          \
153   dest_ptr += delta                                                           \
154
155 #define advance_dest_ptr_transparent(delta)                                   \
156   advance_dest_ptr_base(delta)                                                \
157
158 #define advance_dest_ptr_copy(delta)                                          \
159   advance_dest_ptr_base(delta);                                               \
160   copy_ptr += delta                                                           \
161
162
163 #define color_combine_mask_a(layer)                                           \
164   ((io_registers[REG_BLDCNT] >> layer) & 0x01)                                \
165
166 // For color blending operations, will create a mask that has in bit
167 // 10 if the layer is target B, and bit 9 if the layer is target A.
168
169 #define color_combine_mask(layer)                                             \
170   (color_combine_mask_a(layer) |                                              \
171    ((io_registers[REG_BLDCNT] >> (layer + 7)) & 0x02)) << 9                   \
172
173 // For alpha blending renderers, draw the palette index (9bpp) and
174 // layer bits rather than the raw RGB. For the base this should write to
175 // the 32bit location directly.
176
177 #define tile_expand_base_alpha(index)                                         \
178   dest_ptr[index] = current_pixel | pixel_combine                             \
179
180 #define tile_expand_base_bg(index)                                            \
181   dest_ptr[index] = bg_combine                                                \
182
183
184 // For layered (transparent) writes this should shift the "stack" and write
185 // to the bottom. This will preserve the topmost pixel and the most recent
186 // one.
187
188 #define tile_expand_transparent_alpha(index)                                  \
189   dest_ptr[index] = (dest_ptr[index] << 16) | current_pixel | pixel_combine   \
190
191
192 // OBJ should only shift if the top isn't already OBJ
193 #define tile_expand_transparent_alpha_obj(index)                              \
194   dest = dest_ptr[index];                                                     \
195   if(dest & 0x00000100)                                                       \
196   {                                                                           \
197     dest_ptr[index] = (dest & 0xFFFF0000) | current_pixel | pixel_combine;    \
198   }                                                                           \
199   else                                                                        \
200   {                                                                           \
201     dest_ptr[index] = (dest << 16) | current_pixel | pixel_combine;           \
202   }                                                                           \
203
204
205 // For color effects that don't need to preserve the previous layer.
206 // The color32 version should be used with 32bit wide dest_ptr so as to be
207 // compatible with alpha combine on top of it.
208
209 #define tile_expand_base_color16(index)                                       \
210   dest_ptr[index] = current_pixel | pixel_combine                             \
211
212 #define tile_expand_transparent_color16(index)                                \
213   tile_expand_base_color16(index)                                             \
214
215 #define tile_expand_base_color32(index)                                       \
216   tile_expand_base_color16(index)                                             \
217
218 #define tile_expand_transparent_color32(index)                                \
219   tile_expand_base_color16(index)                                             \
220
221
222 // Operations for isolation 8bpp pixels within 32bpp pixel blocks.
223
224 #define tile_8bpp_pixel_op_mask(op_param)                                     \
225   current_pixel = current_pixels & 0xFF                                       \
226
227 #define tile_8bpp_pixel_op_shift_mask(shift)                                  \
228   current_pixel = (current_pixels >> shift) & 0xFF                            \
229
230 #define tile_8bpp_pixel_op_shift(shift)                                       \
231   current_pixel = current_pixels >> shift                                     \
232
233 #define tile_8bpp_pixel_op_none(shift)                                        \
234
235 // Base should always draw raw in 8bpp mode; color 0 will be drawn where
236 // color 0 is.
237
238 #define tile_8bpp_draw_base_normal(index)                                     \
239   tile_expand_base_normal(index)                                              \
240
241 #define tile_8bpp_draw_base_alpha(index)                                      \
242   if(current_pixel)                                                           \
243   {                                                                           \
244     tile_expand_base_alpha(index);                                            \
245   }                                                                           \
246   else                                                                        \
247   {                                                                           \
248     tile_expand_base_bg(index);                                               \
249   }                                                                           \
250
251
252 #define tile_8bpp_draw_base_color16(index)                                    \
253   tile_8bpp_draw_base_alpha(index)                                            \
254
255 #define tile_8bpp_draw_base_color32(index)                                    \
256   tile_8bpp_draw_base_alpha(index)                                            \
257
258
259 #define tile_8bpp_draw_base(index, op, op_param, alpha_op)                    \
260   tile_8bpp_pixel_op_##op(op_param);                                          \
261   tile_8bpp_draw_base_##alpha_op(index)                                       \
262
263 // Transparent (layered) writes should only replace what is there if the
264 // pixel is not transparent (zero)
265
266 #define tile_8bpp_draw_transparent(index, op, op_param, alpha_op)             \
267   tile_8bpp_pixel_op_##op(op_param);                                          \
268   if(current_pixel)                                                           \
269   {                                                                           \
270     tile_expand_transparent_##alpha_op(index);                                \
271   }                                                                           \
272
273 #define tile_8bpp_draw_copy(index, op, op_param, alpha_op)                    \
274   tile_8bpp_pixel_op_##op(op_param);                                          \
275   if(current_pixel)                                                           \
276   {                                                                           \
277     tile_expand_copy(index);                                                  \
278   }                                                                           \
279
280 // Get the current tile from the map in 8bpp mode
281
282 #define get_tile_8bpp()                                                       \
283   current_tile = *map_ptr;                                                    \
284   tile_ptr = tile_base + ((current_tile & 0x3FF) * 64)                        \
285
286
287 // Draw half of a tile in 8bpp mode, for base renderer
288
289 #define tile_8bpp_draw_four_noflip(index, combine_op, alpha_op)               \
290   tile_8bpp_draw_##combine_op(index + 0, mask, 0, alpha_op);                  \
291   tile_8bpp_draw_##combine_op(index + 1, shift_mask, 8, alpha_op);            \
292   tile_8bpp_draw_##combine_op(index + 2, shift_mask, 16, alpha_op);           \
293   tile_8bpp_draw_##combine_op(index + 3, shift, 24, alpha_op)                 \
294
295
296 // Like the above, but draws the half-tile horizontally flipped
297
298 #define tile_8bpp_draw_four_flip(index, combine_op, alpha_op)                 \
299   tile_8bpp_draw_##combine_op(index + 3, mask, 0, alpha_op);                  \
300   tile_8bpp_draw_##combine_op(index + 2, shift_mask, 8, alpha_op);            \
301   tile_8bpp_draw_##combine_op(index + 1, shift_mask, 16, alpha_op);           \
302   tile_8bpp_draw_##combine_op(index + 0, shift, 24, alpha_op)                 \
303
304 #define tile_8bpp_draw_four_base(index, alpha_op, flip_op)                    \
305   tile_8bpp_draw_four_##flip_op(index, base, alpha_op)                        \
306
307
308 // Draw half of a tile in 8bpp mode, for transparent renderer; as an
309 // optimization the entire thing is checked against zero (in transparent
310 // capable renders it is more likely for the pixels to be transparent than
311 // opaque)
312
313 #define tile_8bpp_draw_four_transparent(index, alpha_op, flip_op)             \
314   if(current_pixels != 0)                                                     \
315   {                                                                           \
316     tile_8bpp_draw_four_##flip_op(index, transparent, alpha_op);              \
317   }                                                                           \
318
319 #define tile_8bpp_draw_four_copy(index, alpha_op, flip_op)                    \
320   if(current_pixels != 0)                                                     \
321   {                                                                           \
322     tile_8bpp_draw_four_##flip_op(index, copy, alpha_op);                     \
323   }                                                                           \
324
325 // Helper macro for drawing 8bpp tiles clipped against the edge of the screen
326
327 #define partial_tile_8bpp(combine_op, alpha_op)                               \
328   for(i = 0; i < partial_tile_run; i++)                                       \
329   {                                                                           \
330     tile_8bpp_draw_##combine_op(0, mask, 0, alpha_op);                        \
331     current_pixels >>= 8;                                                     \
332     advance_dest_ptr_##combine_op(1);                                         \
333   }                                                                           \
334
335
336 // Draws 8bpp tiles clipped against the left side of the screen,
337 // partial_tile_offset indicates how much clipped in it is, partial_tile_run
338 // indicates how much it should draw.
339
340 #define partial_tile_right_noflip_8bpp(combine_op, alpha_op)                  \
341   if(partial_tile_offset >= 4)                                                \
342   {                                                                           \
343     current_pixels = *((u32 *)(tile_ptr + 4)) >>                              \
344      ((partial_tile_offset - 4) * 8);                                         \
345     partial_tile_8bpp(combine_op, alpha_op);                                  \
346   }                                                                           \
347   else                                                                        \
348   {                                                                           \
349     partial_tile_run -= 4;                                                    \
350     current_pixels = *((u32 *)tile_ptr) >> (partial_tile_offset * 8);         \
351     partial_tile_8bpp(combine_op, alpha_op);                                  \
352     current_pixels = *((u32 *)(tile_ptr + 4));                                \
353     tile_8bpp_draw_four_##combine_op(0, alpha_op, noflip);                    \
354     advance_dest_ptr_##combine_op(4);                                         \
355   }                                                                           \
356
357
358 // Draws 8bpp tiles clipped against both the left and right side of the
359 // screen, IE, runs of less than 8 - partial_tile_offset.
360
361 #define partial_tile_mid_noflip_8bpp(combine_op, alpha_op)                    \
362   if(partial_tile_offset >= 4)                                                \
363   {                                                                           \
364     current_pixels = *((u32 *)(tile_ptr + 4)) >>                              \
365      ((partial_tile_offset - 4) * 8);                                         \
366     partial_tile_8bpp(combine_op, alpha_op);                                  \
367   }                                                                           \
368   else                                                                        \
369   {                                                                           \
370     current_pixels = *((u32 *)tile_ptr) >> (partial_tile_offset * 8);         \
371     if((partial_tile_offset + partial_tile_run) > 4)                          \
372     {                                                                         \
373       u32 old_run = partial_tile_run;                                         \
374       partial_tile_run = 4 - partial_tile_offset;                             \
375       partial_tile_8bpp(combine_op, alpha_op);                                \
376       partial_tile_run = old_run - partial_tile_run;                          \
377       current_pixels = *((u32 *)(tile_ptr + 4));                              \
378       partial_tile_8bpp(combine_op, alpha_op);                                \
379     }                                                                         \
380     else                                                                      \
381     {                                                                         \
382       partial_tile_8bpp(combine_op, alpha_op);                                \
383     }                                                                         \
384   }                                                                           \
385
386
387 // Draws 8bpp tiles clipped against the right side of the screen,
388 // partial_tile_run indicates how much there is to draw.
389
390 #define partial_tile_left_noflip_8bpp(combine_op, alpha_op)                   \
391   if(partial_tile_run >= 4)                                                   \
392   {                                                                           \
393     current_pixels = *((u32 *)tile_ptr);                                      \
394     tile_8bpp_draw_four_##combine_op(0, alpha_op, noflip);                    \
395     advance_dest_ptr_##combine_op(4);                                         \
396     tile_ptr += 4;                                                            \
397     partial_tile_run -= 4;                                                    \
398   }                                                                           \
399                                                                               \
400   current_pixels = *((u32 *)(tile_ptr));                                      \
401   partial_tile_8bpp(combine_op, alpha_op)                                     \
402
403
404 // Draws a non-clipped (complete) 8bpp tile.
405
406 #define tile_noflip_8bpp(combine_op, alpha_op)                                \
407   current_pixels = *((u32 *)tile_ptr);                                        \
408   tile_8bpp_draw_four_##combine_op(0, alpha_op, noflip);                      \
409   current_pixels = *((u32 *)(tile_ptr + 4));                                  \
410   tile_8bpp_draw_four_##combine_op(4, alpha_op, noflip)                       \
411
412
413 // Like the above versions but draws flipped tiles.
414
415 #define partial_tile_flip_8bpp(combine_op, alpha_op)                          \
416   for(i = 0; i < partial_tile_run; i++)                                       \
417   {                                                                           \
418     tile_8bpp_draw_##combine_op(0, shift, 24, alpha_op);                      \
419     current_pixels <<= 8;                                                     \
420     advance_dest_ptr_##combine_op(1);                                         \
421   }                                                                           \
422
423 #define partial_tile_right_flip_8bpp(combine_op, alpha_op)                    \
424   if(partial_tile_offset >= 4)                                                \
425   {                                                                           \
426     current_pixels = *((u32 *)tile_ptr) << ((partial_tile_offset - 4) * 8);   \
427     partial_tile_flip_8bpp(combine_op, alpha_op);                             \
428   }                                                                           \
429   else                                                                        \
430   {                                                                           \
431     partial_tile_run -= 4;                                                    \
432     current_pixels = *((u32 *)(tile_ptr + 4)) <<                              \
433      ((partial_tile_offset - 4) * 8);                                         \
434     partial_tile_flip_8bpp(combine_op, alpha_op);                             \
435     current_pixels = *((u32 *)tile_ptr);                                      \
436     tile_8bpp_draw_four_##combine_op(0, alpha_op, flip);                      \
437     advance_dest_ptr_##combine_op(4);                                         \
438   }                                                                           \
439
440 #define partial_tile_mid_flip_8bpp(combine_op, alpha_op)                      \
441   if(partial_tile_offset >= 4)                                                \
442   {                                                                           \
443     current_pixels = *((u32 *)tile_ptr) << ((partial_tile_offset - 4) * 8);   \
444     partial_tile_flip_8bpp(combine_op, alpha_op);                             \
445   }                                                                           \
446   else                                                                        \
447   {                                                                           \
448     current_pixels = *((u32 *)(tile_ptr + 4)) <<                              \
449      ((partial_tile_offset - 4) * 8);                                         \
450                                                                               \
451     if((partial_tile_offset + partial_tile_run) > 4)                          \
452     {                                                                         \
453       u32 old_run = partial_tile_run;                                         \
454       partial_tile_run = 4 - partial_tile_offset;                             \
455       partial_tile_flip_8bpp(combine_op, alpha_op);                           \
456       partial_tile_run = old_run - partial_tile_run;                          \
457       current_pixels = *((u32 *)(tile_ptr));                                  \
458       partial_tile_flip_8bpp(combine_op, alpha_op);                           \
459     }                                                                         \
460     else                                                                      \
461     {                                                                         \
462       partial_tile_flip_8bpp(combine_op, alpha_op);                           \
463     }                                                                         \
464   }                                                                           \
465
466 #define partial_tile_left_flip_8bpp(combine_op, alpha_op)                     \
467   if(partial_tile_run >= 4)                                                   \
468   {                                                                           \
469     current_pixels = *((u32 *)(tile_ptr + 4));                                \
470     tile_8bpp_draw_four_##combine_op(0, alpha_op, flip);                      \
471     advance_dest_ptr_##combine_op(4);                                         \
472     tile_ptr -= 4;                                                            \
473     partial_tile_run -= 4;                                                    \
474   }                                                                           \
475                                                                               \
476   current_pixels = *((u32 *)(tile_ptr + 4));                                  \
477   partial_tile_flip_8bpp(combine_op, alpha_op)                                \
478
479 #define tile_flip_8bpp(combine_op, alpha_op)                                  \
480   current_pixels = *((u32 *)(tile_ptr + 4));                                  \
481   tile_8bpp_draw_four_##combine_op(0, alpha_op, flip);                        \
482   current_pixels = *((u32 *)tile_ptr);                                        \
483   tile_8bpp_draw_four_##combine_op(4, alpha_op, flip)                         \
484
485
486 // Operations for isolating 4bpp tiles in a 32bit block
487
488 #define tile_4bpp_pixel_op_mask(op_param)                                     \
489   current_pixel = current_pixels & 0x0F                                       \
490
491 #define tile_4bpp_pixel_op_shift_mask(shift)                                  \
492   current_pixel = (current_pixels >> shift) & 0x0F                            \
493
494 #define tile_4bpp_pixel_op_shift(shift)                                       \
495   current_pixel = current_pixels >> shift                                     \
496
497 #define tile_4bpp_pixel_op_none(op_param)                                     \
498
499 // Draws a single 4bpp pixel as base, normal renderer; checks to see if the
500 // pixel is zero because if so the current palette should not be applied.
501 // These ifs can be replaced with a lookup table, may or may not be superior
502 // this way, should be benchmarked. The lookup table would be from 0-255
503 // identity map except for multiples of 16, which would map to 0.
504
505 #define tile_4bpp_draw_base_normal(index)                                     \
506   if(current_pixel)                                                           \
507   {                                                                           \
508     current_pixel |= current_palette;                                         \
509     tile_expand_base_normal(index);                                           \
510   }                                                                           \
511   else                                                                        \
512   {                                                                           \
513     tile_expand_base_normal(index);                                           \
514   }                                                                           \
515
516
517 #define tile_4bpp_draw_base_alpha(index)                                      \
518   if(current_pixel)                                                           \
519   {                                                                           \
520     current_pixel |= current_palette;                                         \
521     tile_expand_base_alpha(index);                                            \
522   }                                                                           \
523   else                                                                        \
524   {                                                                           \
525     tile_expand_base_bg(index);                                               \
526   }                                                                           \
527
528 #define tile_4bpp_draw_base_color16(index)                                    \
529   tile_4bpp_draw_base_alpha(index)                                            \
530
531 #define tile_4bpp_draw_base_color32(index)                                    \
532   tile_4bpp_draw_base_alpha(index)                                            \
533
534
535 #define tile_4bpp_draw_base(index, op, op_param, alpha_op)                    \
536   tile_4bpp_pixel_op_##op(op_param);                                          \
537   tile_4bpp_draw_base_##alpha_op(index)                                       \
538
539
540 // Draws a single 4bpp pixel as layered, if not transparent.
541
542 #define tile_4bpp_draw_transparent(index, op, op_param, alpha_op)             \
543   tile_4bpp_pixel_op_##op(op_param);                                          \
544   if(current_pixel)                                                           \
545   {                                                                           \
546     current_pixel |= current_palette;                                         \
547     tile_expand_transparent_##alpha_op(index);                                \
548   }                                                                           \
549
550 #define tile_4bpp_draw_copy(index, op, op_param, alpha_op)                    \
551   tile_4bpp_pixel_op_##op(op_param);                                          \
552   if(current_pixel)                                                           \
553   {                                                                           \
554     current_pixel |= current_palette;                                         \
555     tile_expand_copy(index);                                                  \
556   }                                                                           \
557
558
559 // Draws eight background pixels in transparent mode, for alpha or normal
560 // renderers.
561
562 #define tile_4bpp_draw_eight_base_zero(value)                                 \
563   dest_ptr[0] = value;                                                        \
564   dest_ptr[1] = value;                                                        \
565   dest_ptr[2] = value;                                                        \
566   dest_ptr[3] = value;                                                        \
567   dest_ptr[4] = value;                                                        \
568   dest_ptr[5] = value;                                                        \
569   dest_ptr[6] = value;                                                        \
570   dest_ptr[7] = value                                                         \
571
572
573 // Draws eight background pixels for the alpha renderer, basically color zero
574 // with the background flag high.
575
576 #define tile_4bpp_draw_eight_base_zero_alpha()                                \
577   tile_4bpp_draw_eight_base_zero(bg_combine)                                  \
578
579 #define tile_4bpp_draw_eight_base_zero_color16()                              \
580   tile_4bpp_draw_eight_base_zero_alpha()                                      \
581
582 #define tile_4bpp_draw_eight_base_zero_color32()                              \
583   tile_4bpp_draw_eight_base_zero_alpha()                                      \
584
585
586 // Draws eight background pixels for the normal renderer, just a bunch of
587 // zeros.
588
589 #ifdef RENDER_COLOR16_NORMAL
590
591 #define tile_4bpp_draw_eight_base_zero_normal()                               \
592   current_pixel = 0;                                                          \
593   tile_4bpp_draw_eight_base_zero(current_pixel)                               \
594
595 #else
596
597 #define tile_4bpp_draw_eight_base_zero_normal()                               \
598   current_pixel = palette[0];                                                 \
599   tile_4bpp_draw_eight_base_zero(current_pixel)                               \
600
601 #endif
602
603
604 // Draws eight 4bpp pixels.
605
606 #define tile_4bpp_draw_eight_noflip(combine_op, alpha_op)                     \
607   tile_4bpp_draw_##combine_op(0, mask, 0, alpha_op);                          \
608   tile_4bpp_draw_##combine_op(1, shift_mask, 4, alpha_op);                    \
609   tile_4bpp_draw_##combine_op(2, shift_mask, 8, alpha_op);                    \
610   tile_4bpp_draw_##combine_op(3, shift_mask, 12, alpha_op);                   \
611   tile_4bpp_draw_##combine_op(4, shift_mask, 16, alpha_op);                   \
612   tile_4bpp_draw_##combine_op(5, shift_mask, 20, alpha_op);                   \
613   tile_4bpp_draw_##combine_op(6, shift_mask, 24, alpha_op);                   \
614   tile_4bpp_draw_##combine_op(7, shift, 28, alpha_op)                         \
615
616
617 // Draws eight 4bpp pixels in reverse order (for hflip).
618
619 #define tile_4bpp_draw_eight_flip(combine_op, alpha_op)                       \
620   tile_4bpp_draw_##combine_op(7, mask, 0, alpha_op);                          \
621   tile_4bpp_draw_##combine_op(6, shift_mask, 4, alpha_op);                    \
622   tile_4bpp_draw_##combine_op(5, shift_mask, 8, alpha_op);                    \
623   tile_4bpp_draw_##combine_op(4, shift_mask, 12, alpha_op);                   \
624   tile_4bpp_draw_##combine_op(3, shift_mask, 16, alpha_op);                   \
625   tile_4bpp_draw_##combine_op(2, shift_mask, 20, alpha_op);                   \
626   tile_4bpp_draw_##combine_op(1, shift_mask, 24, alpha_op);                   \
627   tile_4bpp_draw_##combine_op(0, shift, 28, alpha_op)                         \
628
629
630 // Draws eight 4bpp pixels in base mode, checks if all are zero, if so draws
631 // the appropriate background pixels.
632
633 #define tile_4bpp_draw_eight_base(alpha_op, flip_op)                          \
634   if(current_pixels != 0)                                                     \
635   {                                                                           \
636     tile_4bpp_draw_eight_##flip_op(base, alpha_op);                           \
637   }                                                                           \
638   else                                                                        \
639   {                                                                           \
640     tile_4bpp_draw_eight_base_zero_##alpha_op();                              \
641   }                                                                           \
642
643
644 // Draws eight 4bpp pixels in transparent (layered) mode, checks if all are
645 // zero and if so draws nothing.
646
647 #define tile_4bpp_draw_eight_transparent(alpha_op, flip_op)                   \
648   if(current_pixels != 0)                                                     \
649   {                                                                           \
650     tile_4bpp_draw_eight_##flip_op(transparent, alpha_op);                    \
651   }                                                                           \
652
653
654 #define tile_4bpp_draw_eight_copy(alpha_op, flip_op)                          \
655   if(current_pixels != 0)                                                     \
656   {                                                                           \
657     tile_4bpp_draw_eight_##flip_op(copy, alpha_op);                           \
658   }                                                                           \
659
660 // Gets the current tile in 4bpp mode, also getting the current palette and
661 // the pixel block.
662
663 #define get_tile_4bpp()                                                       \
664   current_tile = *map_ptr;                                                    \
665   current_palette = (current_tile >> 12) << 4;                                \
666   tile_ptr = tile_base + ((current_tile & 0x3FF) * 32);                       \
667
668
669 // Helper macro for drawing clipped 4bpp tiles.
670
671 #define partial_tile_4bpp(combine_op, alpha_op)                               \
672   for(i = 0; i < partial_tile_run; i++)                                       \
673   {                                                                           \
674     tile_4bpp_draw_##combine_op(0, mask, 0, alpha_op);                        \
675     current_pixels >>= 4;                                                     \
676     advance_dest_ptr_##combine_op(1);                                         \
677   }                                                                           \
678
679
680 // Draws a 4bpp tile clipped against the left edge of the screen.
681 // partial_tile_offset is how far in it's clipped, partial_tile_run is
682 // how many to draw.
683
684 #define partial_tile_right_noflip_4bpp(combine_op, alpha_op)                  \
685   current_pixels = *((u32 *)tile_ptr) >> (partial_tile_offset * 4);           \
686   partial_tile_4bpp(combine_op, alpha_op)                                     \
687
688
689 // Draws a 4bpp tile clipped against both edges of the screen, same as right.
690
691 #define partial_tile_mid_noflip_4bpp(combine_op, alpha_op)                    \
692   partial_tile_right_noflip_4bpp(combine_op, alpha_op)                        \
693
694
695 // Draws a 4bpp tile clipped against the right edge of the screen.
696 // partial_tile_offset is how many to draw.
697
698 #define partial_tile_left_noflip_4bpp(combine_op, alpha_op)                   \
699   current_pixels = *((u32 *)tile_ptr);                                        \
700   partial_tile_4bpp(combine_op, alpha_op)                                     \
701
702
703 // Draws a complete 4bpp tile row (not clipped)
704 #define tile_noflip_4bpp(combine_op, alpha_op)                                \
705   current_pixels = *((u32 *)tile_ptr);                                        \
706   tile_4bpp_draw_eight_##combine_op(alpha_op, noflip)                         \
707
708
709 // Like the above, but draws flipped tiles.
710
711 #define partial_tile_flip_4bpp(combine_op, alpha_op)                          \
712   for(i = 0; i < partial_tile_run; i++)                                       \
713   {                                                                           \
714     tile_4bpp_draw_##combine_op(0, shift, 28, alpha_op);                      \
715     current_pixels <<= 4;                                                     \
716     advance_dest_ptr_##combine_op(1);                                         \
717   }                                                                           \
718
719 #define partial_tile_right_flip_4bpp(combine_op, alpha_op)                    \
720   current_pixels = *((u32 *)tile_ptr) << (partial_tile_offset * 4);           \
721   partial_tile_flip_4bpp(combine_op, alpha_op)                                \
722
723 #define partial_tile_mid_flip_4bpp(combine_op, alpha_op)                      \
724   partial_tile_right_flip_4bpp(combine_op, alpha_op)                          \
725
726 #define partial_tile_left_flip_4bpp(combine_op, alpha_op)                     \
727   current_pixels = *((u32 *)tile_ptr);                                        \
728   partial_tile_flip_4bpp(combine_op, alpha_op)                                \
729
730 #define tile_flip_4bpp(combine_op, alpha_op)                                  \
731   current_pixels = *((u32 *)tile_ptr);                                        \
732   tile_4bpp_draw_eight_##combine_op(alpha_op, flip)                           \
733
734
735 // Draws a single (partial or complete) tile from the tilemap, flipping
736 // as necessary.
737
738 #define single_tile_map(tile_type, combine_op, color_depth, alpha_op)         \
739   get_tile_##color_depth();                                                   \
740   if(current_tile & 0x800)                                                    \
741     tile_ptr += vertical_pixel_flip;                                          \
742                                                                               \
743   if(current_tile & 0x400)                                                    \
744   {                                                                           \
745     tile_type##_flip_##color_depth(combine_op, alpha_op);                     \
746   }                                                                           \
747   else                                                                        \
748   {                                                                           \
749     tile_type##_noflip_##color_depth(combine_op, alpha_op);                   \
750   }                                                                           \
751
752
753 // Draws multiple sequential tiles from the tilemap, hflips and vflips as
754 // necessary.
755
756 #define multiple_tile_map(combine_op, color_depth, alpha_op)                  \
757   for(i = 0; i < tile_run; i++)                                               \
758   {                                                                           \
759     single_tile_map(tile, combine_op, color_depth, alpha_op);                 \
760     advance_dest_ptr_##combine_op(8);                                         \
761     map_ptr++;                                                                \
762   }                                                                           \
763
764 // Draws a partial tile from a tilemap clipped against the left edge of the
765 // screen.
766
767 #define partial_tile_right_map(combine_op, color_depth, alpha_op)             \
768   single_tile_map(partial_tile_right, combine_op, color_depth, alpha_op);     \
769   map_ptr++                                                                   \
770
771 // Draws a partial tile from a tilemap clipped against both edges of the
772 // screen.
773
774 #define partial_tile_mid_map(combine_op, color_depth, alpha_op)               \
775   single_tile_map(partial_tile_mid, combine_op, color_depth, alpha_op)        \
776
777 // Draws a partial tile from a tilemap clipped against the right edge of the
778 // screen.
779
780 #define partial_tile_left_map(combine_op, color_depth, alpha_op)              \
781   single_tile_map(partial_tile_left, combine_op, color_depth, alpha_op)       \
782
783
784 // Advances a non-flipped 4bpp obj to the next tile.
785
786 #define obj_advance_noflip_4bpp()                                             \
787   tile_ptr += 32                                                              \
788
789
790 // Advances a non-flipped 8bpp obj to the next tile.
791
792 #define obj_advance_noflip_8bpp()                                             \
793   tile_ptr += 64                                                              \
794
795
796 // Advances a flipped 4bpp obj to the next tile.
797
798 #define obj_advance_flip_4bpp()                                               \
799   tile_ptr -= 32                                                              \
800
801
802 // Advances a flipped 8bpp obj to the next tile.
803
804 #define obj_advance_flip_8bpp()                                               \
805   tile_ptr -= 64                                                              \
806
807
808
809 // Draws multiple sequential tiles from an obj, flip_op determines if it should
810 // be flipped or not (set to flip or noflip)
811
812 #define multiple_tile_obj(combine_op, color_depth, alpha_op, flip_op)         \
813   for(i = 0; i < tile_run; i++)                                               \
814   {                                                                           \
815     tile_##flip_op##_##color_depth(combine_op, alpha_op);                     \
816     obj_advance_##flip_op##_##color_depth();                                  \
817     advance_dest_ptr_##combine_op(8);                                         \
818   }                                                                           \
819
820
821 // Draws an obj's tile clipped against the left side of the screen
822
823 #define partial_tile_right_obj(combine_op, color_depth, alpha_op, flip_op)    \
824   partial_tile_right_##flip_op##_##color_depth(combine_op, alpha_op);         \
825   obj_advance_##flip_op##_##color_depth()                                     \
826
827 // Draws an obj's tile clipped against both sides of the screen
828
829 #define partial_tile_mid_obj(combine_op, color_depth, alpha_op, flip_op)      \
830   partial_tile_mid_##flip_op##_##color_depth(combine_op, alpha_op)            \
831
832 // Draws an obj's tile clipped against the right side of the screen
833
834 #define partial_tile_left_obj(combine_op, color_depth, alpha_op, flip_op)     \
835   partial_tile_left_##flip_op##_##color_depth(combine_op, alpha_op)           \
836
837
838 // Extra variables specific for 8bpp/4bpp tile renderers.
839
840 #define tile_extra_variables_8bpp()                                           \
841
842 #define tile_extra_variables_4bpp()                                           \
843   u32 current_palette                                                         \
844
845
846 // Byte lengths of complete tiles and tile rows in 4bpp and 8bpp.
847
848 #define tile_width_4bpp 4
849 #define tile_size_4bpp 32
850 #define tile_width_8bpp 8
851 #define tile_size_8bpp 64
852
853
854 // Render a single scanline of text tiles
855
856 #define tile_render(color_depth, combine_op, alpha_op)                        \
857 {                                                                             \
858   u32 vertical_pixel_offset = (vertical_offset % 8) *                         \
859    tile_width_##color_depth;                                                  \
860   u32 vertical_pixel_flip =                                                   \
861    ((tile_size_##color_depth - tile_width_##color_depth) -                    \
862    vertical_pixel_offset) - vertical_pixel_offset;                            \
863   tile_extra_variables_##color_depth();                                       \
864   u8 *tile_base = vram + (((bg_control >> 2) & 0x03) * (1024 * 16)) +         \
865    vertical_pixel_offset;                                                     \
866   u32 pixel_run = 256 - (horizontal_offset % 256);                            \
867   u32 current_tile;                                                           \
868                                                                               \
869   map_base += ((vertical_offset % 256) / 8) * 32;                             \
870   partial_tile_offset = (horizontal_offset % 8);                              \
871                                                                               \
872   if(pixel_run >= end)                                                        \
873   {                                                                           \
874     if(partial_tile_offset)                                                   \
875     {                                                                         \
876       partial_tile_run = 8 - partial_tile_offset;                             \
877       if(end < partial_tile_run)                                              \
878       {                                                                       \
879         partial_tile_run = end;                                               \
880         partial_tile_mid_map(combine_op, color_depth, alpha_op);              \
881         return;                                                               \
882       }                                                                       \
883       else                                                                    \
884       {                                                                       \
885         end -= partial_tile_run;                                              \
886         partial_tile_right_map(combine_op, color_depth, alpha_op);            \
887       }                                                                       \
888     }                                                                         \
889                                                                               \
890     tile_run = end / 8;                                                       \
891     multiple_tile_map(combine_op, color_depth, alpha_op);                     \
892                                                                               \
893     partial_tile_run = end % 8;                                               \
894                                                                               \
895     if(partial_tile_run)                                                      \
896     {                                                                         \
897       partial_tile_left_map(combine_op, color_depth, alpha_op);               \
898     }                                                                         \
899   }                                                                           \
900   else                                                                        \
901   {                                                                           \
902     if(partial_tile_offset)                                                   \
903     {                                                                         \
904       partial_tile_run = 8 - partial_tile_offset;                             \
905       partial_tile_right_map(combine_op, color_depth, alpha_op);              \
906     }                                                                         \
907                                                                               \
908     tile_run = (pixel_run - partial_tile_run) / 8;                            \
909     multiple_tile_map(combine_op, color_depth, alpha_op);                     \
910     map_ptr = second_ptr;                                                     \
911     end -= pixel_run;                                                         \
912     tile_run = end / 8;                                                       \
913     multiple_tile_map(combine_op, color_depth, alpha_op);                     \
914                                                                               \
915     partial_tile_run = end % 8;                                               \
916     if(partial_tile_run)                                                      \
917     {                                                                         \
918       partial_tile_left_map(combine_op, color_depth, alpha_op);               \
919     }                                                                         \
920   }                                                                           \
921 }                                                                             \
922
923 #define render_scanline_dest_normal         u16
924 #define render_scanline_dest_alpha          u32
925 #define render_scanline_dest_alpha_obj      u32
926 #define render_scanline_dest_color16        u16
927 #define render_scanline_dest_color32        u32
928 #define render_scanline_dest_partial_alpha  u32
929 #define render_scanline_dest_copy_tile      u16
930 #define render_scanline_dest_copy_bitmap    u16
931
932
933 // If rendering a scanline that is not a target A then there's no point in
934 // keeping what's underneath it because it can't blend with it.
935
936 #define render_scanline_skip_alpha(bg_type, combine_op)                       \
937   if((pixel_combine & 0x00000200) == 0)                                       \
938   {                                                                           \
939     render_scanline_##bg_type##_##combine_op##_color32(layer,                 \
940      start, end, scanline);                                                   \
941     return;                                                                   \
942   }                                                                           \
943
944
945 #ifdef RENDER_COLOR16_NORMAL
946
947 #define render_scanline_extra_variables_base_normal(bg_type)                  \
948   const u32 pixel_combine = 0                                                 \
949
950 #else
951
952 #define render_scanline_extra_variables_base_normal(bg_type)                  \
953   u16 *palette = palette_ram_converted                                        \
954
955 #endif
956
957
958 #define render_scanline_extra_variables_base_alpha(bg_type)                   \
959   u32 bg_combine = color_combine_mask(5);                                     \
960   u32 pixel_combine = color_combine_mask(layer) | (bg_combine << 16);         \
961   render_scanline_skip_alpha(bg_type, base)                                   \
962
963 #define render_scanline_extra_variables_base_color()                          \
964   u32 bg_combine = color_combine_mask(5);                                     \
965   u32 pixel_combine = color_combine_mask(layer)                               \
966
967 #define render_scanline_extra_variables_base_color16(bg_type)                 \
968   render_scanline_extra_variables_base_color()                                \
969
970 #define render_scanline_extra_variables_base_color32(bg_type)                 \
971   render_scanline_extra_variables_base_color()                                \
972
973
974 #define render_scanline_extra_variables_transparent_normal(bg_type)           \
975   render_scanline_extra_variables_base_normal(bg_type)                        \
976
977 #define render_scanline_extra_variables_transparent_alpha(bg_type)            \
978   u32 pixel_combine = color_combine_mask(layer);                              \
979   render_scanline_skip_alpha(bg_type, transparent)                            \
980
981 #define render_scanline_extra_variables_transparent_color()                   \
982   u32 pixel_combine = color_combine_mask(layer)                               \
983
984 #define render_scanline_extra_variables_transparent_color16(bg_type)          \
985   render_scanline_extra_variables_transparent_color()                         \
986
987 #define render_scanline_extra_variables_transparent_color32(bg_type)          \
988   render_scanline_extra_variables_transparent_color()                         \
989
990
991
992
993
994 // Map widths and heights
995
996 u32 map_widths[] = { 256, 512, 256, 512 };
997 u32 map_heights[] = { 256, 256, 512, 512 };
998
999 // Build text scanline rendering functions.
1000
1001 #define render_scanline_text_builder(combine_op, alpha_op)                    \
1002 void render_scanline_text_##combine_op##_##alpha_op(u32 layer,                \
1003  u32 start, u32 end, void *scanline)                                          \
1004 {                                                                             \
1005   render_scanline_extra_variables_##combine_op##_##alpha_op(text);            \
1006   u32 bg_control = io_registers[REG_BG0CNT + layer];                          \
1007   u32 map_size = (bg_control >> 14) & 0x03;                                   \
1008   u32 map_width = map_widths[map_size];                                       \
1009   u32 map_height = map_heights[map_size];                                     \
1010   u32 horizontal_offset =                                                     \
1011    (io_registers[REG_BG0HOFS + (layer * 2)] + start) % 512;                   \
1012   u32 vertical_offset = (io_registers[REG_VCOUNT] +                           \
1013    io_registers[REG_BG0VOFS + (layer * 2)]) % 512;                            \
1014   u32 current_pixel;                                                          \
1015   u32 current_pixels;                                                         \
1016   u32 partial_tile_run = 0;                                                   \
1017   u32 partial_tile_offset;                                                    \
1018   u32 tile_run;                                                               \
1019   u32 i;                                                                      \
1020   render_scanline_dest_##alpha_op *dest_ptr =                                 \
1021    ((render_scanline_dest_##alpha_op *)scanline) + start;                     \
1022                                                                               \
1023   u16 *map_base = (u16 *)(vram + ((bg_control >> 8) & 0x1F) * (1024 * 2));    \
1024   u16 *map_ptr, *second_ptr;                                                  \
1025   u8 *tile_ptr;                                                               \
1026                                                                               \
1027   end -= start;                                                               \
1028                                                                               \
1029   if((map_size & 0x02) && (vertical_offset >= 256))                           \
1030   {                                                                           \
1031     map_base += ((map_width / 8) * 32) +                                      \
1032      (((vertical_offset - 256) / 8) * 32);                                    \
1033   }                                                                           \
1034   else                                                                        \
1035   {                                                                           \
1036     map_base += (((vertical_offset % 256) / 8) * 32);                         \
1037   }                                                                           \
1038                                                                               \
1039   if(map_size & 0x01)                                                         \
1040   {                                                                           \
1041     if(horizontal_offset >= 256)                                              \
1042     {                                                                         \
1043       horizontal_offset -= 256;                                               \
1044       map_ptr = map_base + (32 * 32) + (horizontal_offset / 8);               \
1045       second_ptr = map_base;                                                  \
1046     }                                                                         \
1047     else                                                                      \
1048     {                                                                         \
1049       map_ptr = map_base + (horizontal_offset / 8);                           \
1050       second_ptr = map_base + (32 * 32);                                      \
1051     }                                                                         \
1052   }                                                                           \
1053   else                                                                        \
1054   {                                                                           \
1055     horizontal_offset %= 256;                                                 \
1056     map_ptr = map_base + (horizontal_offset / 8);                             \
1057     second_ptr = map_base;                                                    \
1058   }                                                                           \
1059                                                                               \
1060   if(bg_control & 0x80)                                                       \
1061   {                                                                           \
1062     tile_render(8bpp, combine_op, alpha_op);                                  \
1063   }                                                                           \
1064   else                                                                        \
1065   {                                                                           \
1066     tile_render(4bpp, combine_op, alpha_op);                                  \
1067   }                                                                           \
1068 }                                                                             \
1069
1070 render_scanline_text_builder(base, normal);
1071 render_scanline_text_builder(transparent, normal);
1072 render_scanline_text_builder(base, color16);
1073 render_scanline_text_builder(transparent, color16);
1074 render_scanline_text_builder(base, color32);
1075 render_scanline_text_builder(transparent, color32);
1076 render_scanline_text_builder(base, alpha);
1077 render_scanline_text_builder(transparent, alpha);
1078
1079
1080 s32 affine_reference_x[2];
1081 s32 affine_reference_y[2];
1082
1083 #define affine_render_bg_pixel_normal()                                       \
1084   current_pixel = palette_ram_converted[0]                                    \
1085
1086 #define affine_render_bg_pixel_alpha()                                        \
1087   current_pixel = bg_combine                                                  \
1088
1089 #define affine_render_bg_pixel_color16()                                      \
1090   affine_render_bg_pixel_alpha()                                              \
1091
1092 #define affine_render_bg_pixel_color32()                                      \
1093   affine_render_bg_pixel_alpha()                                              \
1094
1095 #define affine_render_bg_pixel_base(alpha_op)                                 \
1096   affine_render_bg_pixel_##alpha_op()                                         \
1097
1098 #define affine_render_bg_pixel_transparent(alpha_op)                          \
1099
1100 #define affine_render_bg_pixel_copy(alpha_op)                                 \
1101
1102 #define affine_render_bg_base(alpha_op)                                       \
1103   dest_ptr[0] = current_pixel
1104
1105 #define affine_render_bg_transparent(alpha_op)                                \
1106
1107 #define affine_render_bg_copy(alpha_op)                                       \
1108
1109 #define affine_render_bg_remainder_base(alpha_op)                             \
1110   affine_render_bg_pixel_##alpha_op();                                        \
1111   for(; i < end; i++)                                                         \
1112   {                                                                           \
1113     affine_render_bg_base(alpha_op);                                          \
1114     advance_dest_ptr_base(1);                                                 \
1115   }                                                                           \
1116
1117 #define affine_render_bg_remainder_transparent(alpha_op)                      \
1118
1119 #define affine_render_bg_remainder_copy(alpha_op)                             \
1120
1121 #define affine_render_next(combine_op)                                        \
1122   source_x += dx;                                                             \
1123   source_y += dy;                                                             \
1124   advance_dest_ptr_##combine_op(1)                                            \
1125
1126 #define affine_render_scale_offset()                                          \
1127   tile_base += ((pixel_y % 8) * 8);                                           \
1128   map_base += (pixel_y / 8) << map_pitch                                      \
1129
1130 #define affine_render_scale_pixel(combine_op, alpha_op)                       \
1131   map_offset = (pixel_x / 8);                                                 \
1132   if(map_offset != last_map_offset)                                           \
1133   {                                                                           \
1134     tile_ptr = tile_base + (map_base[map_offset] * 64);                       \
1135     last_map_offset = map_offset;                                             \
1136   }                                                                           \
1137   tile_ptr = tile_base + (map_base[(pixel_x / 8)] * 64);                      \
1138   current_pixel = tile_ptr[(pixel_x % 8)];                                    \
1139   tile_8bpp_draw_##combine_op(0, none, 0, alpha_op);                          \
1140   affine_render_next(combine_op)                                              \
1141
1142 #define affine_render_scale(combine_op, alpha_op)                             \
1143 {                                                                             \
1144   pixel_y = source_y >> 8;                                                    \
1145   u32 i = 0;                                                                  \
1146   affine_render_bg_pixel_##combine_op(alpha_op);                              \
1147   if((u32)pixel_y < (u32)width_height)                                        \
1148   {                                                                           \
1149     affine_render_scale_offset();                                             \
1150     for(; i < end; i++)                                                       \
1151     {                                                                         \
1152       pixel_x = source_x >> 8;                                                \
1153                                                                               \
1154       if((u32)pixel_x < (u32)width_height)                                    \
1155       {                                                                       \
1156         break;                                                                \
1157       }                                                                       \
1158                                                                               \
1159       affine_render_bg_##combine_op(alpha_op);                                \
1160       affine_render_next(combine_op);                                         \
1161     }                                                                         \
1162                                                                               \
1163     for(; i < end; i++)                                                       \
1164     {                                                                         \
1165       pixel_x = source_x >> 8;                                                \
1166                                                                               \
1167       if((u32)pixel_x >= (u32)width_height)                                   \
1168         break;                                                                \
1169                                                                               \
1170       affine_render_scale_pixel(combine_op, alpha_op);                        \
1171     }                                                                         \
1172   }                                                                           \
1173   affine_render_bg_remainder_##combine_op(alpha_op);                          \
1174 }                                                                             \
1175
1176 #define affine_render_scale_wrap(combine_op, alpha_op)                        \
1177 {                                                                             \
1178   u32 wrap_mask = width_height - 1;                                           \
1179   pixel_y = (source_y >> 8) & wrap_mask;                                      \
1180   if((u32)pixel_y < (u32)width_height)                                        \
1181   {                                                                           \
1182     affine_render_scale_offset();                                             \
1183     for(i = 0; i < end; i++)                                                  \
1184     {                                                                         \
1185       pixel_x = (source_x >> 8) & wrap_mask;                                  \
1186       affine_render_scale_pixel(combine_op, alpha_op);                        \
1187     }                                                                         \
1188   }                                                                           \
1189 }                                                                             \
1190
1191
1192 #define affine_render_rotate_pixel(combine_op, alpha_op)                      \
1193   map_offset = (pixel_x / 8) + ((pixel_y / 8) << map_pitch);                  \
1194   if(map_offset != last_map_offset)                                           \
1195   {                                                                           \
1196     tile_ptr = tile_base + (map_base[map_offset] * 64);                       \
1197     last_map_offset = map_offset;                                             \
1198   }                                                                           \
1199                                                                               \
1200   current_pixel = tile_ptr[(pixel_x % 8) + ((pixel_y % 8) * 8)];              \
1201   tile_8bpp_draw_##combine_op(0, none, 0, alpha_op);                          \
1202   affine_render_next(combine_op)                                              \
1203
1204 #define affine_render_rotate(combine_op, alpha_op)                            \
1205 {                                                                             \
1206   affine_render_bg_pixel_##combine_op(alpha_op);                              \
1207   for(i = 0; i < end; i++)                                                    \
1208   {                                                                           \
1209     pixel_x = source_x >> 8;                                                  \
1210     pixel_y = source_y >> 8;                                                  \
1211                                                                               \
1212     if(((u32)pixel_x < (u32)width_height) &&                                  \
1213      ((u32)pixel_y < (u32)width_height))                                      \
1214     {                                                                         \
1215       break;                                                                  \
1216     }                                                                         \
1217     affine_render_bg_##combine_op(alpha_op);                                  \
1218     affine_render_next(combine_op);                                           \
1219   }                                                                           \
1220                                                                               \
1221   for(; i < end; i++)                                                         \
1222   {                                                                           \
1223     pixel_x = source_x >> 8;                                                  \
1224     pixel_y = source_y >> 8;                                                  \
1225                                                                               \
1226     if(((u32)pixel_x >= (u32)width_height) ||                                 \
1227      ((u32)pixel_y >= (u32)width_height))                                     \
1228     {                                                                         \
1229       affine_render_bg_remainder_##combine_op(alpha_op);                      \
1230       break;                                                                  \
1231     }                                                                         \
1232                                                                               \
1233     affine_render_rotate_pixel(combine_op, alpha_op);                         \
1234   }                                                                           \
1235 }                                                                             \
1236
1237 #define affine_render_rotate_wrap(combine_op, alpha_op)                       \
1238 {                                                                             \
1239   u32 wrap_mask = width_height - 1;                                           \
1240   for(i = 0; i < end; i++)                                                    \
1241   {                                                                           \
1242     pixel_x = (source_x >> 8) & wrap_mask;                                    \
1243     pixel_y = (source_y >> 8) & wrap_mask;                                    \
1244                                                                               \
1245     affine_render_rotate_pixel(combine_op, alpha_op);                         \
1246   }                                                                           \
1247 }                                                                             \
1248
1249
1250 // Build affine background renderers.
1251
1252 #define render_scanline_affine_builder(combine_op, alpha_op)                  \
1253 void render_scanline_affine_##combine_op##_##alpha_op(u32 layer,              \
1254  u32 start, u32 end, void *scanline)                                          \
1255 {                                                                             \
1256   render_scanline_extra_variables_##combine_op##_##alpha_op(affine);          \
1257   u32 bg_control = io_registers[REG_BG0CNT + layer];                          \
1258   u32 current_pixel;                                                          \
1259   s32 source_x, source_y;                                                     \
1260   u32 vcount = io_registers[REG_VCOUNT];                                      \
1261   u32 pixel_x, pixel_y;                                                       \
1262   u32 layer_offset = (layer - 2) * 8;                                         \
1263   s32 dx, dy;                                                                 \
1264   u32 map_size = (bg_control >> 14) & 0x03;                                   \
1265   u32 width_height = 1 << (7 + map_size);                                     \
1266   u32 map_pitch = map_size + 4;                                               \
1267   u8 *map_base = vram + (((bg_control >> 8) & 0x1F) * (1024 * 2));            \
1268   u8 *tile_base = vram + (((bg_control >> 2) & 0x03) * (1024 * 16));          \
1269   u8 *tile_ptr;                                                               \
1270   u32 map_offset, last_map_offset = (u32)-1;                                  \
1271   u32 i;                                                                      \
1272   render_scanline_dest_##alpha_op *dest_ptr =                                 \
1273    ((render_scanline_dest_##alpha_op *)scanline) + start;                     \
1274                                                                               \
1275   dx = (s16)io_registers[REG_BG2PA + layer_offset];                           \
1276   dy = (s16)io_registers[REG_BG2PC + layer_offset];                           \
1277   source_x = affine_reference_x[layer - 2] + (start * dx);                    \
1278   source_y = affine_reference_y[layer - 2] + (start * dy);                    \
1279                                                                               \
1280   end -= start;                                                               \
1281                                                                               \
1282   switch(((bg_control >> 12) & 0x02) | (dy != 0))                             \
1283   {                                                                           \
1284     case 0x00:                                                                \
1285       affine_render_scale(combine_op, alpha_op);                              \
1286       break;                                                                  \
1287                                                                               \
1288     case 0x01:                                                                \
1289       affine_render_rotate(combine_op, alpha_op);                             \
1290       break;                                                                  \
1291                                                                               \
1292     case 0x02:                                                                \
1293       affine_render_scale_wrap(combine_op, alpha_op);                         \
1294       break;                                                                  \
1295                                                                               \
1296     case 0x03:                                                                \
1297       affine_render_rotate_wrap(combine_op, alpha_op);                        \
1298       break;                                                                  \
1299   }                                                                           \
1300 }                                                                             \
1301
1302 render_scanline_affine_builder(base, normal);
1303 render_scanline_affine_builder(transparent, normal);
1304 render_scanline_affine_builder(base, color16);
1305 render_scanline_affine_builder(transparent, color16);
1306 render_scanline_affine_builder(base, color32);
1307 render_scanline_affine_builder(transparent, color32);
1308 render_scanline_affine_builder(base, alpha);
1309 render_scanline_affine_builder(transparent, alpha);
1310
1311
1312 #define bitmap_render_pixel_mode3(alpha_op)                                   \
1313   convert_palette(current_pixel);                                             \
1314   *dest_ptr = current_pixel                                                   \
1315
1316 #define bitmap_render_pixel_mode4(alpha_op)                                   \
1317   tile_expand_base_##alpha_op(0)                                              \
1318
1319 #define bitmap_render_pixel_mode5(alpha_op)                                   \
1320   bitmap_render_pixel_mode3(alpha_op)                                         \
1321
1322
1323 #define bitmap_render_scale(type, alpha_op, width, height)                    \
1324   pixel_y = (source_y >> 8);                                                  \
1325   if((u32)pixel_y < (u32)height)                                              \
1326   {                                                                           \
1327     pixel_x = (source_x >> 8);                                                \
1328     src_ptr += (pixel_y * width);                                             \
1329     if(dx == 0x100)                                                           \
1330     {                                                                         \
1331       if(pixel_x < 0)                                                         \
1332       {                                                                       \
1333         end += pixel_x;                                                       \
1334         dest_ptr -= pixel_x;                                                  \
1335         pixel_x = 0;                                                          \
1336       }                                                                       \
1337       else                                                                    \
1338                                                                               \
1339       if(pixel_x > 0)                                                         \
1340       {                                                                       \
1341         src_ptr += pixel_x;                                                   \
1342       }                                                                       \
1343                                                                               \
1344       if((pixel_x + end) >= width)                                            \
1345         end = (width - pixel_x);                                              \
1346                                                                               \
1347       for(i = 0; (s32)i < (s32)end; i++)                                      \
1348       {                                                                       \
1349         current_pixel = *src_ptr;                                             \
1350         bitmap_render_pixel_##type(alpha_op);                                 \
1351         src_ptr++;                                                            \
1352         dest_ptr++;                                                           \
1353       }                                                                       \
1354     }                                                                         \
1355     else                                                                      \
1356     {                                                                         \
1357       if((u32)(source_y >> 8) < (u32)height)                                  \
1358       {                                                                       \
1359         for(i = 0; i < end; i++)                                              \
1360         {                                                                     \
1361           pixel_x = (source_x >> 8);                                          \
1362                                                                               \
1363           if((u32)pixel_x < (u32)width)                                       \
1364             break;                                                            \
1365                                                                               \
1366           source_x += dx;                                                     \
1367           dest_ptr++;                                                         \
1368         }                                                                     \
1369                                                                               \
1370         for(; i < end; i++)                                                   \
1371         {                                                                     \
1372           pixel_x = (source_x >> 8);                                          \
1373                                                                               \
1374           if((u32)pixel_x >= (u32)width)                                      \
1375             break;                                                            \
1376                                                                               \
1377           current_pixel = src_ptr[pixel_x];                                   \
1378           bitmap_render_pixel_##type(alpha_op);                               \
1379                                                                               \
1380           source_x += dx;                                                     \
1381           dest_ptr++;                                                         \
1382         }                                                                     \
1383       }                                                                       \
1384     }                                                                         \
1385   }                                                                           \
1386
1387 #define bitmap_render_rotate(type, alpha_op, width, height)                   \
1388   for(i = 0; i < end; i++)                                                    \
1389   {                                                                           \
1390     pixel_x = source_x >> 8;                                                  \
1391     pixel_y = source_y >> 8;                                                  \
1392                                                                               \
1393     if(((u32)pixel_x < (u32)width) && ((u32)pixel_y < (u32)height))           \
1394       break;                                                                  \
1395                                                                               \
1396     source_x += dx;                                                           \
1397     source_y += dy;                                                           \
1398     dest_ptr++;                                                               \
1399   }                                                                           \
1400                                                                               \
1401   for(; i < end; i++)                                                         \
1402   {                                                                           \
1403     pixel_x = (source_x >> 8);                                                \
1404     pixel_y = (source_y >> 8);                                                \
1405                                                                               \
1406     if(((u32)pixel_x >= (u32)width) || ((u32)pixel_y >= (u32)height))         \
1407       break;                                                                  \
1408                                                                               \
1409     current_pixel = src_ptr[pixel_x + (pixel_y * width)];                     \
1410      bitmap_render_pixel_##type(alpha_op);                                    \
1411                                                                               \
1412     source_x += dx;                                                           \
1413     source_y += dy;                                                           \
1414     dest_ptr++;                                                               \
1415   }                                                                           \
1416
1417
1418 #define render_scanline_vram_setup_mode3()                                    \
1419   u16 *src_ptr = (u16 *)vram                                                  \
1420
1421 #define render_scanline_vram_setup_mode5()                                    \
1422   u16 *src_ptr;                                                               \
1423   if(io_registers[REG_DISPCNT] & 0x10)                                        \
1424     src_ptr = (u16 *)(vram + 0xA000);                                         \
1425   else                                                                        \
1426     src_ptr = (u16 *)vram                                                     \
1427
1428
1429 #ifdef RENDER_COLOR16_NORMAL
1430
1431 #define render_scanline_vram_setup_mode4()                                    \
1432   const u32 pixel_combine = 0;                                                \
1433   u8 *src_ptr;                                                                \
1434   if(io_registers[REG_DISPCNT] & 0x10)                                        \
1435     src_ptr = vram + 0xA000;                                                  \
1436   else                                                                        \
1437     src_ptr = vram                                                            \
1438
1439
1440 #else
1441
1442 #define render_scanline_vram_setup_mode4()                                    \
1443   u16 *palette = palette_ram_converted;                                       \
1444   u8 *src_ptr;                                                                \
1445   if(io_registers[REG_DISPCNT] & 0x10)                                        \
1446     src_ptr = vram + 0xA000;                                                  \
1447   else                                                                        \
1448     src_ptr = vram                                                            \
1449
1450 #endif
1451
1452
1453
1454 // Build bitmap scanline rendering functions.
1455
1456 #define render_scanline_bitmap_builder(type, alpha_op, width, height)         \
1457 void render_scanline_bitmap_##type##_##alpha_op(u32 start, u32 end,           \
1458  void *scanline)                                                              \
1459 {                                                                             \
1460   u32 bg_control = io_registers[REG_BG2CNT];                                  \
1461   u32 current_pixel;                                                          \
1462   s32 source_x, source_y;                                                     \
1463   u32 vcount = io_registers[REG_VCOUNT];                                      \
1464   s32 pixel_x, pixel_y;                                                       \
1465                                                                               \
1466   s32 dx = (s16)io_registers[REG_BG2PA];                                      \
1467   s32 dy = (s16)io_registers[REG_BG2PC];                                      \
1468                                                                               \
1469   u32 i;                                                                      \
1470                                                                               \
1471   render_scanline_dest_##alpha_op *dest_ptr =                                 \
1472    ((render_scanline_dest_##alpha_op *)scanline) + start;                     \
1473   render_scanline_vram_setup_##type();                                        \
1474                                                                               \
1475   end -= start;                                                               \
1476                                                                               \
1477   source_x = affine_reference_x[0] + (start * dx);                            \
1478   source_y = affine_reference_y[0] + (start * dy);                            \
1479                                                                               \
1480   if(dy == 0)                                                                 \
1481   {                                                                           \
1482     bitmap_render_scale(type, alpha_op, width, height);                       \
1483   }                                                                           \
1484   else                                                                        \
1485   {                                                                           \
1486     bitmap_render_rotate(type, alpha_op, width, height);                      \
1487   }                                                                           \
1488 }                                                                             \
1489
1490 render_scanline_bitmap_builder(mode3, normal, 240, 160);
1491 render_scanline_bitmap_builder(mode4, normal, 240, 160);
1492 render_scanline_bitmap_builder(mode5, normal, 160, 128);
1493
1494
1495 // Fill in the renderers for a layer based on the mode type,
1496
1497 #define tile_layer_render_functions(type)                                     \
1498 {                                                                             \
1499   render_scanline_##type##_base_normal,                                       \
1500   render_scanline_##type##_transparent_normal,                                \
1501   render_scanline_##type##_base_alpha,                                        \
1502   render_scanline_##type##_transparent_alpha,                                 \
1503   render_scanline_##type##_base_color16,                                      \
1504   render_scanline_##type##_transparent_color16,                               \
1505   render_scanline_##type##_base_color32,                                      \
1506   render_scanline_##type##_transparent_color32                                \
1507 }                                                                             \
1508
1509
1510 // Use if a layer is unsupported for that mode.
1511
1512 #define tile_layer_render_null()                                              \
1513 {                                                                             \
1514   NULL, NULL, NULL, NULL                                                      \
1515 }                                                                             \
1516
1517 #define bitmap_layer_render_functions(type)                                   \
1518 {                                                                             \
1519   render_scanline_bitmap_##type##_normal                                      \
1520 }                                                                             \
1521
1522 // Structs containing functions to render the layers for each mode, for
1523 // each render type.
1524 tile_layer_render_struct tile_mode_renderers[3][4] =
1525 {
1526   {
1527     tile_layer_render_functions(text), tile_layer_render_functions(text),
1528     tile_layer_render_functions(text), tile_layer_render_functions(text)
1529   },
1530   {
1531     tile_layer_render_functions(text), tile_layer_render_functions(text),
1532     tile_layer_render_functions(affine), tile_layer_render_functions(text)
1533   },
1534   {
1535     tile_layer_render_functions(text), tile_layer_render_functions(text),
1536     tile_layer_render_functions(affine), tile_layer_render_functions(affine)
1537   }
1538 };
1539
1540 bitmap_layer_render_struct bitmap_mode_renderers[3] =
1541 {
1542   bitmap_layer_render_functions(mode3),
1543   bitmap_layer_render_functions(mode4),
1544   bitmap_layer_render_functions(mode5)
1545 };
1546
1547
1548 #define render_scanline_layer_functions_tile()                                \
1549   tile_layer_render_struct *layer_renderers =                                 \
1550    tile_mode_renderers[dispcnt & 0x07]                                        \
1551
1552 #define render_scanline_layer_functions_bitmap()                              \
1553   bitmap_layer_render_struct *layer_renderers =                               \
1554    bitmap_mode_renderers + ((dispcnt & 0x07) - 3)                             \
1555
1556
1557 // Adjust a flipped obj's starting position
1558
1559 #define obj_tile_offset_noflip(color_depth)                                   \
1560
1561 #define obj_tile_offset_flip(color_depth)                                     \
1562   + (tile_size_##color_depth * ((obj_width - 8) / 8))                         \
1563
1564
1565 // Adjust the obj's starting point if it goes too far off the left edge of    \
1566 // the screen.                                                                \
1567
1568 #define obj_tile_right_offset_noflip(color_depth)                             \
1569   tile_ptr += (partial_tile_offset / 8) * tile_size_##color_depth             \
1570
1571 #define obj_tile_right_offset_flip(color_depth)                               \
1572   tile_ptr -= (partial_tile_offset / 8) * tile_size_##color_depth             \
1573
1574 // Get the current row offset into an obj in 1D map space
1575
1576 #define obj_tile_offset_1D(color_depth, flip_op)                              \
1577   tile_ptr = tile_base + ((obj_attribute_2 & 0x3FF) * 32)                     \
1578    + ((vertical_offset / 8) * (obj_width / 8) * tile_size_##color_depth)      \
1579    + ((vertical_offset % 8) * tile_width_##color_depth)                       \
1580    obj_tile_offset_##flip_op(color_depth)                                     \
1581
1582 // Get the current row offset into an obj in 2D map space
1583
1584 #define obj_tile_offset_2D(color_depth, flip_op)                              \
1585   tile_ptr = tile_base + ((obj_attribute_2 & 0x3FF) * 32)                     \
1586    + ((vertical_offset / 8) * 1024)                                           \
1587    + ((vertical_offset % 8) * tile_width_##color_depth)                       \
1588    obj_tile_offset_##flip_op(color_depth)                                     \
1589
1590
1591 // Get the palette for 4bpp obj.
1592
1593 #define obj_get_palette_4bpp()                                                \
1594   current_palette = (obj_attribute_2 >> 8) & 0xF0                             \
1595
1596 #define obj_get_palette_8bpp()                                                \
1597
1598
1599 // Render the current row of an obj.
1600
1601 #define obj_render(combine_op, color_depth, alpha_op, map_space, flip_op)     \
1602 {                                                                             \
1603   obj_get_palette_##color_depth();                                            \
1604   obj_tile_offset_##map_space(color_depth, flip_op);                          \
1605                                                                               \
1606   if(obj_x < (s32)start)                                                      \
1607   {                                                                           \
1608     dest_ptr = scanline + start;                                              \
1609     pixel_run = obj_width - (start - obj_x);                                  \
1610     if((s32)pixel_run > 0)                                                    \
1611     {                                                                         \
1612       if((obj_x + obj_width) >= end)                                          \
1613       {                                                                       \
1614         pixel_run = end - start;                                              \
1615         partial_tile_offset = start - obj_x;                                  \
1616         obj_tile_right_offset_##flip_op(color_depth);                         \
1617         partial_tile_offset %= 8;                                             \
1618                                                                               \
1619         if(partial_tile_offset)                                               \
1620         {                                                                     \
1621           partial_tile_run = 8 - partial_tile_offset;                         \
1622           if((s32)pixel_run < (s32)partial_tile_run)                          \
1623           {                                                                   \
1624             if((s32)pixel_run > 0)                                            \
1625             {                                                                 \
1626               partial_tile_run = pixel_run;                                   \
1627               partial_tile_mid_obj(combine_op, color_depth, alpha_op,         \
1628                flip_op);                                                      \
1629             }                                                                 \
1630             continue;                                                         \
1631           }                                                                   \
1632           else                                                                \
1633           {                                                                   \
1634             pixel_run -= partial_tile_run;                                    \
1635             partial_tile_right_obj(combine_op, color_depth, alpha_op,         \
1636              flip_op);                                                        \
1637           }                                                                   \
1638         }                                                                     \
1639         tile_run = pixel_run / 8;                                             \
1640         multiple_tile_obj(combine_op, color_depth, alpha_op, flip_op);        \
1641         partial_tile_run = pixel_run % 8;                                     \
1642         if(partial_tile_run)                                                  \
1643         {                                                                     \
1644           partial_tile_left_obj(combine_op, color_depth, alpha_op,            \
1645            flip_op);                                                          \
1646         }                                                                     \
1647       }                                                                       \
1648       else                                                                    \
1649       {                                                                       \
1650         partial_tile_offset = start - obj_x;                                  \
1651         obj_tile_right_offset_##flip_op(color_depth);                         \
1652         partial_tile_offset %= 8;                                             \
1653         if(partial_tile_offset)                                               \
1654         {                                                                     \
1655           partial_tile_run = 8 - partial_tile_offset;                         \
1656           partial_tile_right_obj(combine_op, color_depth, alpha_op,           \
1657            flip_op);                                                          \
1658         }                                                                     \
1659         tile_run = pixel_run / 8;                                             \
1660         multiple_tile_obj(combine_op, color_depth, alpha_op, flip_op);        \
1661       }                                                                       \
1662     }                                                                         \
1663   }                                                                           \
1664   else                                                                        \
1665                                                                               \
1666   if((obj_x + obj_width) >= end)                                              \
1667   {                                                                           \
1668     pixel_run = end - obj_x;                                                  \
1669     if((s32)pixel_run > 0)                                                    \
1670     {                                                                         \
1671       dest_ptr = scanline + obj_x;                                            \
1672       tile_run = pixel_run / 8;                                               \
1673       multiple_tile_obj(combine_op, color_depth, alpha_op, flip_op);          \
1674       partial_tile_run = pixel_run % 8;                                       \
1675       if(partial_tile_run)                                                    \
1676       {                                                                       \
1677         partial_tile_left_obj(combine_op, color_depth, alpha_op, flip_op);    \
1678       }                                                                       \
1679     }                                                                         \
1680   }                                                                           \
1681   else                                                                        \
1682   {                                                                           \
1683     dest_ptr = scanline + obj_x;                                              \
1684     tile_run = obj_width / 8;                                                 \
1685     multiple_tile_obj(combine_op, color_depth, alpha_op, flip_op);            \
1686   }                                                                           \
1687 }                                                                             \
1688
1689 #define obj_scale_offset_1D(color_depth)                                      \
1690   tile_ptr = tile_base + ((obj_attribute_2 & 0x3FF) * 32)                     \
1691    + ((vertical_offset / 8) * (max_x / 8) * tile_size_##color_depth)          \
1692    + ((vertical_offset % 8) * tile_width_##color_depth)                       \
1693
1694 // Get the current row offset into an obj in 2D map space
1695
1696 #define obj_scale_offset_2D(color_depth)                                      \
1697   tile_ptr = tile_base + ((obj_attribute_2 & 0x3FF) * 32)                     \
1698    + ((vertical_offset / 8) * 1024)                                           \
1699    + ((vertical_offset % 8) * tile_width_##color_depth)                       \
1700
1701 #define obj_render_scale_pixel_4bpp(combine_op, alpha_op)                     \
1702   if(tile_x & 0x01)                                                           \
1703   {                                                                           \
1704     current_pixel = tile_ptr[tile_map_offset + ((tile_x >> 1) & 0x03)] >> 4;  \
1705   }                                                                           \
1706   else                                                                        \
1707   {                                                                           \
1708     current_pixel =                                                           \
1709      tile_ptr[tile_map_offset + ((tile_x >> 1) & 0x03)] & 0x0F;               \
1710   }                                                                           \
1711                                                                               \
1712   tile_4bpp_draw_##combine_op(0, none, 0, alpha_op)                           \
1713
1714
1715 #define obj_render_scale_pixel_8bpp(combine_op, alpha_op)                     \
1716   current_pixel = tile_ptr[tile_map_offset + (tile_x & 0x07)];                \
1717   tile_8bpp_draw_##combine_op(0, none, 0, alpha_op);                          \
1718
1719 #define obj_render_scale(combine_op, color_depth, alpha_op, map_space)        \
1720 {                                                                             \
1721   u32 vertical_offset;                                                        \
1722   source_y += (y_delta * dmy);                                                \
1723   vertical_offset = (source_y >> 8);                                          \
1724   if((u32)vertical_offset < (u32)max_y)                                       \
1725   {                                                                           \
1726     obj_scale_offset_##map_space(color_depth);                                \
1727     source_x += (y_delta * dmx) - (middle_x * dx);                            \
1728                                                                               \
1729     for(i = 0; i < obj_width; i++)                                            \
1730     {                                                                         \
1731       tile_x = (source_x >> 8);                                               \
1732                                                                               \
1733       if((u32)tile_x < (u32)max_x)                                            \
1734         break;                                                                \
1735                                                                               \
1736       source_x += dx;                                                         \
1737       advance_dest_ptr_##combine_op(1);                                       \
1738     }                                                                         \
1739                                                                               \
1740     for(; i < obj_width; i++)                                                 \
1741     {                                                                         \
1742       tile_x = (source_x >> 8);                                               \
1743                                                                               \
1744       if((u32)tile_x >= (u32)max_x)                                           \
1745         break;                                                                \
1746                                                                               \
1747       tile_map_offset = (tile_x >> 3) * tile_size_##color_depth;              \
1748       obj_render_scale_pixel_##color_depth(combine_op, alpha_op);             \
1749                                                                               \
1750       source_x += dx;                                                         \
1751       advance_dest_ptr_##combine_op(1);                                       \
1752     }                                                                         \
1753   }                                                                           \
1754 }                                                                             \
1755
1756
1757 #define obj_rotate_offset_1D(color_depth)                                     \
1758   obj_tile_pitch = (max_x / 8) * tile_size_##color_depth                      \
1759
1760 #define obj_rotate_offset_2D(color_depth)                                     \
1761   obj_tile_pitch = 1024                                                       \
1762
1763 #define obj_render_rotate_pixel_4bpp(combine_op, alpha_op)                    \
1764   if(tile_x & 0x01)                                                           \
1765   {                                                                           \
1766     current_pixel = tile_ptr[tile_map_offset +                                \
1767      ((tile_x >> 1) & 0x03) + ((tile_y & 0x07) * obj_pitch)] >> 4;            \
1768   }                                                                           \
1769   else                                                                        \
1770   {                                                                           \
1771     current_pixel = tile_ptr[tile_map_offset +                                \
1772      ((tile_x >> 1) & 0x03) + ((tile_y & 0x07) * obj_pitch)] & 0x0F;          \
1773   }                                                                           \
1774                                                                               \
1775   tile_4bpp_draw_##combine_op(0, none, 0, alpha_op)                           \
1776
1777 #define obj_render_rotate_pixel_8bpp(combine_op, alpha_op)                    \
1778   current_pixel = tile_ptr[tile_map_offset +                                  \
1779    (tile_x & 0x07) + ((tile_y & 0x07) * obj_pitch)];                          \
1780                                                                               \
1781   tile_8bpp_draw_##combine_op(0, none, 0, alpha_op)                           \
1782
1783 #define obj_render_rotate(combine_op, color_depth, alpha_op, map_space)       \
1784 {                                                                             \
1785   tile_ptr = tile_base + ((obj_attribute_2 & 0x3FF) * 32);                    \
1786   obj_rotate_offset_##map_space(color_depth);                                 \
1787                                                                               \
1788   source_x += (y_delta * dmx) - (middle_x * dx);                              \
1789   source_y += (y_delta * dmy) - (middle_x * dy);                              \
1790                                                                               \
1791   for(i = 0; i < obj_width; i++)                                              \
1792   {                                                                           \
1793     tile_x = (source_x >> 8);                                                 \
1794     tile_y = (source_y >> 8);                                                 \
1795                                                                               \
1796     if(((u32)tile_x < (u32)max_x) && ((u32)tile_y < (u32)max_y))              \
1797       break;                                                                  \
1798                                                                               \
1799     source_x += dx;                                                           \
1800     source_y += dy;                                                           \
1801     advance_dest_ptr_##combine_op(1);                                         \
1802   }                                                                           \
1803                                                                               \
1804   for(; i < obj_width; i++)                                                   \
1805   {                                                                           \
1806     tile_x = (source_x >> 8);                                                 \
1807     tile_y = (source_y >> 8);                                                 \
1808                                                                               \
1809     if(((u32)tile_x >= (u32)max_x) || ((u32)tile_y >= (u32)max_y))            \
1810       break;                                                                  \
1811                                                                               \
1812     tile_map_offset = ((tile_x >> 3) * tile_size_##color_depth) +             \
1813     ((tile_y >> 3) * obj_tile_pitch);                                         \
1814     obj_render_rotate_pixel_##color_depth(combine_op, alpha_op);              \
1815                                                                               \
1816     source_x += dx;                                                           \
1817     source_y += dy;                                                           \
1818     advance_dest_ptr_##combine_op(1);                                         \
1819   }                                                                           \
1820 }                                                                             \
1821
1822 // Render the current row of an affine transformed OBJ.
1823
1824 #define obj_render_affine(combine_op, color_depth, alpha_op, map_space)       \
1825 {                                                                             \
1826   s16 *params = oam_ram + (((obj_attribute_1 >> 9) & 0x1F) * 16);             \
1827   s32 dx = params[3];                                                         \
1828   s32 dmx = params[7];                                                        \
1829   s32 dy = params[11];                                                        \
1830   s32 dmy = params[15];                                                       \
1831   s32 source_x, source_y;                                                     \
1832   s32 tile_x, tile_y;                                                         \
1833   u32 tile_offset;                                                            \
1834   u32 tile_map_offset;                                                        \
1835   s32 middle_x;                                                               \
1836   s32 middle_y;                                                               \
1837   s32 max_x = obj_width;                                                      \
1838   s32 max_y = obj_height;                                                     \
1839   s32 y_delta;                                                                \
1840   u32 obj_pitch = tile_width_##color_depth;                                   \
1841   u32 obj_tile_pitch;                                                         \
1842                                                                               \
1843   middle_x = (obj_width / 2);                                                 \
1844   middle_y = (obj_height / 2);                                                \
1845                                                                               \
1846   source_x = (middle_x << 8);                                                 \
1847   source_y = (middle_y << 8);                                                 \
1848                                                                               \
1849                                                                               \
1850   if(obj_attribute_0 & 0x200)                                                 \
1851   {                                                                           \
1852     obj_width *= 2;                                                           \
1853     obj_height *= 2;                                                          \
1854     middle_x *= 2;                                                            \
1855     middle_y *= 2;                                                            \
1856   }                                                                           \
1857                                                                               \
1858   if((s32)obj_x < (s32)start)                                                 \
1859   {                                                                           \
1860     u32 x_delta = start - obj_x;                                              \
1861     middle_x -= x_delta;                                                      \
1862     obj_width -= x_delta;                                                     \
1863     obj_x = start;                                                            \
1864                                                                               \
1865     if((s32)obj_width <= 0)                                                   \
1866       continue;                                                               \
1867   }                                                                           \
1868                                                                               \
1869   if((s32)(obj_x + obj_width) >= (s32)end)                                    \
1870   {                                                                           \
1871     obj_width = end - obj_x;                                                  \
1872                                                                               \
1873     if((s32)obj_width <= 0)                                                   \
1874       continue;                                                               \
1875   }                                                                           \
1876   dest_ptr = scanline + obj_x;                                                \
1877                                                                               \
1878   y_delta = vcount - (obj_y + middle_y);                                      \
1879                                                                               \
1880   obj_get_palette_##color_depth();                                            \
1881                                                                               \
1882   if(dy == 0)                                                                 \
1883   {                                                                           \
1884     obj_render_scale(combine_op, color_depth, alpha_op, map_space);           \
1885   }                                                                           \
1886   else                                                                        \
1887   {                                                                           \
1888     obj_render_rotate(combine_op, color_depth, alpha_op, map_space);          \
1889   }                                                                           \
1890 }                                                                             \
1891
1892 u32 obj_width_table[] = { 8, 16, 32, 64, 16, 32, 32, 64, 8, 8, 16, 32 };
1893 u32 obj_height_table[] = { 8, 16, 32, 64, 8, 8, 16, 32, 16, 32, 32, 64 };
1894
1895 u8 obj_priority_list[5][160][128];
1896 u32 obj_priority_count[5][160];
1897 u32 obj_alpha_count[160];
1898
1899
1900 // Build obj rendering functions
1901
1902 #ifdef RENDER_COLOR16_NORMAL
1903
1904 #define render_scanline_obj_extra_variables_normal(bg_type)                   \
1905   const u32 pixel_combine = (1 << 8)                                          \
1906
1907 #else
1908
1909 #define render_scanline_obj_extra_variables_normal(bg_type)                   \
1910   u16 *palette = palette_ram_converted + 256                                  \
1911
1912 #endif
1913
1914
1915 #define render_scanline_obj_extra_variables_color()                           \
1916   u32 dest;                                                                   \
1917   u32 pixel_combine = color_combine_mask(4) | (1 << 8)                        \
1918
1919 #define render_scanline_obj_extra_variables_alpha_obj(map_space)              \
1920   render_scanline_obj_extra_variables_color();                                \
1921   if((pixel_combine & 0x00000200) == 0)                                       \
1922   {                                                                           \
1923     render_scanline_obj_color32_##map_space(priority, start, end, scanline);  \
1924     return;                                                                   \
1925   }                                                                           \
1926
1927 #define render_scanline_obj_extra_variables_color16(map_space)                \
1928   render_scanline_obj_extra_variables_color()                                 \
1929
1930 #define render_scanline_obj_extra_variables_color32(map_space)                \
1931   render_scanline_obj_extra_variables_color()                                 \
1932
1933 #define render_scanline_obj_extra_variables_partial_alpha(map_space)          \
1934   render_scanline_obj_extra_variables_color();                                \
1935   u32 base_pixel_combine = pixel_combine                                      \
1936
1937 #define render_scanline_obj_extra_variables_copy(type)                        \
1938   u32 bldcnt = io_registers[REG_BLDCNT];                                      \
1939   u32 dispcnt = io_registers[REG_DISPCNT];                                    \
1940   u32 obj_enable = io_registers[REG_WINOUT] >> 8;                             \
1941   render_scanline_layer_functions_##type();                                   \
1942   u32 copy_start, copy_end;                                                   \
1943   u16 copy_buffer[240];                                                       \
1944   u16 *copy_ptr                                                               \
1945
1946 #define render_scanline_obj_extra_variables_copy_tile(map_space)              \
1947   render_scanline_obj_extra_variables_copy(tile)                              \
1948
1949 #define render_scanline_obj_extra_variables_copy_bitmap(map_space)            \
1950   render_scanline_obj_extra_variables_copy(bitmap)                            \
1951
1952
1953 #define render_scanline_obj_main(combine_op, alpha_op, map_space)             \
1954   if(obj_attribute_0 & 0x100)                                                 \
1955   {                                                                           \
1956     if((obj_attribute_0 >> 13) & 0x01)                                        \
1957     {                                                                         \
1958       obj_render_affine(combine_op, 8bpp, alpha_op, map_space);               \
1959     }                                                                         \
1960     else                                                                      \
1961     {                                                                         \
1962       obj_render_affine(combine_op, 4bpp, alpha_op, map_space);               \
1963     }                                                                         \
1964   }                                                                           \
1965   else                                                                        \
1966   {                                                                           \
1967     vertical_offset = vcount - obj_y;                                         \
1968                                                                               \
1969     if((obj_attribute_1 >> 13) & 0x01)                                        \
1970       vertical_offset = obj_height - vertical_offset - 1;                     \
1971                                                                               \
1972     switch(((obj_attribute_0 >> 12) & 0x02) |                                 \
1973      ((obj_attribute_1 >> 12) & 0x01))                                        \
1974     {                                                                         \
1975       case 0x0:                                                               \
1976         obj_render(combine_op, 4bpp, alpha_op, map_space, noflip);            \
1977         break;                                                                \
1978                                                                               \
1979       case 0x1:                                                               \
1980         obj_render(combine_op, 4bpp, alpha_op, map_space, flip);              \
1981         break;                                                                \
1982                                                                               \
1983       case 0x2:                                                               \
1984         obj_render(combine_op, 8bpp, alpha_op, map_space, noflip);            \
1985         break;                                                                \
1986                                                                               \
1987       case 0x3:                                                               \
1988         obj_render(combine_op, 8bpp, alpha_op, map_space, flip);              \
1989         break;                                                                \
1990     }                                                                         \
1991   }                                                                           \
1992
1993 #define render_scanline_obj_no_partial_alpha(combine_op, alpha_op, map_space) \
1994   render_scanline_obj_main(combine_op, alpha_op, map_space)                   \
1995
1996 #define render_scanline_obj_partial_alpha(combine_op, alpha_op, map_space)    \
1997   if((obj_attribute_0 >> 10) & 0x03)                                          \
1998   {                                                                           \
1999     pixel_combine = 0x00000300;                                               \
2000     render_scanline_obj_main(combine_op, alpha_obj, map_space);               \
2001   }                                                                           \
2002   else                                                                        \
2003   {                                                                           \
2004     pixel_combine = base_pixel_combine;                                       \
2005     render_scanline_obj_main(combine_op, color32, map_space);                 \
2006   }                                                                           \
2007
2008 #define render_scanline_obj_prologue_transparent(alpha_op)                    \
2009
2010 #define render_scanline_obj_prologue_copy_body(type)                          \
2011   copy_start = obj_x;                                                         \
2012   if(obj_attribute_0 & 0x200)                                                 \
2013     copy_end = obj_x + (obj_width * 2);                                       \
2014   else                                                                        \
2015     copy_end = obj_x + obj_width;                                             \
2016                                                                               \
2017   if(copy_start < start)                                                      \
2018     copy_start = start;                                                       \
2019   if(copy_end > end)                                                          \
2020     copy_end = end;                                                           \
2021                                                                               \
2022   if((copy_start < end) && (copy_end > start))                                \
2023   {                                                                           \
2024     render_scanline_conditional_##type(copy_start, copy_end, copy_buffer,     \
2025      obj_enable, dispcnt, bldcnt, layer_renderers);                           \
2026     copy_ptr = copy_buffer + copy_start;                                      \
2027   }                                                                           \
2028   else                                                                        \
2029   {                                                                           \
2030     continue;                                                                 \
2031   }                                                                           \
2032
2033 #define render_scanline_obj_prologue_copy_tile()                              \
2034   render_scanline_obj_prologue_copy_body(tile)                                \
2035
2036 #define render_scanline_obj_prologue_copy_bitmap()                            \
2037   render_scanline_obj_prologue_copy_body(bitmap)                              \
2038
2039 #define render_scanline_obj_prologue_copy(alpha_op)                           \
2040   render_scanline_obj_prologue_##alpha_op()                                   \
2041
2042
2043 #define render_scanline_obj_builder(combine_op, alpha_op, map_space,          \
2044  partial_alpha_op)                                                            \
2045 void render_scanline_obj_##alpha_op##_##map_space(u32 priority,               \
2046  u32 start, u32 end, render_scanline_dest_##alpha_op *scanline)               \
2047 {                                                                             \
2048   render_scanline_obj_extra_variables_##alpha_op(map_space);                  \
2049   s32 obj_num, i;                                                             \
2050   s32 obj_x, obj_y;                                                           \
2051   s32 obj_size;                                                               \
2052   s32 obj_width, obj_height;                                                  \
2053   u32 obj_attribute_0, obj_attribute_1, obj_attribute_2;                      \
2054   s32 vcount = io_registers[REG_VCOUNT];                                      \
2055   u32 tile_run;                                                               \
2056   u32 current_pixels;                                                         \
2057   u32 current_pixel;                                                          \
2058   u32 current_palette;                                                        \
2059   u32 vertical_offset;                                                        \
2060   u32 partial_tile_run, partial_tile_offset;                                  \
2061   u32 pixel_run;                                                              \
2062   u16 *oam_ptr;                                                               \
2063   render_scanline_dest_##alpha_op *dest_ptr;                                  \
2064   u8 *tile_base = vram + 0x10000;                                             \
2065   u8 *tile_ptr;                                                               \
2066   u32 obj_count = obj_priority_count[priority][vcount];                       \
2067   u8 *obj_list = obj_priority_list[priority][vcount];                         \
2068                                                                               \
2069   for(obj_num = 0; obj_num < obj_count; obj_num++)                            \
2070   {                                                                           \
2071     oam_ptr = oam_ram + (obj_list[obj_num] * 4);                              \
2072     obj_attribute_0 = oam_ptr[0];                                             \
2073     obj_attribute_1 = oam_ptr[1];                                             \
2074     obj_attribute_2 = oam_ptr[2];                                             \
2075     obj_size = ((obj_attribute_0 >> 12) & 0x0C) | (obj_attribute_1 >> 14);    \
2076                                                                               \
2077     obj_x = (s32)(obj_attribute_1 << 23) >> 23;                               \
2078     obj_width = obj_width_table[obj_size];                                    \
2079                                                                               \
2080     render_scanline_obj_prologue_##combine_op(alpha_op);                      \
2081                                                                               \
2082     obj_y = obj_attribute_0 & 0xFF;                                           \
2083                                                                               \
2084     if(obj_y > 160)                                                           \
2085       obj_y -= 256;                                                           \
2086                                                                               \
2087     obj_height = obj_height_table[obj_size];                                  \
2088     render_scanline_obj_##partial_alpha_op(combine_op, alpha_op, map_space);  \
2089   }                                                                           \
2090 }                                                                             \
2091
2092 render_scanline_obj_builder(transparent, normal, 1D, no_partial_alpha);
2093 render_scanline_obj_builder(transparent, normal, 2D, no_partial_alpha);
2094 render_scanline_obj_builder(transparent, color16, 1D, no_partial_alpha);
2095 render_scanline_obj_builder(transparent, color16, 2D, no_partial_alpha);
2096 render_scanline_obj_builder(transparent, color32, 1D, no_partial_alpha);
2097 render_scanline_obj_builder(transparent, color32, 2D, no_partial_alpha);
2098 render_scanline_obj_builder(transparent, alpha_obj, 1D, no_partial_alpha);
2099 render_scanline_obj_builder(transparent, alpha_obj, 2D, no_partial_alpha);
2100 render_scanline_obj_builder(transparent, partial_alpha, 1D, partial_alpha);
2101 render_scanline_obj_builder(transparent, partial_alpha, 2D, partial_alpha);
2102 render_scanline_obj_builder(copy, copy_tile, 1D, no_partial_alpha);
2103 render_scanline_obj_builder(copy, copy_tile, 2D, no_partial_alpha);
2104 render_scanline_obj_builder(copy, copy_bitmap, 1D, no_partial_alpha);
2105 render_scanline_obj_builder(copy, copy_bitmap, 2D, no_partial_alpha);
2106
2107
2108
2109 void order_obj(u32 video_mode)
2110 {
2111   s32 obj_num, priority, row;
2112   s32 obj_x, obj_y;
2113   s32 obj_size, obj_mode;
2114   s32 obj_width, obj_height;
2115   u32 obj_priority;
2116   u32 obj_attribute_0, obj_attribute_1, obj_attribute_2;
2117   s32 vcount = io_registers[REG_VCOUNT];
2118   u32 partial_tile_run, partial_tile_offset;
2119   u32 pixel_run;
2120   u32 current_count;
2121   u16 *oam_ptr = oam_ram + 508;
2122   u16 *dest_ptr;
2123   u8 *tile_base = vram + 0x10000;
2124   u8 *tile_ptr;
2125
2126   for(priority = 0; priority < 5; priority++)
2127   {
2128     for(row = 0; row < 160; row++)
2129     {
2130       obj_priority_count[priority][row] = 0;
2131     }
2132   }
2133
2134   for(row = 0; row < 160; row++)
2135   {
2136     obj_alpha_count[row] = 0;
2137   }
2138
2139   for(obj_num = 127; obj_num >= 0; obj_num--, oam_ptr -= 4)
2140   {
2141     obj_attribute_0 = oam_ptr[0];
2142     obj_attribute_2 = oam_ptr[2];
2143     obj_size = obj_attribute_0 & 0xC000;
2144     obj_priority = (obj_attribute_2 >> 10) & 0x03;
2145     obj_mode = (obj_attribute_0 >> 10) & 0x03;
2146
2147     if(((obj_attribute_0 & 0x0300) != 0x0200) && (obj_size != 0xC000) &&
2148      (obj_mode != 3) && ((video_mode < 3) ||
2149      ((obj_attribute_2 & 0x3FF) >= 512)))
2150     {
2151       obj_y = obj_attribute_0 & 0xFF;
2152       if(obj_y > 160)
2153         obj_y -= 256;
2154
2155       obj_attribute_1 = oam_ptr[1];
2156       obj_size = ((obj_size >> 12) & 0x0C) | (obj_attribute_1 >> 14);
2157       obj_height = obj_height_table[obj_size];
2158       obj_width = obj_width_table[obj_size];
2159
2160       if(obj_attribute_0 & 0x200)
2161       {
2162         obj_height *= 2;
2163         obj_width *= 2;
2164       }
2165
2166       if(((obj_y + obj_height) > 0) && (obj_y < 160))
2167       {
2168         obj_x = (s32)(obj_attribute_1 << 23) >> 23;
2169
2170         if(((obj_x + obj_width) > 0) && (obj_x < 240))
2171         {
2172           if(obj_y < 0)
2173           {
2174             obj_height += obj_y;
2175             obj_y = 0;
2176           }
2177
2178           if((obj_y + obj_height) >= 160)
2179           {
2180             obj_height = 160 - obj_y;
2181           }
2182
2183           if(obj_mode == 1)
2184           {
2185             for(row = obj_y; row < obj_y + obj_height; row++)
2186             {
2187               current_count = obj_priority_count[obj_priority][row];
2188               obj_priority_list[obj_priority][row][current_count] = obj_num;
2189               obj_priority_count[obj_priority][row] = current_count + 1;
2190               obj_alpha_count[row]++;
2191             }
2192           }
2193           else
2194           {
2195             if(obj_mode == 2)
2196             {
2197               obj_priority = 4;
2198             }
2199
2200             for(row = obj_y; row < obj_y + obj_height; row++)
2201             {
2202               current_count = obj_priority_count[obj_priority][row];
2203               obj_priority_list[obj_priority][row][current_count] = obj_num;
2204               obj_priority_count[obj_priority][row] = current_count + 1;
2205             }
2206           }
2207         }
2208       }
2209     }
2210   }
2211 }
2212
2213 u32 layer_order[16];
2214 u32 layer_count;
2215
2216 u32 order_layers(u32 layer_flags)
2217 {
2218   s32 priority, layer_number;
2219   layer_count = 0;
2220
2221   for(priority = 3; priority >= 0; priority--)
2222   {
2223     for(layer_number = 3; layer_number >= 0; layer_number--)
2224     {
2225       if(((layer_flags >> layer_number) & 1) &&
2226        ((io_registers[REG_BG0CNT + layer_number] & 0x03) == priority))
2227       {
2228         layer_order[layer_count] = layer_number;
2229         layer_count++;
2230       }
2231     }
2232
2233     if((obj_priority_count[priority][io_registers[REG_VCOUNT]] > 0)
2234      && (layer_flags & 0x10))
2235     {
2236       layer_order[layer_count] = priority | 0x04;
2237       layer_count++;
2238     }
2239   }
2240 }
2241
2242 #define fill_line(_start, _end)                                               \
2243   u32 i;                                                                      \
2244                                                                               \
2245   for(i = _start; i < _end; i++)                                              \
2246   {                                                                           \
2247     dest_ptr[i] = color;                                                      \
2248   }                                                                           \
2249
2250
2251 #define fill_line_color_normal()                                              \
2252   color = palette_ram_converted[color]                                        \
2253
2254 #define fill_line_color_alpha()                                               \
2255
2256 #define fill_line_color_color16()                                             \
2257
2258 #define fill_line_color_color32()                                             \
2259
2260 #define fill_line_builder(type)                                               \
2261 void fill_line_##type(u16 color, render_scanline_dest_##type *dest_ptr,       \
2262  u32 start, u32 end)                                                          \
2263 {                                                                             \
2264   fill_line_color_##type();                                                   \
2265   fill_line(start, end);                                                      \
2266 }                                                                             \
2267
2268 fill_line_builder(normal);
2269 fill_line_builder(alpha);
2270 fill_line_builder(color16);
2271 fill_line_builder(color32);
2272
2273
2274 // Alpha blend two pixels (pixel_top and pixel_bottom).
2275
2276 #define blend_pixel()                                                         \
2277   pixel_bottom = palette_ram_converted[(pixel_pair >> 16) & 0x1FF];           \
2278   pixel_bottom = (pixel_bottom | (pixel_bottom << 16)) & 0x07E0F81F;          \
2279   pixel_top = ((pixel_top * blend_a) + (pixel_bottom * blend_b)) >> 4         \
2280
2281
2282 // Alpha blend two pixels, allowing for saturation (individual channels > 31).
2283 // The operation is optimized towards saturation not occuring.
2284
2285 #define blend_saturate_pixel()                                                \
2286   pixel_bottom = palette_ram_converted[(pixel_pair >> 16) & 0x1FF];           \
2287   pixel_bottom = (pixel_bottom | (pixel_bottom << 16)) & 0x07E0F81F;          \
2288   pixel_top = ((pixel_top * blend_a) + (pixel_bottom * blend_b)) >> 4;        \
2289   if(pixel_top & 0x08010020)                                                  \
2290   {                                                                           \
2291     if(pixel_top & 0x08000000)                                                \
2292       pixel_top |= 0x07E00000;                                                \
2293                                                                               \
2294     if(pixel_top & 0x00010000)                                                \
2295       pixel_top |= 0x0000F800;                                                \
2296                                                                               \
2297     if(pixel_top & 0x00000020)                                                \
2298       pixel_top |= 0x0000001F;                                                \
2299   }                                                                           \
2300
2301 #define brighten_pixel()                                                      \
2302   pixel_top = upper + ((pixel_top * blend) >> 4);                             \
2303
2304 #define darken_pixel()                                                        \
2305   pixel_top = (pixel_top * blend) >> 4;                                       \
2306
2307 #define effect_condition_alpha                                                \
2308   ((pixel_pair & 0x04000200) == 0x04000200)                                   \
2309
2310 #define effect_condition_fade(pixel_source)                                   \
2311   ((pixel_source & 0x00000200) == 0x00000200)                                 \
2312
2313 #define expand_pixel_no_dest(expand_type, pixel_source)                       \
2314   pixel_top = (pixel_top | (pixel_top << 16)) & 0x07E0F81F;                   \
2315   expand_type##_pixel();                                                      \
2316   pixel_top &= 0x07E0F81F;                                                    \
2317   pixel_top = (pixel_top >> 16) | pixel_top                                   \
2318
2319 #define expand_pixel(expand_type, pixel_source)                               \
2320   pixel_top = palette_ram_converted[pixel_source & 0x1FF];                    \
2321   expand_pixel_no_dest(expand_type, pixel_source);                            \
2322   *screen_dest_ptr = pixel_top                                                \
2323
2324 #define expand_loop(expand_type, effect_condition, pixel_source)              \
2325   screen_src_ptr += start;                                                    \
2326   screen_dest_ptr += start;                                                   \
2327                                                                               \
2328   end -= start;                                                               \
2329                                                                               \
2330   for(i = 0; i < end; i++)                                                    \
2331   {                                                                           \
2332     pixel_source = *screen_src_ptr;                                           \
2333     if(effect_condition)                                                      \
2334     {                                                                         \
2335       expand_pixel(expand_type, pixel_source);                                \
2336     }                                                                         \
2337     else                                                                      \
2338     {                                                                         \
2339       *screen_dest_ptr =                                                      \
2340        palette_ram_converted[pixel_source & 0x1FF];                           \
2341     }                                                                         \
2342                                                                               \
2343     screen_src_ptr++;                                                         \
2344     screen_dest_ptr++;                                                        \
2345   }                                                                           \
2346
2347
2348 #define expand_loop_partial_alpha(alpha_expand, expand_type)                  \
2349   screen_src_ptr += start;                                                    \
2350   screen_dest_ptr += start;                                                   \
2351                                                                               \
2352   end -= start;                                                               \
2353                                                                               \
2354   for(i = 0; i < end; i++)                                                    \
2355   {                                                                           \
2356     pixel_pair = *screen_src_ptr;                                             \
2357     if(effect_condition_fade(pixel_pair))                                     \
2358     {                                                                         \
2359       if(effect_condition_alpha)                                              \
2360       {                                                                       \
2361         expand_pixel(alpha_expand, pixel_pair);                               \
2362       }                                                                       \
2363       else                                                                    \
2364       {                                                                       \
2365         expand_pixel(expand_type, pixel_pair);                                \
2366       }                                                                       \
2367     }                                                                         \
2368     else                                                                      \
2369     {                                                                         \
2370       *screen_dest_ptr =                                                      \
2371        palette_ram_converted[pixel_pair & 0x1FF];                             \
2372     }                                                                         \
2373                                                                               \
2374     screen_src_ptr++;                                                         \
2375     screen_dest_ptr++;                                                        \
2376   }                                                                           \
2377
2378
2379 #define expand_partial_alpha(expand_type)                                     \
2380   if((blend_a + blend_b) > 16)                                                \
2381   {                                                                           \
2382     expand_loop_partial_alpha(blend_saturate, expand_type);                   \
2383   }                                                                           \
2384   else                                                                        \
2385   {                                                                           \
2386     expand_loop_partial_alpha(blend, expand_type);                            \
2387   }                                                                           \
2388
2389
2390
2391 // Blend top two pixels of scanline with each other.
2392
2393 #ifdef RENDER_COLOR16_NORMAL
2394
2395 #ifndef GP2X_BUILD
2396
2397 void expand_normal(u16 *screen_ptr, u32 start, u32 end)
2398 {
2399   u32 i, pixel_source;
2400   screen_ptr += start;
2401
2402   return;
2403
2404   end -= start;
2405
2406   for(i = 0; i < end; i++)
2407   {
2408     pixel_source = *screen_ptr;
2409     *screen_ptr = palette_ram_converted[pixel_source];
2410
2411     screen_ptr++;
2412   }
2413 }
2414
2415 #endif
2416
2417 #else
2418
2419 #define expand_normal(screen_ptr, start, end)
2420
2421 #endif
2422
2423
2424 #ifndef GP2X_BUILD
2425
2426 void expand_blend(u32 *screen_src_ptr, u16 *screen_dest_ptr,
2427  u32 start, u32 end)
2428 {
2429   u32 pixel_pair;
2430   u32 pixel_top, pixel_bottom;
2431   u32 bldalpha = io_registers[REG_BLDALPHA];
2432   u32 blend_a = bldalpha & 0x1F;
2433   u32 blend_b = (bldalpha >> 8) & 0x1F;
2434   u32 i;
2435
2436   if(blend_a > 16)
2437     blend_a = 16;
2438
2439   if(blend_b > 16)
2440     blend_b = 16;
2441
2442   // The individual colors can saturate over 31, this should be taken
2443   // care of in an alternate pass as it incurs a huge additional speedhit.
2444   if((blend_a + blend_b) > 16)
2445   {
2446     expand_loop(blend_saturate, effect_condition_alpha, pixel_pair);
2447   }
2448   else
2449   {
2450     expand_loop(blend, effect_condition_alpha, pixel_pair);
2451   }
2452 }
2453
2454 #endif
2455
2456 // Blend scanline with white.
2457
2458 void expand_darken(u16 *screen_src_ptr, u16 *screen_dest_ptr,
2459  u32 start, u32 end)
2460 {
2461   u32 pixel_top;
2462   s32 blend = 16 - (io_registers[REG_BLDY] & 0x1F);
2463   u32 i;
2464
2465   if(blend < 0)
2466     blend = 0;
2467
2468   expand_loop(darken, effect_condition_fade(pixel_top), pixel_top);
2469 }
2470
2471
2472 // Blend scanline with black.
2473
2474 void expand_brighten(u16 *screen_src_ptr, u16 *screen_dest_ptr,
2475  u32 start, u32 end)
2476 {
2477   u32 pixel_top;
2478   u32 blend = io_registers[REG_BLDY] & 0x1F;
2479   u32 upper;
2480   u32 i;
2481
2482   if(blend > 16)
2483     blend = 16;
2484
2485   upper = ((0x07E0F81F * blend) >> 4) & 0x07E0F81F;
2486   blend = 16 - blend;
2487
2488   expand_loop(brighten, effect_condition_fade(pixel_top), pixel_top);
2489
2490 }
2491
2492
2493 // Expand scanline such that if both top and bottom pass it's alpha,
2494 // if only top passes it's as specified, and if neither pass it's normal.
2495
2496 void expand_darken_partial_alpha(u32 *screen_src_ptr, u16 *screen_dest_ptr,
2497  u32 start, u32 end)
2498 {
2499   s32 blend = 16 - (io_registers[REG_BLDY] & 0x1F);
2500   u32 pixel_pair;
2501   u32 pixel_top, pixel_bottom;
2502   u32 bldalpha = io_registers[REG_BLDALPHA];
2503   u32 blend_a = bldalpha & 0x1F;
2504   u32 blend_b = (bldalpha >> 8) & 0x1F;
2505   u32 i;
2506
2507   if(blend < 0)
2508     blend = 0;
2509
2510   if(blend_a > 16)
2511     blend_a = 16;
2512
2513   if(blend_b > 16)
2514     blend_b = 16;
2515
2516   expand_partial_alpha(darken);
2517 }
2518
2519
2520 void expand_brighten_partial_alpha(u32 *screen_src_ptr, u16 *screen_dest_ptr,
2521  u32 start, u32 end)
2522 {
2523   s32 blend = io_registers[REG_BLDY] & 0x1F;
2524   u32 pixel_pair;
2525   u32 pixel_top, pixel_bottom;
2526   u32 bldalpha = io_registers[REG_BLDALPHA];
2527   u32 blend_a = bldalpha & 0x1F;
2528   u32 blend_b = (bldalpha >> 8) & 0x1F;
2529   u32 upper;
2530   u32 i;
2531
2532   if(blend > 16)
2533     blend = 16;
2534
2535   upper = ((0x07E0F81F * blend) >> 4) & 0x07E0F81F;
2536   blend = 16 - blend;
2537
2538   if(blend_a > 16)
2539     blend_a = 16;
2540
2541   if(blend_b > 16)
2542     blend_b = 16;
2543
2544   expand_partial_alpha(brighten);
2545 }
2546
2547
2548 // Render an OBJ layer from start to end, depending on the type (1D or 2D)
2549 // stored in dispcnt.
2550
2551 #define render_obj_layer(type, dest, _start, _end)                            \
2552   current_layer &= ~0x04;                                                     \
2553   if(dispcnt & 0x40)                                                          \
2554     render_scanline_obj_##type##_1D(current_layer, _start, _end, dest);       \
2555   else                                                                        \
2556     render_scanline_obj_##type##_2D(current_layer, _start, _end, dest)        \
2557
2558
2559 // Render a target all the way with the background color as taken from the
2560 // palette.
2561
2562 #define fill_line_bg(type, dest, _start, _end)                                \
2563   fill_line_##type(0, dest, _start, _end)                                     \
2564
2565
2566 // Render all layers as they appear in the layer order.
2567
2568 #define render_layers(tile_alpha, obj_alpha, dest)                            \
2569 {                                                                             \
2570   current_layer = layer_order[0];                                             \
2571   if(current_layer & 0x04)                                                    \
2572   {                                                                           \
2573     /* If the first one is OBJ render the background then render it. */       \
2574     fill_line_bg(tile_alpha, dest, 0, 240);                                   \
2575     render_obj_layer(obj_alpha, dest, 0, 240);                                \
2576   }                                                                           \
2577   else                                                                        \
2578   {                                                                           \
2579     /* Otherwise render a base layer. */                                      \
2580     layer_renderers[current_layer].tile_alpha##_render_base(current_layer,    \
2581      0, 240, dest);                                                           \
2582   }                                                                           \
2583                                                                               \
2584   /* Render the rest of the layers. */                                        \
2585   for(layer_order_pos = 1; layer_order_pos < layer_count; layer_order_pos++)  \
2586   {                                                                           \
2587     current_layer = layer_order[layer_order_pos];                             \
2588     if(current_layer & 0x04)                                                  \
2589     {                                                                         \
2590       render_obj_layer(obj_alpha, dest, 0, 240);                              \
2591     }                                                                         \
2592     else                                                                      \
2593     {                                                                         \
2594       layer_renderers[current_layer].                                         \
2595        tile_alpha##_render_transparent(current_layer, 0, 240, dest);          \
2596     }                                                                         \
2597   }                                                                           \
2598 }                                                                             \
2599
2600 #define render_condition_alpha                                                \
2601   (((io_registers[REG_BLDALPHA] & 0x1F1F) != 0x001F) &&                       \
2602    ((io_registers[REG_BLDCNT] & 0x3F) != 0) &&                                \
2603    ((io_registers[REG_BLDCNT] & 0x3F00) != 0))                                \
2604
2605 #define render_condition_fade                                                 \
2606   (((io_registers[REG_BLDY] & 0x1F) != 0) &&                                  \
2607    ((io_registers[REG_BLDCNT] & 0x3F) != 0))                                  \
2608
2609 #define render_layers_color_effect(renderer, layer_condition,                 \
2610  alpha_condition, fade_condition, _start, _end)                               \
2611 {                                                                             \
2612   if(layer_condition)                                                         \
2613   {                                                                           \
2614     if(obj_alpha_count[io_registers[REG_VCOUNT]] > 0)                         \
2615     {                                                                         \
2616       /* Render based on special effects mode. */                             \
2617       u32 screen_buffer[240];                                                 \
2618       switch((bldcnt >> 6) & 0x03)                                            \
2619       {                                                                       \
2620         /* Alpha blend */                                                     \
2621         case 0x01:                                                            \
2622         {                                                                     \
2623           if(alpha_condition)                                                 \
2624           {                                                                   \
2625             renderer(alpha, alpha_obj, screen_buffer);                        \
2626             expand_blend(screen_buffer, scanline, _start, _end);              \
2627             return;                                                           \
2628           }                                                                   \
2629           break;                                                              \
2630         }                                                                     \
2631                                                                               \
2632         /* Fade to white */                                                   \
2633         case 0x02:                                                            \
2634         {                                                                     \
2635           if(fade_condition)                                                  \
2636           {                                                                   \
2637             renderer(color32, partial_alpha, screen_buffer);                  \
2638             expand_brighten_partial_alpha(screen_buffer, scanline,            \
2639              _start, _end);                                                   \
2640             return;                                                           \
2641           }                                                                   \
2642           break;                                                              \
2643         }                                                                     \
2644                                                                               \
2645         /* Fade to black */                                                   \
2646         case 0x03:                                                            \
2647         {                                                                     \
2648           if(fade_condition)                                                  \
2649           {                                                                   \
2650             renderer(color32, partial_alpha, screen_buffer);                  \
2651             expand_darken_partial_alpha(screen_buffer, scanline,              \
2652              _start, _end);                                                   \
2653             return;                                                           \
2654           }                                                                   \
2655           break;                                                              \
2656         }                                                                     \
2657       }                                                                       \
2658                                                                               \
2659       renderer(color32, partial_alpha, screen_buffer);                        \
2660       expand_blend(screen_buffer, scanline, _start, _end);                    \
2661     }                                                                         \
2662     else                                                                      \
2663     {                                                                         \
2664       /* Render based on special effects mode. */                             \
2665       switch((bldcnt >> 6) & 0x03)                                            \
2666       {                                                                       \
2667         /* Alpha blend */                                                     \
2668         case 0x01:                                                            \
2669         {                                                                     \
2670           if(alpha_condition)                                                 \
2671           {                                                                   \
2672             u32 screen_buffer[240];                                           \
2673             renderer(alpha, alpha_obj, screen_buffer);                        \
2674             expand_blend(screen_buffer, scanline, _start, _end);              \
2675             return;                                                           \
2676           }                                                                   \
2677           break;                                                              \
2678         }                                                                     \
2679                                                                               \
2680         /* Fade to white */                                                   \
2681         case 0x02:                                                            \
2682         {                                                                     \
2683           if(fade_condition)                                                  \
2684           {                                                                   \
2685             renderer(color16, color16, scanline);                             \
2686             expand_brighten(scanline, scanline, _start, _end);                \
2687             return;                                                           \
2688           }                                                                   \
2689           break;                                                              \
2690         }                                                                     \
2691                                                                               \
2692         /* Fade to black */                                                   \
2693         case 0x03:                                                            \
2694         {                                                                     \
2695           if(fade_condition)                                                  \
2696           {                                                                   \
2697             renderer(color16, color16, scanline);                             \
2698             expand_darken(scanline, scanline, _start, _end);                  \
2699             return;                                                           \
2700           }                                                                   \
2701           break;                                                              \
2702         }                                                                     \
2703       }                                                                       \
2704                                                                               \
2705       renderer(normal, normal, scanline);                                     \
2706       expand_normal(scanline, _start, _end);                                  \
2707     }                                                                         \
2708   }                                                                           \
2709   else                                                                        \
2710   {                                                                           \
2711     u32 pixel_top = palette_ram_converted[0];                                 \
2712     switch((bldcnt >> 6) & 0x03)                                              \
2713     {                                                                         \
2714       /* Fade to white */                                                     \
2715       case 0x02:                                                              \
2716       {                                                                       \
2717         if(color_combine_mask_a(5))                                           \
2718         {                                                                     \
2719           u32 blend = io_registers[REG_BLDY] & 0x1F;                          \
2720           u32 upper;                                                          \
2721                                                                               \
2722           if(blend > 16)                                                      \
2723             blend = 16;                                                       \
2724                                                                               \
2725           upper = ((0x07E0F81F * blend) >> 4) & 0x07E0F81F;                   \
2726           blend = 16 - blend;                                                 \
2727                                                                               \
2728           expand_pixel_no_dest(brighten, pixel_top);                          \
2729         }                                                                     \
2730         break;                                                                \
2731       }                                                                       \
2732                                                                               \
2733       /* Fade to black */                                                     \
2734       case 0x03:                                                              \
2735       {                                                                       \
2736         if(color_combine_mask_a(5))                                           \
2737         {                                                                     \
2738           s32 blend = 16 - (io_registers[REG_BLDY] & 0x1F);                   \
2739                                                                               \
2740           if(blend < 0)                                                       \
2741             blend = 0;                                                        \
2742                                                                               \
2743           expand_pixel_no_dest(darken, pixel_top);                            \
2744         }                                                                     \
2745         break;                                                                \
2746       }                                                                       \
2747     }                                                                         \
2748     fill_line_color16(pixel_top, scanline, _start, _end);                     \
2749   }                                                                           \
2750 }                                                                             \
2751
2752
2753 // Renders an entire scanline from 0 to 240, based on current color mode.
2754
2755 void render_scanline_tile(u16 *scanline, u32 dispcnt)
2756 {
2757   u32 current_layer;
2758   u32 layer_order_pos;
2759   u32 bldcnt = io_registers[REG_BLDCNT];
2760   render_scanline_layer_functions_tile();
2761
2762   render_layers_color_effect(render_layers, layer_count,
2763    render_condition_alpha, render_condition_fade, 0, 240);
2764 }
2765
2766 void render_scanline_bitmap(u16 *scanline, u32 dispcnt)
2767 {
2768   u32 bldcnt = io_registers[REG_BLDCNT];
2769   render_scanline_layer_functions_bitmap();
2770   u32 current_layer;
2771   u32 layer_order_pos;
2772
2773   fill_line_bg(normal, scanline, 0, 240);
2774
2775   for(layer_order_pos = 0; layer_order_pos < layer_count; layer_order_pos++)
2776   {
2777     current_layer = layer_order[layer_order_pos];
2778     if(current_layer & 0x04)
2779     {
2780       render_obj_layer(normal, scanline, 0, 240);
2781     }
2782     else
2783     {
2784       layer_renderers->normal_render(0, 240, scanline);
2785     }
2786   }
2787 }
2788
2789 // Render layers from start to end based on if they're allowed in the
2790 // enable flags.
2791
2792 #define render_layers_conditional(tile_alpha, obj_alpha, dest)                \
2793 {                                                                             \
2794   __label__ skip;                                                             \
2795   current_layer = layer_order[layer_order_pos];                               \
2796   /* If OBJ aren't enabled skip to the first non-OBJ layer */                 \
2797   if(!(enable_flags & 0x10))                                                  \
2798   {                                                                           \
2799     while((current_layer & 0x04) || !((1 << current_layer) & enable_flags))   \
2800     {                                                                         \
2801       layer_order_pos++;                                                      \
2802       current_layer = layer_order[layer_order_pos];                           \
2803                                                                               \
2804       /* Oops, ran out of layers, render the background. */                   \
2805       if(layer_order_pos == layer_count)                                      \
2806       {                                                                       \
2807         fill_line_bg(tile_alpha, dest, start, end);                           \
2808         goto skip;                                                            \
2809       }                                                                       \
2810     }                                                                         \
2811                                                                               \
2812     /* Render the first valid layer */                                        \
2813     layer_renderers[current_layer].tile_alpha##_render_base(current_layer,    \
2814      start, end, dest);                                                       \
2815                                                                               \
2816     layer_order_pos++;                                                        \
2817                                                                               \
2818     /* Render the rest of the layers if active, skipping OBJ ones. */         \
2819     for(; layer_order_pos < layer_count; layer_order_pos++)                   \
2820     {                                                                         \
2821       current_layer = layer_order[layer_order_pos];                           \
2822       if(!(current_layer & 0x04) && ((1 << current_layer) & enable_flags))    \
2823       {                                                                       \
2824         layer_renderers[current_layer].                                       \
2825          tile_alpha##_render_transparent(current_layer, start, end, dest);    \
2826       }                                                                       \
2827     }                                                                         \
2828   }                                                                           \
2829   else                                                                        \
2830   {                                                                           \
2831     /* Find the first active layer, skip all of the inactive ones */          \
2832     while(!((current_layer & 0x04) || ((1 << current_layer) & enable_flags))) \
2833     {                                                                         \
2834       layer_order_pos++;                                                      \
2835       current_layer = layer_order[layer_order_pos];                           \
2836                                                                               \
2837       /* Oops, ran out of layers, render the background. */                   \
2838       if(layer_order_pos == layer_count)                                      \
2839       {                                                                       \
2840         fill_line_bg(tile_alpha, dest, start, end);                           \
2841         goto skip;                                                            \
2842       }                                                                       \
2843     }                                                                         \
2844                                                                               \
2845     if(current_layer & 0x04)                                                  \
2846     {                                                                         \
2847       /* If the first one is OBJ render the background then render it. */     \
2848       fill_line_bg(tile_alpha, dest, start, end);                             \
2849       render_obj_layer(obj_alpha, dest, start, end);                          \
2850     }                                                                         \
2851     else                                                                      \
2852     {                                                                         \
2853       /* Otherwise render a base layer. */                                    \
2854       layer_renderers[current_layer].                                         \
2855        tile_alpha##_render_base(current_layer, start, end, dest);             \
2856     }                                                                         \
2857                                                                               \
2858     layer_order_pos++;                                                        \
2859                                                                               \
2860     /* Render the rest of the layers. */                                      \
2861     for(; layer_order_pos < layer_count; layer_order_pos++)                   \
2862     {                                                                         \
2863       current_layer = layer_order[layer_order_pos];                           \
2864       if(current_layer & 0x04)                                                \
2865       {                                                                       \
2866         render_obj_layer(obj_alpha, dest, start, end);                        \
2867       }                                                                       \
2868       else                                                                    \
2869       {                                                                       \
2870         if(enable_flags & (1 << current_layer))                               \
2871         {                                                                     \
2872           layer_renderers[current_layer].                                     \
2873            tile_alpha##_render_transparent(current_layer, start, end, dest);  \
2874         }                                                                     \
2875       }                                                                       \
2876     }                                                                         \
2877   }                                                                           \
2878                                                                               \
2879   skip:                                                                       \
2880     ;                                                                         \
2881 }                                                                             \
2882
2883
2884 // Render all of the BG and OBJ in a tiled scanline from start to end ONLY if
2885 // enable_flag allows that layer/OBJ. Also conditionally render color effects.
2886
2887 void render_scanline_conditional_tile(u32 start, u32 end, u16 *scanline,
2888  u32 enable_flags, u32 dispcnt, u32 bldcnt, tile_layer_render_struct
2889  *layer_renderers)
2890 {
2891   u32 current_layer;
2892   u32 layer_order_pos = 0;
2893
2894   render_layers_color_effect(render_layers_conditional,
2895    (layer_count && (enable_flags & 0x1F)),
2896    ((enable_flags & 0x20) && render_condition_alpha),
2897    ((enable_flags & 0x20) && render_condition_fade), start, end);
2898 }
2899
2900
2901 // Render the BG and OBJ in a bitmap scanline from start to end ONLY if
2902 // enable_flag allows that layer/OBJ. Also conditionally render color effects.
2903
2904 void render_scanline_conditional_bitmap(u32 start, u32 end, u16 *scanline,
2905  u32 enable_flags, u32 dispcnt, u32 bldcnt, bitmap_layer_render_struct
2906  *layer_renderers)
2907 {
2908   u32 current_layer;
2909   u32 layer_order_pos;
2910
2911   fill_line_bg(normal, scanline, start, end);
2912
2913   for(layer_order_pos = 0; layer_order_pos < layer_count; layer_order_pos++)
2914   {
2915     current_layer = layer_order[layer_order_pos];
2916     if(current_layer & 0x04)
2917     {
2918       if(enable_flags & 0x10)
2919       {
2920         render_obj_layer(normal, scanline, start, end);
2921       }
2922     }
2923     else
2924     {
2925       if(enable_flags & 0x04)
2926         layer_renderers->normal_render(start, end, scanline);
2927     }
2928   }
2929 }
2930
2931
2932 #define window_x_coords(window_number)                                        \
2933   window_##window_number##_x1 =                                               \
2934    io_registers[REG_WIN##window_number##H] >> 8;                              \
2935   window_##window_number##_x2 =                                               \
2936    io_registers[REG_WIN##window_number##H] & 0xFF;                            \
2937   window_##window_number##_enable =                                           \
2938    (winin >> (window_number * 8)) & 0x3F;                                     \
2939                                                                               \
2940   if(window_##window_number##_x1 > 240)                                       \
2941     window_##window_number##_x1 = 240;                                        \
2942                                                                               \
2943   if(window_##window_number##_x2 > 240)                                       \
2944     window_##window_number##_x2 = 240                                         \
2945
2946 #define window_coords(window_number)                                          \
2947   u32 window_##window_number##_x1, window_##window_number##_x2;               \
2948   u32 window_##window_number##_y1, window_##window_number##_y2;               \
2949   u32 window_##window_number##_enable;                                        \
2950   window_##window_number##_y1 =                                               \
2951    io_registers[REG_WIN##window_number##V] >> 8;                              \
2952   window_##window_number##_y2 =                                               \
2953    io_registers[REG_WIN##window_number##V] & 0xFF;                            \
2954                                                                               \
2955   if(window_##window_number##_y1 > window_##window_number##_y2)               \
2956   {                                                                           \
2957     if((((vcount <= window_##window_number##_y2) ||                           \
2958      (vcount > window_##window_number##_y1)) ||                               \
2959      (window_##window_number##_y2 > 227)) &&                                  \
2960      (window_##window_number##_y1 <= 227))                                    \
2961     {                                                                         \
2962       window_x_coords(window_number);                                         \
2963     }                                                                         \
2964     else                                                                      \
2965     {                                                                         \
2966       window_##window_number##_x1 = 240;                                      \
2967       window_##window_number##_x2 = 240;                                      \
2968     }                                                                         \
2969   }                                                                           \
2970   else                                                                        \
2971   {                                                                           \
2972     if((((vcount >= window_##window_number##_y1) &&                           \
2973      (vcount < window_##window_number##_y2)) ||                               \
2974      (window_##window_number##_y2 > 227)) &&                                  \
2975      (window_##window_number##_y1 <= 227))                                    \
2976     {                                                                         \
2977       window_x_coords(window_number);                                         \
2978     }                                                                         \
2979     else                                                                      \
2980     {                                                                         \
2981       window_##window_number##_x1 = 240;                                      \
2982       window_##window_number##_x2 = 240;                                      \
2983     }                                                                         \
2984   }                                                                           \
2985
2986 #define render_window_segment(type, start, end, window_type)                  \
2987   if(start != end)                                                            \
2988   {                                                                           \
2989     render_scanline_conditional_##type(start, end, scanline,                  \
2990      window_##window_type##_enable, dispcnt, bldcnt, layer_renderers);        \
2991   }                                                                           \
2992
2993 #define render_window_segment_unequal(type, start, end, window_type)          \
2994   render_scanline_conditional_##type(start, end, scanline,                    \
2995    window_##window_type##_enable, dispcnt, bldcnt, layer_renderers)           \
2996
2997 #define render_window_segment_clip(type, clip_start, clip_end, start, end,    \
2998  window_type)                                                                 \
2999 {                                                                             \
3000   if(start != end)                                                            \
3001   {                                                                           \
3002     if(start < clip_start)                                                    \
3003     {                                                                         \
3004       if(end > clip_start)                                                    \
3005       {                                                                       \
3006         if(end > clip_end)                                                    \
3007         {                                                                     \
3008           render_window_segment_unequal(type, clip_start, clip_end,           \
3009            window_type);                                                      \
3010         }                                                                     \
3011         else                                                                  \
3012         {                                                                     \
3013           render_window_segment_unequal(type, clip_start, end, window_type);  \
3014         }                                                                     \
3015       }                                                                       \
3016     }                                                                         \
3017     else                                                                      \
3018                                                                               \
3019     if(end > clip_end)                                                        \
3020     {                                                                         \
3021       if(start < clip_end)                                                    \
3022         render_window_segment_unequal(type, start, clip_end, window_type);    \
3023     }                                                                         \
3024     else                                                                      \
3025     {                                                                         \
3026       render_window_segment_unequal(type, start, end, window_type);           \
3027     }                                                                         \
3028   }                                                                           \
3029 }                                                                             \
3030
3031 #define render_window_clip_1(type, start, end)                                \
3032   if(window_1_x1 != 240)                                                      \
3033   {                                                                           \
3034     if(window_1_x1 > window_1_x2)                                             \
3035     {                                                                         \
3036       render_window_segment_clip(type, start, end, 0, window_1_x2, 1);        \
3037       render_window_segment_clip(type, start, end, window_1_x2, window_1_x1,  \
3038        out);                                                                  \
3039       render_window_segment_clip(type, start, end, window_1_x1, 240, 1);      \
3040     }                                                                         \
3041     else                                                                      \
3042     {                                                                         \
3043       render_window_segment_clip(type, start, end, 0, window_1_x1, out);      \
3044       render_window_segment_clip(type, start, end, window_1_x1, window_1_x2,  \
3045        1);                                                                    \
3046       render_window_segment_clip(type, start, end, window_1_x2, 240, out);    \
3047     }                                                                         \
3048   }                                                                           \
3049   else                                                                        \
3050   {                                                                           \
3051     render_window_segment(type, start, end, out);                             \
3052   }                                                                           \
3053
3054 #define render_window_clip_obj(type, start, end);                             \
3055   render_window_segment(type, start, end, out);                               \
3056   if(dispcnt & 0x40)                                                          \
3057     render_scanline_obj_copy_##type##_1D(4, start, end, scanline);            \
3058   else                                                                        \
3059     render_scanline_obj_copy_##type##_2D(4, start, end, scanline)             \
3060
3061
3062 #define render_window_segment_clip_obj(type, clip_start, clip_end, start,     \
3063  end)                                                                         \
3064 {                                                                             \
3065   if(start != end)                                                            \
3066   {                                                                           \
3067     if(start < clip_start)                                                    \
3068     {                                                                         \
3069       if(end > clip_start)                                                    \
3070       {                                                                       \
3071         if(end > clip_end)                                                    \
3072         {                                                                     \
3073           render_window_clip_obj(type, clip_start, clip_end);                 \
3074         }                                                                     \
3075         else                                                                  \
3076         {                                                                     \
3077           render_window_clip_obj(type, clip_start, end);                      \
3078         }                                                                     \
3079       }                                                                       \
3080     }                                                                         \
3081     else                                                                      \
3082                                                                               \
3083     if(end > clip_end)                                                        \
3084     {                                                                         \
3085       if(start < clip_end)                                                    \
3086       {                                                                       \
3087         render_window_clip_obj(type, start, clip_end);                        \
3088       }                                                                       \
3089     }                                                                         \
3090     else                                                                      \
3091     {                                                                         \
3092       render_window_clip_obj(type, start, end);                               \
3093     }                                                                         \
3094   }                                                                           \
3095 }                                                                             \
3096
3097
3098 #define render_window_clip_1_obj(type, start, end)                            \
3099   if(window_1_x1 != 240)                                                      \
3100   {                                                                           \
3101     if(window_1_x1 > window_1_x2)                                             \
3102     {                                                                         \
3103       render_window_segment_clip(type, start, end, 0, window_1_x2, 1);        \
3104       render_window_segment_clip_obj(type, start, end, window_1_x2,           \
3105        window_1_x1);                                                          \
3106       render_window_segment_clip(type, start, end, window_1_x1, 240, 1);      \
3107     }                                                                         \
3108     else                                                                      \
3109     {                                                                         \
3110       render_window_segment_clip_obj(type, start, end, 0, window_1_x1);       \
3111       render_window_segment_clip(type, start, end, window_1_x1, window_1_x2,  \
3112        1);                                                                    \
3113       render_window_segment_clip_obj(type, start, end, window_1_x2, 240);     \
3114     }                                                                         \
3115   }                                                                           \
3116   else                                                                        \
3117   {                                                                           \
3118     render_window_clip_obj(type, start, end);                                 \
3119   }                                                                           \
3120
3121
3122
3123 #define render_window_single(type, window_number)                             \
3124   u32 winin = io_registers[REG_WININ];                                        \
3125   window_coords(window_number);                                               \
3126   if(window_##window_number##_x1 > window_##window_number##_x2)               \
3127   {                                                                           \
3128     render_window_segment(type, 0, window_##window_number##_x2,               \
3129      window_number);                                                          \
3130     render_window_segment(type, window_##window_number##_x2,                  \
3131      window_##window_number##_x1, out);                                       \
3132     render_window_segment(type, window_##window_number##_x1, 240,             \
3133      window_number);                                                          \
3134   }                                                                           \
3135   else                                                                        \
3136   {                                                                           \
3137     render_window_segment(type, 0, window_##window_number##_x1, out);         \
3138     render_window_segment(type, window_##window_number##_x1,                  \
3139      window_##window_number##_x2, window_number);                             \
3140     render_window_segment(type, window_##window_number##_x2, 240, out);       \
3141   }                                                                           \
3142
3143 #define render_window_multi(type, front, back)                                \
3144   if(window_##front##_x1 > window_##front##_x2)                               \
3145   {                                                                           \
3146     render_window_segment(type, 0, window_##front##_x2, front);               \
3147     render_window_clip_##back(type, window_##front##_x2,                      \
3148      window_##front##_x1);                                                    \
3149     render_window_segment(type, window_##front##_x1, 240, front);             \
3150   }                                                                           \
3151   else                                                                        \
3152   {                                                                           \
3153     render_window_clip_##back(type, 0, window_##front##_x1);                  \
3154     render_window_segment(type, window_##front##_x1, window_##front##_x2,     \
3155      front);                                                                  \
3156     render_window_clip_##back(type, window_##front##_x2, 240);                \
3157   }                                                                           \
3158
3159 #define render_scanline_window_builder(type)                                  \
3160 void render_scanline_window_##type(u16 *scanline, u32 dispcnt)                \
3161 {                                                                             \
3162   u32 vcount = io_registers[REG_VCOUNT];                                      \
3163   u32 winout = io_registers[REG_WINOUT];                                      \
3164   u32 bldcnt = io_registers[REG_BLDCNT];                                      \
3165   u32 window_out_enable = winout & 0x3F;                                      \
3166                                                                               \
3167   render_scanline_layer_functions_##type();                                   \
3168                                                                               \
3169   switch(dispcnt >> 13)                                                       \
3170   {                                                                           \
3171     /* Just window 0 */                                                       \
3172     case 0x01:                                                                \
3173     {                                                                         \
3174       render_window_single(type, 0);                                          \
3175       break;                                                                  \
3176     }                                                                         \
3177                                                                               \
3178     /* Just window 1 */                                                       \
3179     case 0x02:                                                                \
3180     {                                                                         \
3181       render_window_single(type, 1);                                          \
3182       break;                                                                  \
3183     }                                                                         \
3184                                                                               \
3185     /* Windows 1 and 2 */                                                     \
3186     case 0x03:                                                                \
3187     {                                                                         \
3188       u32 winin = io_registers[REG_WININ];                                    \
3189       window_coords(0);                                                       \
3190       window_coords(1);                                                       \
3191       render_window_multi(type, 0, 1);                                        \
3192       break;                                                                  \
3193     }                                                                         \
3194                                                                               \
3195     /* Just OBJ windows */                                                    \
3196     case 0x04:                                                                \
3197     {                                                                         \
3198       u32 window_obj_enable = winout >> 8;                                    \
3199       render_window_clip_obj(type, 0, 240);                                   \
3200       break;                                                                  \
3201     }                                                                         \
3202                                                                               \
3203     /* Window 0 and OBJ window */                                             \
3204     case 0x05:                                                                \
3205     {                                                                         \
3206       u32 window_obj_enable = winout >> 8;                                    \
3207       u32 winin = io_registers[REG_WININ];                                    \
3208       window_coords(0);                                                       \
3209       render_window_multi(type, 0, obj);                                      \
3210       break;                                                                  \
3211     }                                                                         \
3212                                                                               \
3213     /* Window 1 and OBJ window */                                             \
3214     case 0x06:                                                                \
3215     {                                                                         \
3216       u32 window_obj_enable = winout >> 8;                                    \
3217       u32 winin = io_registers[REG_WININ];                                    \
3218       window_coords(1);                                                       \
3219       render_window_multi(type, 1, obj);                                      \
3220       break;                                                                  \
3221     }                                                                         \
3222                                                                               \
3223     /* Window 0, 1, and OBJ window */                                         \
3224     case 0x07:                                                                \
3225     {                                                                         \
3226       u32 window_obj_enable = winout >> 8;                                    \
3227       u32 winin = io_registers[REG_WININ];                                    \
3228       window_coords(0);                                                       \
3229       window_coords(1);                                                       \
3230       render_window_multi(type, 0, 1_obj);                                    \
3231       break;                                                                  \
3232     }                                                                         \
3233   }                                                                           \
3234 }                                                                             \
3235
3236 render_scanline_window_builder(tile);
3237 render_scanline_window_builder(bitmap);
3238
3239 u32 active_layers[6] = { 0x1F, 0x17, 0x1C, 0x14, 0x14, 0x14 };
3240
3241 u32 small_resolution_width = 240;
3242 u32 small_resolution_height = 160;
3243 u32 resolution_width, resolution_height;
3244
3245 void update_scanline()
3246 {
3247   u32 pitch = get_screen_pitch();
3248   u32 dispcnt = io_registers[REG_DISPCNT];
3249   u32 display_flags = (dispcnt >> 8) & 0x1F;
3250   u32 vcount = io_registers[REG_VCOUNT];
3251   u16 *screen_offset = get_screen_pixels() + (vcount * pitch);
3252   u32 video_mode = dispcnt & 0x07;
3253   u32 current_layer;
3254
3255   // If OAM has been modified since the last scanline has been updated then
3256   // reorder and reprofile the OBJ lists.
3257   if(oam_update)
3258   {
3259     order_obj(video_mode);
3260     oam_update = 0;
3261   }
3262
3263   order_layers((dispcnt >> 8) & active_layers[video_mode]);
3264
3265   if(skip_next_frame)
3266     return;
3267
3268   // If the screen is in in forced blank draw pure white.
3269   if(dispcnt & 0x80)
3270   {
3271     fill_line_color16(0xFFFF, screen_offset, 0, 240);
3272   }
3273   else
3274   {
3275     if(video_mode < 3)
3276     {
3277       if(dispcnt >> 13)
3278       {
3279         render_scanline_window_tile(screen_offset, dispcnt);
3280       }
3281       else
3282       {
3283         render_scanline_tile(screen_offset, dispcnt);
3284       }
3285     }
3286     else
3287     {
3288       if(dispcnt >> 13)
3289         render_scanline_window_bitmap(screen_offset, dispcnt);
3290       else
3291         render_scanline_bitmap(screen_offset, dispcnt);
3292     }
3293   }
3294
3295   affine_reference_x[0] += (s16)io_registers[REG_BG2PB];
3296   affine_reference_y[0] += (s16)io_registers[REG_BG2PD];
3297   affine_reference_x[1] += (s16)io_registers[REG_BG3PB];
3298   affine_reference_y[1] += (s16)io_registers[REG_BG3PD];
3299 }
3300
3301 #ifdef PSP_BUILD
3302
3303 u32 screen_flip = 0;
3304
3305 void flip_screen()
3306 {
3307   if(video_direct == 0)
3308   {
3309     u32 *old_ge_cmd_ptr = ge_cmd_ptr;
3310     sceKernelDcacheWritebackAll();
3311
3312     // Render the current screen
3313     ge_cmd_ptr = ge_cmd + 2;
3314     GE_CMD(TBP0, ((u32)screen_pixels & 0x00FFFFFF));
3315     GE_CMD(TBW0, (((u32)screen_pixels & 0xFF000000) >> 8) |
3316      GBA_SCREEN_WIDTH);
3317     ge_cmd_ptr = old_ge_cmd_ptr;
3318
3319     sceGeListEnQueue(ge_cmd, ge_cmd_ptr, gecbid, NULL);
3320
3321     // Flip to the next screen
3322     screen_flip ^= 1;
3323
3324     if(screen_flip)
3325       screen_pixels = screen_texture + (240 * 160 * 2);
3326     else
3327       screen_pixels = screen_texture;
3328   }
3329 }
3330
3331 #else
3332
3333 #define integer_scale_copy_2()                                                \
3334   current_scanline_ptr[x2] = current_pixel;                                   \
3335   current_scanline_ptr[x2 - 1] = current_pixel;                               \
3336   x2 -= 2                                                                     \
3337
3338 #define integer_scale_copy_3()                                                \
3339   current_scanline_ptr[x2] = current_pixel;                                   \
3340   current_scanline_ptr[x2 - 1] = current_pixel;                               \
3341   current_scanline_ptr[x2 - 2] = current_pixel;                               \
3342   x2 -= 3                                                                     \
3343
3344 #define integer_scale_copy_4()                                                \
3345   current_scanline_ptr[x2] = current_pixel;                                   \
3346   current_scanline_ptr[x2 - 1] = current_pixel;                               \
3347   current_scanline_ptr[x2 - 2] = current_pixel;                               \
3348   current_scanline_ptr[x2 - 3] = current_pixel;                               \
3349   x2 -= 4                                                                     \
3350
3351 #define integer_scale_horizontal(scale_factor)                                \
3352   for(y = 0; y < 160; y++)                                                    \
3353   {                                                                           \
3354     for(x = 239, x2 = (240 * video_scale) - 1; x >= 0; x--)                   \
3355     {                                                                         \
3356       current_pixel = current_scanline_ptr[x];                                \
3357       integer_scale_copy_##scale_factor();                                    \
3358       current_scanline_ptr[x2] = current_scanline_ptr[x];                     \
3359       current_scanline_ptr[x2 - 1] = current_scanline_ptr[x];                 \
3360       current_scanline_ptr[x2 - 2] = current_scanline_ptr[x];                 \
3361     }                                                                         \
3362     current_scanline_ptr += pitch;                                            \
3363   }                                                                           \
3364
3365 void flip_screen()
3366 {
3367   if((video_scale != 1) && (current_scale != unscaled))
3368   {
3369     s32 x, y;
3370     s32 x2, y2;
3371     u16 *screen_ptr = get_screen_pixels();
3372     u16 *current_scanline_ptr = screen_ptr;
3373     u32 pitch = get_screen_pitch();
3374     u16 current_pixel;
3375     u32 i;
3376
3377     switch(video_scale)
3378     {
3379       case 2:
3380         integer_scale_horizontal(2);
3381         break;
3382
3383       case 3:
3384         integer_scale_horizontal(3);
3385         break;
3386
3387       default:
3388       case 4:
3389         integer_scale_horizontal(4);
3390         break;
3391
3392     }
3393
3394     for(y = 159, y2 = (160 * video_scale) - 1; y >= 0; y--)
3395     {
3396       for(i = 0; i < video_scale; i++)
3397       {
3398         memcpy(screen_ptr + (y2 * pitch),
3399          screen_ptr + (y * pitch), 480 * video_scale);
3400         y2--;
3401       }
3402     }
3403   }
3404 #ifdef GP2X_BUILD
3405   {
3406     if((screen_scale == unscaled) &&
3407      (resolution_width == small_resolution_width) &&
3408      (resolution_height == small_resolution_height))
3409     {
3410       SDL_Rect srect = {0, 0, 240, 160};
3411       SDL_Rect drect = {40, 40, 240, 160};
3412       SDL_BlitSurface(screen, &srect, hw_screen, &drect);
3413     }
3414     else if((screen_scale == scaled_aspect) &&
3415      (resolution_width == small_resolution_width) &&
3416      (resolution_height == small_resolution_height))
3417     {
3418       SDL_Rect drect = {0, 10, 0, 0};
3419       SDL_BlitSurface(screen, NULL, hw_screen, &drect);
3420     }
3421     else
3422     {
3423       SDL_BlitSurface(screen, NULL, hw_screen, NULL);
3424     }
3425     /* it is unclear if this syscall takes virtual or physical addresses,
3426      * but using virtual seems to work for me. */
3427     gp2x_flush_cache(hw_screen->pixels, hw_screen->pixels + 320*240, 0);
3428   }
3429 #else
3430   SDL_Flip(screen);
3431 #endif
3432 }
3433
3434 #endif
3435
3436 u32 frame_to_render;
3437
3438 void update_screen()
3439 {
3440   if(!skip_next_frame)
3441     flip_screen();
3442 }
3443
3444 #ifdef PSP_BUILD
3445
3446 void init_video()
3447 {
3448   sceDisplaySetMode(0, PSP_SCREEN_WIDTH, PSP_SCREEN_HEIGHT);
3449
3450   sceDisplayWaitVblankStart();
3451   sceDisplaySetFrameBuf((void*)psp_gu_vram_base, PSP_LINE_SIZE,
3452    PSP_DISPLAY_PIXEL_FORMAT_565, PSP_DISPLAY_SETBUF_NEXTFRAME);
3453
3454   sceGuInit();
3455
3456   sceGuStart(GU_DIRECT, display_list);
3457   sceGuDrawBuffer(GU_PSM_5650, (void*)0, PSP_LINE_SIZE);
3458   sceGuDispBuffer(PSP_SCREEN_WIDTH, PSP_SCREEN_HEIGHT,
3459    (void*)0, PSP_LINE_SIZE);
3460   sceGuClear(GU_COLOR_BUFFER_BIT);
3461
3462   sceGuOffset(2048 - (PSP_SCREEN_WIDTH / 2), 2048 - (PSP_SCREEN_HEIGHT / 2));
3463   sceGuViewport(2048, 2048, PSP_SCREEN_WIDTH, PSP_SCREEN_HEIGHT);
3464
3465   sceGuScissor(0, 0, PSP_SCREEN_WIDTH + 1, PSP_SCREEN_HEIGHT + 1);
3466   sceGuEnable(GU_SCISSOR_TEST);
3467   sceGuTexMode(GU_PSM_5650, 0, 0, GU_FALSE);
3468   sceGuTexFunc(GU_TFX_REPLACE, GU_TCC_RGBA);
3469   sceGuTexFilter(GU_LINEAR, GU_LINEAR);
3470   sceGuEnable(GU_TEXTURE_2D);
3471
3472   sceGuFrontFace(GU_CW);
3473   sceGuDisable(GU_BLEND);
3474
3475   sceGuFinish();
3476   sceGuSync(0, 0);
3477
3478   sceDisplayWaitVblankStart();
3479   sceGuDisplay(GU_TRUE);
3480
3481   PspGeCallbackData gecb;
3482   gecb.signal_func = NULL;
3483   gecb.signal_arg = NULL;
3484   gecb.finish_func = Ge_Finish_Callback;
3485   gecb.finish_arg = NULL;
3486   gecbid = sceGeSetCallback(&gecb);
3487
3488   screen_vertex[0] = 0 + 0.5;
3489   screen_vertex[1] = 0 + 0.5;
3490   screen_vertex[2] = 0 + 0.5;
3491   screen_vertex[3] = 0 + 0.5;
3492   screen_vertex[4] = 0;
3493   screen_vertex[5] = GBA_SCREEN_WIDTH - 0.5;
3494   screen_vertex[6] = GBA_SCREEN_HEIGHT - 0.5;
3495   screen_vertex[7] = PSP_SCREEN_WIDTH - 0.5;
3496   screen_vertex[8] = PSP_SCREEN_HEIGHT - 0.5;
3497   screen_vertex[9] = 0;
3498
3499   // Set framebuffer to PSP VRAM
3500   GE_CMD(FBP, ((u32)psp_gu_vram_base & 0x00FFFFFF));
3501   GE_CMD(FBW, (((u32)psp_gu_vram_base & 0xFF000000) >> 8) | PSP_LINE_SIZE);
3502   // Set texture 0 to the screen texture
3503   GE_CMD(TBP0, ((u32)screen_texture & 0x00FFFFFF));
3504   GE_CMD(TBW0, (((u32)screen_texture & 0xFF000000) >> 8) | GBA_SCREEN_WIDTH);
3505   // Set the texture size to 256 by 256 (2^8 by 2^8)
3506   GE_CMD(TSIZE0, (8 << 8) | 8);
3507   // Flush the texture cache
3508   GE_CMD(TFLUSH, 0);
3509   // Use 2D coordinates, no indeces, no weights, 32bit float positions,
3510   // 32bit float texture coordinates
3511   GE_CMD(VTYPE, (1 << 23) | (0 << 11) | (0 << 9) |
3512    (3 << 7) | (0 << 5) | (0 << 2) | 3);
3513   // Set the base of the index list pointer to 0
3514   GE_CMD(BASE, 0);
3515   // Set the rest of index list pointer to 0 (not being used)
3516   GE_CMD(IADDR, 0);
3517   // Set the base of the screen vertex list pointer
3518   GE_CMD(BASE, ((u32)screen_vertex & 0xFF000000) >> 8);
3519   // Set the rest of the screen vertex list pointer
3520   GE_CMD(VADDR, ((u32)screen_vertex & 0x00FFFFFF));
3521   // Primitive kick: render sprite (primitive 6), 2 vertices
3522   GE_CMD(PRIM, (6 << 16) | 2);
3523   // Done with commands
3524   GE_CMD(FINISH, 0);
3525   // Raise signal interrupt
3526   GE_CMD(SIGNAL, 0);
3527   GE_CMD(NOP, 0);
3528   GE_CMD(NOP, 0);
3529 }
3530
3531 #else
3532
3533 void init_video()
3534 {
3535   SDL_Init(SDL_INIT_VIDEO | SDL_INIT_JOYSTICK | SDL_INIT_NOPARACHUTE);
3536
3537 #ifdef GP2X_BUILD
3538   SDL_GP2X_AllowGfxMemory(NULL, 0);
3539
3540   hw_screen = SDL_SetVideoMode(320 * video_scale, 240 * video_scale,
3541    16, SDL_HWSURFACE);
3542
3543   screen = SDL_CreateRGBSurface(SDL_HWSURFACE, 240 * video_scale,
3544    160 * video_scale, 16, 0xFFFF, 0xFFFF, 0xFFFF, 0);
3545
3546   gp2x_load_mmuhack();
3547 #else
3548   screen = SDL_SetVideoMode(240 * video_scale, 160 * video_scale, 16, 0);
3549 #endif
3550   SDL_ShowCursor(0);
3551 }
3552
3553 #endif
3554
3555 video_scale_type screen_scale = scaled_aspect;
3556 video_scale_type current_scale = scaled_aspect;
3557 video_filter_type screen_filter = filter_bilinear;
3558
3559
3560 #ifdef PSP_BUILD
3561
3562 void video_resolution_large()
3563 {
3564   if(video_direct != 1)
3565   {
3566     video_direct = 1;
3567     screen_pixels = psp_gu_vram_base;
3568     screen_pitch = 512;
3569     sceGuStart(GU_DIRECT, display_list);
3570     sceGuDispBuffer(PSP_SCREEN_WIDTH, PSP_SCREEN_HEIGHT,
3571      (void*)0, PSP_LINE_SIZE);
3572     sceGuFinish();
3573   }
3574 }
3575
3576 void set_gba_resolution(video_scale_type scale)
3577 {
3578   u32 filter_linear = 0;
3579   screen_scale = scale;
3580   switch(scale)
3581   {
3582     case unscaled:
3583       screen_vertex[2] = 120 + 0.5;
3584       screen_vertex[3] = 56 + 0.5;
3585       screen_vertex[7] = GBA_SCREEN_WIDTH + 120 - 0.5;
3586       screen_vertex[8] = GBA_SCREEN_HEIGHT + 56 - 0.5;
3587       break;
3588
3589     case scaled_aspect:
3590       screen_vertex[2] = 36 + 0.5;
3591       screen_vertex[3] = 0 + 0.5;
3592       screen_vertex[7] = 408 + 36 - 0.5;
3593       screen_vertex[8] = PSP_SCREEN_HEIGHT - 0.5;
3594       break;
3595
3596     case fullscreen:
3597       screen_vertex[2] = 0;
3598       screen_vertex[3] = 0;
3599       screen_vertex[7] = PSP_SCREEN_WIDTH;
3600       screen_vertex[8] = PSP_SCREEN_HEIGHT;
3601       break;
3602   }
3603
3604   sceGuStart(GU_DIRECT, display_list);
3605   if(screen_filter == filter_bilinear)
3606     sceGuTexFilter(GU_LINEAR, GU_LINEAR);
3607   else
3608     sceGuTexFilter(GU_NEAREST, GU_NEAREST);
3609
3610   sceGuFinish();
3611   sceGuSync(0, 0);
3612
3613   clear_screen(0x0000);
3614 }
3615
3616 void video_resolution_small()
3617 {
3618   if(video_direct != 0)
3619   {
3620     set_gba_resolution(screen_scale);
3621     video_direct = 0;
3622     screen_pixels = screen_texture;
3623     screen_flip = 0;
3624     screen_pitch = 240;
3625     sceGuStart(GU_DIRECT, display_list);
3626     sceGuDispBuffer(PSP_SCREEN_WIDTH, PSP_SCREEN_HEIGHT,
3627      (void*)0, PSP_LINE_SIZE);
3628     sceGuFinish();
3629   }
3630 }
3631
3632 void clear_screen(u16 color)
3633 {
3634   u32 i;
3635   u16 *src_ptr = get_screen_pixels();
3636
3637   sceGuSync(0, 0);
3638
3639   for(i = 0; i < (512 * 272); i++, src_ptr++)
3640   {
3641     *src_ptr = color;
3642   }
3643
3644   // I don't know why this doesn't work.
3645 /*  color = (((color & 0x1F) * 255 / 31) << 0) |
3646    ((((color >> 5) & 0x3F) * 255 / 63) << 8) |
3647    ((((color >> 11) & 0x1F) * 255 / 31) << 16) | (0xFF << 24);
3648
3649   sceGuStart(GU_DIRECT, display_list);
3650   sceGuDrawBuffer(GU_PSM_5650, (void*)0, PSP_LINE_SIZE);
3651   //sceGuDispBuffer(PSP_SCREEN_WIDTH, PSP_SCREEN_HEIGHT,
3652   // (void*)0, PSP_LINE_SIZE);
3653   sceGuClearColor(color);
3654   sceGuClear(GU_COLOR_BUFFER_BIT);
3655   sceGuFinish();
3656   sceGuSync(0, 0); */
3657 }
3658
3659 #else
3660
3661 void video_resolution_large()
3662 {
3663   current_scale = unscaled;
3664
3665 #ifdef GP2X_BUILD
3666   SDL_FreeSurface(screen);
3667   SDL_GP2X_AllowGfxMemory(NULL, 0);
3668     hw_screen = SDL_SetVideoMode(320, 240, 16, SDL_HWSURFACE);
3669   screen = SDL_CreateRGBSurface(SDL_HWSURFACE, 320, 240, 16, 0xFFFF,
3670    0xFFFF, 0xFFFF, 0);
3671   resolution_width = 320;
3672     resolution_height = 240;
3673   SDL_ShowCursor(0);
3674
3675   gp2x_load_mmuhack();
3676 #else
3677   screen = SDL_SetVideoMode(480, 272, 16, 0);
3678   resolution_width = 480;
3679   resolution_height = 272;
3680 #endif
3681 }
3682
3683 void video_resolution_small()
3684 {
3685   current_scale = screen_scale;
3686
3687 #ifdef GP2X_BUILD
3688   int w, h;
3689   SDL_FreeSurface(screen);
3690   SDL_GP2X_AllowGfxMemory(NULL, 0);
3691
3692   w = 320; h = 240;
3693   if (screen_scale != unscaled)
3694   {
3695     w = small_resolution_width * video_scale;
3696     h = small_resolution_height * video_scale;
3697   }
3698   if (screen_scale == scaled_aspect) h += 20;
3699   hw_screen = SDL_SetVideoMode(w, h, 16, SDL_HWSURFACE);
3700
3701   screen = SDL_CreateRGBSurface(SDL_HWSURFACE,
3702    small_resolution_width * video_scale, small_resolution_height *
3703    video_scale, 16, 0xFFFF, 0xFFFF, 0xFFFF, 0);
3704
3705   SDL_ShowCursor(0);
3706
3707   gp2x_load_mmuhack();
3708 #else
3709   screen = SDL_SetVideoMode(small_resolution_width * video_scale,
3710    small_resolution_height * video_scale, 16, 0);
3711 #endif
3712   resolution_width = small_resolution_width;
3713   resolution_height = small_resolution_height;
3714 }
3715
3716 void set_gba_resolution(video_scale_type scale)
3717 {
3718   if(screen_scale != scale)
3719   {
3720     screen_scale = scale;
3721     switch(scale)
3722     {
3723       case unscaled:
3724       case scaled_aspect:
3725       case fullscreen:
3726         small_resolution_width = 240 * video_scale;
3727         small_resolution_height = 160 * video_scale;
3728         break;
3729     }
3730   }
3731 }
3732
3733 void clear_screen(u16 color)
3734 {
3735   u16 *dest_ptr = get_screen_pixels();
3736   u32 line_skip = get_screen_pitch() - screen->w;
3737   u32 x, y;
3738
3739   for(y = 0; y < screen->h; y++)
3740   {
3741     for(x = 0; x < screen->w; x++, dest_ptr++)
3742     {
3743       *dest_ptr = color;
3744     }
3745     dest_ptr += line_skip;
3746   }
3747 }
3748
3749 #endif
3750
3751 u16 *copy_screen()
3752 {
3753   u16 *copy = malloc(240 * 160 * 2);
3754   memcpy(copy, get_screen_pixels(), 240 * 160 * 2);
3755   return copy;
3756 }
3757
3758 void blit_to_screen(u16 *src, u32 w, u32 h, u32 dest_x, u32 dest_y)
3759 {
3760   u32 pitch = get_screen_pitch();
3761   u16 *dest_ptr = get_screen_pixels() + dest_x + (dest_y * pitch);
3762
3763   u16 *src_ptr = src;
3764   u32 line_skip = pitch - w;
3765   u32 x, y;
3766
3767   for(y = 0; y < h; y++)
3768   {
3769     for(x = 0; x < w; x++, src_ptr++, dest_ptr++)
3770     {
3771       *dest_ptr = *src_ptr;
3772     }
3773     dest_ptr += line_skip;
3774   }
3775 }
3776
3777 void print_string_ext(const char *str, u16 fg_color, u16 bg_color,
3778  u32 x, u32 y, void *_dest_ptr, u32 pitch, u32 pad)
3779 {
3780   u16 *dest_ptr = (u16 *)_dest_ptr + (y * pitch) + x;
3781   u8 current_char = str[0];
3782   u32 current_row;
3783   u32 glyph_offset;
3784   u32 i = 0, i2, i3;
3785   u32 str_index = 1;
3786   u32 current_x = x;
3787
3788
3789   /* EDIT */
3790   if(y + FONT_HEIGHT > resolution_height)
3791       return;
3792
3793   while(current_char)
3794   {
3795     if(current_char == '\n')
3796     {
3797       y += FONT_HEIGHT;
3798       current_x = x;
3799       dest_ptr = get_screen_pixels() + (y * pitch) + x;
3800     }
3801     else
3802     {
3803       glyph_offset = _font_offset[current_char];
3804       current_x += FONT_WIDTH;
3805       for(i2 = 0; i2 < FONT_HEIGHT; i2++, glyph_offset++)
3806       {
3807         current_row = _font_bits[glyph_offset];
3808         for(i3 = 0; i3 < FONT_WIDTH; i3++)
3809         {
3810           if((current_row >> (15 - i3)) & 0x01)
3811             *dest_ptr = fg_color;
3812           else
3813             *dest_ptr = bg_color;
3814           dest_ptr++;
3815         }
3816         dest_ptr += (pitch - FONT_WIDTH);
3817       }
3818       dest_ptr = dest_ptr - (pitch * FONT_HEIGHT) + FONT_WIDTH;
3819     }
3820
3821     i++;
3822
3823     current_char = str[str_index];
3824
3825     if((i < pad) && (current_char == 0))
3826     {
3827       current_char = ' ';
3828     }
3829     else
3830     {
3831       str_index++;
3832     }
3833
3834     if(current_x + FONT_WIDTH > resolution_width /* EDIT */)
3835     {
3836       while (current_char && current_char != '\n')
3837       {
3838         current_char = str[str_index++];
3839       }
3840     }
3841   }
3842 }
3843
3844 void print_string(const char *str, u16 fg_color, u16 bg_color,
3845  u32 x, u32 y)
3846 {
3847   print_string_ext(str, fg_color, bg_color, x, y, get_screen_pixels(),
3848    get_screen_pitch(), 0);
3849 }
3850
3851 void print_string_pad(const char *str, u16 fg_color, u16 bg_color,
3852  u32 x, u32 y, u32 pad)
3853 {
3854   print_string_ext(str, fg_color, bg_color, x, y, get_screen_pixels(),
3855    get_screen_pitch(), pad);
3856 }
3857
3858 u32 debug_cursor_x = 0;
3859 u32 debug_cursor_y = 0;
3860
3861 #ifdef STDIO_DEBUG
3862
3863 void debug_screen_clear()
3864 {
3865 }
3866
3867 void debug_screen_start()
3868 {
3869 }
3870
3871 void debug_screen_end()
3872 {
3873 }
3874
3875 void debug_screen_update()
3876 {
3877 }
3878
3879 void debug_screen_printf(const char *format, ...)
3880 {
3881   va_list ap;
3882
3883   va_start(ap, format);
3884   vprintf(format, ap);
3885   va_end(ap);
3886 }
3887
3888 void debug_screen_newline(u32 count)
3889 {
3890   printf("\n");
3891 }
3892
3893
3894 #else
3895
3896 void debug_screen_clear()
3897 {
3898   debug_cursor_x = 0;
3899   debug_cursor_y = 0;
3900   clear_screen(0x0000);
3901 }
3902
3903 void debug_screen_start()
3904 {
3905   video_resolution_large();
3906   debug_screen_clear();
3907 }
3908
3909 void debug_screen_end()
3910 {
3911   video_resolution_small();
3912 }
3913
3914 void debug_screen_update()
3915 {
3916   flip_screen();
3917 }
3918
3919 void debug_screen_printf(const char *format, ...)
3920 {
3921   char str_buffer[512];
3922   u32 str_buffer_length;
3923   va_list ap;
3924
3925   va_start(ap, format);
3926   str_buffer_length = vsnprintf(str_buffer, 512, format, ap);
3927   va_end(ap);
3928
3929   printf("printing debug string %s at %d %d\n", str_buffer,
3930    debug_cursor_x, debug_cursor_y);
3931
3932   print_string(str_buffer, 0xFFFF, 0x0000, debug_cursor_x, debug_cursor_y);
3933   debug_cursor_x += FONT_WIDTH * str_buffer_length;
3934 }
3935
3936 void debug_screen_newline(u32 count)
3937 {
3938   debug_cursor_x = 0;
3939   debug_cursor_y += FONT_HEIGHT * count;
3940 }
3941
3942 #endif
3943
3944 void debug_screen_printl(const char *format, ...)
3945 {
3946   va_list ap;
3947
3948   va_start(ap, format);
3949   debug_screen_printf(format, ap);
3950   debug_screen_newline(1);
3951 //  debug_screen_printf("\n");
3952   va_end(ap);
3953 }
3954
3955
3956 #define video_savestate_builder(type)                                         \
3957 void video_##type##_savestate(file_tag_type savestate_file)                   \
3958 {                                                                             \
3959   file_##type##_array(savestate_file, affine_reference_x);                    \
3960   file_##type##_array(savestate_file, affine_reference_y);                    \
3961 }                                                                             \
3962
3963 video_savestate_builder(read);
3964 video_savestate_builder(write_mem);
3965
3966