SDL-1.2.14
[sdl_omap.git] / src / video / ps2gs / SDL_gsyuv.c
1 /*
2     SDL - Simple DirectMedia Layer
3     Copyright (C) 1997-2009 Sam Lantinga
4
5     This library is free software; you can redistribute it and/or
6     modify it under the terms of the GNU Lesser General Public
7     License as published by the Free Software Foundation; either
8     version 2.1 of the License, or (at your option) any later version.
9
10     This library is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13     Lesser General Public License for more details.
14
15     You should have received a copy of the GNU Lesser General Public
16     License along with this library; if not, write to the Free Software
17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
18
19     Sam Lantinga
20     slouken@libsdl.org
21 */
22 #include "SDL_config.h"
23
24 /* This is the Playstation 2 implementation of YUV video overlays */
25
26 #include <fcntl.h>
27 #include <unistd.h>
28 #include <sys/ioctl.h>
29 #include <sys/mman.h>
30 #include <asm/page.h>           /* For definition of PAGE_SIZE */
31
32 #include "SDL_video.h"
33 #include "SDL_gsyuv_c.h"
34 #include "../SDL_yuvfuncs.h"
35
36 /* The maximum number of 16x16 pixel block converted at once */
37 #define MAX_MACROBLOCKS 1024    /* 2^10 macroblocks at once */
38
39 /* The functions used to manipulate video overlays */
40 static struct private_yuvhwfuncs gs_yuvfuncs = {
41         GS_LockYUVOverlay,
42         GS_UnlockYUVOverlay,
43         GS_DisplayYUVOverlay,
44         GS_FreeYUVOverlay
45 };
46
47 struct private_yuvhwdata {
48         int ipu_fd;
49         Uint8 *pixels;
50         int macroblocks;
51         int dma_len;
52         caddr_t dma_mem;
53         caddr_t ipu_imem;
54         caddr_t ipu_omem;
55         caddr_t dma_tags;
56         unsigned long long *stretch_x1y1;
57         unsigned long long *stretch_x2y2;
58         struct ps2_plist plist;
59
60         /* These are just so we don't have to allocate them separately */
61         Uint16 pitches[3];
62         Uint8 *planes[3];
63 };
64
65 static int power_of_2(int value)
66 {
67         int shift;
68
69         for ( shift = 0; (1<<shift) < value; ++shift ) {
70                 /* Keep looking */ ;
71         }
72         return(shift);
73 }
74
75 SDL_Overlay *GS_CreateYUVOverlay(_THIS, int width, int height, Uint32 format, SDL_Surface *display)
76 {
77         SDL_Overlay *overlay;
78         struct private_yuvhwdata *hwdata;
79         int map_offset;
80         unsigned long long *tags;
81         caddr_t base;
82         int bpp;
83         int fbp, fbw, psm;
84         int x, y, w, h;
85         int pnum;
86         struct ps2_packet *packet;
87         struct ps2_packet tex_packet;
88
89         /* We can only decode blocks of 16x16 pixels */
90         if ( (width & 15) || (height & 15) ) {
91                 SDL_SetError("Overlay width/height must be multiples of 16");
92                 return(NULL);
93         }
94         /* Make sure the image isn't too large for a single DMA transfer */
95         if ( ((width/16) * (height/16)) > MAX_MACROBLOCKS ) {
96                 SDL_SetError("Overlay too large (maximum size: %d pixels)",
97                              MAX_MACROBLOCKS * 16 * 16);
98                 return(NULL);
99         }
100
101         /* Double-check the requested format.  For simplicity, we'll only
102            support planar YUV formats.
103          */
104         switch (format) {
105             case SDL_YV12_OVERLAY:
106             case SDL_IYUV_OVERLAY:
107                 /* Supported planar YUV format */
108                 break;
109             default:
110                 SDL_SetError("Unsupported YUV format");
111                 return(NULL);
112         }
113
114         /* Create the overlay structure */
115         overlay = (SDL_Overlay *)SDL_malloc(sizeof *overlay);
116         if ( overlay == NULL ) {
117                 SDL_OutOfMemory();
118                 return(NULL);
119         }
120         SDL_memset(overlay, 0, (sizeof *overlay));
121
122         /* Fill in the basic members */
123         overlay->format = format;
124         overlay->w = width;
125         overlay->h = height;
126
127         /* Set up the YUV surface function structure */
128         overlay->hwfuncs = &gs_yuvfuncs;
129         overlay->hw_overlay = 1;
130
131         /* Create the pixel data */
132         hwdata = (struct private_yuvhwdata *)SDL_malloc(sizeof *hwdata);
133         overlay->hwdata = hwdata;
134         if ( hwdata == NULL ) {
135                 SDL_FreeYUVOverlay(overlay);
136                 SDL_OutOfMemory();
137                 return(NULL);
138         }
139         hwdata->ipu_fd = -1;
140         hwdata->pixels = (Uint8 *)SDL_malloc(width*height*2);
141         if ( hwdata->pixels == NULL ) {
142                 SDL_FreeYUVOverlay(overlay);
143                 SDL_OutOfMemory();
144                 return(NULL);
145         }
146         hwdata->macroblocks = (width/16) * (height/16);
147
148         /* Find the pitch and offset values for the overlay */
149         overlay->pitches = hwdata->pitches;
150         overlay->pixels = hwdata->planes;
151         switch (format) {
152             case SDL_YV12_OVERLAY:
153             case SDL_IYUV_OVERLAY:
154                 overlay->pitches[0] = overlay->w;
155                 overlay->pitches[1] = overlay->pitches[0] / 2;
156                 overlay->pitches[2] = overlay->pitches[0] / 2;
157                 overlay->pixels[0] = hwdata->pixels;
158                 overlay->pixels[1] = overlay->pixels[0] +
159                                      overlay->pitches[0] * overlay->h;
160                 overlay->pixels[2] = overlay->pixels[1] +
161                                      overlay->pitches[1] * overlay->h / 2;
162                 overlay->planes = 3;
163                 break;
164             default:
165                 /* We should never get here (caught above) */
166                 break;
167         }
168
169         /* Theoretically we could support several concurrent decode
170            streams queueing up on the same file descriptor, but for
171            simplicity we'll support only one.  Opening the IPU more
172            than once will fail with EBUSY.
173         */
174         hwdata->ipu_fd = open("/dev/ps2ipu", O_RDWR);
175         if ( hwdata->ipu_fd < 0 ) {
176                 SDL_FreeYUVOverlay(overlay);
177                 SDL_SetError("Playstation 2 IPU busy");
178                 return(NULL);
179         }
180
181         /* Allocate a DMA area for pixel conversion */
182         bpp = this->screen->format->BytesPerPixel;
183         map_offset = (mapped_len + (sysconf(_SC_PAGESIZE) - 1)) & ~(sysconf(_SC_PAGESIZE) - 1);
184         hwdata->dma_len = hwdata->macroblocks * (16 * 16 + 8 * 8 + 8 * 8) +
185                           width * height * bpp +
186                           hwdata->macroblocks * (16 * sizeof(long long)) +
187                           12 * sizeof(long long);
188         hwdata->dma_mem = mmap(0, hwdata->dma_len, PROT_READ|PROT_WRITE,
189                                MAP_SHARED, memory_fd, map_offset);
190         if ( hwdata->dma_mem == MAP_FAILED ) {
191                 hwdata->ipu_imem = (caddr_t)0;
192                 SDL_FreeYUVOverlay(overlay);
193                 SDL_SetError("Unable to map %d bytes for DMA", hwdata->dma_len);
194                 return(NULL);
195         }
196         hwdata->ipu_imem = hwdata->dma_mem;
197         hwdata->ipu_omem = hwdata->ipu_imem +
198                            hwdata->macroblocks * (16 * 16 + 8 * 8 + 8 * 8);
199         hwdata->dma_tags = hwdata->ipu_omem + width * height * bpp;
200
201         /* Allocate memory for the DMA packets */
202         hwdata->plist.num = hwdata->macroblocks * 4 + 1;
203         hwdata->plist.packet = (struct ps2_packet *)SDL_malloc(
204                                hwdata->plist.num*sizeof(struct ps2_packet));
205         if ( ! hwdata->plist.packet ) {
206                 SDL_FreeYUVOverlay(overlay);
207                 SDL_OutOfMemory();
208                 return(NULL);
209         }
210         pnum = 0;
211         packet = hwdata->plist.packet;
212
213         /* Set up the tags to send the image to the screen */
214         tags = (unsigned long long *)hwdata->dma_tags;
215         base = hwdata->ipu_omem;
216         fbp = screen_image.fbp;
217         fbw = screen_image.fbw;
218         psm = screen_image.psm;
219         y = screen_image.y + screen_image.h;    /* Offscreen video memory */
220         for ( h=height/16; h; --h ) {
221                 x = 0;                  /* Visible video memory */
222                 for ( w=width/16; w; --w ) {
223                         /* The head tag */
224                         packet[pnum].ptr = &tags[0];
225                         packet[pnum].len = 10 * sizeof(*tags);
226                         ++pnum;
227                         tags[0] = 4 | (1LL << 60);      /* GIFtag */
228                         tags[1] = 0x0e;                 /* A+D */
229                         tags[2] = ((unsigned long long)fbp << 32) |
230                                   ((unsigned long long)fbw << 48) |
231                                   ((unsigned long long)psm << 56);
232                         tags[3] = PS2_GS_BITBLTBUF;
233                         tags[4] = ((unsigned long long)x << 32) |
234                                   ((unsigned long long)y << 48);
235                         tags[5] = PS2_GS_TRXPOS;
236                         tags[6] = (unsigned long long)16 |
237                                   ((unsigned long long)16 << 32);
238                         tags[7] = PS2_GS_TRXREG;
239                         tags[8] = 0;
240                         tags[9] = PS2_GS_TRXDIR;
241                         /* Now the actual image data */
242                         packet[pnum].ptr = &tags[10];
243                         packet[pnum].len = 2 * sizeof(*tags);
244                         ++pnum;
245                         tags[10] = ((16*16*bpp) >> 4) | (2LL << 58);
246                         tags[11] = 0;
247                         packet[pnum].ptr = (void *)base;
248                         packet[pnum].len = 16 * 16 * bpp;
249                         ++pnum;
250                         packet[pnum].ptr = &tags[12];
251                         packet[pnum].len = 2 * sizeof(*tags);
252                         ++pnum;
253                         tags[12] = (0 >> 4) | (1 << 15) | (2LL << 58);
254                         tags[13] = 0;
255
256                         tags += 16;
257                         base += 16 * 16 * bpp;
258
259                         x += 16;
260                 }
261                 y += 16;
262         }
263
264         /* Set up the texture memory area for the video */
265         tex_packet.ptr = tags;
266         tex_packet.len = 8 * sizeof(*tags);
267         tags[0] = 3 | (1LL << 60);      /* GIFtag */
268         tags[1] = 0x0e;                 /* A+D */
269         tags[2] = ((screen_image.y + screen_image.h) * screen_image.w) / 64 +
270                   ((unsigned long long)fbw << 14) +
271                   ((unsigned long long)psm << 20) +
272                   ((unsigned long long)power_of_2(width) << 26) +
273                   ((unsigned long long)power_of_2(height) << 30) +
274                   ((unsigned long long)1 << 34) +
275                   ((unsigned long long)1 << 35);
276         tags[3] = PS2_GS_TEX0_1;
277         tags[4] = (1 << 5) + (1 << 6);
278         tags[5] = PS2_GS_TEX1_1;
279         tags[6] = 0;
280         tags[7] = PS2_GS_TEXFLUSH;
281         ioctl(console_fd, PS2IOC_SEND, &tex_packet);
282
283         /* Set up the tags for scaling the image */
284         packet[pnum].ptr = tags;
285         packet[pnum].len = 12 * sizeof(*tags);
286         ++pnum;
287         tags[0] = 5 | (1LL << 60);      /* GIFtag */
288         tags[1] = 0x0e;                 /* A+D */
289         tags[2] = 6 + (1 << 4) + (1 << 8);
290         tags[3] = PS2_GS_PRIM;
291         tags[4] = ((unsigned long long)0 * 16) +
292                    (((unsigned long long)0 * 16) << 16);
293         tags[5] = PS2_GS_UV;
294         tags[6] = 0; /* X1, Y1 */
295         tags[7] = PS2_GS_XYZ2;
296         hwdata->stretch_x1y1 = &tags[6];
297         tags[8] = ((unsigned long long)overlay->w * 16) +
298                    (((unsigned long long)overlay->h * 16) << 16);
299         tags[9] = PS2_GS_UV;
300         tags[10] = 0; /* X2, Y2 */
301         tags[11] = PS2_GS_XYZ2;
302         hwdata->stretch_x2y2 = &tags[10];
303
304         /* We're all done.. */
305         return(overlay);
306 }
307
308 int GS_LockYUVOverlay(_THIS, SDL_Overlay *overlay)
309 {
310         return(0);
311 }
312
313 void GS_UnlockYUVOverlay(_THIS, SDL_Overlay *overlay)
314 {
315         return;
316 }
317
318 int GS_DisplayYUVOverlay(_THIS, SDL_Overlay *overlay, SDL_Rect *src, SDL_Rect *dst)
319 {
320         struct private_yuvhwdata *hwdata;
321         __u32 cmd;
322         struct ps2_packet packet;
323         int h, w, i;
324         Uint32 *lum, *Cr, *Cb;
325         int lum_pitch;
326         int crb_pitch;
327         Uint32 *lum_src, *Cr_src, *Cb_src;
328         Uint32 *srcp, *dstp;
329         unsigned int x, y;
330         SDL_Surface *screen;
331
332         /* Find out where the various portions of the image are */
333         hwdata = overlay->hwdata;
334         switch (overlay->format) {
335             case SDL_YV12_OVERLAY:
336                 lum = (Uint32 *)overlay->pixels[0];
337                 Cr =  (Uint32 *)overlay->pixels[1];
338                 Cb =  (Uint32 *)overlay->pixels[2];
339                 break;
340             case SDL_IYUV_OVERLAY:
341                 lum = (Uint32 *)overlay->pixels[0];
342                 Cr =  (Uint32 *)overlay->pixels[2];
343                 Cb =  (Uint32 *)overlay->pixels[1];
344             default:
345                 SDL_SetError("Unsupported YUV format in blit (?)");
346                 return(-1);
347         }
348         dstp = (Uint32 *)hwdata->ipu_imem;
349         lum_pitch = overlay->w/4;
350         crb_pitch = (overlay->w/2)/4;
351
352         /* Copy blocks of 16x16 pixels to the DMA area */
353         for ( h=overlay->h/16; h; --h ) {
354                 lum_src = lum;
355                 Cr_src = Cr;
356                 Cb_src = Cb;
357                 for ( w=overlay->w/16; w; --w ) {
358                         srcp = lum_src;
359                         for ( i=0; i<16; ++i ) {
360                                 dstp[0] = srcp[0];
361                                 dstp[1] = srcp[1];
362                                 dstp[2] = srcp[2];
363                                 dstp[3] = srcp[3];
364                                 srcp += lum_pitch;
365                                 dstp += 4;
366                         }
367                         srcp = Cb_src;
368                         for ( i=0; i<8; ++i ) {
369                                 dstp[0] = srcp[0];
370                                 dstp[1] = srcp[1];
371                                 srcp += crb_pitch;
372                                 dstp += 2;
373                         }
374                         srcp = Cr_src;
375                         for ( i=0; i<8; ++i ) {
376                                 dstp[0] = srcp[0];
377                                 dstp[1] = srcp[1];
378                                 srcp += crb_pitch;
379                                 dstp += 2;
380                         }
381                         lum_src += 16 / 4;
382                         Cb_src += 8 / 4;
383                         Cr_src += 8 / 4;
384                 }
385                 lum += lum_pitch * 16;
386                 Cr += crb_pitch * 8;
387                 Cb += crb_pitch * 8;
388         }
389
390         /* Send the macroblock data to the IPU */
391 #ifdef DEBUG_YUV
392         fprintf(stderr, "Sending data to IPU..\n");
393 #endif
394         packet.ptr = hwdata->ipu_imem;
395         packet.len = hwdata->macroblocks * (16 * 16 + 8 * 8 + 8 * 8);
396         ioctl(hwdata->ipu_fd, PS2IOC_SENDA, &packet);
397
398         /* Trigger the DMA to the IPU for conversion */
399 #ifdef DEBUG_YUV
400         fprintf(stderr, "Trigging conversion command\n");
401 #endif
402         cmd = (7 << 28) + hwdata->macroblocks;
403         if ( screen_image.psm == PS2_GS_PSMCT16 ) {
404                 cmd += (1 << 27) +      /* Output RGB 555 */
405                        (1 << 26);       /* Dither output */
406         }
407         ioctl(hwdata->ipu_fd, PS2IOC_SIPUCMD, &cmd);
408
409         /* Retrieve the converted image from the IPU */
410 #ifdef DEBUG_YUV
411         fprintf(stderr, "Retrieving data from IPU..\n");
412 #endif
413         packet.ptr = hwdata->ipu_omem;
414         packet.len = overlay->w * overlay->h *
415                      this->screen->format->BytesPerPixel;
416         ioctl(hwdata->ipu_fd, PS2IOC_RECV, &packet);
417
418 #ifdef DEBUG_YUV
419         fprintf(stderr, "Copying image to screen..\n");
420 #endif
421         /* Wait for previous DMA to complete */
422         ioctl(console_fd, PS2IOC_SENDQCT, 1);
423
424         /* Send the current image to the screen and scale it */
425         screen = this->screen;
426         x = (unsigned int)dst->x;
427         y = (unsigned int)dst->y;
428         if ( screen->offset ) {
429                 x += (screen->offset % screen->pitch) /
430                      screen->format->BytesPerPixel;
431                 y += (screen->offset / screen->pitch);
432         }
433         y += screen_image.y;
434         *hwdata->stretch_x1y1 = (x * 16) + ((y * 16) << 16);
435         x += (unsigned int)dst->w;
436         y += (unsigned int)dst->h;
437         *hwdata->stretch_x2y2 = (x * 16) + ((y * 16) << 16);
438         return ioctl(console_fd, PS2IOC_SENDL, &hwdata->plist);
439 }
440
441 void GS_FreeYUVOverlay(_THIS, SDL_Overlay *overlay)
442 {
443         struct private_yuvhwdata *hwdata;
444
445         hwdata = overlay->hwdata;
446         if ( hwdata ) {
447                 if ( hwdata->ipu_fd >= 0 ) {
448                         close(hwdata->ipu_fd);
449                 }
450                 if ( hwdata->dma_mem ) {
451                         munmap(hwdata->dma_mem, hwdata->dma_len);
452                 }
453                 if ( hwdata->plist.packet ) {
454                         SDL_free(hwdata->plist.packet);
455                 }
456                 if ( hwdata->pixels ) {
457                         SDL_free(hwdata->pixels);
458                 }
459                 SDL_free(hwdata);
460         }
461 }