]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - drivers/media/platform/coda/coda-bit.c
[media] coda: adjust sequence offset after unexpected decoded frame
[karo-tx-linux.git] / drivers / media / platform / coda / coda-bit.c
1 /*
2  * Coda multi-standard codec IP - BIT processor functions
3  *
4  * Copyright (C) 2012 Vista Silicon S.L.
5  *    Javier Martin, <javier.martin@vista-silicon.com>
6  *    Xavier Duret
7  * Copyright (C) 2012-2014 Philipp Zabel, Pengutronix
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  */
14
15 #include <linux/clk.h>
16 #include <linux/irqreturn.h>
17 #include <linux/kernel.h>
18 #include <linux/platform_device.h>
19 #include <linux/reset.h>
20 #include <linux/slab.h>
21 #include <linux/videodev2.h>
22
23 #include <media/v4l2-common.h>
24 #include <media/v4l2-ctrls.h>
25 #include <media/v4l2-fh.h>
26 #include <media/v4l2-mem2mem.h>
27 #include <media/videobuf2-core.h>
28 #include <media/videobuf2-dma-contig.h>
29 #include <media/videobuf2-vmalloc.h>
30
31 #include "coda.h"
32
33 #define CODA7_PS_BUF_SIZE       0x28000
34 #define CODA9_PS_SAVE_SIZE      (512 * 1024)
35
36 #define CODA_DEFAULT_GAMMA      4096
37 #define CODA9_DEFAULT_GAMMA     24576   /* 0.75 * 32768 */
38
39 static inline int coda_is_initialized(struct coda_dev *dev)
40 {
41         return coda_read(dev, CODA_REG_BIT_CUR_PC) != 0;
42 }
43
44 static inline unsigned long coda_isbusy(struct coda_dev *dev)
45 {
46         return coda_read(dev, CODA_REG_BIT_BUSY);
47 }
48
49 static int coda_wait_timeout(struct coda_dev *dev)
50 {
51         unsigned long timeout = jiffies + msecs_to_jiffies(1000);
52
53         while (coda_isbusy(dev)) {
54                 if (time_after(jiffies, timeout))
55                         return -ETIMEDOUT;
56         }
57         return 0;
58 }
59
60 static void coda_command_async(struct coda_ctx *ctx, int cmd)
61 {
62         struct coda_dev *dev = ctx->dev;
63
64         if (dev->devtype->product == CODA_960 ||
65             dev->devtype->product == CODA_7541) {
66                 /* Restore context related registers to CODA */
67                 coda_write(dev, ctx->bit_stream_param,
68                                 CODA_REG_BIT_BIT_STREAM_PARAM);
69                 coda_write(dev, ctx->frm_dis_flg,
70                                 CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
71                 coda_write(dev, ctx->frame_mem_ctrl,
72                                 CODA_REG_BIT_FRAME_MEM_CTRL);
73                 coda_write(dev, ctx->workbuf.paddr, CODA_REG_BIT_WORK_BUF_ADDR);
74         }
75
76         if (dev->devtype->product == CODA_960) {
77                 coda_write(dev, 1, CODA9_GDI_WPROT_ERR_CLR);
78                 coda_write(dev, 0, CODA9_GDI_WPROT_RGN_EN);
79         }
80
81         coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY);
82
83         coda_write(dev, ctx->idx, CODA_REG_BIT_RUN_INDEX);
84         coda_write(dev, ctx->params.codec_mode, CODA_REG_BIT_RUN_COD_STD);
85         coda_write(dev, ctx->params.codec_mode_aux, CODA7_REG_BIT_RUN_AUX_STD);
86
87         coda_write(dev, cmd, CODA_REG_BIT_RUN_COMMAND);
88 }
89
90 static int coda_command_sync(struct coda_ctx *ctx, int cmd)
91 {
92         struct coda_dev *dev = ctx->dev;
93
94         coda_command_async(ctx, cmd);
95         return coda_wait_timeout(dev);
96 }
97
98 int coda_hw_reset(struct coda_ctx *ctx)
99 {
100         struct coda_dev *dev = ctx->dev;
101         unsigned long timeout;
102         unsigned int idx;
103         int ret;
104
105         if (!dev->rstc)
106                 return -ENOENT;
107
108         idx = coda_read(dev, CODA_REG_BIT_RUN_INDEX);
109
110         if (dev->devtype->product == CODA_960) {
111                 timeout = jiffies + msecs_to_jiffies(100);
112                 coda_write(dev, 0x11, CODA9_GDI_BUS_CTRL);
113                 while (coda_read(dev, CODA9_GDI_BUS_STATUS) != 0x77) {
114                         if (time_after(jiffies, timeout))
115                                 return -ETIME;
116                         cpu_relax();
117                 }
118         }
119
120         ret = reset_control_reset(dev->rstc);
121         if (ret < 0)
122                 return ret;
123
124         if (dev->devtype->product == CODA_960)
125                 coda_write(dev, 0x00, CODA9_GDI_BUS_CTRL);
126         coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY);
127         coda_write(dev, CODA_REG_RUN_ENABLE, CODA_REG_BIT_CODE_RUN);
128         ret = coda_wait_timeout(dev);
129         coda_write(dev, idx, CODA_REG_BIT_RUN_INDEX);
130
131         return ret;
132 }
133
134 static void coda_kfifo_sync_from_device(struct coda_ctx *ctx)
135 {
136         struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo;
137         struct coda_dev *dev = ctx->dev;
138         u32 rd_ptr;
139
140         rd_ptr = coda_read(dev, CODA_REG_BIT_RD_PTR(ctx->reg_idx));
141         kfifo->out = (kfifo->in & ~kfifo->mask) |
142                       (rd_ptr - ctx->bitstream.paddr);
143         if (kfifo->out > kfifo->in)
144                 kfifo->out -= kfifo->mask + 1;
145 }
146
147 static void coda_kfifo_sync_to_device_full(struct coda_ctx *ctx)
148 {
149         struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo;
150         struct coda_dev *dev = ctx->dev;
151         u32 rd_ptr, wr_ptr;
152
153         rd_ptr = ctx->bitstream.paddr + (kfifo->out & kfifo->mask);
154         coda_write(dev, rd_ptr, CODA_REG_BIT_RD_PTR(ctx->reg_idx));
155         wr_ptr = ctx->bitstream.paddr + (kfifo->in & kfifo->mask);
156         coda_write(dev, wr_ptr, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
157 }
158
159 static void coda_kfifo_sync_to_device_write(struct coda_ctx *ctx)
160 {
161         struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo;
162         struct coda_dev *dev = ctx->dev;
163         u32 wr_ptr;
164
165         wr_ptr = ctx->bitstream.paddr + (kfifo->in & kfifo->mask);
166         coda_write(dev, wr_ptr, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
167 }
168
169 static int coda_bitstream_queue(struct coda_ctx *ctx,
170                                 struct vb2_buffer *src_buf)
171 {
172         u32 src_size = vb2_get_plane_payload(src_buf, 0);
173         u32 n;
174
175         n = kfifo_in(&ctx->bitstream_fifo, vb2_plane_vaddr(src_buf, 0),
176                      src_size);
177         if (n < src_size)
178                 return -ENOSPC;
179
180         dma_sync_single_for_device(&ctx->dev->plat_dev->dev,
181                                    ctx->bitstream.paddr, ctx->bitstream.size,
182                                    DMA_TO_DEVICE);
183
184         src_buf->v4l2_buf.sequence = ctx->qsequence++;
185
186         return 0;
187 }
188
189 static bool coda_bitstream_try_queue(struct coda_ctx *ctx,
190                                      struct vb2_buffer *src_buf)
191 {
192         int ret;
193
194         if (coda_get_bitstream_payload(ctx) +
195             vb2_get_plane_payload(src_buf, 0) + 512 >= ctx->bitstream.size)
196                 return false;
197
198         if (vb2_plane_vaddr(src_buf, 0) == NULL) {
199                 v4l2_err(&ctx->dev->v4l2_dev, "trying to queue empty buffer\n");
200                 return true;
201         }
202
203         ret = coda_bitstream_queue(ctx, src_buf);
204         if (ret < 0) {
205                 v4l2_err(&ctx->dev->v4l2_dev, "bitstream buffer overflow\n");
206                 return false;
207         }
208         /* Sync read pointer to device */
209         if (ctx == v4l2_m2m_get_curr_priv(ctx->dev->m2m_dev))
210                 coda_kfifo_sync_to_device_write(ctx);
211
212         ctx->hold = false;
213
214         return true;
215 }
216
217 void coda_fill_bitstream(struct coda_ctx *ctx)
218 {
219         struct vb2_buffer *src_buf;
220         struct coda_buffer_meta *meta;
221         u32 start;
222
223         while (v4l2_m2m_num_src_bufs_ready(ctx->fh.m2m_ctx) > 0) {
224                 /*
225                  * Only queue a single JPEG into the bitstream buffer, except
226                  * to increase payload over 512 bytes or if in hold state.
227                  */
228                 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG &&
229                     (coda_get_bitstream_payload(ctx) >= 512) && !ctx->hold)
230                         break;
231
232                 src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
233
234                 /* Drop frames that do not start/end with a SOI/EOI markers */
235                 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG &&
236                     !coda_jpeg_check_buffer(ctx, src_buf)) {
237                         v4l2_err(&ctx->dev->v4l2_dev,
238                                  "dropping invalid JPEG frame\n");
239                         src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
240                         v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_ERROR);
241                         continue;
242                 }
243
244                 /* Buffer start position */
245                 start = ctx->bitstream_fifo.kfifo.in &
246                         ctx->bitstream_fifo.kfifo.mask;
247
248                 if (coda_bitstream_try_queue(ctx, src_buf)) {
249                         /*
250                          * Source buffer is queued in the bitstream ringbuffer;
251                          * queue the timestamp and mark source buffer as done
252                          */
253                         src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
254
255                         meta = kmalloc(sizeof(*meta), GFP_KERNEL);
256                         if (meta) {
257                                 meta->sequence = src_buf->v4l2_buf.sequence;
258                                 meta->timecode = src_buf->v4l2_buf.timecode;
259                                 meta->timestamp = src_buf->v4l2_buf.timestamp;
260                                 meta->start = start;
261                                 meta->end = ctx->bitstream_fifo.kfifo.in &
262                                             ctx->bitstream_fifo.kfifo.mask;
263                                 list_add_tail(&meta->list,
264                                               &ctx->buffer_meta_list);
265                         }
266
267                         v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
268                 } else {
269                         break;
270                 }
271         }
272 }
273
274 void coda_bit_stream_end_flag(struct coda_ctx *ctx)
275 {
276         struct coda_dev *dev = ctx->dev;
277
278         ctx->bit_stream_param |= CODA_BIT_STREAM_END_FLAG;
279
280         /* If this context is currently running, update the hardware flag */
281         if ((dev->devtype->product == CODA_960) &&
282             coda_isbusy(dev) &&
283             (ctx->idx == coda_read(dev, CODA_REG_BIT_RUN_INDEX))) {
284                 coda_write(dev, ctx->bit_stream_param,
285                            CODA_REG_BIT_BIT_STREAM_PARAM);
286         }
287 }
288
289 static void coda_parabuf_write(struct coda_ctx *ctx, int index, u32 value)
290 {
291         struct coda_dev *dev = ctx->dev;
292         u32 *p = ctx->parabuf.vaddr;
293
294         if (dev->devtype->product == CODA_DX6)
295                 p[index] = value;
296         else
297                 p[index ^ 1] = value;
298 }
299
300 static void coda_free_framebuffers(struct coda_ctx *ctx)
301 {
302         int i;
303
304         for (i = 0; i < CODA_MAX_FRAMEBUFFERS; i++)
305                 coda_free_aux_buf(ctx->dev, &ctx->internal_frames[i]);
306 }
307
308 static int coda_alloc_framebuffers(struct coda_ctx *ctx,
309                                    struct coda_q_data *q_data, u32 fourcc)
310 {
311         struct coda_dev *dev = ctx->dev;
312         int width, height;
313         dma_addr_t paddr;
314         int ysize;
315         int ret;
316         int i;
317
318         if (ctx->codec && (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264 ||
319              ctx->codec->dst_fourcc == V4L2_PIX_FMT_H264)) {
320                 width = round_up(q_data->width, 16);
321                 height = round_up(q_data->height, 16);
322         } else {
323                 width = round_up(q_data->width, 8);
324                 height = q_data->height;
325         }
326         ysize = width * height;
327
328         /* Allocate frame buffers */
329         for (i = 0; i < ctx->num_internal_frames; i++) {
330                 size_t size;
331                 char *name;
332
333                 size = ysize + ysize / 2;
334                 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264 &&
335                     dev->devtype->product != CODA_DX6)
336                         size += ysize / 4;
337                 name = kasprintf(GFP_KERNEL, "fb%d", i);
338                 ret = coda_alloc_context_buf(ctx, &ctx->internal_frames[i],
339                                              size, name);
340                 kfree(name);
341                 if (ret < 0) {
342                         coda_free_framebuffers(ctx);
343                         return ret;
344                 }
345         }
346
347         /* Register frame buffers in the parameter buffer */
348         for (i = 0; i < ctx->num_internal_frames; i++) {
349                 paddr = ctx->internal_frames[i].paddr;
350                 /* Start addresses of Y, Cb, Cr planes */
351                 coda_parabuf_write(ctx, i * 3 + 0, paddr);
352                 coda_parabuf_write(ctx, i * 3 + 1, paddr + ysize);
353                 coda_parabuf_write(ctx, i * 3 + 2, paddr + ysize + ysize / 4);
354
355                 /* mvcol buffer for h.264 */
356                 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264 &&
357                     dev->devtype->product != CODA_DX6)
358                         coda_parabuf_write(ctx, 96 + i,
359                                            ctx->internal_frames[i].paddr +
360                                            ysize + ysize/4 + ysize/4);
361         }
362
363         /* mvcol buffer for mpeg4 */
364         if ((dev->devtype->product != CODA_DX6) &&
365             (ctx->codec->src_fourcc == V4L2_PIX_FMT_MPEG4))
366                 coda_parabuf_write(ctx, 97, ctx->internal_frames[i].paddr +
367                                             ysize + ysize/4 + ysize/4);
368
369         return 0;
370 }
371
372 static void coda_free_context_buffers(struct coda_ctx *ctx)
373 {
374         struct coda_dev *dev = ctx->dev;
375
376         coda_free_aux_buf(dev, &ctx->slicebuf);
377         coda_free_aux_buf(dev, &ctx->psbuf);
378         if (dev->devtype->product != CODA_DX6)
379                 coda_free_aux_buf(dev, &ctx->workbuf);
380 }
381
382 static int coda_alloc_context_buffers(struct coda_ctx *ctx,
383                                       struct coda_q_data *q_data)
384 {
385         struct coda_dev *dev = ctx->dev;
386         size_t size;
387         int ret;
388
389         if (dev->devtype->product == CODA_DX6)
390                 return 0;
391
392         if (ctx->psbuf.vaddr) {
393                 v4l2_err(&dev->v4l2_dev, "psmembuf still allocated\n");
394                 return -EBUSY;
395         }
396         if (ctx->slicebuf.vaddr) {
397                 v4l2_err(&dev->v4l2_dev, "slicebuf still allocated\n");
398                 return -EBUSY;
399         }
400         if (ctx->workbuf.vaddr) {
401                 v4l2_err(&dev->v4l2_dev, "context buffer still allocated\n");
402                 ret = -EBUSY;
403                 return -ENOMEM;
404         }
405
406         if (q_data->fourcc == V4L2_PIX_FMT_H264) {
407                 /* worst case slice size */
408                 size = (DIV_ROUND_UP(q_data->width, 16) *
409                         DIV_ROUND_UP(q_data->height, 16)) * 3200 / 8 + 512;
410                 ret = coda_alloc_context_buf(ctx, &ctx->slicebuf, size,
411                                              "slicebuf");
412                 if (ret < 0) {
413                         v4l2_err(&dev->v4l2_dev,
414                                  "failed to allocate %d byte slice buffer",
415                                  ctx->slicebuf.size);
416                         return ret;
417                 }
418         }
419
420         if (dev->devtype->product == CODA_7541) {
421                 ret = coda_alloc_context_buf(ctx, &ctx->psbuf,
422                                              CODA7_PS_BUF_SIZE, "psbuf");
423                 if (ret < 0) {
424                         v4l2_err(&dev->v4l2_dev,
425                                  "failed to allocate psmem buffer");
426                         goto err;
427                 }
428         }
429
430         size = dev->devtype->workbuf_size;
431         if (dev->devtype->product == CODA_960 &&
432             q_data->fourcc == V4L2_PIX_FMT_H264)
433                 size += CODA9_PS_SAVE_SIZE;
434         ret = coda_alloc_context_buf(ctx, &ctx->workbuf, size, "workbuf");
435         if (ret < 0) {
436                 v4l2_err(&dev->v4l2_dev,
437                          "failed to allocate %d byte context buffer",
438                          ctx->workbuf.size);
439                 goto err;
440         }
441
442         return 0;
443
444 err:
445         coda_free_context_buffers(ctx);
446         return ret;
447 }
448
449 static int coda_encode_header(struct coda_ctx *ctx, struct vb2_buffer *buf,
450                               int header_code, u8 *header, int *size)
451 {
452         struct coda_dev *dev = ctx->dev;
453         size_t bufsize;
454         int ret;
455         int i;
456
457         if (dev->devtype->product == CODA_960)
458                 memset(vb2_plane_vaddr(buf, 0), 0, 64);
459
460         coda_write(dev, vb2_dma_contig_plane_dma_addr(buf, 0),
461                    CODA_CMD_ENC_HEADER_BB_START);
462         bufsize = vb2_plane_size(buf, 0);
463         if (dev->devtype->product == CODA_960)
464                 bufsize /= 1024;
465         coda_write(dev, bufsize, CODA_CMD_ENC_HEADER_BB_SIZE);
466         coda_write(dev, header_code, CODA_CMD_ENC_HEADER_CODE);
467         ret = coda_command_sync(ctx, CODA_COMMAND_ENCODE_HEADER);
468         if (ret < 0) {
469                 v4l2_err(&dev->v4l2_dev, "CODA_COMMAND_ENCODE_HEADER timeout\n");
470                 return ret;
471         }
472
473         if (dev->devtype->product == CODA_960) {
474                 for (i = 63; i > 0; i--)
475                         if (((char *)vb2_plane_vaddr(buf, 0))[i] != 0)
476                                 break;
477                 *size = i + 1;
478         } else {
479                 *size = coda_read(dev, CODA_REG_BIT_WR_PTR(ctx->reg_idx)) -
480                         coda_read(dev, CODA_CMD_ENC_HEADER_BB_START);
481         }
482         memcpy(header, vb2_plane_vaddr(buf, 0), *size);
483
484         return 0;
485 }
486
487 static phys_addr_t coda_iram_alloc(struct coda_iram_info *iram, size_t size)
488 {
489         phys_addr_t ret;
490
491         size = round_up(size, 1024);
492         if (size > iram->remaining)
493                 return 0;
494         iram->remaining -= size;
495
496         ret = iram->next_paddr;
497         iram->next_paddr += size;
498
499         return ret;
500 }
501
502 static void coda_setup_iram(struct coda_ctx *ctx)
503 {
504         struct coda_iram_info *iram_info = &ctx->iram_info;
505         struct coda_dev *dev = ctx->dev;
506         int w64, w128;
507         int mb_width;
508         int dbk_bits;
509         int bit_bits;
510         int ip_bits;
511
512         memset(iram_info, 0, sizeof(*iram_info));
513         iram_info->next_paddr = dev->iram.paddr;
514         iram_info->remaining = dev->iram.size;
515
516         if (!dev->iram.vaddr)
517                 return;
518
519         switch (dev->devtype->product) {
520         case CODA_7541:
521                 dbk_bits = CODA7_USE_HOST_DBK_ENABLE | CODA7_USE_DBK_ENABLE;
522                 bit_bits = CODA7_USE_HOST_BIT_ENABLE | CODA7_USE_BIT_ENABLE;
523                 ip_bits = CODA7_USE_HOST_IP_ENABLE | CODA7_USE_IP_ENABLE;
524                 break;
525         case CODA_960:
526                 dbk_bits = CODA9_USE_HOST_DBK_ENABLE | CODA9_USE_DBK_ENABLE;
527                 bit_bits = CODA9_USE_HOST_BIT_ENABLE | CODA7_USE_BIT_ENABLE;
528                 ip_bits = CODA9_USE_HOST_IP_ENABLE | CODA7_USE_IP_ENABLE;
529                 break;
530         default: /* CODA_DX6 */
531                 return;
532         }
533
534         if (ctx->inst_type == CODA_INST_ENCODER) {
535                 struct coda_q_data *q_data_src;
536
537                 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
538                 mb_width = DIV_ROUND_UP(q_data_src->width, 16);
539                 w128 = mb_width * 128;
540                 w64 = mb_width * 64;
541
542                 /* Prioritize in case IRAM is too small for everything */
543                 if (dev->devtype->product == CODA_7541) {
544                         iram_info->search_ram_size = round_up(mb_width * 16 *
545                                                               36 + 2048, 1024);
546                         iram_info->search_ram_paddr = coda_iram_alloc(iram_info,
547                                                 iram_info->search_ram_size);
548                         if (!iram_info->search_ram_paddr) {
549                                 pr_err("IRAM is smaller than the search ram size\n");
550                                 goto out;
551                         }
552                         iram_info->axi_sram_use |= CODA7_USE_HOST_ME_ENABLE |
553                                                    CODA7_USE_ME_ENABLE;
554                 }
555
556                 /* Only H.264BP and H.263P3 are considered */
557                 iram_info->buf_dbk_y_use = coda_iram_alloc(iram_info, w64);
558                 iram_info->buf_dbk_c_use = coda_iram_alloc(iram_info, w64);
559                 if (!iram_info->buf_dbk_c_use)
560                         goto out;
561                 iram_info->axi_sram_use |= dbk_bits;
562
563                 iram_info->buf_bit_use = coda_iram_alloc(iram_info, w128);
564                 if (!iram_info->buf_bit_use)
565                         goto out;
566                 iram_info->axi_sram_use |= bit_bits;
567
568                 iram_info->buf_ip_ac_dc_use = coda_iram_alloc(iram_info, w128);
569                 if (!iram_info->buf_ip_ac_dc_use)
570                         goto out;
571                 iram_info->axi_sram_use |= ip_bits;
572
573                 /* OVL and BTP disabled for encoder */
574         } else if (ctx->inst_type == CODA_INST_DECODER) {
575                 struct coda_q_data *q_data_dst;
576
577                 q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
578                 mb_width = DIV_ROUND_UP(q_data_dst->width, 16);
579                 w128 = mb_width * 128;
580
581                 iram_info->buf_dbk_y_use = coda_iram_alloc(iram_info, w128);
582                 iram_info->buf_dbk_c_use = coda_iram_alloc(iram_info, w128);
583                 if (!iram_info->buf_dbk_c_use)
584                         goto out;
585                 iram_info->axi_sram_use |= dbk_bits;
586
587                 iram_info->buf_bit_use = coda_iram_alloc(iram_info, w128);
588                 if (!iram_info->buf_bit_use)
589                         goto out;
590                 iram_info->axi_sram_use |= bit_bits;
591
592                 iram_info->buf_ip_ac_dc_use = coda_iram_alloc(iram_info, w128);
593                 if (!iram_info->buf_ip_ac_dc_use)
594                         goto out;
595                 iram_info->axi_sram_use |= ip_bits;
596
597                 /* OVL and BTP unused as there is no VC1 support yet */
598         }
599
600 out:
601         if (!(iram_info->axi_sram_use & CODA7_USE_HOST_IP_ENABLE))
602                 v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
603                          "IRAM smaller than needed\n");
604
605         if (dev->devtype->product == CODA_7541) {
606                 /* TODO - Enabling these causes picture errors on CODA7541 */
607                 if (ctx->inst_type == CODA_INST_DECODER) {
608                         /* fw 1.4.50 */
609                         iram_info->axi_sram_use &= ~(CODA7_USE_HOST_IP_ENABLE |
610                                                      CODA7_USE_IP_ENABLE);
611                 } else {
612                         /* fw 13.4.29 */
613                         iram_info->axi_sram_use &= ~(CODA7_USE_HOST_IP_ENABLE |
614                                                      CODA7_USE_HOST_DBK_ENABLE |
615                                                      CODA7_USE_IP_ENABLE |
616                                                      CODA7_USE_DBK_ENABLE);
617                 }
618         }
619 }
620
621 static u32 coda_supported_firmwares[] = {
622         CODA_FIRMWARE_VERNUM(CODA_DX6, 2, 2, 5),
623         CODA_FIRMWARE_VERNUM(CODA_7541, 1, 4, 50),
624         CODA_FIRMWARE_VERNUM(CODA_960, 2, 1, 5),
625 };
626
627 static bool coda_firmware_supported(u32 vernum)
628 {
629         int i;
630
631         for (i = 0; i < ARRAY_SIZE(coda_supported_firmwares); i++)
632                 if (vernum == coda_supported_firmwares[i])
633                         return true;
634         return false;
635 }
636
637 int coda_check_firmware(struct coda_dev *dev)
638 {
639         u16 product, major, minor, release;
640         u32 data;
641         int ret;
642
643         ret = clk_prepare_enable(dev->clk_per);
644         if (ret)
645                 goto err_clk_per;
646
647         ret = clk_prepare_enable(dev->clk_ahb);
648         if (ret)
649                 goto err_clk_ahb;
650
651         coda_write(dev, 0, CODA_CMD_FIRMWARE_VERNUM);
652         coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY);
653         coda_write(dev, 0, CODA_REG_BIT_RUN_INDEX);
654         coda_write(dev, 0, CODA_REG_BIT_RUN_COD_STD);
655         coda_write(dev, CODA_COMMAND_FIRMWARE_GET, CODA_REG_BIT_RUN_COMMAND);
656         if (coda_wait_timeout(dev)) {
657                 v4l2_err(&dev->v4l2_dev, "firmware get command error\n");
658                 ret = -EIO;
659                 goto err_run_cmd;
660         }
661
662         if (dev->devtype->product == CODA_960) {
663                 data = coda_read(dev, CODA9_CMD_FIRMWARE_CODE_REV);
664                 v4l2_info(&dev->v4l2_dev, "Firmware code revision: %d\n",
665                           data);
666         }
667
668         /* Check we are compatible with the loaded firmware */
669         data = coda_read(dev, CODA_CMD_FIRMWARE_VERNUM);
670         product = CODA_FIRMWARE_PRODUCT(data);
671         major = CODA_FIRMWARE_MAJOR(data);
672         minor = CODA_FIRMWARE_MINOR(data);
673         release = CODA_FIRMWARE_RELEASE(data);
674
675         clk_disable_unprepare(dev->clk_per);
676         clk_disable_unprepare(dev->clk_ahb);
677
678         if (product != dev->devtype->product) {
679                 v4l2_err(&dev->v4l2_dev,
680                          "Wrong firmware. Hw: %s, Fw: %s, Version: %u.%u.%u\n",
681                          coda_product_name(dev->devtype->product),
682                          coda_product_name(product), major, minor, release);
683                 return -EINVAL;
684         }
685
686         v4l2_info(&dev->v4l2_dev, "Initialized %s.\n",
687                   coda_product_name(product));
688
689         if (coda_firmware_supported(data)) {
690                 v4l2_info(&dev->v4l2_dev, "Firmware version: %u.%u.%u\n",
691                           major, minor, release);
692         } else {
693                 v4l2_warn(&dev->v4l2_dev,
694                           "Unsupported firmware version: %u.%u.%u\n",
695                           major, minor, release);
696         }
697
698         return 0;
699
700 err_run_cmd:
701         clk_disable_unprepare(dev->clk_ahb);
702 err_clk_ahb:
703         clk_disable_unprepare(dev->clk_per);
704 err_clk_per:
705         return ret;
706 }
707
708 /*
709  * Encoder context operations
710  */
711
712 static int coda_start_encoding(struct coda_ctx *ctx)
713 {
714         struct coda_dev *dev = ctx->dev;
715         struct v4l2_device *v4l2_dev = &dev->v4l2_dev;
716         struct coda_q_data *q_data_src, *q_data_dst;
717         u32 bitstream_buf, bitstream_size;
718         struct vb2_buffer *buf;
719         int gamma, ret, value;
720         u32 dst_fourcc;
721         u32 stride;
722
723         q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
724         q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
725         dst_fourcc = q_data_dst->fourcc;
726
727         /* Allocate per-instance buffers */
728         ret = coda_alloc_context_buffers(ctx, q_data_src);
729         if (ret < 0)
730                 return ret;
731
732         buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
733         bitstream_buf = vb2_dma_contig_plane_dma_addr(buf, 0);
734         bitstream_size = q_data_dst->sizeimage;
735
736         if (!coda_is_initialized(dev)) {
737                 v4l2_err(v4l2_dev, "coda is not initialized.\n");
738                 return -EFAULT;
739         }
740
741         if (dst_fourcc == V4L2_PIX_FMT_JPEG) {
742                 if (!ctx->params.jpeg_qmat_tab[0])
743                         ctx->params.jpeg_qmat_tab[0] = kmalloc(64, GFP_KERNEL);
744                 if (!ctx->params.jpeg_qmat_tab[1])
745                         ctx->params.jpeg_qmat_tab[1] = kmalloc(64, GFP_KERNEL);
746                 coda_set_jpeg_compression_quality(ctx, ctx->params.jpeg_quality);
747         }
748
749         mutex_lock(&dev->coda_mutex);
750
751         coda_write(dev, ctx->parabuf.paddr, CODA_REG_BIT_PARA_BUF_ADDR);
752         coda_write(dev, bitstream_buf, CODA_REG_BIT_RD_PTR(ctx->reg_idx));
753         coda_write(dev, bitstream_buf, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
754         switch (dev->devtype->product) {
755         case CODA_DX6:
756                 coda_write(dev, CODADX6_STREAM_BUF_DYNALLOC_EN |
757                         CODADX6_STREAM_BUF_PIC_RESET, CODA_REG_BIT_STREAM_CTRL);
758                 break;
759         case CODA_960:
760                 coda_write(dev, 0, CODA9_GDI_WPROT_RGN_EN);
761                 /* fallthrough */
762         case CODA_7541:
763                 coda_write(dev, CODA7_STREAM_BUF_DYNALLOC_EN |
764                         CODA7_STREAM_BUF_PIC_RESET, CODA_REG_BIT_STREAM_CTRL);
765                 break;
766         }
767
768         ctx->frame_mem_ctrl &= ~CODA_FRAME_CHROMA_INTERLEAVE;
769         if (q_data_src->fourcc == V4L2_PIX_FMT_NV12)
770                 ctx->frame_mem_ctrl |= CODA_FRAME_CHROMA_INTERLEAVE;
771         coda_write(dev, ctx->frame_mem_ctrl, CODA_REG_BIT_FRAME_MEM_CTRL);
772
773         if (dev->devtype->product == CODA_DX6) {
774                 /* Configure the coda */
775                 coda_write(dev, dev->iram.paddr,
776                            CODADX6_REG_BIT_SEARCH_RAM_BASE_ADDR);
777         }
778
779         /* Could set rotation here if needed */
780         value = 0;
781         switch (dev->devtype->product) {
782         case CODA_DX6:
783                 value = (q_data_src->width & CODADX6_PICWIDTH_MASK)
784                         << CODADX6_PICWIDTH_OFFSET;
785                 value |= (q_data_src->height & CODADX6_PICHEIGHT_MASK)
786                          << CODA_PICHEIGHT_OFFSET;
787                 break;
788         case CODA_7541:
789                 if (dst_fourcc == V4L2_PIX_FMT_H264) {
790                         value = (round_up(q_data_src->width, 16) &
791                                  CODA7_PICWIDTH_MASK) << CODA7_PICWIDTH_OFFSET;
792                         value |= (round_up(q_data_src->height, 16) &
793                                  CODA7_PICHEIGHT_MASK) << CODA_PICHEIGHT_OFFSET;
794                         break;
795                 }
796                 /* fallthrough */
797         case CODA_960:
798                 value = (q_data_src->width & CODA7_PICWIDTH_MASK)
799                         << CODA7_PICWIDTH_OFFSET;
800                 value |= (q_data_src->height & CODA7_PICHEIGHT_MASK)
801                          << CODA_PICHEIGHT_OFFSET;
802         }
803         coda_write(dev, value, CODA_CMD_ENC_SEQ_SRC_SIZE);
804         if (dst_fourcc == V4L2_PIX_FMT_JPEG)
805                 ctx->params.framerate = 0;
806         coda_write(dev, ctx->params.framerate,
807                    CODA_CMD_ENC_SEQ_SRC_F_RATE);
808
809         ctx->params.codec_mode = ctx->codec->mode;
810         switch (dst_fourcc) {
811         case V4L2_PIX_FMT_MPEG4:
812                 if (dev->devtype->product == CODA_960)
813                         coda_write(dev, CODA9_STD_MPEG4,
814                                    CODA_CMD_ENC_SEQ_COD_STD);
815                 else
816                         coda_write(dev, CODA_STD_MPEG4,
817                                    CODA_CMD_ENC_SEQ_COD_STD);
818                 coda_write(dev, 0, CODA_CMD_ENC_SEQ_MP4_PARA);
819                 break;
820         case V4L2_PIX_FMT_H264:
821                 if (dev->devtype->product == CODA_960)
822                         coda_write(dev, CODA9_STD_H264,
823                                    CODA_CMD_ENC_SEQ_COD_STD);
824                 else
825                         coda_write(dev, CODA_STD_H264,
826                                    CODA_CMD_ENC_SEQ_COD_STD);
827                 if (ctx->params.h264_deblk_enabled) {
828                         value = ((ctx->params.h264_deblk_alpha &
829                                   CODA_264PARAM_DEBLKFILTEROFFSETALPHA_MASK) <<
830                                  CODA_264PARAM_DEBLKFILTEROFFSETALPHA_OFFSET) |
831                                 ((ctx->params.h264_deblk_beta &
832                                   CODA_264PARAM_DEBLKFILTEROFFSETBETA_MASK) <<
833                                  CODA_264PARAM_DEBLKFILTEROFFSETBETA_OFFSET);
834                 } else {
835                         value = 1 << CODA_264PARAM_DISABLEDEBLK_OFFSET;
836                 }
837                 coda_write(dev, value, CODA_CMD_ENC_SEQ_264_PARA);
838                 break;
839         case V4L2_PIX_FMT_JPEG:
840                 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_PARA);
841                 coda_write(dev, ctx->params.jpeg_restart_interval,
842                                 CODA_CMD_ENC_SEQ_JPG_RST_INTERVAL);
843                 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_THUMB_EN);
844                 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_THUMB_SIZE);
845                 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_THUMB_OFFSET);
846
847                 coda_jpeg_write_tables(ctx);
848                 break;
849         default:
850                 v4l2_err(v4l2_dev,
851                          "dst format (0x%08x) invalid.\n", dst_fourcc);
852                 ret = -EINVAL;
853                 goto out;
854         }
855
856         /*
857          * slice mode and GOP size registers are used for thumb size/offset
858          * in JPEG mode
859          */
860         if (dst_fourcc != V4L2_PIX_FMT_JPEG) {
861                 switch (ctx->params.slice_mode) {
862                 case V4L2_MPEG_VIDEO_MULTI_SLICE_MODE_SINGLE:
863                         value = 0;
864                         break;
865                 case V4L2_MPEG_VIDEO_MULTI_SICE_MODE_MAX_MB:
866                         value  = (ctx->params.slice_max_mb &
867                                   CODA_SLICING_SIZE_MASK)
868                                  << CODA_SLICING_SIZE_OFFSET;
869                         value |= (1 & CODA_SLICING_UNIT_MASK)
870                                  << CODA_SLICING_UNIT_OFFSET;
871                         value |=  1 & CODA_SLICING_MODE_MASK;
872                         break;
873                 case V4L2_MPEG_VIDEO_MULTI_SICE_MODE_MAX_BYTES:
874                         value  = (ctx->params.slice_max_bits &
875                                   CODA_SLICING_SIZE_MASK)
876                                  << CODA_SLICING_SIZE_OFFSET;
877                         value |= (0 & CODA_SLICING_UNIT_MASK)
878                                  << CODA_SLICING_UNIT_OFFSET;
879                         value |=  1 & CODA_SLICING_MODE_MASK;
880                         break;
881                 }
882                 coda_write(dev, value, CODA_CMD_ENC_SEQ_SLICE_MODE);
883                 value = ctx->params.gop_size & CODA_GOP_SIZE_MASK;
884                 coda_write(dev, value, CODA_CMD_ENC_SEQ_GOP_SIZE);
885         }
886
887         if (ctx->params.bitrate) {
888                 /* Rate control enabled */
889                 value = (ctx->params.bitrate & CODA_RATECONTROL_BITRATE_MASK)
890                         << CODA_RATECONTROL_BITRATE_OFFSET;
891                 value |=  1 & CODA_RATECONTROL_ENABLE_MASK;
892                 if (dev->devtype->product == CODA_960)
893                         value |= BIT(31); /* disable autoskip */
894         } else {
895                 value = 0;
896         }
897         coda_write(dev, value, CODA_CMD_ENC_SEQ_RC_PARA);
898
899         coda_write(dev, 0, CODA_CMD_ENC_SEQ_RC_BUF_SIZE);
900         coda_write(dev, ctx->params.intra_refresh,
901                    CODA_CMD_ENC_SEQ_INTRA_REFRESH);
902
903         coda_write(dev, bitstream_buf, CODA_CMD_ENC_SEQ_BB_START);
904         coda_write(dev, bitstream_size / 1024, CODA_CMD_ENC_SEQ_BB_SIZE);
905
906
907         value = 0;
908         if (dev->devtype->product == CODA_960)
909                 gamma = CODA9_DEFAULT_GAMMA;
910         else
911                 gamma = CODA_DEFAULT_GAMMA;
912         if (gamma > 0) {
913                 coda_write(dev, (gamma & CODA_GAMMA_MASK) << CODA_GAMMA_OFFSET,
914                            CODA_CMD_ENC_SEQ_RC_GAMMA);
915         }
916
917         if (ctx->params.h264_min_qp || ctx->params.h264_max_qp) {
918                 coda_write(dev,
919                            ctx->params.h264_min_qp << CODA_QPMIN_OFFSET |
920                            ctx->params.h264_max_qp << CODA_QPMAX_OFFSET,
921                            CODA_CMD_ENC_SEQ_RC_QP_MIN_MAX);
922         }
923         if (dev->devtype->product == CODA_960) {
924                 if (ctx->params.h264_max_qp)
925                         value |= 1 << CODA9_OPTION_RCQPMAX_OFFSET;
926                 if (CODA_DEFAULT_GAMMA > 0)
927                         value |= 1 << CODA9_OPTION_GAMMA_OFFSET;
928         } else {
929                 if (CODA_DEFAULT_GAMMA > 0) {
930                         if (dev->devtype->product == CODA_DX6)
931                                 value |= 1 << CODADX6_OPTION_GAMMA_OFFSET;
932                         else
933                                 value |= 1 << CODA7_OPTION_GAMMA_OFFSET;
934                 }
935                 if (ctx->params.h264_min_qp)
936                         value |= 1 << CODA7_OPTION_RCQPMIN_OFFSET;
937                 if (ctx->params.h264_max_qp)
938                         value |= 1 << CODA7_OPTION_RCQPMAX_OFFSET;
939         }
940         coda_write(dev, value, CODA_CMD_ENC_SEQ_OPTION);
941
942         coda_write(dev, 0, CODA_CMD_ENC_SEQ_RC_INTERVAL_MODE);
943
944         coda_setup_iram(ctx);
945
946         if (dst_fourcc == V4L2_PIX_FMT_H264) {
947                 switch (dev->devtype->product) {
948                 case CODA_DX6:
949                         value = FMO_SLICE_SAVE_BUF_SIZE << 7;
950                         coda_write(dev, value, CODADX6_CMD_ENC_SEQ_FMO);
951                         break;
952                 case CODA_7541:
953                         coda_write(dev, ctx->iram_info.search_ram_paddr,
954                                         CODA7_CMD_ENC_SEQ_SEARCH_BASE);
955                         coda_write(dev, ctx->iram_info.search_ram_size,
956                                         CODA7_CMD_ENC_SEQ_SEARCH_SIZE);
957                         break;
958                 case CODA_960:
959                         coda_write(dev, 0, CODA9_CMD_ENC_SEQ_ME_OPTION);
960                         coda_write(dev, 0, CODA9_CMD_ENC_SEQ_INTRA_WEIGHT);
961                 }
962         }
963
964         ret = coda_command_sync(ctx, CODA_COMMAND_SEQ_INIT);
965         if (ret < 0) {
966                 v4l2_err(v4l2_dev, "CODA_COMMAND_SEQ_INIT timeout\n");
967                 goto out;
968         }
969
970         if (coda_read(dev, CODA_RET_ENC_SEQ_SUCCESS) == 0) {
971                 v4l2_err(v4l2_dev, "CODA_COMMAND_SEQ_INIT failed\n");
972                 ret = -EFAULT;
973                 goto out;
974         }
975
976         if (dst_fourcc != V4L2_PIX_FMT_JPEG) {
977                 if (dev->devtype->product == CODA_960)
978                         ctx->num_internal_frames = 4;
979                 else
980                         ctx->num_internal_frames = 2;
981                 ret = coda_alloc_framebuffers(ctx, q_data_src, dst_fourcc);
982                 if (ret < 0) {
983                         v4l2_err(v4l2_dev, "failed to allocate framebuffers\n");
984                         goto out;
985                 }
986                 stride = q_data_src->bytesperline;
987         } else {
988                 ctx->num_internal_frames = 0;
989                 stride = 0;
990         }
991         coda_write(dev, ctx->num_internal_frames, CODA_CMD_SET_FRAME_BUF_NUM);
992         coda_write(dev, stride, CODA_CMD_SET_FRAME_BUF_STRIDE);
993
994         if (dev->devtype->product == CODA_7541) {
995                 coda_write(dev, q_data_src->bytesperline,
996                                 CODA7_CMD_SET_FRAME_SOURCE_BUF_STRIDE);
997         }
998         if (dev->devtype->product != CODA_DX6) {
999                 coda_write(dev, ctx->iram_info.buf_bit_use,
1000                                 CODA7_CMD_SET_FRAME_AXI_BIT_ADDR);
1001                 coda_write(dev, ctx->iram_info.buf_ip_ac_dc_use,
1002                                 CODA7_CMD_SET_FRAME_AXI_IPACDC_ADDR);
1003                 coda_write(dev, ctx->iram_info.buf_dbk_y_use,
1004                                 CODA7_CMD_SET_FRAME_AXI_DBKY_ADDR);
1005                 coda_write(dev, ctx->iram_info.buf_dbk_c_use,
1006                                 CODA7_CMD_SET_FRAME_AXI_DBKC_ADDR);
1007                 coda_write(dev, ctx->iram_info.buf_ovl_use,
1008                                 CODA7_CMD_SET_FRAME_AXI_OVL_ADDR);
1009                 if (dev->devtype->product == CODA_960) {
1010                         coda_write(dev, ctx->iram_info.buf_btp_use,
1011                                         CODA9_CMD_SET_FRAME_AXI_BTP_ADDR);
1012
1013                         /* FIXME */
1014                         coda_write(dev, ctx->internal_frames[2].paddr,
1015                                    CODA9_CMD_SET_FRAME_SUBSAMP_A);
1016                         coda_write(dev, ctx->internal_frames[3].paddr,
1017                                    CODA9_CMD_SET_FRAME_SUBSAMP_B);
1018                 }
1019         }
1020
1021         ret = coda_command_sync(ctx, CODA_COMMAND_SET_FRAME_BUF);
1022         if (ret < 0) {
1023                 v4l2_err(v4l2_dev, "CODA_COMMAND_SET_FRAME_BUF timeout\n");
1024                 goto out;
1025         }
1026
1027         /* Save stream headers */
1028         buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1029         switch (dst_fourcc) {
1030         case V4L2_PIX_FMT_H264:
1031                 /*
1032                  * Get SPS in the first frame and copy it to an
1033                  * intermediate buffer.
1034                  */
1035                 ret = coda_encode_header(ctx, buf, CODA_HEADER_H264_SPS,
1036                                          &ctx->vpu_header[0][0],
1037                                          &ctx->vpu_header_size[0]);
1038                 if (ret < 0)
1039                         goto out;
1040
1041                 /*
1042                  * Get PPS in the first frame and copy it to an
1043                  * intermediate buffer.
1044                  */
1045                 ret = coda_encode_header(ctx, buf, CODA_HEADER_H264_PPS,
1046                                          &ctx->vpu_header[1][0],
1047                                          &ctx->vpu_header_size[1]);
1048                 if (ret < 0)
1049                         goto out;
1050
1051                 /*
1052                  * Length of H.264 headers is variable and thus it might not be
1053                  * aligned for the coda to append the encoded frame. In that is
1054                  * the case a filler NAL must be added to header 2.
1055                  */
1056                 ctx->vpu_header_size[2] = coda_h264_padding(
1057                                         (ctx->vpu_header_size[0] +
1058                                          ctx->vpu_header_size[1]),
1059                                          ctx->vpu_header[2]);
1060                 break;
1061         case V4L2_PIX_FMT_MPEG4:
1062                 /*
1063                  * Get VOS in the first frame and copy it to an
1064                  * intermediate buffer
1065                  */
1066                 ret = coda_encode_header(ctx, buf, CODA_HEADER_MP4V_VOS,
1067                                          &ctx->vpu_header[0][0],
1068                                          &ctx->vpu_header_size[0]);
1069                 if (ret < 0)
1070                         goto out;
1071
1072                 ret = coda_encode_header(ctx, buf, CODA_HEADER_MP4V_VIS,
1073                                          &ctx->vpu_header[1][0],
1074                                          &ctx->vpu_header_size[1]);
1075                 if (ret < 0)
1076                         goto out;
1077
1078                 ret = coda_encode_header(ctx, buf, CODA_HEADER_MP4V_VOL,
1079                                          &ctx->vpu_header[2][0],
1080                                          &ctx->vpu_header_size[2]);
1081                 if (ret < 0)
1082                         goto out;
1083                 break;
1084         default:
1085                 /* No more formats need to save headers at the moment */
1086                 break;
1087         }
1088
1089 out:
1090         mutex_unlock(&dev->coda_mutex);
1091         return ret;
1092 }
1093
1094 static int coda_prepare_encode(struct coda_ctx *ctx)
1095 {
1096         struct coda_q_data *q_data_src, *q_data_dst;
1097         struct vb2_buffer *src_buf, *dst_buf;
1098         struct coda_dev *dev = ctx->dev;
1099         int force_ipicture;
1100         int quant_param = 0;
1101         u32 pic_stream_buffer_addr, pic_stream_buffer_size;
1102         u32 rot_mode = 0;
1103         u32 dst_fourcc;
1104         u32 reg;
1105
1106         src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
1107         dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1108         q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
1109         q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
1110         dst_fourcc = q_data_dst->fourcc;
1111
1112         src_buf->v4l2_buf.sequence = ctx->osequence;
1113         dst_buf->v4l2_buf.sequence = ctx->osequence;
1114         ctx->osequence++;
1115
1116         /*
1117          * Workaround coda firmware BUG that only marks the first
1118          * frame as IDR. This is a problem for some decoders that can't
1119          * recover when a frame is lost.
1120          */
1121         if (src_buf->v4l2_buf.sequence % ctx->params.gop_size) {
1122                 src_buf->v4l2_buf.flags |= V4L2_BUF_FLAG_PFRAME;
1123                 src_buf->v4l2_buf.flags &= ~V4L2_BUF_FLAG_KEYFRAME;
1124         } else {
1125                 src_buf->v4l2_buf.flags |= V4L2_BUF_FLAG_KEYFRAME;
1126                 src_buf->v4l2_buf.flags &= ~V4L2_BUF_FLAG_PFRAME;
1127         }
1128
1129         if (dev->devtype->product == CODA_960)
1130                 coda_set_gdi_regs(ctx);
1131
1132         /*
1133          * Copy headers at the beginning of the first frame for H.264 only.
1134          * In MPEG4 they are already copied by the coda.
1135          */
1136         if (src_buf->v4l2_buf.sequence == 0) {
1137                 pic_stream_buffer_addr =
1138                         vb2_dma_contig_plane_dma_addr(dst_buf, 0) +
1139                         ctx->vpu_header_size[0] +
1140                         ctx->vpu_header_size[1] +
1141                         ctx->vpu_header_size[2];
1142                 pic_stream_buffer_size = q_data_dst->sizeimage -
1143                         ctx->vpu_header_size[0] -
1144                         ctx->vpu_header_size[1] -
1145                         ctx->vpu_header_size[2];
1146                 memcpy(vb2_plane_vaddr(dst_buf, 0),
1147                        &ctx->vpu_header[0][0], ctx->vpu_header_size[0]);
1148                 memcpy(vb2_plane_vaddr(dst_buf, 0) + ctx->vpu_header_size[0],
1149                        &ctx->vpu_header[1][0], ctx->vpu_header_size[1]);
1150                 memcpy(vb2_plane_vaddr(dst_buf, 0) + ctx->vpu_header_size[0] +
1151                         ctx->vpu_header_size[1], &ctx->vpu_header[2][0],
1152                         ctx->vpu_header_size[2]);
1153         } else {
1154                 pic_stream_buffer_addr =
1155                         vb2_dma_contig_plane_dma_addr(dst_buf, 0);
1156                 pic_stream_buffer_size = q_data_dst->sizeimage;
1157         }
1158
1159         if (src_buf->v4l2_buf.flags & V4L2_BUF_FLAG_KEYFRAME) {
1160                 force_ipicture = 1;
1161                 switch (dst_fourcc) {
1162                 case V4L2_PIX_FMT_H264:
1163                         quant_param = ctx->params.h264_intra_qp;
1164                         break;
1165                 case V4L2_PIX_FMT_MPEG4:
1166                         quant_param = ctx->params.mpeg4_intra_qp;
1167                         break;
1168                 case V4L2_PIX_FMT_JPEG:
1169                         quant_param = 30;
1170                         break;
1171                 default:
1172                         v4l2_warn(&ctx->dev->v4l2_dev,
1173                                 "cannot set intra qp, fmt not supported\n");
1174                         break;
1175                 }
1176         } else {
1177                 force_ipicture = 0;
1178                 switch (dst_fourcc) {
1179                 case V4L2_PIX_FMT_H264:
1180                         quant_param = ctx->params.h264_inter_qp;
1181                         break;
1182                 case V4L2_PIX_FMT_MPEG4:
1183                         quant_param = ctx->params.mpeg4_inter_qp;
1184                         break;
1185                 default:
1186                         v4l2_warn(&ctx->dev->v4l2_dev,
1187                                 "cannot set inter qp, fmt not supported\n");
1188                         break;
1189                 }
1190         }
1191
1192         /* submit */
1193         if (ctx->params.rot_mode)
1194                 rot_mode = CODA_ROT_MIR_ENABLE | ctx->params.rot_mode;
1195         coda_write(dev, rot_mode, CODA_CMD_ENC_PIC_ROT_MODE);
1196         coda_write(dev, quant_param, CODA_CMD_ENC_PIC_QS);
1197
1198         if (dev->devtype->product == CODA_960) {
1199                 coda_write(dev, 4/*FIXME: 0*/, CODA9_CMD_ENC_PIC_SRC_INDEX);
1200                 coda_write(dev, q_data_src->width, CODA9_CMD_ENC_PIC_SRC_STRIDE);
1201                 coda_write(dev, 0, CODA9_CMD_ENC_PIC_SUB_FRAME_SYNC);
1202
1203                 reg = CODA9_CMD_ENC_PIC_SRC_ADDR_Y;
1204         } else {
1205                 reg = CODA_CMD_ENC_PIC_SRC_ADDR_Y;
1206         }
1207         coda_write_base(ctx, q_data_src, src_buf, reg);
1208
1209         coda_write(dev, force_ipicture << 1 & 0x2,
1210                    CODA_CMD_ENC_PIC_OPTION);
1211
1212         coda_write(dev, pic_stream_buffer_addr, CODA_CMD_ENC_PIC_BB_START);
1213         coda_write(dev, pic_stream_buffer_size / 1024,
1214                    CODA_CMD_ENC_PIC_BB_SIZE);
1215
1216         if (!ctx->streamon_out) {
1217                 /* After streamoff on the output side, set stream end flag */
1218                 ctx->bit_stream_param |= CODA_BIT_STREAM_END_FLAG;
1219                 coda_write(dev, ctx->bit_stream_param,
1220                            CODA_REG_BIT_BIT_STREAM_PARAM);
1221         }
1222
1223         if (dev->devtype->product != CODA_DX6)
1224                 coda_write(dev, ctx->iram_info.axi_sram_use,
1225                                 CODA7_REG_BIT_AXI_SRAM_USE);
1226
1227         coda_command_async(ctx, CODA_COMMAND_PIC_RUN);
1228
1229         return 0;
1230 }
1231
1232 static void coda_finish_encode(struct coda_ctx *ctx)
1233 {
1234         struct vb2_buffer *src_buf, *dst_buf;
1235         struct coda_dev *dev = ctx->dev;
1236         u32 wr_ptr, start_ptr;
1237
1238         src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
1239         dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1240
1241         /* Get results from the coda */
1242         start_ptr = coda_read(dev, CODA_CMD_ENC_PIC_BB_START);
1243         wr_ptr = coda_read(dev, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
1244
1245         /* Calculate bytesused field */
1246         if (dst_buf->v4l2_buf.sequence == 0) {
1247                 vb2_set_plane_payload(dst_buf, 0, wr_ptr - start_ptr +
1248                                         ctx->vpu_header_size[0] +
1249                                         ctx->vpu_header_size[1] +
1250                                         ctx->vpu_header_size[2]);
1251         } else {
1252                 vb2_set_plane_payload(dst_buf, 0, wr_ptr - start_ptr);
1253         }
1254
1255         v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev, "frame size = %u\n",
1256                  wr_ptr - start_ptr);
1257
1258         coda_read(dev, CODA_RET_ENC_PIC_SLICE_NUM);
1259         coda_read(dev, CODA_RET_ENC_PIC_FLAG);
1260
1261         if (coda_read(dev, CODA_RET_ENC_PIC_TYPE) == 0) {
1262                 dst_buf->v4l2_buf.flags |= V4L2_BUF_FLAG_KEYFRAME;
1263                 dst_buf->v4l2_buf.flags &= ~V4L2_BUF_FLAG_PFRAME;
1264         } else {
1265                 dst_buf->v4l2_buf.flags |= V4L2_BUF_FLAG_PFRAME;
1266                 dst_buf->v4l2_buf.flags &= ~V4L2_BUF_FLAG_KEYFRAME;
1267         }
1268
1269         dst_buf->v4l2_buf.timestamp = src_buf->v4l2_buf.timestamp;
1270         dst_buf->v4l2_buf.flags &= ~V4L2_BUF_FLAG_TSTAMP_SRC_MASK;
1271         dst_buf->v4l2_buf.flags |=
1272                 src_buf->v4l2_buf.flags & V4L2_BUF_FLAG_TSTAMP_SRC_MASK;
1273         dst_buf->v4l2_buf.timecode = src_buf->v4l2_buf.timecode;
1274
1275         v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
1276
1277         dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
1278         v4l2_m2m_buf_done(dst_buf, VB2_BUF_STATE_DONE);
1279
1280         ctx->gopcounter--;
1281         if (ctx->gopcounter < 0)
1282                 ctx->gopcounter = ctx->params.gop_size - 1;
1283
1284         v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1285                 "job finished: encoding frame (%d) (%s)\n",
1286                 dst_buf->v4l2_buf.sequence,
1287                 (dst_buf->v4l2_buf.flags & V4L2_BUF_FLAG_KEYFRAME) ?
1288                 "KEYFRAME" : "PFRAME");
1289 }
1290
1291 static void coda_seq_end_work(struct work_struct *work)
1292 {
1293         struct coda_ctx *ctx = container_of(work, struct coda_ctx, seq_end_work);
1294         struct coda_dev *dev = ctx->dev;
1295
1296         mutex_lock(&ctx->buffer_mutex);
1297         mutex_lock(&dev->coda_mutex);
1298
1299         v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1300                  "%d: %s: sent command 'SEQ_END' to coda\n", ctx->idx,
1301                  __func__);
1302         if (coda_command_sync(ctx, CODA_COMMAND_SEQ_END)) {
1303                 v4l2_err(&dev->v4l2_dev,
1304                          "CODA_COMMAND_SEQ_END failed\n");
1305         }
1306
1307         kfifo_init(&ctx->bitstream_fifo,
1308                 ctx->bitstream.vaddr, ctx->bitstream.size);
1309
1310         coda_free_framebuffers(ctx);
1311         coda_free_context_buffers(ctx);
1312
1313         mutex_unlock(&dev->coda_mutex);
1314         mutex_unlock(&ctx->buffer_mutex);
1315 }
1316
1317 static void coda_bit_release(struct coda_ctx *ctx)
1318 {
1319         coda_free_framebuffers(ctx);
1320         coda_free_context_buffers(ctx);
1321 }
1322
1323 const struct coda_context_ops coda_bit_encode_ops = {
1324         .queue_init = coda_encoder_queue_init,
1325         .start_streaming = coda_start_encoding,
1326         .prepare_run = coda_prepare_encode,
1327         .finish_run = coda_finish_encode,
1328         .seq_end_work = coda_seq_end_work,
1329         .release = coda_bit_release,
1330 };
1331
1332 /*
1333  * Decoder context operations
1334  */
1335
1336 static int __coda_start_decoding(struct coda_ctx *ctx)
1337 {
1338         struct coda_q_data *q_data_src, *q_data_dst;
1339         u32 bitstream_buf, bitstream_size;
1340         struct coda_dev *dev = ctx->dev;
1341         int width, height;
1342         u32 src_fourcc, dst_fourcc;
1343         u32 val;
1344         int ret;
1345
1346         /* Start decoding */
1347         q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
1348         q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
1349         bitstream_buf = ctx->bitstream.paddr;
1350         bitstream_size = ctx->bitstream.size;
1351         src_fourcc = q_data_src->fourcc;
1352         dst_fourcc = q_data_dst->fourcc;
1353
1354         /* Allocate per-instance buffers */
1355         ret = coda_alloc_context_buffers(ctx, q_data_src);
1356         if (ret < 0)
1357                 return ret;
1358
1359         coda_write(dev, ctx->parabuf.paddr, CODA_REG_BIT_PARA_BUF_ADDR);
1360
1361         /* Update coda bitstream read and write pointers from kfifo */
1362         coda_kfifo_sync_to_device_full(ctx);
1363
1364         ctx->frame_mem_ctrl &= ~CODA_FRAME_CHROMA_INTERLEAVE;
1365         if (dst_fourcc == V4L2_PIX_FMT_NV12)
1366                 ctx->frame_mem_ctrl |= CODA_FRAME_CHROMA_INTERLEAVE;
1367         coda_write(dev, ctx->frame_mem_ctrl, CODA_REG_BIT_FRAME_MEM_CTRL);
1368
1369         ctx->display_idx = -1;
1370         ctx->frm_dis_flg = 0;
1371         coda_write(dev, 0, CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
1372
1373         coda_write(dev, CODA_BIT_DEC_SEQ_INIT_ESCAPE,
1374                         CODA_REG_BIT_BIT_STREAM_PARAM);
1375
1376         coda_write(dev, bitstream_buf, CODA_CMD_DEC_SEQ_BB_START);
1377         coda_write(dev, bitstream_size / 1024, CODA_CMD_DEC_SEQ_BB_SIZE);
1378         val = 0;
1379         if ((dev->devtype->product == CODA_7541) ||
1380             (dev->devtype->product == CODA_960))
1381                 val |= CODA_REORDER_ENABLE;
1382         if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG)
1383                 val |= CODA_NO_INT_ENABLE;
1384         coda_write(dev, val, CODA_CMD_DEC_SEQ_OPTION);
1385
1386         ctx->params.codec_mode = ctx->codec->mode;
1387         if (dev->devtype->product == CODA_960 &&
1388             src_fourcc == V4L2_PIX_FMT_MPEG4)
1389                 ctx->params.codec_mode_aux = CODA_MP4_AUX_MPEG4;
1390         else
1391                 ctx->params.codec_mode_aux = 0;
1392         if (src_fourcc == V4L2_PIX_FMT_H264) {
1393                 if (dev->devtype->product == CODA_7541) {
1394                         coda_write(dev, ctx->psbuf.paddr,
1395                                         CODA_CMD_DEC_SEQ_PS_BB_START);
1396                         coda_write(dev, (CODA7_PS_BUF_SIZE / 1024),
1397                                         CODA_CMD_DEC_SEQ_PS_BB_SIZE);
1398                 }
1399                 if (dev->devtype->product == CODA_960) {
1400                         coda_write(dev, 0, CODA_CMD_DEC_SEQ_X264_MV_EN);
1401                         coda_write(dev, 512, CODA_CMD_DEC_SEQ_SPP_CHUNK_SIZE);
1402                 }
1403         }
1404         if (dev->devtype->product != CODA_960)
1405                 coda_write(dev, 0, CODA_CMD_DEC_SEQ_SRC_SIZE);
1406
1407         if (coda_command_sync(ctx, CODA_COMMAND_SEQ_INIT)) {
1408                 v4l2_err(&dev->v4l2_dev, "CODA_COMMAND_SEQ_INIT timeout\n");
1409                 coda_write(dev, 0, CODA_REG_BIT_BIT_STREAM_PARAM);
1410                 return -ETIMEDOUT;
1411         }
1412
1413         /* Update kfifo out pointer from coda bitstream read pointer */
1414         coda_kfifo_sync_from_device(ctx);
1415
1416         coda_write(dev, 0, CODA_REG_BIT_BIT_STREAM_PARAM);
1417
1418         if (coda_read(dev, CODA_RET_DEC_SEQ_SUCCESS) == 0) {
1419                 v4l2_err(&dev->v4l2_dev,
1420                         "CODA_COMMAND_SEQ_INIT failed, error code = %d\n",
1421                         coda_read(dev, CODA_RET_DEC_SEQ_ERR_REASON));
1422                 return -EAGAIN;
1423         }
1424
1425         val = coda_read(dev, CODA_RET_DEC_SEQ_SRC_SIZE);
1426         if (dev->devtype->product == CODA_DX6) {
1427                 width = (val >> CODADX6_PICWIDTH_OFFSET) & CODADX6_PICWIDTH_MASK;
1428                 height = val & CODADX6_PICHEIGHT_MASK;
1429         } else {
1430                 width = (val >> CODA7_PICWIDTH_OFFSET) & CODA7_PICWIDTH_MASK;
1431                 height = val & CODA7_PICHEIGHT_MASK;
1432         }
1433
1434         if (width > q_data_dst->width || height > q_data_dst->height) {
1435                 v4l2_err(&dev->v4l2_dev, "stream is %dx%d, not %dx%d\n",
1436                          width, height, q_data_dst->width, q_data_dst->height);
1437                 return -EINVAL;
1438         }
1439
1440         width = round_up(width, 16);
1441         height = round_up(height, 16);
1442
1443         v4l2_dbg(1, coda_debug, &dev->v4l2_dev, "%s instance %d now: %dx%d\n",
1444                  __func__, ctx->idx, width, height);
1445
1446         ctx->num_internal_frames = coda_read(dev, CODA_RET_DEC_SEQ_FRAME_NEED);
1447         if (ctx->num_internal_frames > CODA_MAX_FRAMEBUFFERS) {
1448                 v4l2_err(&dev->v4l2_dev,
1449                          "not enough framebuffers to decode (%d < %d)\n",
1450                          CODA_MAX_FRAMEBUFFERS, ctx->num_internal_frames);
1451                 return -EINVAL;
1452         }
1453
1454         if (src_fourcc == V4L2_PIX_FMT_H264) {
1455                 u32 left_right;
1456                 u32 top_bottom;
1457
1458                 left_right = coda_read(dev, CODA_RET_DEC_SEQ_CROP_LEFT_RIGHT);
1459                 top_bottom = coda_read(dev, CODA_RET_DEC_SEQ_CROP_TOP_BOTTOM);
1460
1461                 q_data_dst->rect.left = (left_right >> 10) & 0x3ff;
1462                 q_data_dst->rect.top = (top_bottom >> 10) & 0x3ff;
1463                 q_data_dst->rect.width = width - q_data_dst->rect.left -
1464                                          (left_right & 0x3ff);
1465                 q_data_dst->rect.height = height - q_data_dst->rect.top -
1466                                           (top_bottom & 0x3ff);
1467         }
1468
1469         ret = coda_alloc_framebuffers(ctx, q_data_dst, src_fourcc);
1470         if (ret < 0) {
1471                 v4l2_err(&dev->v4l2_dev, "failed to allocate framebuffers\n");
1472                 return ret;
1473         }
1474
1475         /* Tell the decoder how many frame buffers we allocated. */
1476         coda_write(dev, ctx->num_internal_frames, CODA_CMD_SET_FRAME_BUF_NUM);
1477         coda_write(dev, width, CODA_CMD_SET_FRAME_BUF_STRIDE);
1478
1479         if (dev->devtype->product != CODA_DX6) {
1480                 /* Set secondary AXI IRAM */
1481                 coda_setup_iram(ctx);
1482
1483                 coda_write(dev, ctx->iram_info.buf_bit_use,
1484                                 CODA7_CMD_SET_FRAME_AXI_BIT_ADDR);
1485                 coda_write(dev, ctx->iram_info.buf_ip_ac_dc_use,
1486                                 CODA7_CMD_SET_FRAME_AXI_IPACDC_ADDR);
1487                 coda_write(dev, ctx->iram_info.buf_dbk_y_use,
1488                                 CODA7_CMD_SET_FRAME_AXI_DBKY_ADDR);
1489                 coda_write(dev, ctx->iram_info.buf_dbk_c_use,
1490                                 CODA7_CMD_SET_FRAME_AXI_DBKC_ADDR);
1491                 coda_write(dev, ctx->iram_info.buf_ovl_use,
1492                                 CODA7_CMD_SET_FRAME_AXI_OVL_ADDR);
1493                 if (dev->devtype->product == CODA_960)
1494                         coda_write(dev, ctx->iram_info.buf_btp_use,
1495                                         CODA9_CMD_SET_FRAME_AXI_BTP_ADDR);
1496         }
1497
1498         if (dev->devtype->product == CODA_960) {
1499                 int cbb_size, crb_size;
1500
1501                 coda_write(dev, -1, CODA9_CMD_SET_FRAME_DELAY);
1502                 /* Luma 2x0 page, 2x6 cache, chroma 2x0 page, 2x4 cache size */
1503                 coda_write(dev, 0x20262024, CODA9_CMD_SET_FRAME_CACHE_SIZE);
1504
1505                 if (dst_fourcc == V4L2_PIX_FMT_NV12) {
1506                         cbb_size = 0;
1507                         crb_size = 16;
1508                 } else {
1509                         cbb_size = 8;
1510                         crb_size = 8;
1511                 }
1512                 coda_write(dev, 2 << CODA9_CACHE_PAGEMERGE_OFFSET |
1513                                 32 << CODA9_CACHE_LUMA_BUFFER_SIZE_OFFSET |
1514                                 cbb_size << CODA9_CACHE_CB_BUFFER_SIZE_OFFSET |
1515                                 crb_size << CODA9_CACHE_CR_BUFFER_SIZE_OFFSET,
1516                                 CODA9_CMD_SET_FRAME_CACHE_CONFIG);
1517         }
1518
1519         if (src_fourcc == V4L2_PIX_FMT_H264) {
1520                 coda_write(dev, ctx->slicebuf.paddr,
1521                                 CODA_CMD_SET_FRAME_SLICE_BB_START);
1522                 coda_write(dev, ctx->slicebuf.size / 1024,
1523                                 CODA_CMD_SET_FRAME_SLICE_BB_SIZE);
1524         }
1525
1526         if (dev->devtype->product == CODA_7541) {
1527                 int max_mb_x = 1920 / 16;
1528                 int max_mb_y = 1088 / 16;
1529                 int max_mb_num = max_mb_x * max_mb_y;
1530
1531                 coda_write(dev, max_mb_num << 16 | max_mb_x << 8 | max_mb_y,
1532                                 CODA7_CMD_SET_FRAME_MAX_DEC_SIZE);
1533         } else if (dev->devtype->product == CODA_960) {
1534                 int max_mb_x = 1920 / 16;
1535                 int max_mb_y = 1088 / 16;
1536                 int max_mb_num = max_mb_x * max_mb_y;
1537
1538                 coda_write(dev, max_mb_num << 16 | max_mb_x << 8 | max_mb_y,
1539                                 CODA9_CMD_SET_FRAME_MAX_DEC_SIZE);
1540         }
1541
1542         if (coda_command_sync(ctx, CODA_COMMAND_SET_FRAME_BUF)) {
1543                 v4l2_err(&ctx->dev->v4l2_dev,
1544                          "CODA_COMMAND_SET_FRAME_BUF timeout\n");
1545                 return -ETIMEDOUT;
1546         }
1547
1548         return 0;
1549 }
1550
1551 static int coda_start_decoding(struct coda_ctx *ctx)
1552 {
1553         struct coda_dev *dev = ctx->dev;
1554         int ret;
1555
1556         mutex_lock(&dev->coda_mutex);
1557         ret = __coda_start_decoding(ctx);
1558         mutex_unlock(&dev->coda_mutex);
1559
1560         return ret;
1561 }
1562
1563 static int coda_prepare_decode(struct coda_ctx *ctx)
1564 {
1565         struct vb2_buffer *dst_buf;
1566         struct coda_dev *dev = ctx->dev;
1567         struct coda_q_data *q_data_dst;
1568         struct coda_buffer_meta *meta;
1569         u32 reg_addr, reg_stride;
1570
1571         dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1572         q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
1573
1574         /* Try to copy source buffer contents into the bitstream ringbuffer */
1575         mutex_lock(&ctx->bitstream_mutex);
1576         coda_fill_bitstream(ctx);
1577         mutex_unlock(&ctx->bitstream_mutex);
1578
1579         if (coda_get_bitstream_payload(ctx) < 512 &&
1580             (!(ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG))) {
1581                 v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1582                          "bitstream payload: %d, skipping\n",
1583                          coda_get_bitstream_payload(ctx));
1584                 v4l2_m2m_job_finish(ctx->dev->m2m_dev, ctx->fh.m2m_ctx);
1585                 return -EAGAIN;
1586         }
1587
1588         /* Run coda_start_decoding (again) if not yet initialized */
1589         if (!ctx->initialized) {
1590                 int ret = __coda_start_decoding(ctx);
1591
1592                 if (ret < 0) {
1593                         v4l2_err(&dev->v4l2_dev, "failed to start decoding\n");
1594                         v4l2_m2m_job_finish(ctx->dev->m2m_dev, ctx->fh.m2m_ctx);
1595                         return -EAGAIN;
1596                 } else {
1597                         ctx->initialized = 1;
1598                 }
1599         }
1600
1601         if (dev->devtype->product == CODA_960)
1602                 coda_set_gdi_regs(ctx);
1603
1604         if (dev->devtype->product == CODA_960) {
1605                 /*
1606                  * The CODA960 seems to have an internal list of buffers with
1607                  * 64 entries that includes the registered frame buffers as
1608                  * well as the rotator buffer output.
1609                  * ROT_INDEX needs to be < 0x40, but > ctx->num_internal_frames.
1610                  */
1611                 coda_write(dev, CODA_MAX_FRAMEBUFFERS + dst_buf->v4l2_buf.index,
1612                                 CODA9_CMD_DEC_PIC_ROT_INDEX);
1613
1614                 reg_addr = CODA9_CMD_DEC_PIC_ROT_ADDR_Y;
1615                 reg_stride = CODA9_CMD_DEC_PIC_ROT_STRIDE;
1616         } else {
1617                 reg_addr = CODA_CMD_DEC_PIC_ROT_ADDR_Y;
1618                 reg_stride = CODA_CMD_DEC_PIC_ROT_STRIDE;
1619         }
1620         coda_write_base(ctx, q_data_dst, dst_buf, reg_addr);
1621         coda_write(dev, q_data_dst->bytesperline, reg_stride);
1622
1623         coda_write(dev, CODA_ROT_MIR_ENABLE | ctx->params.rot_mode,
1624                         CODA_CMD_DEC_PIC_ROT_MODE);
1625
1626         switch (dev->devtype->product) {
1627         case CODA_DX6:
1628                 /* TBD */
1629         case CODA_7541:
1630                 coda_write(dev, CODA_PRE_SCAN_EN, CODA_CMD_DEC_PIC_OPTION);
1631                 break;
1632         case CODA_960:
1633                 /* 'hardcode to use interrupt disable mode'? */
1634                 coda_write(dev, (1 << 10), CODA_CMD_DEC_PIC_OPTION);
1635                 break;
1636         }
1637
1638         coda_write(dev, 0, CODA_CMD_DEC_PIC_SKIP_NUM);
1639
1640         coda_write(dev, 0, CODA_CMD_DEC_PIC_BB_START);
1641         coda_write(dev, 0, CODA_CMD_DEC_PIC_START_BYTE);
1642
1643         if (dev->devtype->product != CODA_DX6)
1644                 coda_write(dev, ctx->iram_info.axi_sram_use,
1645                                 CODA7_REG_BIT_AXI_SRAM_USE);
1646
1647         meta = list_first_entry_or_null(&ctx->buffer_meta_list,
1648                                         struct coda_buffer_meta, list);
1649
1650         if (meta && ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG) {
1651
1652                 /* If this is the last buffer in the bitstream, add padding */
1653                 if (meta->end == (ctx->bitstream_fifo.kfifo.in &
1654                                   ctx->bitstream_fifo.kfifo.mask)) {
1655                         static unsigned char buf[512];
1656                         unsigned int pad;
1657
1658                         /* Pad to multiple of 256 and then add 256 more */
1659                         pad = ((0 - meta->end) & 0xff) + 256;
1660
1661                         memset(buf, 0xff, sizeof(buf));
1662
1663                         kfifo_in(&ctx->bitstream_fifo, buf, pad);
1664                 }
1665         }
1666
1667         coda_kfifo_sync_to_device_full(ctx);
1668
1669         coda_command_async(ctx, CODA_COMMAND_PIC_RUN);
1670
1671         return 0;
1672 }
1673
1674 static void coda_finish_decode(struct coda_ctx *ctx)
1675 {
1676         struct coda_dev *dev = ctx->dev;
1677         struct coda_q_data *q_data_src;
1678         struct coda_q_data *q_data_dst;
1679         struct vb2_buffer *dst_buf;
1680         struct coda_buffer_meta *meta;
1681         unsigned long payload;
1682         int width, height;
1683         int decoded_idx;
1684         int display_idx;
1685         u32 src_fourcc;
1686         int success;
1687         u32 err_mb;
1688         u32 val;
1689
1690         /* Update kfifo out pointer from coda bitstream read pointer */
1691         coda_kfifo_sync_from_device(ctx);
1692
1693         /*
1694          * in stream-end mode, the read pointer can overshoot the write pointer
1695          * by up to 512 bytes
1696          */
1697         if (ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG) {
1698                 if (coda_get_bitstream_payload(ctx) >= CODA_MAX_FRAME_SIZE - 512)
1699                         kfifo_init(&ctx->bitstream_fifo,
1700                                 ctx->bitstream.vaddr, ctx->bitstream.size);
1701         }
1702
1703         q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
1704         src_fourcc = q_data_src->fourcc;
1705
1706         val = coda_read(dev, CODA_RET_DEC_PIC_SUCCESS);
1707         if (val != 1)
1708                 pr_err("DEC_PIC_SUCCESS = %d\n", val);
1709
1710         success = val & 0x1;
1711         if (!success)
1712                 v4l2_err(&dev->v4l2_dev, "decode failed\n");
1713
1714         if (src_fourcc == V4L2_PIX_FMT_H264) {
1715                 if (val & (1 << 3))
1716                         v4l2_err(&dev->v4l2_dev,
1717                                  "insufficient PS buffer space (%d bytes)\n",
1718                                  ctx->psbuf.size);
1719                 if (val & (1 << 2))
1720                         v4l2_err(&dev->v4l2_dev,
1721                                  "insufficient slice buffer space (%d bytes)\n",
1722                                  ctx->slicebuf.size);
1723         }
1724
1725         val = coda_read(dev, CODA_RET_DEC_PIC_SIZE);
1726         width = (val >> 16) & 0xffff;
1727         height = val & 0xffff;
1728
1729         q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
1730
1731         /* frame crop information */
1732         if (src_fourcc == V4L2_PIX_FMT_H264) {
1733                 u32 left_right;
1734                 u32 top_bottom;
1735
1736                 left_right = coda_read(dev, CODA_RET_DEC_PIC_CROP_LEFT_RIGHT);
1737                 top_bottom = coda_read(dev, CODA_RET_DEC_PIC_CROP_TOP_BOTTOM);
1738
1739                 if (left_right == 0xffffffff && top_bottom == 0xffffffff) {
1740                         /* Keep current crop information */
1741                 } else {
1742                         struct v4l2_rect *rect = &q_data_dst->rect;
1743
1744                         rect->left = left_right >> 16 & 0xffff;
1745                         rect->top = top_bottom >> 16 & 0xffff;
1746                         rect->width = width - rect->left -
1747                                       (left_right & 0xffff);
1748                         rect->height = height - rect->top -
1749                                        (top_bottom & 0xffff);
1750                 }
1751         } else {
1752                 /* no cropping */
1753         }
1754
1755         err_mb = coda_read(dev, CODA_RET_DEC_PIC_ERR_MB);
1756         if (err_mb > 0)
1757                 v4l2_err(&dev->v4l2_dev,
1758                          "errors in %d macroblocks\n", err_mb);
1759
1760         if (dev->devtype->product == CODA_7541) {
1761                 val = coda_read(dev, CODA_RET_DEC_PIC_OPTION);
1762                 if (val == 0) {
1763                         /* not enough bitstream data */
1764                         v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1765                                  "prescan failed: %d\n", val);
1766                         ctx->hold = true;
1767                         return;
1768                 }
1769         }
1770
1771         ctx->frm_dis_flg = coda_read(dev,
1772                                      CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
1773
1774         /*
1775          * The previous display frame was copied out by the rotator,
1776          * now it can be overwritten again
1777          */
1778         if (ctx->display_idx >= 0 &&
1779             ctx->display_idx < ctx->num_internal_frames) {
1780                 ctx->frm_dis_flg &= ~(1 << ctx->display_idx);
1781                 coda_write(dev, ctx->frm_dis_flg,
1782                                 CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
1783         }
1784
1785         /*
1786          * The index of the last decoded frame, not necessarily in
1787          * display order, and the index of the next display frame.
1788          * The latter could have been decoded in a previous run.
1789          */
1790         decoded_idx = coda_read(dev, CODA_RET_DEC_PIC_CUR_IDX);
1791         display_idx = coda_read(dev, CODA_RET_DEC_PIC_FRAME_IDX);
1792
1793         if (decoded_idx == -1) {
1794                 /* no frame was decoded, but we might have a display frame */
1795                 if (display_idx >= 0 && display_idx < ctx->num_internal_frames)
1796                         ctx->sequence_offset++;
1797                 else if (ctx->display_idx < 0)
1798                         ctx->hold = true;
1799         } else if (decoded_idx == -2) {
1800                 /* no frame was decoded, we still return remaining buffers */
1801         } else if (decoded_idx < 0 || decoded_idx >= ctx->num_internal_frames) {
1802                 v4l2_err(&dev->v4l2_dev,
1803                          "decoded frame index out of range: %d\n", decoded_idx);
1804         } else {
1805                 val = coda_read(dev, CODA_RET_DEC_PIC_FRAME_NUM) - 1;
1806                 val -= ctx->sequence_offset;
1807                 mutex_lock(&ctx->bitstream_mutex);
1808                 if (!list_empty(&ctx->buffer_meta_list)) {
1809                         meta = list_first_entry(&ctx->buffer_meta_list,
1810                                               struct coda_buffer_meta, list);
1811                         list_del(&meta->list);
1812                         if (val != (meta->sequence & 0xffff)) {
1813                                 v4l2_err(&dev->v4l2_dev,
1814                                          "sequence number mismatch (%d(%d) != %d)\n",
1815                                          val, ctx->sequence_offset,
1816                                          meta->sequence);
1817                         }
1818                         ctx->frame_metas[decoded_idx] = *meta;
1819                         kfree(meta);
1820                 } else {
1821                         v4l2_err(&dev->v4l2_dev, "empty timestamp list!\n");
1822                         memset(&ctx->frame_metas[decoded_idx], 0,
1823                                sizeof(struct coda_buffer_meta));
1824                         ctx->frame_metas[decoded_idx].sequence = val;
1825                         ctx->sequence_offset++;
1826                 }
1827                 mutex_unlock(&ctx->bitstream_mutex);
1828
1829                 val = coda_read(dev, CODA_RET_DEC_PIC_TYPE) & 0x7;
1830                 if (val == 0)
1831                         ctx->frame_types[decoded_idx] = V4L2_BUF_FLAG_KEYFRAME;
1832                 else if (val == 1)
1833                         ctx->frame_types[decoded_idx] = V4L2_BUF_FLAG_PFRAME;
1834                 else
1835                         ctx->frame_types[decoded_idx] = V4L2_BUF_FLAG_BFRAME;
1836
1837                 ctx->frame_errors[decoded_idx] = err_mb;
1838         }
1839
1840         if (display_idx == -1) {
1841                 /*
1842                  * no more frames to be decoded, but there could still
1843                  * be rotator output to dequeue
1844                  */
1845                 ctx->hold = true;
1846         } else if (display_idx == -3) {
1847                 /* possibly prescan failure */
1848         } else if (display_idx < 0 || display_idx >= ctx->num_internal_frames) {
1849                 v4l2_err(&dev->v4l2_dev,
1850                          "presentation frame index out of range: %d\n",
1851                          display_idx);
1852         }
1853
1854         /* If a frame was copied out, return it */
1855         if (ctx->display_idx >= 0 &&
1856             ctx->display_idx < ctx->num_internal_frames) {
1857                 dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
1858                 dst_buf->v4l2_buf.sequence = ctx->osequence++;
1859
1860                 dst_buf->v4l2_buf.flags &= ~(V4L2_BUF_FLAG_KEYFRAME |
1861                                              V4L2_BUF_FLAG_PFRAME |
1862                                              V4L2_BUF_FLAG_BFRAME);
1863                 dst_buf->v4l2_buf.flags |= ctx->frame_types[ctx->display_idx];
1864                 meta = &ctx->frame_metas[ctx->display_idx];
1865                 dst_buf->v4l2_buf.timecode = meta->timecode;
1866                 dst_buf->v4l2_buf.timestamp = meta->timestamp;
1867
1868                 switch (q_data_dst->fourcc) {
1869                 case V4L2_PIX_FMT_YUV420:
1870                 case V4L2_PIX_FMT_YVU420:
1871                 case V4L2_PIX_FMT_NV12:
1872                 default:
1873                         payload = width * height * 3 / 2;
1874                         break;
1875                 case V4L2_PIX_FMT_YUV422P:
1876                         payload = width * height * 2;
1877                         break;
1878                 }
1879                 vb2_set_plane_payload(dst_buf, 0, payload);
1880
1881                 v4l2_m2m_buf_done(dst_buf, ctx->frame_errors[display_idx] ?
1882                                   VB2_BUF_STATE_ERROR : VB2_BUF_STATE_DONE);
1883
1884                 v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1885                         "job finished: decoding frame (%d) (%s)\n",
1886                         dst_buf->v4l2_buf.sequence,
1887                         (dst_buf->v4l2_buf.flags & V4L2_BUF_FLAG_KEYFRAME) ?
1888                         "KEYFRAME" : "PFRAME");
1889         } else {
1890                 v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1891                         "job finished: no frame decoded\n");
1892         }
1893
1894         /* The rotator will copy the current display frame next time */
1895         ctx->display_idx = display_idx;
1896 }
1897
1898 const struct coda_context_ops coda_bit_decode_ops = {
1899         .queue_init = coda_decoder_queue_init,
1900         .start_streaming = coda_start_decoding,
1901         .prepare_run = coda_prepare_decode,
1902         .finish_run = coda_finish_decode,
1903         .seq_end_work = coda_seq_end_work,
1904         .release = coda_bit_release,
1905 };
1906
1907 irqreturn_t coda_irq_handler(int irq, void *data)
1908 {
1909         struct coda_dev *dev = data;
1910         struct coda_ctx *ctx;
1911
1912         /* read status register to attend the IRQ */
1913         coda_read(dev, CODA_REG_BIT_INT_STATUS);
1914         coda_write(dev, CODA_REG_BIT_INT_CLEAR_SET,
1915                       CODA_REG_BIT_INT_CLEAR);
1916
1917         ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev);
1918         if (ctx == NULL) {
1919                 v4l2_err(&dev->v4l2_dev,
1920                          "Instance released before the end of transaction\n");
1921                 mutex_unlock(&dev->coda_mutex);
1922                 return IRQ_HANDLED;
1923         }
1924
1925         if (ctx->aborting) {
1926                 v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
1927                          "task has been aborted\n");
1928         }
1929
1930         if (coda_isbusy(ctx->dev)) {
1931                 v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
1932                          "coda is still busy!!!!\n");
1933                 return IRQ_NONE;
1934         }
1935
1936         complete(&ctx->completion);
1937
1938         return IRQ_HANDLED;
1939 }