]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - arch/sparc/kernel/ldc.c
sparc: Make LDC use common iommu poll management functions
[karo-tx-linux.git] / arch / sparc / kernel / ldc.c
1 /* ldc.c: Logical Domain Channel link-layer protocol driver.
2  *
3  * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
4  */
5
6 #include <linux/kernel.h>
7 #include <linux/export.h>
8 #include <linux/slab.h>
9 #include <linux/spinlock.h>
10 #include <linux/delay.h>
11 #include <linux/errno.h>
12 #include <linux/string.h>
13 #include <linux/scatterlist.h>
14 #include <linux/interrupt.h>
15 #include <linux/list.h>
16 #include <linux/init.h>
17 #include <linux/bitmap.h>
18 #include <linux/hash.h>
19 #include <linux/iommu-common.h>
20
21 #include <asm/hypervisor.h>
22 #include <asm/iommu.h>
23 #include <asm/page.h>
24 #include <asm/ldc.h>
25 #include <asm/mdesc.h>
26
27 #define DRV_MODULE_NAME         "ldc"
28 #define PFX DRV_MODULE_NAME     ": "
29 #define DRV_MODULE_VERSION      "1.1"
30 #define DRV_MODULE_RELDATE      "July 22, 2008"
31
32 #define COOKIE_PGSZ_CODE        0xf000000000000000ULL
33 #define COOKIE_PGSZ_CODE_SHIFT  60ULL
34
35 static DEFINE_PER_CPU(unsigned int, ldc_pool_hash);
36
37 static char version[] =
38         DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
39 #define LDC_PACKET_SIZE         64
40
41 /* Packet header layout for unreliable and reliable mode frames.
42  * When in RAW mode, packets are simply straight 64-byte payloads
43  * with no headers.
44  */
45 struct ldc_packet {
46         u8                      type;
47 #define LDC_CTRL                0x01
48 #define LDC_DATA                0x02
49 #define LDC_ERR                 0x10
50
51         u8                      stype;
52 #define LDC_INFO                0x01
53 #define LDC_ACK                 0x02
54 #define LDC_NACK                0x04
55
56         u8                      ctrl;
57 #define LDC_VERS                0x01 /* Link Version            */
58 #define LDC_RTS                 0x02 /* Request To Send         */
59 #define LDC_RTR                 0x03 /* Ready To Receive        */
60 #define LDC_RDX                 0x04 /* Ready for Data eXchange */
61 #define LDC_CTRL_MSK            0x0f
62
63         u8                      env;
64 #define LDC_LEN                 0x3f
65 #define LDC_FRAG_MASK           0xc0
66 #define LDC_START               0x40
67 #define LDC_STOP                0x80
68
69         u32                     seqid;
70
71         union {
72                 u8              u_data[LDC_PACKET_SIZE - 8];
73                 struct {
74                         u32     pad;
75                         u32     ackid;
76                         u8      r_data[LDC_PACKET_SIZE - 8 - 8];
77                 } r;
78         } u;
79 };
80
81 struct ldc_version {
82         u16 major;
83         u16 minor;
84 };
85
86 /* Ordered from largest major to lowest.  */
87 static struct ldc_version ver_arr[] = {
88         { .major = 1, .minor = 0 },
89 };
90
91 #define LDC_DEFAULT_MTU                 (4 * LDC_PACKET_SIZE)
92 #define LDC_DEFAULT_NUM_ENTRIES         (PAGE_SIZE / LDC_PACKET_SIZE)
93
94 struct ldc_channel;
95
96 struct ldc_mode_ops {
97         int (*write)(struct ldc_channel *, const void *, unsigned int);
98         int (*read)(struct ldc_channel *, void *, unsigned int);
99 };
100
101 static const struct ldc_mode_ops raw_ops;
102 static const struct ldc_mode_ops nonraw_ops;
103 static const struct ldc_mode_ops stream_ops;
104
105 int ldom_domaining_enabled;
106
107 struct ldc_iommu {
108         /* Protects ldc_unmap.  */
109         spinlock_t                      lock;
110         struct ldc_mtable_entry         *page_table;
111         struct iommu_table              iommu_table;
112 };
113
114 struct ldc_channel {
115         /* Protects all operations that depend upon channel state.  */
116         spinlock_t                      lock;
117
118         unsigned long                   id;
119
120         u8                              *mssbuf;
121         u32                             mssbuf_len;
122         u32                             mssbuf_off;
123
124         struct ldc_packet               *tx_base;
125         unsigned long                   tx_head;
126         unsigned long                   tx_tail;
127         unsigned long                   tx_num_entries;
128         unsigned long                   tx_ra;
129
130         unsigned long                   tx_acked;
131
132         struct ldc_packet               *rx_base;
133         unsigned long                   rx_head;
134         unsigned long                   rx_tail;
135         unsigned long                   rx_num_entries;
136         unsigned long                   rx_ra;
137
138         u32                             rcv_nxt;
139         u32                             snd_nxt;
140
141         unsigned long                   chan_state;
142
143         struct ldc_channel_config       cfg;
144         void                            *event_arg;
145
146         const struct ldc_mode_ops       *mops;
147
148         struct ldc_iommu                iommu;
149
150         struct ldc_version              ver;
151
152         u8                              hs_state;
153 #define LDC_HS_CLOSED                   0x00
154 #define LDC_HS_OPEN                     0x01
155 #define LDC_HS_GOTVERS                  0x02
156 #define LDC_HS_SENTRTR                  0x03
157 #define LDC_HS_GOTRTR                   0x04
158 #define LDC_HS_COMPLETE                 0x10
159
160         u8                              flags;
161 #define LDC_FLAG_ALLOCED_QUEUES         0x01
162 #define LDC_FLAG_REGISTERED_QUEUES      0x02
163 #define LDC_FLAG_REGISTERED_IRQS        0x04
164 #define LDC_FLAG_RESET                  0x10
165
166         u8                              mss;
167         u8                              state;
168
169 #define LDC_IRQ_NAME_MAX                32
170         char                            rx_irq_name[LDC_IRQ_NAME_MAX];
171         char                            tx_irq_name[LDC_IRQ_NAME_MAX];
172
173         struct hlist_head               mh_list;
174
175         struct hlist_node               list;
176 };
177
178 #define ldcdbg(TYPE, f, a...) \
179 do {    if (lp->cfg.debug & LDC_DEBUG_##TYPE) \
180                 printk(KERN_INFO PFX "ID[%lu] " f, lp->id, ## a); \
181 } while (0)
182
183 static const char *state_to_str(u8 state)
184 {
185         switch (state) {
186         case LDC_STATE_INVALID:
187                 return "INVALID";
188         case LDC_STATE_INIT:
189                 return "INIT";
190         case LDC_STATE_BOUND:
191                 return "BOUND";
192         case LDC_STATE_READY:
193                 return "READY";
194         case LDC_STATE_CONNECTED:
195                 return "CONNECTED";
196         default:
197                 return "<UNKNOWN>";
198         }
199 }
200
201 static void ldc_set_state(struct ldc_channel *lp, u8 state)
202 {
203         ldcdbg(STATE, "STATE (%s) --> (%s)\n",
204                state_to_str(lp->state),
205                state_to_str(state));
206
207         lp->state = state;
208 }
209
210 static unsigned long __advance(unsigned long off, unsigned long num_entries)
211 {
212         off += LDC_PACKET_SIZE;
213         if (off == (num_entries * LDC_PACKET_SIZE))
214                 off = 0;
215
216         return off;
217 }
218
219 static unsigned long rx_advance(struct ldc_channel *lp, unsigned long off)
220 {
221         return __advance(off, lp->rx_num_entries);
222 }
223
224 static unsigned long tx_advance(struct ldc_channel *lp, unsigned long off)
225 {
226         return __advance(off, lp->tx_num_entries);
227 }
228
229 static struct ldc_packet *handshake_get_tx_packet(struct ldc_channel *lp,
230                                                   unsigned long *new_tail)
231 {
232         struct ldc_packet *p;
233         unsigned long t;
234
235         t = tx_advance(lp, lp->tx_tail);
236         if (t == lp->tx_head)
237                 return NULL;
238
239         *new_tail = t;
240
241         p = lp->tx_base;
242         return p + (lp->tx_tail / LDC_PACKET_SIZE);
243 }
244
245 /* When we are in reliable or stream mode, have to track the next packet
246  * we haven't gotten an ACK for in the TX queue using tx_acked.  We have
247  * to be careful not to stomp over the queue past that point.  During
248  * the handshake, we don't have TX data packets pending in the queue
249  * and that's why handshake_get_tx_packet() need not be mindful of
250  * lp->tx_acked.
251  */
252 static unsigned long head_for_data(struct ldc_channel *lp)
253 {
254         if (lp->cfg.mode == LDC_MODE_STREAM)
255                 return lp->tx_acked;
256         return lp->tx_head;
257 }
258
259 static int tx_has_space_for(struct ldc_channel *lp, unsigned int size)
260 {
261         unsigned long limit, tail, new_tail, diff;
262         unsigned int mss;
263
264         limit = head_for_data(lp);
265         tail = lp->tx_tail;
266         new_tail = tx_advance(lp, tail);
267         if (new_tail == limit)
268                 return 0;
269
270         if (limit > new_tail)
271                 diff = limit - new_tail;
272         else
273                 diff = (limit +
274                         ((lp->tx_num_entries * LDC_PACKET_SIZE) - new_tail));
275         diff /= LDC_PACKET_SIZE;
276         mss = lp->mss;
277
278         if (diff * mss < size)
279                 return 0;
280
281         return 1;
282 }
283
284 static struct ldc_packet *data_get_tx_packet(struct ldc_channel *lp,
285                                              unsigned long *new_tail)
286 {
287         struct ldc_packet *p;
288         unsigned long h, t;
289
290         h = head_for_data(lp);
291         t = tx_advance(lp, lp->tx_tail);
292         if (t == h)
293                 return NULL;
294
295         *new_tail = t;
296
297         p = lp->tx_base;
298         return p + (lp->tx_tail / LDC_PACKET_SIZE);
299 }
300
301 static int set_tx_tail(struct ldc_channel *lp, unsigned long tail)
302 {
303         unsigned long orig_tail = lp->tx_tail;
304         int limit = 1000;
305
306         lp->tx_tail = tail;
307         while (limit-- > 0) {
308                 unsigned long err;
309
310                 err = sun4v_ldc_tx_set_qtail(lp->id, tail);
311                 if (!err)
312                         return 0;
313
314                 if (err != HV_EWOULDBLOCK) {
315                         lp->tx_tail = orig_tail;
316                         return -EINVAL;
317                 }
318                 udelay(1);
319         }
320
321         lp->tx_tail = orig_tail;
322         return -EBUSY;
323 }
324
325 /* This just updates the head value in the hypervisor using
326  * a polling loop with a timeout.  The caller takes care of
327  * upating software state representing the head change, if any.
328  */
329 static int __set_rx_head(struct ldc_channel *lp, unsigned long head)
330 {
331         int limit = 1000;
332
333         while (limit-- > 0) {
334                 unsigned long err;
335
336                 err = sun4v_ldc_rx_set_qhead(lp->id, head);
337                 if (!err)
338                         return 0;
339
340                 if (err != HV_EWOULDBLOCK)
341                         return -EINVAL;
342
343                 udelay(1);
344         }
345
346         return -EBUSY;
347 }
348
349 static int send_tx_packet(struct ldc_channel *lp,
350                           struct ldc_packet *p,
351                           unsigned long new_tail)
352 {
353         BUG_ON(p != (lp->tx_base + (lp->tx_tail / LDC_PACKET_SIZE)));
354
355         return set_tx_tail(lp, new_tail);
356 }
357
358 static struct ldc_packet *handshake_compose_ctrl(struct ldc_channel *lp,
359                                                  u8 stype, u8 ctrl,
360                                                  void *data, int dlen,
361                                                  unsigned long *new_tail)
362 {
363         struct ldc_packet *p = handshake_get_tx_packet(lp, new_tail);
364
365         if (p) {
366                 memset(p, 0, sizeof(*p));
367                 p->type = LDC_CTRL;
368                 p->stype = stype;
369                 p->ctrl = ctrl;
370                 if (data)
371                         memcpy(p->u.u_data, data, dlen);
372         }
373         return p;
374 }
375
376 static int start_handshake(struct ldc_channel *lp)
377 {
378         struct ldc_packet *p;
379         struct ldc_version *ver;
380         unsigned long new_tail;
381
382         ver = &ver_arr[0];
383
384         ldcdbg(HS, "SEND VER INFO maj[%u] min[%u]\n",
385                ver->major, ver->minor);
386
387         p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
388                                    ver, sizeof(*ver), &new_tail);
389         if (p) {
390                 int err = send_tx_packet(lp, p, new_tail);
391                 if (!err)
392                         lp->flags &= ~LDC_FLAG_RESET;
393                 return err;
394         }
395         return -EBUSY;
396 }
397
398 static int send_version_nack(struct ldc_channel *lp,
399                              u16 major, u16 minor)
400 {
401         struct ldc_packet *p;
402         struct ldc_version ver;
403         unsigned long new_tail;
404
405         ver.major = major;
406         ver.minor = minor;
407
408         p = handshake_compose_ctrl(lp, LDC_NACK, LDC_VERS,
409                                    &ver, sizeof(ver), &new_tail);
410         if (p) {
411                 ldcdbg(HS, "SEND VER NACK maj[%u] min[%u]\n",
412                        ver.major, ver.minor);
413
414                 return send_tx_packet(lp, p, new_tail);
415         }
416         return -EBUSY;
417 }
418
419 static int send_version_ack(struct ldc_channel *lp,
420                             struct ldc_version *vp)
421 {
422         struct ldc_packet *p;
423         unsigned long new_tail;
424
425         p = handshake_compose_ctrl(lp, LDC_ACK, LDC_VERS,
426                                    vp, sizeof(*vp), &new_tail);
427         if (p) {
428                 ldcdbg(HS, "SEND VER ACK maj[%u] min[%u]\n",
429                        vp->major, vp->minor);
430
431                 return send_tx_packet(lp, p, new_tail);
432         }
433         return -EBUSY;
434 }
435
436 static int send_rts(struct ldc_channel *lp)
437 {
438         struct ldc_packet *p;
439         unsigned long new_tail;
440
441         p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTS, NULL, 0,
442                                    &new_tail);
443         if (p) {
444                 p->env = lp->cfg.mode;
445                 p->seqid = 0;
446                 lp->rcv_nxt = 0;
447
448                 ldcdbg(HS, "SEND RTS env[0x%x] seqid[0x%x]\n",
449                        p->env, p->seqid);
450
451                 return send_tx_packet(lp, p, new_tail);
452         }
453         return -EBUSY;
454 }
455
456 static int send_rtr(struct ldc_channel *lp)
457 {
458         struct ldc_packet *p;
459         unsigned long new_tail;
460
461         p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTR, NULL, 0,
462                                    &new_tail);
463         if (p) {
464                 p->env = lp->cfg.mode;
465                 p->seqid = 0;
466
467                 ldcdbg(HS, "SEND RTR env[0x%x] seqid[0x%x]\n",
468                        p->env, p->seqid);
469
470                 return send_tx_packet(lp, p, new_tail);
471         }
472         return -EBUSY;
473 }
474
475 static int send_rdx(struct ldc_channel *lp)
476 {
477         struct ldc_packet *p;
478         unsigned long new_tail;
479
480         p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RDX, NULL, 0,
481                                    &new_tail);
482         if (p) {
483                 p->env = 0;
484                 p->seqid = ++lp->snd_nxt;
485                 p->u.r.ackid = lp->rcv_nxt;
486
487                 ldcdbg(HS, "SEND RDX env[0x%x] seqid[0x%x] ackid[0x%x]\n",
488                        p->env, p->seqid, p->u.r.ackid);
489
490                 return send_tx_packet(lp, p, new_tail);
491         }
492         return -EBUSY;
493 }
494
495 static int send_data_nack(struct ldc_channel *lp, struct ldc_packet *data_pkt)
496 {
497         struct ldc_packet *p;
498         unsigned long new_tail;
499         int err;
500
501         p = data_get_tx_packet(lp, &new_tail);
502         if (!p)
503                 return -EBUSY;
504         memset(p, 0, sizeof(*p));
505         p->type = data_pkt->type;
506         p->stype = LDC_NACK;
507         p->ctrl = data_pkt->ctrl & LDC_CTRL_MSK;
508         p->seqid = lp->snd_nxt + 1;
509         p->u.r.ackid = lp->rcv_nxt;
510
511         ldcdbg(HS, "SEND DATA NACK type[0x%x] ctl[0x%x] seq[0x%x] ack[0x%x]\n",
512                p->type, p->ctrl, p->seqid, p->u.r.ackid);
513
514         err = send_tx_packet(lp, p, new_tail);
515         if (!err)
516                 lp->snd_nxt++;
517
518         return err;
519 }
520
521 static int ldc_abort(struct ldc_channel *lp)
522 {
523         unsigned long hv_err;
524
525         ldcdbg(STATE, "ABORT\n");
526
527         /* We report but do not act upon the hypervisor errors because
528          * there really isn't much we can do if they fail at this point.
529          */
530         hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
531         if (hv_err)
532                 printk(KERN_ERR PFX "ldc_abort: "
533                        "sun4v_ldc_tx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
534                        lp->id, lp->tx_ra, lp->tx_num_entries, hv_err);
535
536         hv_err = sun4v_ldc_tx_get_state(lp->id,
537                                         &lp->tx_head,
538                                         &lp->tx_tail,
539                                         &lp->chan_state);
540         if (hv_err)
541                 printk(KERN_ERR PFX "ldc_abort: "
542                        "sun4v_ldc_tx_get_state(%lx,...) failed, err=%lu\n",
543                        lp->id, hv_err);
544
545         hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
546         if (hv_err)
547                 printk(KERN_ERR PFX "ldc_abort: "
548                        "sun4v_ldc_rx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
549                        lp->id, lp->rx_ra, lp->rx_num_entries, hv_err);
550
551         /* Refetch the RX queue state as well, because we could be invoked
552          * here in the queue processing context.
553          */
554         hv_err = sun4v_ldc_rx_get_state(lp->id,
555                                         &lp->rx_head,
556                                         &lp->rx_tail,
557                                         &lp->chan_state);
558         if (hv_err)
559                 printk(KERN_ERR PFX "ldc_abort: "
560                        "sun4v_ldc_rx_get_state(%lx,...) failed, err=%lu\n",
561                        lp->id, hv_err);
562
563         return -ECONNRESET;
564 }
565
566 static struct ldc_version *find_by_major(u16 major)
567 {
568         struct ldc_version *ret = NULL;
569         int i;
570
571         for (i = 0; i < ARRAY_SIZE(ver_arr); i++) {
572                 struct ldc_version *v = &ver_arr[i];
573                 if (v->major <= major) {
574                         ret = v;
575                         break;
576                 }
577         }
578         return ret;
579 }
580
581 static int process_ver_info(struct ldc_channel *lp, struct ldc_version *vp)
582 {
583         struct ldc_version *vap;
584         int err;
585
586         ldcdbg(HS, "GOT VERSION INFO major[%x] minor[%x]\n",
587                vp->major, vp->minor);
588
589         if (lp->hs_state == LDC_HS_GOTVERS) {
590                 lp->hs_state = LDC_HS_OPEN;
591                 memset(&lp->ver, 0, sizeof(lp->ver));
592         }
593
594         vap = find_by_major(vp->major);
595         if (!vap) {
596                 err = send_version_nack(lp, 0, 0);
597         } else if (vap->major != vp->major) {
598                 err = send_version_nack(lp, vap->major, vap->minor);
599         } else {
600                 struct ldc_version ver = *vp;
601                 if (ver.minor > vap->minor)
602                         ver.minor = vap->minor;
603                 err = send_version_ack(lp, &ver);
604                 if (!err) {
605                         lp->ver = ver;
606                         lp->hs_state = LDC_HS_GOTVERS;
607                 }
608         }
609         if (err)
610                 return ldc_abort(lp);
611
612         return 0;
613 }
614
615 static int process_ver_ack(struct ldc_channel *lp, struct ldc_version *vp)
616 {
617         ldcdbg(HS, "GOT VERSION ACK major[%x] minor[%x]\n",
618                vp->major, vp->minor);
619
620         if (lp->hs_state == LDC_HS_GOTVERS) {
621                 if (lp->ver.major != vp->major ||
622                     lp->ver.minor != vp->minor)
623                         return ldc_abort(lp);
624         } else {
625                 lp->ver = *vp;
626                 lp->hs_state = LDC_HS_GOTVERS;
627         }
628         if (send_rts(lp))
629                 return ldc_abort(lp);
630         return 0;
631 }
632
633 static int process_ver_nack(struct ldc_channel *lp, struct ldc_version *vp)
634 {
635         struct ldc_version *vap;
636         struct ldc_packet *p;
637         unsigned long new_tail;
638
639         if (vp->major == 0 && vp->minor == 0)
640                 return ldc_abort(lp);
641
642         vap = find_by_major(vp->major);
643         if (!vap)
644                 return ldc_abort(lp);
645
646         p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
647                                            vap, sizeof(*vap),
648                                            &new_tail);
649         if (!p)
650                 return ldc_abort(lp);
651
652         return send_tx_packet(lp, p, new_tail);
653 }
654
655 static int process_version(struct ldc_channel *lp,
656                            struct ldc_packet *p)
657 {
658         struct ldc_version *vp;
659
660         vp = (struct ldc_version *) p->u.u_data;
661
662         switch (p->stype) {
663         case LDC_INFO:
664                 return process_ver_info(lp, vp);
665
666         case LDC_ACK:
667                 return process_ver_ack(lp, vp);
668
669         case LDC_NACK:
670                 return process_ver_nack(lp, vp);
671
672         default:
673                 return ldc_abort(lp);
674         }
675 }
676
677 static int process_rts(struct ldc_channel *lp,
678                        struct ldc_packet *p)
679 {
680         ldcdbg(HS, "GOT RTS stype[%x] seqid[%x] env[%x]\n",
681                p->stype, p->seqid, p->env);
682
683         if (p->stype     != LDC_INFO       ||
684             lp->hs_state != LDC_HS_GOTVERS ||
685             p->env       != lp->cfg.mode)
686                 return ldc_abort(lp);
687
688         lp->snd_nxt = p->seqid;
689         lp->rcv_nxt = p->seqid;
690         lp->hs_state = LDC_HS_SENTRTR;
691         if (send_rtr(lp))
692                 return ldc_abort(lp);
693
694         return 0;
695 }
696
697 static int process_rtr(struct ldc_channel *lp,
698                        struct ldc_packet *p)
699 {
700         ldcdbg(HS, "GOT RTR stype[%x] seqid[%x] env[%x]\n",
701                p->stype, p->seqid, p->env);
702
703         if (p->stype     != LDC_INFO ||
704             p->env       != lp->cfg.mode)
705                 return ldc_abort(lp);
706
707         lp->snd_nxt = p->seqid;
708         lp->hs_state = LDC_HS_COMPLETE;
709         ldc_set_state(lp, LDC_STATE_CONNECTED);
710         send_rdx(lp);
711
712         return LDC_EVENT_UP;
713 }
714
715 static int rx_seq_ok(struct ldc_channel *lp, u32 seqid)
716 {
717         return lp->rcv_nxt + 1 == seqid;
718 }
719
720 static int process_rdx(struct ldc_channel *lp,
721                        struct ldc_packet *p)
722 {
723         ldcdbg(HS, "GOT RDX stype[%x] seqid[%x] env[%x] ackid[%x]\n",
724                p->stype, p->seqid, p->env, p->u.r.ackid);
725
726         if (p->stype != LDC_INFO ||
727             !(rx_seq_ok(lp, p->seqid)))
728                 return ldc_abort(lp);
729
730         lp->rcv_nxt = p->seqid;
731
732         lp->hs_state = LDC_HS_COMPLETE;
733         ldc_set_state(lp, LDC_STATE_CONNECTED);
734
735         return LDC_EVENT_UP;
736 }
737
738 static int process_control_frame(struct ldc_channel *lp,
739                                  struct ldc_packet *p)
740 {
741         switch (p->ctrl) {
742         case LDC_VERS:
743                 return process_version(lp, p);
744
745         case LDC_RTS:
746                 return process_rts(lp, p);
747
748         case LDC_RTR:
749                 return process_rtr(lp, p);
750
751         case LDC_RDX:
752                 return process_rdx(lp, p);
753
754         default:
755                 return ldc_abort(lp);
756         }
757 }
758
759 static int process_error_frame(struct ldc_channel *lp,
760                                struct ldc_packet *p)
761 {
762         return ldc_abort(lp);
763 }
764
765 static int process_data_ack(struct ldc_channel *lp,
766                             struct ldc_packet *ack)
767 {
768         unsigned long head = lp->tx_acked;
769         u32 ackid = ack->u.r.ackid;
770
771         while (1) {
772                 struct ldc_packet *p = lp->tx_base + (head / LDC_PACKET_SIZE);
773
774                 head = tx_advance(lp, head);
775
776                 if (p->seqid == ackid) {
777                         lp->tx_acked = head;
778                         return 0;
779                 }
780                 if (head == lp->tx_tail)
781                         return ldc_abort(lp);
782         }
783
784         return 0;
785 }
786
787 static void send_events(struct ldc_channel *lp, unsigned int event_mask)
788 {
789         if (event_mask & LDC_EVENT_RESET)
790                 lp->cfg.event(lp->event_arg, LDC_EVENT_RESET);
791         if (event_mask & LDC_EVENT_UP)
792                 lp->cfg.event(lp->event_arg, LDC_EVENT_UP);
793         if (event_mask & LDC_EVENT_DATA_READY)
794                 lp->cfg.event(lp->event_arg, LDC_EVENT_DATA_READY);
795 }
796
797 static irqreturn_t ldc_rx(int irq, void *dev_id)
798 {
799         struct ldc_channel *lp = dev_id;
800         unsigned long orig_state, flags;
801         unsigned int event_mask;
802
803         spin_lock_irqsave(&lp->lock, flags);
804
805         orig_state = lp->chan_state;
806
807         /* We should probably check for hypervisor errors here and
808          * reset the LDC channel if we get one.
809          */
810         sun4v_ldc_rx_get_state(lp->id,
811                                &lp->rx_head,
812                                &lp->rx_tail,
813                                &lp->chan_state);
814
815         ldcdbg(RX, "RX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
816                orig_state, lp->chan_state, lp->rx_head, lp->rx_tail);
817
818         event_mask = 0;
819
820         if (lp->cfg.mode == LDC_MODE_RAW &&
821             lp->chan_state == LDC_CHANNEL_UP) {
822                 lp->hs_state = LDC_HS_COMPLETE;
823                 ldc_set_state(lp, LDC_STATE_CONNECTED);
824
825                 event_mask |= LDC_EVENT_UP;
826
827                 orig_state = lp->chan_state;
828         }
829
830         /* If we are in reset state, flush the RX queue and ignore
831          * everything.
832          */
833         if (lp->flags & LDC_FLAG_RESET) {
834                 (void) __set_rx_head(lp, lp->rx_tail);
835                 goto out;
836         }
837
838         /* Once we finish the handshake, we let the ldc_read()
839          * paths do all of the control frame and state management.
840          * Just trigger the callback.
841          */
842         if (lp->hs_state == LDC_HS_COMPLETE) {
843 handshake_complete:
844                 if (lp->chan_state != orig_state) {
845                         unsigned int event = LDC_EVENT_RESET;
846
847                         if (lp->chan_state == LDC_CHANNEL_UP)
848                                 event = LDC_EVENT_UP;
849
850                         event_mask |= event;
851                 }
852                 if (lp->rx_head != lp->rx_tail)
853                         event_mask |= LDC_EVENT_DATA_READY;
854
855                 goto out;
856         }
857
858         if (lp->chan_state != orig_state)
859                 goto out;
860
861         while (lp->rx_head != lp->rx_tail) {
862                 struct ldc_packet *p;
863                 unsigned long new;
864                 int err;
865
866                 p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
867
868                 switch (p->type) {
869                 case LDC_CTRL:
870                         err = process_control_frame(lp, p);
871                         if (err > 0)
872                                 event_mask |= err;
873                         break;
874
875                 case LDC_DATA:
876                         event_mask |= LDC_EVENT_DATA_READY;
877                         err = 0;
878                         break;
879
880                 case LDC_ERR:
881                         err = process_error_frame(lp, p);
882                         break;
883
884                 default:
885                         err = ldc_abort(lp);
886                         break;
887                 }
888
889                 if (err < 0)
890                         break;
891
892                 new = lp->rx_head;
893                 new += LDC_PACKET_SIZE;
894                 if (new == (lp->rx_num_entries * LDC_PACKET_SIZE))
895                         new = 0;
896                 lp->rx_head = new;
897
898                 err = __set_rx_head(lp, new);
899                 if (err < 0) {
900                         (void) ldc_abort(lp);
901                         break;
902                 }
903                 if (lp->hs_state == LDC_HS_COMPLETE)
904                         goto handshake_complete;
905         }
906
907 out:
908         spin_unlock_irqrestore(&lp->lock, flags);
909
910         send_events(lp, event_mask);
911
912         return IRQ_HANDLED;
913 }
914
915 static irqreturn_t ldc_tx(int irq, void *dev_id)
916 {
917         struct ldc_channel *lp = dev_id;
918         unsigned long flags, orig_state;
919         unsigned int event_mask = 0;
920
921         spin_lock_irqsave(&lp->lock, flags);
922
923         orig_state = lp->chan_state;
924
925         /* We should probably check for hypervisor errors here and
926          * reset the LDC channel if we get one.
927          */
928         sun4v_ldc_tx_get_state(lp->id,
929                                &lp->tx_head,
930                                &lp->tx_tail,
931                                &lp->chan_state);
932
933         ldcdbg(TX, " TX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
934                orig_state, lp->chan_state, lp->tx_head, lp->tx_tail);
935
936         if (lp->cfg.mode == LDC_MODE_RAW &&
937             lp->chan_state == LDC_CHANNEL_UP) {
938                 lp->hs_state = LDC_HS_COMPLETE;
939                 ldc_set_state(lp, LDC_STATE_CONNECTED);
940
941                 event_mask |= LDC_EVENT_UP;
942         }
943
944         spin_unlock_irqrestore(&lp->lock, flags);
945
946         send_events(lp, event_mask);
947
948         return IRQ_HANDLED;
949 }
950
951 /* XXX ldc_alloc() and ldc_free() needs to run under a mutex so
952  * XXX that addition and removal from the ldc_channel_list has
953  * XXX atomicity, otherwise the __ldc_channel_exists() check is
954  * XXX totally pointless as another thread can slip into ldc_alloc()
955  * XXX and add a channel with the same ID.  There also needs to be
956  * XXX a spinlock for ldc_channel_list.
957  */
958 static HLIST_HEAD(ldc_channel_list);
959
960 static int __ldc_channel_exists(unsigned long id)
961 {
962         struct ldc_channel *lp;
963
964         hlist_for_each_entry(lp, &ldc_channel_list, list) {
965                 if (lp->id == id)
966                         return 1;
967         }
968         return 0;
969 }
970
971 static int alloc_queue(const char *name, unsigned long num_entries,
972                        struct ldc_packet **base, unsigned long *ra)
973 {
974         unsigned long size, order;
975         void *q;
976
977         size = num_entries * LDC_PACKET_SIZE;
978         order = get_order(size);
979
980         q = (void *) __get_free_pages(GFP_KERNEL, order);
981         if (!q) {
982                 printk(KERN_ERR PFX "Alloc of %s queue failed with "
983                        "size=%lu order=%lu\n", name, size, order);
984                 return -ENOMEM;
985         }
986
987         memset(q, 0, PAGE_SIZE << order);
988
989         *base = q;
990         *ra = __pa(q);
991
992         return 0;
993 }
994
995 static void free_queue(unsigned long num_entries, struct ldc_packet *q)
996 {
997         unsigned long size, order;
998
999         if (!q)
1000                 return;
1001
1002         size = num_entries * LDC_PACKET_SIZE;
1003         order = get_order(size);
1004
1005         free_pages((unsigned long)q, order);
1006 }
1007
1008 static unsigned long ldc_cookie_to_index(u64 cookie, void *arg)
1009 {
1010         u64 szcode = cookie >> COOKIE_PGSZ_CODE_SHIFT;
1011         /* struct ldc_iommu *ldc_iommu = (struct ldc_iommu *)arg; */
1012
1013         cookie &= ~COOKIE_PGSZ_CODE;
1014
1015         return (cookie >> (13ULL + (szcode * 3ULL)));
1016 }
1017
1018 struct ldc_demap_arg {
1019         struct ldc_iommu *ldc_iommu;
1020         u64 cookie;
1021         unsigned long id;
1022 };
1023
1024 static void ldc_demap(void *arg, unsigned long entry, unsigned long npages)
1025 {
1026         struct ldc_demap_arg *ldc_demap_arg = arg;
1027         struct ldc_iommu *iommu = ldc_demap_arg->ldc_iommu;
1028         unsigned long id = ldc_demap_arg->id;
1029         u64 cookie = ldc_demap_arg->cookie;
1030         struct ldc_mtable_entry *base;
1031         unsigned long i, shift;
1032
1033         shift = (cookie >> COOKIE_PGSZ_CODE_SHIFT) * 3;
1034         base = iommu->page_table + entry;
1035         for (i = 0; i < npages; i++) {
1036                 if (base->cookie)
1037                         sun4v_ldc_revoke(id, cookie + (i << shift),
1038                                          base->cookie);
1039                 base->mte = 0;
1040         }
1041 }
1042
1043 /* XXX Make this configurable... XXX */
1044 #define LDC_IOTABLE_SIZE        (8 * 1024)
1045
1046 struct iommu_tbl_ops ldc_iommu_ops = {
1047         .cookie_to_index = ldc_cookie_to_index,
1048         .demap = ldc_demap,
1049 };
1050
1051 static void setup_ldc_pool_hash(void)
1052 {
1053         unsigned int i;
1054         static bool do_once;
1055
1056         if (do_once)
1057                 return;
1058         do_once = true;
1059         for_each_possible_cpu(i)
1060                 per_cpu(ldc_pool_hash, i) = hash_32(i, IOMMU_POOL_HASHBITS);
1061 }
1062
1063
1064 static int ldc_iommu_init(const char *name, struct ldc_channel *lp)
1065 {
1066         unsigned long sz, num_tsb_entries, tsbsize, order;
1067         struct ldc_iommu *ldc_iommu = &lp->iommu;
1068         struct iommu_table *iommu = &ldc_iommu->iommu_table;
1069         struct ldc_mtable_entry *table;
1070         unsigned long hv_err;
1071         int err;
1072
1073         num_tsb_entries = LDC_IOTABLE_SIZE;
1074         tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1075         setup_ldc_pool_hash();
1076         spin_lock_init(&ldc_iommu->lock);
1077
1078         sz = num_tsb_entries / 8;
1079         sz = (sz + 7UL) & ~7UL;
1080         iommu->map = kzalloc(sz, GFP_KERNEL);
1081         if (!iommu->map) {
1082                 printk(KERN_ERR PFX "Alloc of arena map failed, sz=%lu\n", sz);
1083                 return -ENOMEM;
1084         }
1085         iommu_tbl_pool_init(iommu, num_tsb_entries, PAGE_SHIFT,
1086                             &ldc_iommu_ops, false, 1);
1087
1088         order = get_order(tsbsize);
1089
1090         table = (struct ldc_mtable_entry *)
1091                 __get_free_pages(GFP_KERNEL, order);
1092         err = -ENOMEM;
1093         if (!table) {
1094                 printk(KERN_ERR PFX "Alloc of MTE table failed, "
1095                        "size=%lu order=%lu\n", tsbsize, order);
1096                 goto out_free_map;
1097         }
1098
1099         memset(table, 0, PAGE_SIZE << order);
1100
1101         ldc_iommu->page_table = table;
1102
1103         hv_err = sun4v_ldc_set_map_table(lp->id, __pa(table),
1104                                          num_tsb_entries);
1105         err = -EINVAL;
1106         if (hv_err)
1107                 goto out_free_table;
1108
1109         return 0;
1110
1111 out_free_table:
1112         free_pages((unsigned long) table, order);
1113         ldc_iommu->page_table = NULL;
1114
1115 out_free_map:
1116         kfree(iommu->map);
1117         iommu->map = NULL;
1118
1119         return err;
1120 }
1121
1122 static void ldc_iommu_release(struct ldc_channel *lp)
1123 {
1124         struct ldc_iommu *ldc_iommu = &lp->iommu;
1125         struct iommu_table *iommu = &ldc_iommu->iommu_table;
1126         unsigned long num_tsb_entries, tsbsize, order;
1127
1128         (void) sun4v_ldc_set_map_table(lp->id, 0, 0);
1129
1130         num_tsb_entries = iommu->poolsize * iommu->nr_pools;
1131         tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1132         order = get_order(tsbsize);
1133
1134         free_pages((unsigned long) ldc_iommu->page_table, order);
1135         ldc_iommu->page_table = NULL;
1136
1137         kfree(iommu->map);
1138         iommu->map = NULL;
1139 }
1140
1141 struct ldc_channel *ldc_alloc(unsigned long id,
1142                               const struct ldc_channel_config *cfgp,
1143                               void *event_arg,
1144                               const char *name)
1145 {
1146         struct ldc_channel *lp;
1147         const struct ldc_mode_ops *mops;
1148         unsigned long dummy1, dummy2, hv_err;
1149         u8 mss, *mssbuf;
1150         int err;
1151
1152         err = -ENODEV;
1153         if (!ldom_domaining_enabled)
1154                 goto out_err;
1155
1156         err = -EINVAL;
1157         if (!cfgp)
1158                 goto out_err;
1159         if (!name)
1160                 goto out_err;
1161
1162         switch (cfgp->mode) {
1163         case LDC_MODE_RAW:
1164                 mops = &raw_ops;
1165                 mss = LDC_PACKET_SIZE;
1166                 break;
1167
1168         case LDC_MODE_UNRELIABLE:
1169                 mops = &nonraw_ops;
1170                 mss = LDC_PACKET_SIZE - 8;
1171                 break;
1172
1173         case LDC_MODE_STREAM:
1174                 mops = &stream_ops;
1175                 mss = LDC_PACKET_SIZE - 8 - 8;
1176                 break;
1177
1178         default:
1179                 goto out_err;
1180         }
1181
1182         if (!cfgp->event || !event_arg || !cfgp->rx_irq || !cfgp->tx_irq)
1183                 goto out_err;
1184
1185         hv_err = sun4v_ldc_tx_qinfo(id, &dummy1, &dummy2);
1186         err = -ENODEV;
1187         if (hv_err == HV_ECHANNEL)
1188                 goto out_err;
1189
1190         err = -EEXIST;
1191         if (__ldc_channel_exists(id))
1192                 goto out_err;
1193
1194         mssbuf = NULL;
1195
1196         lp = kzalloc(sizeof(*lp), GFP_KERNEL);
1197         err = -ENOMEM;
1198         if (!lp)
1199                 goto out_err;
1200
1201         spin_lock_init(&lp->lock);
1202
1203         lp->id = id;
1204
1205         err = ldc_iommu_init(name, lp);
1206         if (err)
1207                 goto out_free_ldc;
1208
1209         lp->mops = mops;
1210         lp->mss = mss;
1211
1212         lp->cfg = *cfgp;
1213         if (!lp->cfg.mtu)
1214                 lp->cfg.mtu = LDC_DEFAULT_MTU;
1215
1216         if (lp->cfg.mode == LDC_MODE_STREAM) {
1217                 mssbuf = kzalloc(lp->cfg.mtu, GFP_KERNEL);
1218                 if (!mssbuf) {
1219                         err = -ENOMEM;
1220                         goto out_free_iommu;
1221                 }
1222                 lp->mssbuf = mssbuf;
1223         }
1224
1225         lp->event_arg = event_arg;
1226
1227         /* XXX allow setting via ldc_channel_config to override defaults
1228          * XXX or use some formula based upon mtu
1229          */
1230         lp->tx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1231         lp->rx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1232
1233         err = alloc_queue("TX", lp->tx_num_entries,
1234                           &lp->tx_base, &lp->tx_ra);
1235         if (err)
1236                 goto out_free_mssbuf;
1237
1238         err = alloc_queue("RX", lp->rx_num_entries,
1239                           &lp->rx_base, &lp->rx_ra);
1240         if (err)
1241                 goto out_free_txq;
1242
1243         lp->flags |= LDC_FLAG_ALLOCED_QUEUES;
1244
1245         lp->hs_state = LDC_HS_CLOSED;
1246         ldc_set_state(lp, LDC_STATE_INIT);
1247
1248         INIT_HLIST_NODE(&lp->list);
1249         hlist_add_head(&lp->list, &ldc_channel_list);
1250
1251         INIT_HLIST_HEAD(&lp->mh_list);
1252
1253         snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name);
1254         snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name);
1255
1256         err = request_irq(lp->cfg.rx_irq, ldc_rx, 0,
1257                           lp->rx_irq_name, lp);
1258         if (err)
1259                 goto out_free_txq;
1260
1261         err = request_irq(lp->cfg.tx_irq, ldc_tx, 0,
1262                           lp->tx_irq_name, lp);
1263         if (err) {
1264                 free_irq(lp->cfg.rx_irq, lp);
1265                 goto out_free_txq;
1266         }
1267
1268         return lp;
1269
1270 out_free_txq:
1271         free_queue(lp->tx_num_entries, lp->tx_base);
1272
1273 out_free_mssbuf:
1274         kfree(mssbuf);
1275
1276 out_free_iommu:
1277         ldc_iommu_release(lp);
1278
1279 out_free_ldc:
1280         kfree(lp);
1281
1282 out_err:
1283         return ERR_PTR(err);
1284 }
1285 EXPORT_SYMBOL(ldc_alloc);
1286
1287 void ldc_unbind(struct ldc_channel *lp)
1288 {
1289         if (lp->flags & LDC_FLAG_REGISTERED_IRQS) {
1290                 free_irq(lp->cfg.rx_irq, lp);
1291                 free_irq(lp->cfg.tx_irq, lp);
1292                 lp->flags &= ~LDC_FLAG_REGISTERED_IRQS;
1293         }
1294
1295         if (lp->flags & LDC_FLAG_REGISTERED_QUEUES) {
1296                 sun4v_ldc_tx_qconf(lp->id, 0, 0);
1297                 sun4v_ldc_rx_qconf(lp->id, 0, 0);
1298                 lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1299         }
1300         if (lp->flags & LDC_FLAG_ALLOCED_QUEUES) {
1301                 free_queue(lp->tx_num_entries, lp->tx_base);
1302                 free_queue(lp->rx_num_entries, lp->rx_base);
1303                 lp->flags &= ~LDC_FLAG_ALLOCED_QUEUES;
1304         }
1305
1306         ldc_set_state(lp, LDC_STATE_INIT);
1307 }
1308 EXPORT_SYMBOL(ldc_unbind);
1309
1310 void ldc_free(struct ldc_channel *lp)
1311 {
1312         ldc_unbind(lp);
1313         hlist_del(&lp->list);
1314         kfree(lp->mssbuf);
1315         ldc_iommu_release(lp);
1316
1317         kfree(lp);
1318 }
1319 EXPORT_SYMBOL(ldc_free);
1320
1321 /* Bind the channel.  This registers the LDC queues with
1322  * the hypervisor and puts the channel into a pseudo-listening
1323  * state.  This does not initiate a handshake, ldc_connect() does
1324  * that.
1325  */
1326 int ldc_bind(struct ldc_channel *lp)
1327 {
1328         unsigned long hv_err, flags;
1329         int err = -EINVAL;
1330
1331         if (lp->state != LDC_STATE_INIT)
1332                 return -EINVAL;
1333
1334         spin_lock_irqsave(&lp->lock, flags);
1335
1336         enable_irq(lp->cfg.rx_irq);
1337         enable_irq(lp->cfg.tx_irq);
1338
1339         lp->flags |= LDC_FLAG_REGISTERED_IRQS;
1340
1341         err = -ENODEV;
1342         hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1343         if (hv_err)
1344                 goto out_free_irqs;
1345
1346         hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1347         if (hv_err)
1348                 goto out_free_irqs;
1349
1350         hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1351         if (hv_err)
1352                 goto out_unmap_tx;
1353
1354         hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1355         if (hv_err)
1356                 goto out_unmap_tx;
1357
1358         lp->flags |= LDC_FLAG_REGISTERED_QUEUES;
1359
1360         hv_err = sun4v_ldc_tx_get_state(lp->id,
1361                                         &lp->tx_head,
1362                                         &lp->tx_tail,
1363                                         &lp->chan_state);
1364         err = -EBUSY;
1365         if (hv_err)
1366                 goto out_unmap_rx;
1367
1368         lp->tx_acked = lp->tx_head;
1369
1370         lp->hs_state = LDC_HS_OPEN;
1371         ldc_set_state(lp, LDC_STATE_BOUND);
1372
1373         spin_unlock_irqrestore(&lp->lock, flags);
1374
1375         return 0;
1376
1377 out_unmap_rx:
1378         lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1379         sun4v_ldc_rx_qconf(lp->id, 0, 0);
1380
1381 out_unmap_tx:
1382         sun4v_ldc_tx_qconf(lp->id, 0, 0);
1383
1384 out_free_irqs:
1385         lp->flags &= ~LDC_FLAG_REGISTERED_IRQS;
1386         free_irq(lp->cfg.tx_irq, lp);
1387         free_irq(lp->cfg.rx_irq, lp);
1388
1389         spin_unlock_irqrestore(&lp->lock, flags);
1390
1391         return err;
1392 }
1393 EXPORT_SYMBOL(ldc_bind);
1394
1395 int ldc_connect(struct ldc_channel *lp)
1396 {
1397         unsigned long flags;
1398         int err;
1399
1400         if (lp->cfg.mode == LDC_MODE_RAW)
1401                 return -EINVAL;
1402
1403         spin_lock_irqsave(&lp->lock, flags);
1404
1405         if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1406             !(lp->flags & LDC_FLAG_REGISTERED_QUEUES) ||
1407             lp->hs_state != LDC_HS_OPEN)
1408                 err = ((lp->hs_state > LDC_HS_OPEN) ? 0 : -EINVAL);
1409         else
1410                 err = start_handshake(lp);
1411
1412         spin_unlock_irqrestore(&lp->lock, flags);
1413
1414         return err;
1415 }
1416 EXPORT_SYMBOL(ldc_connect);
1417
1418 int ldc_disconnect(struct ldc_channel *lp)
1419 {
1420         unsigned long hv_err, flags;
1421         int err;
1422
1423         if (lp->cfg.mode == LDC_MODE_RAW)
1424                 return -EINVAL;
1425
1426         if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1427             !(lp->flags & LDC_FLAG_REGISTERED_QUEUES))
1428                 return -EINVAL;
1429
1430         spin_lock_irqsave(&lp->lock, flags);
1431
1432         err = -ENODEV;
1433         hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1434         if (hv_err)
1435                 goto out_err;
1436
1437         hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1438         if (hv_err)
1439                 goto out_err;
1440
1441         hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1442         if (hv_err)
1443                 goto out_err;
1444
1445         hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1446         if (hv_err)
1447                 goto out_err;
1448
1449         ldc_set_state(lp, LDC_STATE_BOUND);
1450         lp->hs_state = LDC_HS_OPEN;
1451         lp->flags |= LDC_FLAG_RESET;
1452
1453         spin_unlock_irqrestore(&lp->lock, flags);
1454
1455         return 0;
1456
1457 out_err:
1458         sun4v_ldc_tx_qconf(lp->id, 0, 0);
1459         sun4v_ldc_rx_qconf(lp->id, 0, 0);
1460         free_irq(lp->cfg.tx_irq, lp);
1461         free_irq(lp->cfg.rx_irq, lp);
1462         lp->flags &= ~(LDC_FLAG_REGISTERED_IRQS |
1463                        LDC_FLAG_REGISTERED_QUEUES);
1464         ldc_set_state(lp, LDC_STATE_INIT);
1465
1466         spin_unlock_irqrestore(&lp->lock, flags);
1467
1468         return err;
1469 }
1470 EXPORT_SYMBOL(ldc_disconnect);
1471
1472 int ldc_state(struct ldc_channel *lp)
1473 {
1474         return lp->state;
1475 }
1476 EXPORT_SYMBOL(ldc_state);
1477
1478 static int write_raw(struct ldc_channel *lp, const void *buf, unsigned int size)
1479 {
1480         struct ldc_packet *p;
1481         unsigned long new_tail;
1482         int err;
1483
1484         if (size > LDC_PACKET_SIZE)
1485                 return -EMSGSIZE;
1486
1487         p = data_get_tx_packet(lp, &new_tail);
1488         if (!p)
1489                 return -EAGAIN;
1490
1491         memcpy(p, buf, size);
1492
1493         err = send_tx_packet(lp, p, new_tail);
1494         if (!err)
1495                 err = size;
1496
1497         return err;
1498 }
1499
1500 static int read_raw(struct ldc_channel *lp, void *buf, unsigned int size)
1501 {
1502         struct ldc_packet *p;
1503         unsigned long hv_err, new;
1504         int err;
1505
1506         if (size < LDC_PACKET_SIZE)
1507                 return -EINVAL;
1508
1509         hv_err = sun4v_ldc_rx_get_state(lp->id,
1510                                         &lp->rx_head,
1511                                         &lp->rx_tail,
1512                                         &lp->chan_state);
1513         if (hv_err)
1514                 return ldc_abort(lp);
1515
1516         if (lp->chan_state == LDC_CHANNEL_DOWN ||
1517             lp->chan_state == LDC_CHANNEL_RESETTING)
1518                 return -ECONNRESET;
1519
1520         if (lp->rx_head == lp->rx_tail)
1521                 return 0;
1522
1523         p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
1524         memcpy(buf, p, LDC_PACKET_SIZE);
1525
1526         new = rx_advance(lp, lp->rx_head);
1527         lp->rx_head = new;
1528
1529         err = __set_rx_head(lp, new);
1530         if (err < 0)
1531                 err = -ECONNRESET;
1532         else
1533                 err = LDC_PACKET_SIZE;
1534
1535         return err;
1536 }
1537
1538 static const struct ldc_mode_ops raw_ops = {
1539         .write          =       write_raw,
1540         .read           =       read_raw,
1541 };
1542
1543 static int write_nonraw(struct ldc_channel *lp, const void *buf,
1544                         unsigned int size)
1545 {
1546         unsigned long hv_err, tail;
1547         unsigned int copied;
1548         u32 seq;
1549         int err;
1550
1551         hv_err = sun4v_ldc_tx_get_state(lp->id, &lp->tx_head, &lp->tx_tail,
1552                                         &lp->chan_state);
1553         if (unlikely(hv_err))
1554                 return -EBUSY;
1555
1556         if (unlikely(lp->chan_state != LDC_CHANNEL_UP))
1557                 return ldc_abort(lp);
1558
1559         if (!tx_has_space_for(lp, size))
1560                 return -EAGAIN;
1561
1562         seq = lp->snd_nxt;
1563         copied = 0;
1564         tail = lp->tx_tail;
1565         while (copied < size) {
1566                 struct ldc_packet *p = lp->tx_base + (tail / LDC_PACKET_SIZE);
1567                 u8 *data = ((lp->cfg.mode == LDC_MODE_UNRELIABLE) ?
1568                             p->u.u_data :
1569                             p->u.r.r_data);
1570                 int data_len;
1571
1572                 p->type = LDC_DATA;
1573                 p->stype = LDC_INFO;
1574                 p->ctrl = 0;
1575
1576                 data_len = size - copied;
1577                 if (data_len > lp->mss)
1578                         data_len = lp->mss;
1579
1580                 BUG_ON(data_len > LDC_LEN);
1581
1582                 p->env = (data_len |
1583                           (copied == 0 ? LDC_START : 0) |
1584                           (data_len == size - copied ? LDC_STOP : 0));
1585
1586                 p->seqid = ++seq;
1587
1588                 ldcdbg(DATA, "SENT DATA [%02x:%02x:%02x:%02x:%08x]\n",
1589                        p->type,
1590                        p->stype,
1591                        p->ctrl,
1592                        p->env,
1593                        p->seqid);
1594
1595                 memcpy(data, buf, data_len);
1596                 buf += data_len;
1597                 copied += data_len;
1598
1599                 tail = tx_advance(lp, tail);
1600         }
1601
1602         err = set_tx_tail(lp, tail);
1603         if (!err) {
1604                 lp->snd_nxt = seq;
1605                 err = size;
1606         }
1607
1608         return err;
1609 }
1610
1611 static int rx_bad_seq(struct ldc_channel *lp, struct ldc_packet *p,
1612                       struct ldc_packet *first_frag)
1613 {
1614         int err;
1615
1616         if (first_frag)
1617                 lp->rcv_nxt = first_frag->seqid - 1;
1618
1619         err = send_data_nack(lp, p);
1620         if (err)
1621                 return err;
1622
1623         err = __set_rx_head(lp, lp->rx_tail);
1624         if (err < 0)
1625                 return ldc_abort(lp);
1626
1627         return 0;
1628 }
1629
1630 static int data_ack_nack(struct ldc_channel *lp, struct ldc_packet *p)
1631 {
1632         if (p->stype & LDC_ACK) {
1633                 int err = process_data_ack(lp, p);
1634                 if (err)
1635                         return err;
1636         }
1637         if (p->stype & LDC_NACK)
1638                 return ldc_abort(lp);
1639
1640         return 0;
1641 }
1642
1643 static int rx_data_wait(struct ldc_channel *lp, unsigned long cur_head)
1644 {
1645         unsigned long dummy;
1646         int limit = 1000;
1647
1648         ldcdbg(DATA, "DATA WAIT cur_head[%lx] rx_head[%lx] rx_tail[%lx]\n",
1649                cur_head, lp->rx_head, lp->rx_tail);
1650         while (limit-- > 0) {
1651                 unsigned long hv_err;
1652
1653                 hv_err = sun4v_ldc_rx_get_state(lp->id,
1654                                                 &dummy,
1655                                                 &lp->rx_tail,
1656                                                 &lp->chan_state);
1657                 if (hv_err)
1658                         return ldc_abort(lp);
1659
1660                 if (lp->chan_state == LDC_CHANNEL_DOWN ||
1661                     lp->chan_state == LDC_CHANNEL_RESETTING)
1662                         return -ECONNRESET;
1663
1664                 if (cur_head != lp->rx_tail) {
1665                         ldcdbg(DATA, "DATA WAIT DONE "
1666                                "head[%lx] tail[%lx] chan_state[%lx]\n",
1667                                dummy, lp->rx_tail, lp->chan_state);
1668                         return 0;
1669                 }
1670
1671                 udelay(1);
1672         }
1673         return -EAGAIN;
1674 }
1675
1676 static int rx_set_head(struct ldc_channel *lp, unsigned long head)
1677 {
1678         int err = __set_rx_head(lp, head);
1679
1680         if (err < 0)
1681                 return ldc_abort(lp);
1682
1683         lp->rx_head = head;
1684         return 0;
1685 }
1686
1687 static void send_data_ack(struct ldc_channel *lp)
1688 {
1689         unsigned long new_tail;
1690         struct ldc_packet *p;
1691
1692         p = data_get_tx_packet(lp, &new_tail);
1693         if (likely(p)) {
1694                 int err;
1695
1696                 memset(p, 0, sizeof(*p));
1697                 p->type = LDC_DATA;
1698                 p->stype = LDC_ACK;
1699                 p->ctrl = 0;
1700                 p->seqid = lp->snd_nxt + 1;
1701                 p->u.r.ackid = lp->rcv_nxt;
1702
1703                 err = send_tx_packet(lp, p, new_tail);
1704                 if (!err)
1705                         lp->snd_nxt++;
1706         }
1707 }
1708
1709 static int read_nonraw(struct ldc_channel *lp, void *buf, unsigned int size)
1710 {
1711         struct ldc_packet *first_frag;
1712         unsigned long hv_err, new;
1713         int err, copied;
1714
1715         hv_err = sun4v_ldc_rx_get_state(lp->id,
1716                                         &lp->rx_head,
1717                                         &lp->rx_tail,
1718                                         &lp->chan_state);
1719         if (hv_err)
1720                 return ldc_abort(lp);
1721
1722         if (lp->chan_state == LDC_CHANNEL_DOWN ||
1723             lp->chan_state == LDC_CHANNEL_RESETTING)
1724                 return -ECONNRESET;
1725
1726         if (lp->rx_head == lp->rx_tail)
1727                 return 0;
1728
1729         first_frag = NULL;
1730         copied = err = 0;
1731         new = lp->rx_head;
1732         while (1) {
1733                 struct ldc_packet *p;
1734                 int pkt_len;
1735
1736                 BUG_ON(new == lp->rx_tail);
1737                 p = lp->rx_base + (new / LDC_PACKET_SIZE);
1738
1739                 ldcdbg(RX, "RX read pkt[%02x:%02x:%02x:%02x:%08x:%08x] "
1740                        "rcv_nxt[%08x]\n",
1741                        p->type,
1742                        p->stype,
1743                        p->ctrl,
1744                        p->env,
1745                        p->seqid,
1746                        p->u.r.ackid,
1747                        lp->rcv_nxt);
1748
1749                 if (unlikely(!rx_seq_ok(lp, p->seqid))) {
1750                         err = rx_bad_seq(lp, p, first_frag);
1751                         copied = 0;
1752                         break;
1753                 }
1754
1755                 if (p->type & LDC_CTRL) {
1756                         err = process_control_frame(lp, p);
1757                         if (err < 0)
1758                                 break;
1759                         err = 0;
1760                 }
1761
1762                 lp->rcv_nxt = p->seqid;
1763
1764                 if (!(p->type & LDC_DATA)) {
1765                         new = rx_advance(lp, new);
1766                         goto no_data;
1767                 }
1768                 if (p->stype & (LDC_ACK | LDC_NACK)) {
1769                         err = data_ack_nack(lp, p);
1770                         if (err)
1771                                 break;
1772                 }
1773                 if (!(p->stype & LDC_INFO)) {
1774                         new = rx_advance(lp, new);
1775                         err = rx_set_head(lp, new);
1776                         if (err)
1777                                 break;
1778                         goto no_data;
1779                 }
1780
1781                 pkt_len = p->env & LDC_LEN;
1782
1783                 /* Every initial packet starts with the START bit set.
1784                  *
1785                  * Singleton packets will have both START+STOP set.
1786                  *
1787                  * Fragments will have START set in the first frame, STOP
1788                  * set in the last frame, and neither bit set in middle
1789                  * frames of the packet.
1790                  *
1791                  * Therefore if we are at the beginning of a packet and
1792                  * we don't see START, or we are in the middle of a fragmented
1793                  * packet and do see START, we are unsynchronized and should
1794                  * flush the RX queue.
1795                  */
1796                 if ((first_frag == NULL && !(p->env & LDC_START)) ||
1797                     (first_frag != NULL &&  (p->env & LDC_START))) {
1798                         if (!first_frag)
1799                                 new = rx_advance(lp, new);
1800
1801                         err = rx_set_head(lp, new);
1802                         if (err)
1803                                 break;
1804
1805                         if (!first_frag)
1806                                 goto no_data;
1807                 }
1808                 if (!first_frag)
1809                         first_frag = p;
1810
1811                 if (pkt_len > size - copied) {
1812                         /* User didn't give us a big enough buffer,
1813                          * what to do?  This is a pretty serious error.
1814                          *
1815                          * Since we haven't updated the RX ring head to
1816                          * consume any of the packets, signal the error
1817                          * to the user and just leave the RX ring alone.
1818                          *
1819                          * This seems the best behavior because this allows
1820                          * a user of the LDC layer to start with a small
1821                          * RX buffer for ldc_read() calls and use -EMSGSIZE
1822                          * as a cue to enlarge it's read buffer.
1823                          */
1824                         err = -EMSGSIZE;
1825                         break;
1826                 }
1827
1828                 /* Ok, we are gonna eat this one.  */
1829                 new = rx_advance(lp, new);
1830
1831                 memcpy(buf,
1832                        (lp->cfg.mode == LDC_MODE_UNRELIABLE ?
1833                         p->u.u_data : p->u.r.r_data), pkt_len);
1834                 buf += pkt_len;
1835                 copied += pkt_len;
1836
1837                 if (p->env & LDC_STOP)
1838                         break;
1839
1840 no_data:
1841                 if (new == lp->rx_tail) {
1842                         err = rx_data_wait(lp, new);
1843                         if (err)
1844                                 break;
1845                 }
1846         }
1847
1848         if (!err)
1849                 err = rx_set_head(lp, new);
1850
1851         if (err && first_frag)
1852                 lp->rcv_nxt = first_frag->seqid - 1;
1853
1854         if (!err) {
1855                 err = copied;
1856                 if (err > 0 && lp->cfg.mode != LDC_MODE_UNRELIABLE)
1857                         send_data_ack(lp);
1858         }
1859
1860         return err;
1861 }
1862
1863 static const struct ldc_mode_ops nonraw_ops = {
1864         .write          =       write_nonraw,
1865         .read           =       read_nonraw,
1866 };
1867
1868 static int write_stream(struct ldc_channel *lp, const void *buf,
1869                         unsigned int size)
1870 {
1871         if (size > lp->cfg.mtu)
1872                 size = lp->cfg.mtu;
1873         return write_nonraw(lp, buf, size);
1874 }
1875
1876 static int read_stream(struct ldc_channel *lp, void *buf, unsigned int size)
1877 {
1878         if (!lp->mssbuf_len) {
1879                 int err = read_nonraw(lp, lp->mssbuf, lp->cfg.mtu);
1880                 if (err < 0)
1881                         return err;
1882
1883                 lp->mssbuf_len = err;
1884                 lp->mssbuf_off = 0;
1885         }
1886
1887         if (size > lp->mssbuf_len)
1888                 size = lp->mssbuf_len;
1889         memcpy(buf, lp->mssbuf + lp->mssbuf_off, size);
1890
1891         lp->mssbuf_off += size;
1892         lp->mssbuf_len -= size;
1893
1894         return size;
1895 }
1896
1897 static const struct ldc_mode_ops stream_ops = {
1898         .write          =       write_stream,
1899         .read           =       read_stream,
1900 };
1901
1902 int ldc_write(struct ldc_channel *lp, const void *buf, unsigned int size)
1903 {
1904         unsigned long flags;
1905         int err;
1906
1907         if (!buf)
1908                 return -EINVAL;
1909
1910         if (!size)
1911                 return 0;
1912
1913         spin_lock_irqsave(&lp->lock, flags);
1914
1915         if (lp->hs_state != LDC_HS_COMPLETE)
1916                 err = -ENOTCONN;
1917         else
1918                 err = lp->mops->write(lp, buf, size);
1919
1920         spin_unlock_irqrestore(&lp->lock, flags);
1921
1922         return err;
1923 }
1924 EXPORT_SYMBOL(ldc_write);
1925
1926 int ldc_read(struct ldc_channel *lp, void *buf, unsigned int size)
1927 {
1928         unsigned long flags;
1929         int err;
1930
1931         if (!buf)
1932                 return -EINVAL;
1933
1934         if (!size)
1935                 return 0;
1936
1937         spin_lock_irqsave(&lp->lock, flags);
1938
1939         if (lp->hs_state != LDC_HS_COMPLETE)
1940                 err = -ENOTCONN;
1941         else
1942                 err = lp->mops->read(lp, buf, size);
1943
1944         spin_unlock_irqrestore(&lp->lock, flags);
1945
1946         return err;
1947 }
1948 EXPORT_SYMBOL(ldc_read);
1949
1950 static u64 pagesize_code(void)
1951 {
1952         switch (PAGE_SIZE) {
1953         default:
1954         case (8ULL * 1024ULL):
1955                 return 0;
1956         case (64ULL * 1024ULL):
1957                 return 1;
1958         case (512ULL * 1024ULL):
1959                 return 2;
1960         case (4ULL * 1024ULL * 1024ULL):
1961                 return 3;
1962         case (32ULL * 1024ULL * 1024ULL):
1963                 return 4;
1964         case (256ULL * 1024ULL * 1024ULL):
1965                 return 5;
1966         }
1967 }
1968
1969 static u64 make_cookie(u64 index, u64 pgsz_code, u64 page_offset)
1970 {
1971         return ((pgsz_code << COOKIE_PGSZ_CODE_SHIFT) |
1972                 (index << PAGE_SHIFT) |
1973                 page_offset);
1974 }
1975
1976
1977 static struct ldc_mtable_entry *alloc_npages(struct ldc_iommu *iommu,
1978                                              unsigned long npages)
1979 {
1980         long entry;
1981
1982         entry = iommu_tbl_range_alloc(NULL, &iommu->iommu_table, npages,
1983                                      NULL, __this_cpu_read(ldc_pool_hash));
1984         if (unlikely(entry < 0))
1985                 return NULL;
1986
1987         return iommu->page_table + entry;
1988 }
1989
1990 static u64 perm_to_mte(unsigned int map_perm)
1991 {
1992         u64 mte_base;
1993
1994         mte_base = pagesize_code();
1995
1996         if (map_perm & LDC_MAP_SHADOW) {
1997                 if (map_perm & LDC_MAP_R)
1998                         mte_base |= LDC_MTE_COPY_R;
1999                 if (map_perm & LDC_MAP_W)
2000                         mte_base |= LDC_MTE_COPY_W;
2001         }
2002         if (map_perm & LDC_MAP_DIRECT) {
2003                 if (map_perm & LDC_MAP_R)
2004                         mte_base |= LDC_MTE_READ;
2005                 if (map_perm & LDC_MAP_W)
2006                         mte_base |= LDC_MTE_WRITE;
2007                 if (map_perm & LDC_MAP_X)
2008                         mte_base |= LDC_MTE_EXEC;
2009         }
2010         if (map_perm & LDC_MAP_IO) {
2011                 if (map_perm & LDC_MAP_R)
2012                         mte_base |= LDC_MTE_IOMMU_R;
2013                 if (map_perm & LDC_MAP_W)
2014                         mte_base |= LDC_MTE_IOMMU_W;
2015         }
2016
2017         return mte_base;
2018 }
2019
2020 static int pages_in_region(unsigned long base, long len)
2021 {
2022         int count = 0;
2023
2024         do {
2025                 unsigned long new = (base + PAGE_SIZE) & PAGE_MASK;
2026
2027                 len -= (new - base);
2028                 base = new;
2029                 count++;
2030         } while (len > 0);
2031
2032         return count;
2033 }
2034
2035 struct cookie_state {
2036         struct ldc_mtable_entry         *page_table;
2037         struct ldc_trans_cookie         *cookies;
2038         u64                             mte_base;
2039         u64                             prev_cookie;
2040         u32                             pte_idx;
2041         u32                             nc;
2042 };
2043
2044 static void fill_cookies(struct cookie_state *sp, unsigned long pa,
2045                          unsigned long off, unsigned long len)
2046 {
2047         do {
2048                 unsigned long tlen, new = pa + PAGE_SIZE;
2049                 u64 this_cookie;
2050
2051                 sp->page_table[sp->pte_idx].mte = sp->mte_base | pa;
2052
2053                 tlen = PAGE_SIZE;
2054                 if (off)
2055                         tlen = PAGE_SIZE - off;
2056                 if (tlen > len)
2057                         tlen = len;
2058
2059                 this_cookie = make_cookie(sp->pte_idx,
2060                                           pagesize_code(), off);
2061
2062                 off = 0;
2063
2064                 if (this_cookie == sp->prev_cookie) {
2065                         sp->cookies[sp->nc - 1].cookie_size += tlen;
2066                 } else {
2067                         sp->cookies[sp->nc].cookie_addr = this_cookie;
2068                         sp->cookies[sp->nc].cookie_size = tlen;
2069                         sp->nc++;
2070                 }
2071                 sp->prev_cookie = this_cookie + tlen;
2072
2073                 sp->pte_idx++;
2074
2075                 len -= tlen;
2076                 pa = new;
2077         } while (len > 0);
2078 }
2079
2080 static int sg_count_one(struct scatterlist *sg)
2081 {
2082         unsigned long base = page_to_pfn(sg_page(sg)) << PAGE_SHIFT;
2083         long len = sg->length;
2084
2085         if ((sg->offset | len) & (8UL - 1))
2086                 return -EFAULT;
2087
2088         return pages_in_region(base + sg->offset, len);
2089 }
2090
2091 static int sg_count_pages(struct scatterlist *sg, int num_sg)
2092 {
2093         int count;
2094         int i;
2095
2096         count = 0;
2097         for (i = 0; i < num_sg; i++) {
2098                 int err = sg_count_one(sg + i);
2099                 if (err < 0)
2100                         return err;
2101                 count += err;
2102         }
2103
2104         return count;
2105 }
2106
2107 int ldc_map_sg(struct ldc_channel *lp,
2108                struct scatterlist *sg, int num_sg,
2109                struct ldc_trans_cookie *cookies, int ncookies,
2110                unsigned int map_perm)
2111 {
2112         unsigned long i, npages;
2113         struct ldc_mtable_entry *base;
2114         struct cookie_state state;
2115         struct ldc_iommu *iommu;
2116         int err;
2117
2118         if (map_perm & ~LDC_MAP_ALL)
2119                 return -EINVAL;
2120
2121         err = sg_count_pages(sg, num_sg);
2122         if (err < 0)
2123                 return err;
2124
2125         npages = err;
2126         if (err > ncookies)
2127                 return -EMSGSIZE;
2128
2129         iommu = &lp->iommu;
2130
2131         base = alloc_npages(iommu, npages);
2132
2133         if (!base)
2134                 return -ENOMEM;
2135
2136         state.page_table = iommu->page_table;
2137         state.cookies = cookies;
2138         state.mte_base = perm_to_mte(map_perm);
2139         state.prev_cookie = ~(u64)0;
2140         state.pte_idx = (base - iommu->page_table);
2141         state.nc = 0;
2142
2143         for (i = 0; i < num_sg; i++)
2144                 fill_cookies(&state, page_to_pfn(sg_page(&sg[i])) << PAGE_SHIFT,
2145                              sg[i].offset, sg[i].length);
2146
2147         return state.nc;
2148 }
2149 EXPORT_SYMBOL(ldc_map_sg);
2150
2151 int ldc_map_single(struct ldc_channel *lp,
2152                    void *buf, unsigned int len,
2153                    struct ldc_trans_cookie *cookies, int ncookies,
2154                    unsigned int map_perm)
2155 {
2156         unsigned long npages, pa;
2157         struct ldc_mtable_entry *base;
2158         struct cookie_state state;
2159         struct ldc_iommu *iommu;
2160
2161         if ((map_perm & ~LDC_MAP_ALL) || (ncookies < 1))
2162                 return -EINVAL;
2163
2164         pa = __pa(buf);
2165         if ((pa | len) & (8UL - 1))
2166                 return -EFAULT;
2167
2168         npages = pages_in_region(pa, len);
2169
2170         iommu = &lp->iommu;
2171
2172         base = alloc_npages(iommu, npages);
2173
2174         if (!base)
2175                 return -ENOMEM;
2176
2177         state.page_table = iommu->page_table;
2178         state.cookies = cookies;
2179         state.mte_base = perm_to_mte(map_perm);
2180         state.prev_cookie = ~(u64)0;
2181         state.pte_idx = (base - iommu->page_table);
2182         state.nc = 0;
2183         fill_cookies(&state, (pa & PAGE_MASK), (pa & ~PAGE_MASK), len);
2184         BUG_ON(state.nc > ncookies);
2185
2186         return state.nc;
2187 }
2188 EXPORT_SYMBOL(ldc_map_single);
2189
2190
2191 static void free_npages(unsigned long id, struct ldc_iommu *iommu,
2192                         u64 cookie, u64 size)
2193 {
2194         unsigned long npages;
2195         struct ldc_demap_arg demap_arg;
2196
2197         demap_arg.ldc_iommu = iommu;
2198         demap_arg.cookie = cookie;
2199         demap_arg.id = id;
2200
2201         npages = PAGE_ALIGN(((cookie & ~PAGE_MASK) + size)) >> PAGE_SHIFT;
2202         iommu_tbl_range_free(&iommu->iommu_table, cookie, npages, true,
2203                              &demap_arg);
2204
2205 }
2206
2207 void ldc_unmap(struct ldc_channel *lp, struct ldc_trans_cookie *cookies,
2208                int ncookies)
2209 {
2210         struct ldc_iommu *iommu = &lp->iommu;
2211         int i;
2212         unsigned long flags;
2213
2214         spin_lock_irqsave(&iommu->lock, flags);
2215         for (i = 0; i < ncookies; i++) {
2216                 u64 addr = cookies[i].cookie_addr;
2217                 u64 size = cookies[i].cookie_size;
2218
2219                 free_npages(lp->id, iommu, addr, size);
2220         }
2221         spin_unlock_irqrestore(&iommu->lock, flags);
2222 }
2223 EXPORT_SYMBOL(ldc_unmap);
2224
2225 int ldc_copy(struct ldc_channel *lp, int copy_dir,
2226              void *buf, unsigned int len, unsigned long offset,
2227              struct ldc_trans_cookie *cookies, int ncookies)
2228 {
2229         unsigned int orig_len;
2230         unsigned long ra;
2231         int i;
2232
2233         if (copy_dir != LDC_COPY_IN && copy_dir != LDC_COPY_OUT) {
2234                 printk(KERN_ERR PFX "ldc_copy: ID[%lu] Bad copy_dir[%d]\n",
2235                        lp->id, copy_dir);
2236                 return -EINVAL;
2237         }
2238
2239         ra = __pa(buf);
2240         if ((ra | len | offset) & (8UL - 1)) {
2241                 printk(KERN_ERR PFX "ldc_copy: ID[%lu] Unaligned buffer "
2242                        "ra[%lx] len[%x] offset[%lx]\n",
2243                        lp->id, ra, len, offset);
2244                 return -EFAULT;
2245         }
2246
2247         if (lp->hs_state != LDC_HS_COMPLETE ||
2248             (lp->flags & LDC_FLAG_RESET)) {
2249                 printk(KERN_ERR PFX "ldc_copy: ID[%lu] Link down hs_state[%x] "
2250                        "flags[%x]\n", lp->id, lp->hs_state, lp->flags);
2251                 return -ECONNRESET;
2252         }
2253
2254         orig_len = len;
2255         for (i = 0; i < ncookies; i++) {
2256                 unsigned long cookie_raddr = cookies[i].cookie_addr;
2257                 unsigned long this_len = cookies[i].cookie_size;
2258                 unsigned long actual_len;
2259
2260                 if (unlikely(offset)) {
2261                         unsigned long this_off = offset;
2262
2263                         if (this_off > this_len)
2264                                 this_off = this_len;
2265
2266                         offset -= this_off;
2267                         this_len -= this_off;
2268                         if (!this_len)
2269                                 continue;
2270                         cookie_raddr += this_off;
2271                 }
2272
2273                 if (this_len > len)
2274                         this_len = len;
2275
2276                 while (1) {
2277                         unsigned long hv_err;
2278
2279                         hv_err = sun4v_ldc_copy(lp->id, copy_dir,
2280                                                 cookie_raddr, ra,
2281                                                 this_len, &actual_len);
2282                         if (unlikely(hv_err)) {
2283                                 printk(KERN_ERR PFX "ldc_copy: ID[%lu] "
2284                                        "HV error %lu\n",
2285                                        lp->id, hv_err);
2286                                 if (lp->hs_state != LDC_HS_COMPLETE ||
2287                                     (lp->flags & LDC_FLAG_RESET))
2288                                         return -ECONNRESET;
2289                                 else
2290                                         return -EFAULT;
2291                         }
2292
2293                         cookie_raddr += actual_len;
2294                         ra += actual_len;
2295                         len -= actual_len;
2296                         if (actual_len == this_len)
2297                                 break;
2298
2299                         this_len -= actual_len;
2300                 }
2301
2302                 if (!len)
2303                         break;
2304         }
2305
2306         /* It is caller policy what to do about short copies.
2307          * For example, a networking driver can declare the
2308          * packet a runt and drop it.
2309          */
2310
2311         return orig_len - len;
2312 }
2313 EXPORT_SYMBOL(ldc_copy);
2314
2315 void *ldc_alloc_exp_dring(struct ldc_channel *lp, unsigned int len,
2316                           struct ldc_trans_cookie *cookies, int *ncookies,
2317                           unsigned int map_perm)
2318 {
2319         void *buf;
2320         int err;
2321
2322         if (len & (8UL - 1))
2323                 return ERR_PTR(-EINVAL);
2324
2325         buf = kzalloc(len, GFP_KERNEL);
2326         if (!buf)
2327                 return ERR_PTR(-ENOMEM);
2328
2329         err = ldc_map_single(lp, buf, len, cookies, *ncookies, map_perm);
2330         if (err < 0) {
2331                 kfree(buf);
2332                 return ERR_PTR(err);
2333         }
2334         *ncookies = err;
2335
2336         return buf;
2337 }
2338 EXPORT_SYMBOL(ldc_alloc_exp_dring);
2339
2340 void ldc_free_exp_dring(struct ldc_channel *lp, void *buf, unsigned int len,
2341                         struct ldc_trans_cookie *cookies, int ncookies)
2342 {
2343         ldc_unmap(lp, cookies, ncookies);
2344         kfree(buf);
2345 }
2346 EXPORT_SYMBOL(ldc_free_exp_dring);
2347
2348 static int __init ldc_init(void)
2349 {
2350         unsigned long major, minor;
2351         struct mdesc_handle *hp;
2352         const u64 *v;
2353         int err;
2354         u64 mp;
2355
2356         hp = mdesc_grab();
2357         if (!hp)
2358                 return -ENODEV;
2359
2360         mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "platform");
2361         err = -ENODEV;
2362         if (mp == MDESC_NODE_NULL)
2363                 goto out;
2364
2365         v = mdesc_get_property(hp, mp, "domaining-enabled", NULL);
2366         if (!v)
2367                 goto out;
2368
2369         major = 1;
2370         minor = 0;
2371         if (sun4v_hvapi_register(HV_GRP_LDOM, major, &minor)) {
2372                 printk(KERN_INFO PFX "Could not register LDOM hvapi.\n");
2373                 goto out;
2374         }
2375
2376         printk(KERN_INFO "%s", version);
2377
2378         if (!*v) {
2379                 printk(KERN_INFO PFX "Domaining disabled.\n");
2380                 goto out;
2381         }
2382         ldom_domaining_enabled = 1;
2383         err = 0;
2384
2385 out:
2386         mdesc_release(hp);
2387         return err;
2388 }
2389
2390 core_initcall(ldc_init);