]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - Documentation/mic/mpssd/mpssd.c
Merge remote-tracking branch 'access_once/linux-next'
[karo-tx-linux.git] / Documentation / mic / mpssd / mpssd.c
1 /*
2  * Intel MIC Platform Software Stack (MPSS)
3  *
4  * Copyright(c) 2013 Intel Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License, version 2, as
8  * published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * General Public License for more details.
14  *
15  * The full GNU General Public License is included in this distribution in
16  * the file called "COPYING".
17  *
18  * Intel MIC User Space Tools.
19  */
20
21 #define _GNU_SOURCE
22
23 #include <stdlib.h>
24 #include <fcntl.h>
25 #include <getopt.h>
26 #include <assert.h>
27 #include <unistd.h>
28 #include <stdbool.h>
29 #include <signal.h>
30 #include <poll.h>
31 #include <features.h>
32 #include <sys/types.h>
33 #include <sys/stat.h>
34 #include <sys/mman.h>
35 #include <sys/socket.h>
36 #include <linux/virtio_ring.h>
37 #include <linux/virtio_net.h>
38 #include <linux/virtio_console.h>
39 #include <linux/virtio_blk.h>
40 #include <linux/version.h>
41 #include "mpssd.h"
42 #include <linux/mic_ioctl.h>
43 #include <linux/mic_common.h>
44 #include <tools/endian.h>
45
46 static void *init_mic(void *arg);
47
48 static FILE *logfp;
49 static struct mic_info mic_list;
50
51 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
52
53 #define min_t(type, x, y) ({                            \
54                 type __min1 = (x);                      \
55                 type __min2 = (y);                      \
56                 __min1 < __min2 ? __min1 : __min2; })
57
58 /* align addr on a size boundary - adjust address up/down if needed */
59 #define _ALIGN_DOWN(addr, size)  ((addr)&(~((size)-1)))
60 #define _ALIGN_UP(addr, size)    _ALIGN_DOWN(addr + size - 1, size)
61
62 /* align addr on a size boundary - adjust address up if needed */
63 #define _ALIGN(addr, size)     _ALIGN_UP(addr, size)
64
65 /* to align the pointer to the (next) page boundary */
66 #define PAGE_ALIGN(addr)        _ALIGN(addr, PAGE_SIZE)
67
68 #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
69
70 #define GSO_ENABLED             1
71 #define MAX_GSO_SIZE            (64 * 1024)
72 #define ETH_H_LEN               14
73 #define MAX_NET_PKT_SIZE        (_ALIGN_UP(MAX_GSO_SIZE + ETH_H_LEN, 64))
74 #define MIC_DEVICE_PAGE_END     0x1000
75
76 #ifndef VIRTIO_NET_HDR_F_DATA_VALID
77 #define VIRTIO_NET_HDR_F_DATA_VALID     2       /* Csum is valid */
78 #endif
79
80 static struct {
81         struct mic_device_desc dd;
82         struct mic_vqconfig vqconfig[2];
83         __u32 host_features, guest_acknowledgements;
84         struct virtio_console_config cons_config;
85 } virtcons_dev_page = {
86         .dd = {
87                 .type = VIRTIO_ID_CONSOLE,
88                 .num_vq = ARRAY_SIZE(virtcons_dev_page.vqconfig),
89                 .feature_len = sizeof(virtcons_dev_page.host_features),
90                 .config_len = sizeof(virtcons_dev_page.cons_config),
91         },
92         .vqconfig[0] = {
93                 .num = htole16(MIC_VRING_ENTRIES),
94         },
95         .vqconfig[1] = {
96                 .num = htole16(MIC_VRING_ENTRIES),
97         },
98 };
99
100 static struct {
101         struct mic_device_desc dd;
102         struct mic_vqconfig vqconfig[2];
103         __u32 host_features, guest_acknowledgements;
104         struct virtio_net_config net_config;
105 } virtnet_dev_page = {
106         .dd = {
107                 .type = VIRTIO_ID_NET,
108                 .num_vq = ARRAY_SIZE(virtnet_dev_page.vqconfig),
109                 .feature_len = sizeof(virtnet_dev_page.host_features),
110                 .config_len = sizeof(virtnet_dev_page.net_config),
111         },
112         .vqconfig[0] = {
113                 .num = htole16(MIC_VRING_ENTRIES),
114         },
115         .vqconfig[1] = {
116                 .num = htole16(MIC_VRING_ENTRIES),
117         },
118 #if GSO_ENABLED
119         .host_features = htole32(
120                 1 << VIRTIO_NET_F_CSUM |
121                 1 << VIRTIO_NET_F_GSO |
122                 1 << VIRTIO_NET_F_GUEST_TSO4 |
123                 1 << VIRTIO_NET_F_GUEST_TSO6 |
124                 1 << VIRTIO_NET_F_GUEST_ECN),
125 #else
126                 .host_features = 0,
127 #endif
128 };
129
130 static const char *mic_config_dir = "/etc/mpss";
131 static const char *virtblk_backend = "VIRTBLK_BACKEND";
132 static struct {
133         struct mic_device_desc dd;
134         struct mic_vqconfig vqconfig[1];
135         __u32 host_features, guest_acknowledgements;
136         struct virtio_blk_config blk_config;
137 } virtblk_dev_page = {
138         .dd = {
139                 .type = VIRTIO_ID_BLOCK,
140                 .num_vq = ARRAY_SIZE(virtblk_dev_page.vqconfig),
141                 .feature_len = sizeof(virtblk_dev_page.host_features),
142                 .config_len = sizeof(virtblk_dev_page.blk_config),
143         },
144         .vqconfig[0] = {
145                 .num = htole16(MIC_VRING_ENTRIES),
146         },
147         .host_features =
148                 htole32(1<<VIRTIO_BLK_F_SEG_MAX),
149         .blk_config = {
150                 .seg_max = htole32(MIC_VRING_ENTRIES - 2),
151                 .capacity = htole64(0),
152          }
153 };
154
155 static char *myname;
156
157 static int
158 tap_configure(struct mic_info *mic, char *dev)
159 {
160         pid_t pid;
161         char *ifargv[7];
162         char ipaddr[IFNAMSIZ];
163         int ret = 0;
164
165         pid = fork();
166         if (pid == 0) {
167                 ifargv[0] = "ip";
168                 ifargv[1] = "link";
169                 ifargv[2] = "set";
170                 ifargv[3] = dev;
171                 ifargv[4] = "up";
172                 ifargv[5] = NULL;
173                 mpsslog("Configuring %s\n", dev);
174                 ret = execvp("ip", ifargv);
175                 if (ret < 0) {
176                         mpsslog("%s execvp failed errno %s\n",
177                                 mic->name, strerror(errno));
178                         return ret;
179                 }
180         }
181         if (pid < 0) {
182                 mpsslog("%s fork failed errno %s\n",
183                         mic->name, strerror(errno));
184                 return ret;
185         }
186
187         ret = waitpid(pid, NULL, 0);
188         if (ret < 0) {
189                 mpsslog("%s waitpid failed errno %s\n",
190                         mic->name, strerror(errno));
191                 return ret;
192         }
193
194         snprintf(ipaddr, IFNAMSIZ, "172.31.%d.254/24", mic->id + 1);
195
196         pid = fork();
197         if (pid == 0) {
198                 ifargv[0] = "ip";
199                 ifargv[1] = "addr";
200                 ifargv[2] = "add";
201                 ifargv[3] = ipaddr;
202                 ifargv[4] = "dev";
203                 ifargv[5] = dev;
204                 ifargv[6] = NULL;
205                 mpsslog("Configuring %s ipaddr %s\n", dev, ipaddr);
206                 ret = execvp("ip", ifargv);
207                 if (ret < 0) {
208                         mpsslog("%s execvp failed errno %s\n",
209                                 mic->name, strerror(errno));
210                         return ret;
211                 }
212         }
213         if (pid < 0) {
214                 mpsslog("%s fork failed errno %s\n",
215                         mic->name, strerror(errno));
216                 return ret;
217         }
218
219         ret = waitpid(pid, NULL, 0);
220         if (ret < 0) {
221                 mpsslog("%s waitpid failed errno %s\n",
222                         mic->name, strerror(errno));
223                 return ret;
224         }
225         mpsslog("MIC name %s %s %d DONE!\n",
226                 mic->name, __func__, __LINE__);
227         return 0;
228 }
229
230 static int tun_alloc(struct mic_info *mic, char *dev)
231 {
232         struct ifreq ifr;
233         int fd, err;
234 #if GSO_ENABLED
235         unsigned offload;
236 #endif
237         fd = open("/dev/net/tun", O_RDWR);
238         if (fd < 0) {
239                 mpsslog("Could not open /dev/net/tun %s\n", strerror(errno));
240                 goto done;
241         }
242
243         memset(&ifr, 0, sizeof(ifr));
244
245         ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
246         if (*dev)
247                 strncpy(ifr.ifr_name, dev, IFNAMSIZ);
248
249         err = ioctl(fd, TUNSETIFF, (void *)&ifr);
250         if (err < 0) {
251                 mpsslog("%s %s %d TUNSETIFF failed %s\n",
252                         mic->name, __func__, __LINE__, strerror(errno));
253                 close(fd);
254                 return err;
255         }
256 #if GSO_ENABLED
257         offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_TSO_ECN;
258
259         err = ioctl(fd, TUNSETOFFLOAD, offload);
260         if (err < 0) {
261                 mpsslog("%s %s %d TUNSETOFFLOAD failed %s\n",
262                         mic->name, __func__, __LINE__, strerror(errno));
263                 close(fd);
264                 return err;
265         }
266 #endif
267         strcpy(dev, ifr.ifr_name);
268         mpsslog("Created TAP %s\n", dev);
269 done:
270         return fd;
271 }
272
273 #define NET_FD_VIRTIO_NET 0
274 #define NET_FD_TUN 1
275 #define MAX_NET_FD 2
276
277 static void set_dp(struct mic_info *mic, int type, void *dp)
278 {
279         switch (type) {
280         case VIRTIO_ID_CONSOLE:
281                 mic->mic_console.console_dp = dp;
282                 return;
283         case VIRTIO_ID_NET:
284                 mic->mic_net.net_dp = dp;
285                 return;
286         case VIRTIO_ID_BLOCK:
287                 mic->mic_virtblk.block_dp = dp;
288                 return;
289         }
290         mpsslog("%s %s %d not found\n", mic->name, __func__, type);
291         assert(0);
292 }
293
294 static void *get_dp(struct mic_info *mic, int type)
295 {
296         switch (type) {
297         case VIRTIO_ID_CONSOLE:
298                 return mic->mic_console.console_dp;
299         case VIRTIO_ID_NET:
300                 return mic->mic_net.net_dp;
301         case VIRTIO_ID_BLOCK:
302                 return mic->mic_virtblk.block_dp;
303         }
304         mpsslog("%s %s %d not found\n", mic->name, __func__, type);
305         assert(0);
306         return NULL;
307 }
308
309 static struct mic_device_desc *get_device_desc(struct mic_info *mic, int type)
310 {
311         struct mic_device_desc *d;
312         int i;
313         void *dp = get_dp(mic, type);
314
315         for (i = sizeof(struct mic_bootparam); i < PAGE_SIZE;
316                 i += mic_total_desc_size(d)) {
317                 d = dp + i;
318
319                 /* End of list */
320                 if (d->type == 0)
321                         break;
322
323                 if (d->type == -1)
324                         continue;
325
326                 mpsslog("%s %s d-> type %d d %p\n",
327                         mic->name, __func__, d->type, d);
328
329                 if (d->type == (__u8)type)
330                         return d;
331         }
332         mpsslog("%s %s %d not found\n", mic->name, __func__, type);
333         return NULL;
334 }
335
336 /* See comments in vhost.c for explanation of next_desc() */
337 static unsigned next_desc(struct vring_desc *desc)
338 {
339         unsigned int next;
340
341         if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT))
342                 return -1U;
343         next = le16toh(desc->next);
344         return next;
345 }
346
347 /* Sum up all the IOVEC length */
348 static ssize_t
349 sum_iovec_len(struct mic_copy_desc *copy)
350 {
351         ssize_t sum = 0;
352         int i;
353
354         for (i = 0; i < copy->iovcnt; i++)
355                 sum += copy->iov[i].iov_len;
356         return sum;
357 }
358
359 static inline void verify_out_len(struct mic_info *mic,
360         struct mic_copy_desc *copy)
361 {
362         if (copy->out_len != sum_iovec_len(copy)) {
363                 mpsslog("%s %s %d BUG copy->out_len 0x%x len 0x%zx\n",
364                         mic->name, __func__, __LINE__,
365                         copy->out_len, sum_iovec_len(copy));
366                 assert(copy->out_len == sum_iovec_len(copy));
367         }
368 }
369
370 /* Display an iovec */
371 static void
372 disp_iovec(struct mic_info *mic, struct mic_copy_desc *copy,
373            const char *s, int line)
374 {
375         int i;
376
377         for (i = 0; i < copy->iovcnt; i++)
378                 mpsslog("%s %s %d copy->iov[%d] addr %p len 0x%zx\n",
379                         mic->name, s, line, i,
380                         copy->iov[i].iov_base, copy->iov[i].iov_len);
381 }
382
383 static inline __u16 read_avail_idx(struct mic_vring *vr)
384 {
385         return ACCESS_ONCE(vr->info->avail_idx);
386 }
387
388 static inline void txrx_prepare(int type, bool tx, struct mic_vring *vr,
389                                 struct mic_copy_desc *copy, ssize_t len)
390 {
391         copy->vr_idx = tx ? 0 : 1;
392         copy->update_used = true;
393         if (type == VIRTIO_ID_NET)
394                 copy->iov[1].iov_len = len - sizeof(struct virtio_net_hdr);
395         else
396                 copy->iov[0].iov_len = len;
397 }
398
399 /* Central API which triggers the copies */
400 static int
401 mic_virtio_copy(struct mic_info *mic, int fd,
402                 struct mic_vring *vr, struct mic_copy_desc *copy)
403 {
404         int ret;
405
406         ret = ioctl(fd, MIC_VIRTIO_COPY_DESC, copy);
407         if (ret) {
408                 mpsslog("%s %s %d errno %s ret %d\n",
409                         mic->name, __func__, __LINE__,
410                         strerror(errno), ret);
411         }
412         return ret;
413 }
414
415 static inline unsigned _vring_size(unsigned int num, unsigned long align)
416 {
417         return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (3 + num)
418                                 + align - 1) & ~(align - 1))
419                 + sizeof(__u16) * 3 + sizeof(struct vring_used_elem) * num;
420 }
421
422 /*
423  * This initialization routine requires at least one
424  * vring i.e. vr0. vr1 is optional.
425  */
426 static void *
427 init_vr(struct mic_info *mic, int fd, int type,
428         struct mic_vring *vr0, struct mic_vring *vr1, int num_vq)
429 {
430         int vr_size;
431         char *va;
432
433         vr_size = PAGE_ALIGN(_vring_size(MIC_VRING_ENTRIES,
434                                          MIC_VIRTIO_RING_ALIGN) +
435                              sizeof(struct _mic_vring_info));
436         va = mmap(NULL, MIC_DEVICE_PAGE_END + vr_size * num_vq,
437                 PROT_READ, MAP_SHARED, fd, 0);
438         if (MAP_FAILED == va) {
439                 mpsslog("%s %s %d mmap failed errno %s\n",
440                         mic->name, __func__, __LINE__,
441                         strerror(errno));
442                 goto done;
443         }
444         set_dp(mic, type, va);
445         vr0->va = (struct mic_vring *)&va[MIC_DEVICE_PAGE_END];
446         vr0->info = vr0->va +
447                 _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN);
448         vring_init(&vr0->vr,
449                    MIC_VRING_ENTRIES, vr0->va, MIC_VIRTIO_RING_ALIGN);
450         mpsslog("%s %s vr0 %p vr0->info %p vr_size 0x%x vring 0x%x ",
451                 __func__, mic->name, vr0->va, vr0->info, vr_size,
452                 _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
453         mpsslog("magic 0x%x expected 0x%x\n",
454                 le32toh(vr0->info->magic), MIC_MAGIC + type);
455         assert(le32toh(vr0->info->magic) == MIC_MAGIC + type);
456         if (vr1) {
457                 vr1->va = (struct mic_vring *)
458                         &va[MIC_DEVICE_PAGE_END + vr_size];
459                 vr1->info = vr1->va + _vring_size(MIC_VRING_ENTRIES,
460                         MIC_VIRTIO_RING_ALIGN);
461                 vring_init(&vr1->vr,
462                            MIC_VRING_ENTRIES, vr1->va, MIC_VIRTIO_RING_ALIGN);
463                 mpsslog("%s %s vr1 %p vr1->info %p vr_size 0x%x vring 0x%x ",
464                         __func__, mic->name, vr1->va, vr1->info, vr_size,
465                         _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
466                 mpsslog("magic 0x%x expected 0x%x\n",
467                         le32toh(vr1->info->magic), MIC_MAGIC + type + 1);
468                 assert(le32toh(vr1->info->magic) == MIC_MAGIC + type + 1);
469         }
470 done:
471         return va;
472 }
473
474 static int
475 wait_for_card_driver(struct mic_info *mic, int fd, int type)
476 {
477         struct pollfd pollfd;
478         int err;
479         struct mic_device_desc *desc = get_device_desc(mic, type);
480         __u8 prev_status;
481
482         if (!desc)
483                 return -ENODEV;
484         prev_status = desc->status;
485         pollfd.fd = fd;
486         mpsslog("%s %s Waiting .... desc-> type %d status 0x%x\n",
487                 mic->name, __func__, type, desc->status);
488
489         while (1) {
490                 pollfd.events = POLLIN;
491                 pollfd.revents = 0;
492                 err = poll(&pollfd, 1, -1);
493                 if (err < 0) {
494                         mpsslog("%s %s poll failed %s\n",
495                                 mic->name, __func__, strerror(errno));
496                         continue;
497                 }
498
499                 if (pollfd.revents) {
500                         if (desc->status != prev_status) {
501                                 mpsslog("%s %s Waiting... desc-> type %d "
502                                         "status 0x%x\n",
503                                         mic->name, __func__, type,
504                                         desc->status);
505                                 prev_status = desc->status;
506                         }
507                         if (desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
508                                 mpsslog("%s %s poll.revents %d\n",
509                                         mic->name, __func__, pollfd.revents);
510                                 mpsslog("%s %s desc-> type %d status 0x%x\n",
511                                         mic->name, __func__, type,
512                                         desc->status);
513                                 break;
514                         }
515                 }
516         }
517         return 0;
518 }
519
520 /* Spin till we have some descriptors */
521 static void
522 spin_for_descriptors(struct mic_info *mic, struct mic_vring *vr)
523 {
524         __u16 avail_idx = read_avail_idx(vr);
525
526         while (avail_idx == le16toh(ACCESS_ONCE(vr->vr.avail->idx))) {
527 #ifdef DEBUG
528                 mpsslog("%s %s waiting for desc avail %d info_avail %d\n",
529                         mic->name, __func__,
530                         le16toh(vr->vr.avail->idx), vr->info->avail_idx);
531 #endif
532                 sched_yield();
533         }
534 }
535
536 static void *
537 virtio_net(void *arg)
538 {
539         static __u8 vnet_hdr[2][sizeof(struct virtio_net_hdr)];
540         static __u8 vnet_buf[2][MAX_NET_PKT_SIZE] __attribute__ ((aligned(64)));
541         struct iovec vnet_iov[2][2] = {
542                 { { .iov_base = vnet_hdr[0], .iov_len = sizeof(vnet_hdr[0]) },
543                   { .iov_base = vnet_buf[0], .iov_len = sizeof(vnet_buf[0]) } },
544                 { { .iov_base = vnet_hdr[1], .iov_len = sizeof(vnet_hdr[1]) },
545                   { .iov_base = vnet_buf[1], .iov_len = sizeof(vnet_buf[1]) } },
546         };
547         struct iovec *iov0 = vnet_iov[0], *iov1 = vnet_iov[1];
548         struct mic_info *mic = (struct mic_info *)arg;
549         char if_name[IFNAMSIZ];
550         struct pollfd net_poll[MAX_NET_FD];
551         struct mic_vring tx_vr, rx_vr;
552         struct mic_copy_desc copy;
553         struct mic_device_desc *desc;
554         int err;
555
556         snprintf(if_name, IFNAMSIZ, "mic%d", mic->id);
557         mic->mic_net.tap_fd = tun_alloc(mic, if_name);
558         if (mic->mic_net.tap_fd < 0)
559                 goto done;
560
561         if (tap_configure(mic, if_name))
562                 goto done;
563         mpsslog("MIC name %s id %d\n", mic->name, mic->id);
564
565         net_poll[NET_FD_VIRTIO_NET].fd = mic->mic_net.virtio_net_fd;
566         net_poll[NET_FD_VIRTIO_NET].events = POLLIN;
567         net_poll[NET_FD_TUN].fd = mic->mic_net.tap_fd;
568         net_poll[NET_FD_TUN].events = POLLIN;
569
570         if (MAP_FAILED == init_vr(mic, mic->mic_net.virtio_net_fd,
571                                   VIRTIO_ID_NET, &tx_vr, &rx_vr,
572                 virtnet_dev_page.dd.num_vq)) {
573                 mpsslog("%s init_vr failed %s\n",
574                         mic->name, strerror(errno));
575                 goto done;
576         }
577
578         copy.iovcnt = 2;
579         desc = get_device_desc(mic, VIRTIO_ID_NET);
580
581         while (1) {
582                 ssize_t len;
583
584                 net_poll[NET_FD_VIRTIO_NET].revents = 0;
585                 net_poll[NET_FD_TUN].revents = 0;
586
587                 /* Start polling for data from tap and virtio net */
588                 err = poll(net_poll, 2, -1);
589                 if (err < 0) {
590                         mpsslog("%s poll failed %s\n",
591                                 __func__, strerror(errno));
592                         continue;
593                 }
594                 if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
595                         err = wait_for_card_driver(mic,
596                                                    mic->mic_net.virtio_net_fd,
597                                                    VIRTIO_ID_NET);
598                         if (err) {
599                                 mpsslog("%s %s %d Exiting...\n",
600                                         mic->name, __func__, __LINE__);
601                                 break;
602                         }
603                 }
604                 /*
605                  * Check if there is data to be read from TUN and write to
606                  * virtio net fd if there is.
607                  */
608                 if (net_poll[NET_FD_TUN].revents & POLLIN) {
609                         copy.iov = iov0;
610                         len = readv(net_poll[NET_FD_TUN].fd,
611                                 copy.iov, copy.iovcnt);
612                         if (len > 0) {
613                                 struct virtio_net_hdr *hdr
614                                         = (struct virtio_net_hdr *)vnet_hdr[0];
615
616                                 /* Disable checksums on the card since we are on
617                                    a reliable PCIe link */
618                                 hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
619 #ifdef DEBUG
620                                 mpsslog("%s %s %d hdr->flags 0x%x ", mic->name,
621                                         __func__, __LINE__, hdr->flags);
622                                 mpsslog("copy.out_len %d hdr->gso_type 0x%x\n",
623                                         copy.out_len, hdr->gso_type);
624 #endif
625 #ifdef DEBUG
626                                 disp_iovec(mic, copy, __func__, __LINE__);
627                                 mpsslog("%s %s %d read from tap 0x%lx\n",
628                                         mic->name, __func__, __LINE__,
629                                         len);
630 #endif
631                                 spin_for_descriptors(mic, &tx_vr);
632                                 txrx_prepare(VIRTIO_ID_NET, 1, &tx_vr, &copy,
633                                              len);
634
635                                 err = mic_virtio_copy(mic,
636                                         mic->mic_net.virtio_net_fd, &tx_vr,
637                                         &copy);
638                                 if (err < 0) {
639                                         mpsslog("%s %s %d mic_virtio_copy %s\n",
640                                                 mic->name, __func__, __LINE__,
641                                                 strerror(errno));
642                                 }
643                                 if (!err)
644                                         verify_out_len(mic, &copy);
645 #ifdef DEBUG
646                                 disp_iovec(mic, copy, __func__, __LINE__);
647                                 mpsslog("%s %s %d wrote to net 0x%lx\n",
648                                         mic->name, __func__, __LINE__,
649                                         sum_iovec_len(&copy));
650 #endif
651                                 /* Reinitialize IOV for next run */
652                                 iov0[1].iov_len = MAX_NET_PKT_SIZE;
653                         } else if (len < 0) {
654                                 disp_iovec(mic, &copy, __func__, __LINE__);
655                                 mpsslog("%s %s %d read failed %s ", mic->name,
656                                         __func__, __LINE__, strerror(errno));
657                                 mpsslog("cnt %d sum %zd\n",
658                                         copy.iovcnt, sum_iovec_len(&copy));
659                         }
660                 }
661
662                 /*
663                  * Check if there is data to be read from virtio net and
664                  * write to TUN if there is.
665                  */
666                 if (net_poll[NET_FD_VIRTIO_NET].revents & POLLIN) {
667                         while (rx_vr.info->avail_idx !=
668                                 le16toh(rx_vr.vr.avail->idx)) {
669                                 copy.iov = iov1;
670                                 txrx_prepare(VIRTIO_ID_NET, 0, &rx_vr, &copy,
671                                              MAX_NET_PKT_SIZE
672                                         + sizeof(struct virtio_net_hdr));
673
674                                 err = mic_virtio_copy(mic,
675                                         mic->mic_net.virtio_net_fd, &rx_vr,
676                                         &copy);
677                                 if (!err) {
678 #ifdef DEBUG
679                                         struct virtio_net_hdr *hdr
680                                                 = (struct virtio_net_hdr *)
681                                                         vnet_hdr[1];
682
683                                         mpsslog("%s %s %d hdr->flags 0x%x, ",
684                                                 mic->name, __func__, __LINE__,
685                                                 hdr->flags);
686                                         mpsslog("out_len %d gso_type 0x%x\n",
687                                                 copy.out_len,
688                                                 hdr->gso_type);
689 #endif
690                                         /* Set the correct output iov_len */
691                                         iov1[1].iov_len = copy.out_len -
692                                                 sizeof(struct virtio_net_hdr);
693                                         verify_out_len(mic, &copy);
694 #ifdef DEBUG
695                                         disp_iovec(mic, copy, __func__,
696                                                    __LINE__);
697                                         mpsslog("%s %s %d ",
698                                                 mic->name, __func__, __LINE__);
699                                         mpsslog("read from net 0x%lx\n",
700                                                 sum_iovec_len(copy));
701 #endif
702                                         len = writev(net_poll[NET_FD_TUN].fd,
703                                                 copy.iov, copy.iovcnt);
704                                         if (len != sum_iovec_len(&copy)) {
705                                                 mpsslog("Tun write failed %s ",
706                                                         strerror(errno));
707                                                 mpsslog("len 0x%zx ", len);
708                                                 mpsslog("read_len 0x%zx\n",
709                                                         sum_iovec_len(&copy));
710                                         } else {
711 #ifdef DEBUG
712                                                 disp_iovec(mic, &copy, __func__,
713                                                            __LINE__);
714                                                 mpsslog("%s %s %d ",
715                                                         mic->name, __func__,
716                                                         __LINE__);
717                                                 mpsslog("wrote to tap 0x%lx\n",
718                                                         len);
719 #endif
720                                         }
721                                 } else {
722                                         mpsslog("%s %s %d mic_virtio_copy %s\n",
723                                                 mic->name, __func__, __LINE__,
724                                                 strerror(errno));
725                                         break;
726                                 }
727                         }
728                 }
729                 if (net_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
730                         mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
731         }
732 done:
733         pthread_exit(NULL);
734 }
735
736 /* virtio_console */
737 #define VIRTIO_CONSOLE_FD 0
738 #define MONITOR_FD (VIRTIO_CONSOLE_FD + 1)
739 #define MAX_CONSOLE_FD (MONITOR_FD + 1)  /* must be the last one + 1 */
740 #define MAX_BUFFER_SIZE PAGE_SIZE
741
742 static void *
743 virtio_console(void *arg)
744 {
745         static __u8 vcons_buf[2][PAGE_SIZE];
746         struct iovec vcons_iov[2] = {
747                 { .iov_base = vcons_buf[0], .iov_len = sizeof(vcons_buf[0]) },
748                 { .iov_base = vcons_buf[1], .iov_len = sizeof(vcons_buf[1]) },
749         };
750         struct iovec *iov0 = &vcons_iov[0], *iov1 = &vcons_iov[1];
751         struct mic_info *mic = (struct mic_info *)arg;
752         int err;
753         struct pollfd console_poll[MAX_CONSOLE_FD];
754         int pty_fd;
755         char *pts_name;
756         ssize_t len;
757         struct mic_vring tx_vr, rx_vr;
758         struct mic_copy_desc copy;
759         struct mic_device_desc *desc;
760
761         pty_fd = posix_openpt(O_RDWR);
762         if (pty_fd < 0) {
763                 mpsslog("can't open a pseudoterminal master device: %s\n",
764                         strerror(errno));
765                 goto _return;
766         }
767         pts_name = ptsname(pty_fd);
768         if (pts_name == NULL) {
769                 mpsslog("can't get pts name\n");
770                 goto _close_pty;
771         }
772         printf("%s console message goes to %s\n", mic->name, pts_name);
773         mpsslog("%s console message goes to %s\n", mic->name, pts_name);
774         err = grantpt(pty_fd);
775         if (err < 0) {
776                 mpsslog("can't grant access: %s %s\n",
777                         pts_name, strerror(errno));
778                 goto _close_pty;
779         }
780         err = unlockpt(pty_fd);
781         if (err < 0) {
782                 mpsslog("can't unlock a pseudoterminal: %s %s\n",
783                         pts_name, strerror(errno));
784                 goto _close_pty;
785         }
786         console_poll[MONITOR_FD].fd = pty_fd;
787         console_poll[MONITOR_FD].events = POLLIN;
788
789         console_poll[VIRTIO_CONSOLE_FD].fd = mic->mic_console.virtio_console_fd;
790         console_poll[VIRTIO_CONSOLE_FD].events = POLLIN;
791
792         if (MAP_FAILED == init_vr(mic, mic->mic_console.virtio_console_fd,
793                                   VIRTIO_ID_CONSOLE, &tx_vr, &rx_vr,
794                 virtcons_dev_page.dd.num_vq)) {
795                 mpsslog("%s init_vr failed %s\n",
796                         mic->name, strerror(errno));
797                 goto _close_pty;
798         }
799
800         copy.iovcnt = 1;
801         desc = get_device_desc(mic, VIRTIO_ID_CONSOLE);
802
803         for (;;) {
804                 console_poll[MONITOR_FD].revents = 0;
805                 console_poll[VIRTIO_CONSOLE_FD].revents = 0;
806                 err = poll(console_poll, MAX_CONSOLE_FD, -1);
807                 if (err < 0) {
808                         mpsslog("%s %d: poll failed: %s\n", __func__, __LINE__,
809                                 strerror(errno));
810                         continue;
811                 }
812                 if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
813                         err = wait_for_card_driver(mic,
814                                         mic->mic_console.virtio_console_fd,
815                                         VIRTIO_ID_CONSOLE);
816                         if (err) {
817                                 mpsslog("%s %s %d Exiting...\n",
818                                         mic->name, __func__, __LINE__);
819                                 break;
820                         }
821                 }
822
823                 if (console_poll[MONITOR_FD].revents & POLLIN) {
824                         copy.iov = iov0;
825                         len = readv(pty_fd, copy.iov, copy.iovcnt);
826                         if (len > 0) {
827 #ifdef DEBUG
828                                 disp_iovec(mic, copy, __func__, __LINE__);
829                                 mpsslog("%s %s %d read from tap 0x%lx\n",
830                                         mic->name, __func__, __LINE__,
831                                         len);
832 #endif
833                                 spin_for_descriptors(mic, &tx_vr);
834                                 txrx_prepare(VIRTIO_ID_CONSOLE, 1, &tx_vr,
835                                              &copy, len);
836
837                                 err = mic_virtio_copy(mic,
838                                         mic->mic_console.virtio_console_fd,
839                                         &tx_vr, &copy);
840                                 if (err < 0) {
841                                         mpsslog("%s %s %d mic_virtio_copy %s\n",
842                                                 mic->name, __func__, __LINE__,
843                                                 strerror(errno));
844                                 }
845                                 if (!err)
846                                         verify_out_len(mic, &copy);
847 #ifdef DEBUG
848                                 disp_iovec(mic, copy, __func__, __LINE__);
849                                 mpsslog("%s %s %d wrote to net 0x%lx\n",
850                                         mic->name, __func__, __LINE__,
851                                         sum_iovec_len(copy));
852 #endif
853                                 /* Reinitialize IOV for next run */
854                                 iov0->iov_len = PAGE_SIZE;
855                         } else if (len < 0) {
856                                 disp_iovec(mic, &copy, __func__, __LINE__);
857                                 mpsslog("%s %s %d read failed %s ",
858                                         mic->name, __func__, __LINE__,
859                                         strerror(errno));
860                                 mpsslog("cnt %d sum %zd\n",
861                                         copy.iovcnt, sum_iovec_len(&copy));
862                         }
863                 }
864
865                 if (console_poll[VIRTIO_CONSOLE_FD].revents & POLLIN) {
866                         while (rx_vr.info->avail_idx !=
867                                 le16toh(rx_vr.vr.avail->idx)) {
868                                 copy.iov = iov1;
869                                 txrx_prepare(VIRTIO_ID_CONSOLE, 0, &rx_vr,
870                                              &copy, PAGE_SIZE);
871
872                                 err = mic_virtio_copy(mic,
873                                         mic->mic_console.virtio_console_fd,
874                                         &rx_vr, &copy);
875                                 if (!err) {
876                                         /* Set the correct output iov_len */
877                                         iov1->iov_len = copy.out_len;
878                                         verify_out_len(mic, &copy);
879 #ifdef DEBUG
880                                         disp_iovec(mic, copy, __func__,
881                                                    __LINE__);
882                                         mpsslog("%s %s %d ",
883                                                 mic->name, __func__, __LINE__);
884                                         mpsslog("read from net 0x%lx\n",
885                                                 sum_iovec_len(copy));
886 #endif
887                                         len = writev(pty_fd,
888                                                 copy.iov, copy.iovcnt);
889                                         if (len != sum_iovec_len(&copy)) {
890                                                 mpsslog("Tun write failed %s ",
891                                                         strerror(errno));
892                                                 mpsslog("len 0x%zx ", len);
893                                                 mpsslog("read_len 0x%zx\n",
894                                                         sum_iovec_len(&copy));
895                                         } else {
896 #ifdef DEBUG
897                                                 disp_iovec(mic, copy, __func__,
898                                                            __LINE__);
899                                                 mpsslog("%s %s %d ",
900                                                         mic->name, __func__,
901                                                         __LINE__);
902                                                 mpsslog("wrote to tap 0x%lx\n",
903                                                         len);
904 #endif
905                                         }
906                                 } else {
907                                         mpsslog("%s %s %d mic_virtio_copy %s\n",
908                                                 mic->name, __func__, __LINE__,
909                                                 strerror(errno));
910                                         break;
911                                 }
912                         }
913                 }
914                 if (console_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
915                         mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
916         }
917 _close_pty:
918         close(pty_fd);
919 _return:
920         pthread_exit(NULL);
921 }
922
923 static void
924 add_virtio_device(struct mic_info *mic, struct mic_device_desc *dd)
925 {
926         char path[PATH_MAX];
927         int fd, err;
928
929         snprintf(path, PATH_MAX, "/dev/mic%d", mic->id);
930         fd = open(path, O_RDWR);
931         if (fd < 0) {
932                 mpsslog("Could not open %s %s\n", path, strerror(errno));
933                 return;
934         }
935
936         err = ioctl(fd, MIC_VIRTIO_ADD_DEVICE, dd);
937         if (err < 0) {
938                 mpsslog("Could not add %d %s\n", dd->type, strerror(errno));
939                 close(fd);
940                 return;
941         }
942         switch (dd->type) {
943         case VIRTIO_ID_NET:
944                 mic->mic_net.virtio_net_fd = fd;
945                 mpsslog("Added VIRTIO_ID_NET for %s\n", mic->name);
946                 break;
947         case VIRTIO_ID_CONSOLE:
948                 mic->mic_console.virtio_console_fd = fd;
949                 mpsslog("Added VIRTIO_ID_CONSOLE for %s\n", mic->name);
950                 break;
951         case VIRTIO_ID_BLOCK:
952                 mic->mic_virtblk.virtio_block_fd = fd;
953                 mpsslog("Added VIRTIO_ID_BLOCK for %s\n", mic->name);
954                 break;
955         }
956 }
957
958 static bool
959 set_backend_file(struct mic_info *mic)
960 {
961         FILE *config;
962         char buff[PATH_MAX], *line, *evv, *p;
963
964         snprintf(buff, PATH_MAX, "%s/mpssd%03d.conf", mic_config_dir, mic->id);
965         config = fopen(buff, "r");
966         if (config == NULL)
967                 return false;
968         do {  /* look for "virtblk_backend=XXXX" */
969                 line = fgets(buff, PATH_MAX, config);
970                 if (line == NULL)
971                         break;
972                 if (*line == '#')
973                         continue;
974                 p = strchr(line, '\n');
975                 if (p)
976                         *p = '\0';
977         } while (strncmp(line, virtblk_backend, strlen(virtblk_backend)) != 0);
978         fclose(config);
979         if (line == NULL)
980                 return false;
981         evv = strchr(line, '=');
982         if (evv == NULL)
983                 return false;
984         mic->mic_virtblk.backend_file = malloc(strlen(evv) + 1);
985         if (mic->mic_virtblk.backend_file == NULL) {
986                 mpsslog("%s %d can't allocate memory\n", mic->name, mic->id);
987                 return false;
988         }
989         strcpy(mic->mic_virtblk.backend_file, evv + 1);
990         return true;
991 }
992
993 #define SECTOR_SIZE 512
994 static bool
995 set_backend_size(struct mic_info *mic)
996 {
997         mic->mic_virtblk.backend_size = lseek(mic->mic_virtblk.backend, 0,
998                 SEEK_END);
999         if (mic->mic_virtblk.backend_size < 0) {
1000                 mpsslog("%s: can't seek: %s\n",
1001                         mic->name, mic->mic_virtblk.backend_file);
1002                 return false;
1003         }
1004         virtblk_dev_page.blk_config.capacity =
1005                 mic->mic_virtblk.backend_size / SECTOR_SIZE;
1006         if ((mic->mic_virtblk.backend_size % SECTOR_SIZE) != 0)
1007                 virtblk_dev_page.blk_config.capacity++;
1008
1009         virtblk_dev_page.blk_config.capacity =
1010                 htole64(virtblk_dev_page.blk_config.capacity);
1011
1012         return true;
1013 }
1014
1015 static bool
1016 open_backend(struct mic_info *mic)
1017 {
1018         if (!set_backend_file(mic))
1019                 goto _error_exit;
1020         mic->mic_virtblk.backend = open(mic->mic_virtblk.backend_file, O_RDWR);
1021         if (mic->mic_virtblk.backend < 0) {
1022                 mpsslog("%s: can't open: %s\n", mic->name,
1023                         mic->mic_virtblk.backend_file);
1024                 goto _error_free;
1025         }
1026         if (!set_backend_size(mic))
1027                 goto _error_close;
1028         mic->mic_virtblk.backend_addr = mmap(NULL,
1029                 mic->mic_virtblk.backend_size,
1030                 PROT_READ|PROT_WRITE, MAP_SHARED,
1031                 mic->mic_virtblk.backend, 0L);
1032         if (mic->mic_virtblk.backend_addr == MAP_FAILED) {
1033                 mpsslog("%s: can't map: %s %s\n",
1034                         mic->name, mic->mic_virtblk.backend_file,
1035                         strerror(errno));
1036                 goto _error_close;
1037         }
1038         return true;
1039
1040  _error_close:
1041         close(mic->mic_virtblk.backend);
1042  _error_free:
1043         free(mic->mic_virtblk.backend_file);
1044  _error_exit:
1045         return false;
1046 }
1047
1048 static void
1049 close_backend(struct mic_info *mic)
1050 {
1051         munmap(mic->mic_virtblk.backend_addr, mic->mic_virtblk.backend_size);
1052         close(mic->mic_virtblk.backend);
1053         free(mic->mic_virtblk.backend_file);
1054 }
1055
1056 static bool
1057 start_virtblk(struct mic_info *mic, struct mic_vring *vring)
1058 {
1059         if (((unsigned long)&virtblk_dev_page.blk_config % 8) != 0) {
1060                 mpsslog("%s: blk_config is not 8 byte aligned.\n",
1061                         mic->name);
1062                 return false;
1063         }
1064         add_virtio_device(mic, &virtblk_dev_page.dd);
1065         if (MAP_FAILED == init_vr(mic, mic->mic_virtblk.virtio_block_fd,
1066                                   VIRTIO_ID_BLOCK, vring, NULL,
1067                                   virtblk_dev_page.dd.num_vq)) {
1068                 mpsslog("%s init_vr failed %s\n",
1069                         mic->name, strerror(errno));
1070                 return false;
1071         }
1072         return true;
1073 }
1074
1075 static void
1076 stop_virtblk(struct mic_info *mic)
1077 {
1078         int vr_size, ret;
1079
1080         vr_size = PAGE_ALIGN(_vring_size(MIC_VRING_ENTRIES,
1081                                          MIC_VIRTIO_RING_ALIGN) +
1082                              sizeof(struct _mic_vring_info));
1083         ret = munmap(mic->mic_virtblk.block_dp,
1084                 MIC_DEVICE_PAGE_END + vr_size * virtblk_dev_page.dd.num_vq);
1085         if (ret < 0)
1086                 mpsslog("%s munmap errno %d\n", mic->name, errno);
1087         close(mic->mic_virtblk.virtio_block_fd);
1088 }
1089
1090 static __u8
1091 header_error_check(struct vring_desc *desc)
1092 {
1093         if (le32toh(desc->len) != sizeof(struct virtio_blk_outhdr)) {
1094                 mpsslog("%s() %d: length is not sizeof(virtio_blk_outhd)\n",
1095                         __func__, __LINE__);
1096                 return -EIO;
1097         }
1098         if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) {
1099                 mpsslog("%s() %d: alone\n",
1100                         __func__, __LINE__);
1101                 return -EIO;
1102         }
1103         if (le16toh(desc->flags) & VRING_DESC_F_WRITE) {
1104                 mpsslog("%s() %d: not read\n",
1105                         __func__, __LINE__);
1106                 return -EIO;
1107         }
1108         return 0;
1109 }
1110
1111 static int
1112 read_header(int fd, struct virtio_blk_outhdr *hdr, __u32 desc_idx)
1113 {
1114         struct iovec iovec;
1115         struct mic_copy_desc copy;
1116
1117         iovec.iov_len = sizeof(*hdr);
1118         iovec.iov_base = hdr;
1119         copy.iov = &iovec;
1120         copy.iovcnt = 1;
1121         copy.vr_idx = 0;  /* only one vring on virtio_block */
1122         copy.update_used = false;  /* do not update used index */
1123         return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1124 }
1125
1126 static int
1127 transfer_blocks(int fd, struct iovec *iovec, __u32 iovcnt)
1128 {
1129         struct mic_copy_desc copy;
1130
1131         copy.iov = iovec;
1132         copy.iovcnt = iovcnt;
1133         copy.vr_idx = 0;  /* only one vring on virtio_block */
1134         copy.update_used = false;  /* do not update used index */
1135         return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1136 }
1137
1138 static __u8
1139 status_error_check(struct vring_desc *desc)
1140 {
1141         if (le32toh(desc->len) != sizeof(__u8)) {
1142                 mpsslog("%s() %d: length is not sizeof(status)\n",
1143                         __func__, __LINE__);
1144                 return -EIO;
1145         }
1146         return 0;
1147 }
1148
1149 static int
1150 write_status(int fd, __u8 *status)
1151 {
1152         struct iovec iovec;
1153         struct mic_copy_desc copy;
1154
1155         iovec.iov_base = status;
1156         iovec.iov_len = sizeof(*status);
1157         copy.iov = &iovec;
1158         copy.iovcnt = 1;
1159         copy.vr_idx = 0;  /* only one vring on virtio_block */
1160         copy.update_used = true; /* Update used index */
1161         return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1162 }
1163
1164 #ifndef VIRTIO_BLK_T_GET_ID
1165 #define VIRTIO_BLK_T_GET_ID    8
1166 #endif
1167
1168 static void *
1169 virtio_block(void *arg)
1170 {
1171         struct mic_info *mic = (struct mic_info *)arg;
1172         int ret;
1173         struct pollfd block_poll;
1174         struct mic_vring vring;
1175         __u16 avail_idx;
1176         __u32 desc_idx;
1177         struct vring_desc *desc;
1178         struct iovec *iovec, *piov;
1179         __u8 status;
1180         __u32 buffer_desc_idx;
1181         struct virtio_blk_outhdr hdr;
1182         void *fos;
1183
1184         for (;;) {  /* forever */
1185                 if (!open_backend(mic)) { /* No virtblk */
1186                         for (mic->mic_virtblk.signaled = 0;
1187                                 !mic->mic_virtblk.signaled;)
1188                                 sleep(1);
1189                         continue;
1190                 }
1191
1192                 /* backend file is specified. */
1193                 if (!start_virtblk(mic, &vring))
1194                         goto _close_backend;
1195                 iovec = malloc(sizeof(*iovec) *
1196                         le32toh(virtblk_dev_page.blk_config.seg_max));
1197                 if (!iovec) {
1198                         mpsslog("%s: can't alloc iovec: %s\n",
1199                                 mic->name, strerror(ENOMEM));
1200                         goto _stop_virtblk;
1201                 }
1202
1203                 block_poll.fd = mic->mic_virtblk.virtio_block_fd;
1204                 block_poll.events = POLLIN;
1205                 for (mic->mic_virtblk.signaled = 0;
1206                      !mic->mic_virtblk.signaled;) {
1207                         block_poll.revents = 0;
1208                                         /* timeout in 1 sec to see signaled */
1209                         ret = poll(&block_poll, 1, 1000);
1210                         if (ret < 0) {
1211                                 mpsslog("%s %d: poll failed: %s\n",
1212                                         __func__, __LINE__,
1213                                         strerror(errno));
1214                                 continue;
1215                         }
1216
1217                         if (!(block_poll.revents & POLLIN)) {
1218 #ifdef DEBUG
1219                                 mpsslog("%s %d: block_poll.revents=0x%x\n",
1220                                         __func__, __LINE__, block_poll.revents);
1221 #endif
1222                                 continue;
1223                         }
1224
1225                         /* POLLIN */
1226                         while (vring.info->avail_idx !=
1227                                 le16toh(vring.vr.avail->idx)) {
1228                                 /* read header element */
1229                                 avail_idx =
1230                                         vring.info->avail_idx &
1231                                         (vring.vr.num - 1);
1232                                 desc_idx = le16toh(
1233                                         vring.vr.avail->ring[avail_idx]);
1234                                 desc = &vring.vr.desc[desc_idx];
1235 #ifdef DEBUG
1236                                 mpsslog("%s() %d: avail_idx=%d ",
1237                                         __func__, __LINE__,
1238                                         vring.info->avail_idx);
1239                                 mpsslog("vring.vr.num=%d desc=%p\n",
1240                                         vring.vr.num, desc);
1241 #endif
1242                                 status = header_error_check(desc);
1243                                 ret = read_header(
1244                                         mic->mic_virtblk.virtio_block_fd,
1245                                         &hdr, desc_idx);
1246                                 if (ret < 0) {
1247                                         mpsslog("%s() %d %s: ret=%d %s\n",
1248                                                 __func__, __LINE__,
1249                                                 mic->name, ret,
1250                                                 strerror(errno));
1251                                         break;
1252                                 }
1253                                 /* buffer element */
1254                                 piov = iovec;
1255                                 status = 0;
1256                                 fos = mic->mic_virtblk.backend_addr +
1257                                         (hdr.sector * SECTOR_SIZE);
1258                                 buffer_desc_idx = next_desc(desc);
1259                                 desc_idx = buffer_desc_idx;
1260                                 for (desc = &vring.vr.desc[buffer_desc_idx];
1261                                      desc->flags & VRING_DESC_F_NEXT;
1262                                      desc_idx = next_desc(desc),
1263                                              desc = &vring.vr.desc[desc_idx]) {
1264                                         piov->iov_len = desc->len;
1265                                         piov->iov_base = fos;
1266                                         piov++;
1267                                         fos += desc->len;
1268                                 }
1269                                 /* Returning NULLs for VIRTIO_BLK_T_GET_ID. */
1270                                 if (hdr.type & ~(VIRTIO_BLK_T_OUT |
1271                                         VIRTIO_BLK_T_GET_ID)) {
1272                                         /*
1273                                           VIRTIO_BLK_T_IN - does not do
1274                                           anything. Probably for documenting.
1275                                           VIRTIO_BLK_T_SCSI_CMD - for
1276                                           virtio_scsi.
1277                                           VIRTIO_BLK_T_FLUSH - turned off in
1278                                           config space.
1279                                           VIRTIO_BLK_T_BARRIER - defined but not
1280                                           used in anywhere.
1281                                         */
1282                                         mpsslog("%s() %d: type %x ",
1283                                                 __func__, __LINE__,
1284                                                 hdr.type);
1285                                         mpsslog("is not supported\n");
1286                                         status = -ENOTSUP;
1287
1288                                 } else {
1289                                         ret = transfer_blocks(
1290                                         mic->mic_virtblk.virtio_block_fd,
1291                                                 iovec,
1292                                                 piov - iovec);
1293                                         if (ret < 0 &&
1294                                             status != 0)
1295                                                 status = ret;
1296                                 }
1297                                 /* write status and update used pointer */
1298                                 if (status != 0)
1299                                         status = status_error_check(desc);
1300                                 ret = write_status(
1301                                         mic->mic_virtblk.virtio_block_fd,
1302                                         &status);
1303 #ifdef DEBUG
1304                                 mpsslog("%s() %d: write status=%d on desc=%p\n",
1305                                         __func__, __LINE__,
1306                                         status, desc);
1307 #endif
1308                         }
1309                 }
1310                 free(iovec);
1311 _stop_virtblk:
1312                 stop_virtblk(mic);
1313 _close_backend:
1314                 close_backend(mic);
1315         }  /* forever */
1316
1317         pthread_exit(NULL);
1318 }
1319
1320 static void
1321 reset(struct mic_info *mic)
1322 {
1323 #define RESET_TIMEOUT 120
1324         int i = RESET_TIMEOUT;
1325         setsysfs(mic->name, "state", "reset");
1326         while (i) {
1327                 char *state;
1328                 state = readsysfs(mic->name, "state");
1329                 if (!state)
1330                         goto retry;
1331                 mpsslog("%s: %s %d state %s\n",
1332                         mic->name, __func__, __LINE__, state);
1333
1334                 if (!strcmp(state, "ready")) {
1335                         free(state);
1336                         break;
1337                 }
1338                 free(state);
1339 retry:
1340                 sleep(1);
1341                 i--;
1342         }
1343 }
1344
1345 static int
1346 get_mic_shutdown_status(struct mic_info *mic, char *shutdown_status)
1347 {
1348         if (!strcmp(shutdown_status, "nop"))
1349                 return MIC_NOP;
1350         if (!strcmp(shutdown_status, "crashed"))
1351                 return MIC_CRASHED;
1352         if (!strcmp(shutdown_status, "halted"))
1353                 return MIC_HALTED;
1354         if (!strcmp(shutdown_status, "poweroff"))
1355                 return MIC_POWER_OFF;
1356         if (!strcmp(shutdown_status, "restart"))
1357                 return MIC_RESTART;
1358         mpsslog("%s: BUG invalid status %s\n", mic->name, shutdown_status);
1359         /* Invalid state */
1360         assert(0);
1361 };
1362
1363 static int get_mic_state(struct mic_info *mic)
1364 {
1365         char *state = NULL;
1366         enum mic_states mic_state;
1367
1368         while (!state) {
1369                 state = readsysfs(mic->name, "state");
1370                 sleep(1);
1371         }
1372         mpsslog("%s: %s %d state %s\n",
1373                 mic->name, __func__, __LINE__, state);
1374
1375         if (!strcmp(state, "ready")) {
1376                 mic_state = MIC_READY;
1377         } else if (!strcmp(state, "booting")) {
1378                 mic_state = MIC_BOOTING;
1379         } else if (!strcmp(state, "online")) {
1380                 mic_state = MIC_ONLINE;
1381         } else if (!strcmp(state, "shutting_down")) {
1382                 mic_state = MIC_SHUTTING_DOWN;
1383         } else if (!strcmp(state, "reset_failed")) {
1384                 mic_state = MIC_RESET_FAILED;
1385         } else if (!strcmp(state, "resetting")) {
1386                 mic_state = MIC_RESETTING;
1387         } else {
1388                 mpsslog("%s: BUG invalid state %s\n", mic->name, state);
1389                 assert(0);
1390         }
1391
1392         free(state);
1393         return mic_state;
1394 };
1395
1396 static void mic_handle_shutdown(struct mic_info *mic)
1397 {
1398 #define SHUTDOWN_TIMEOUT 60
1399         int i = SHUTDOWN_TIMEOUT;
1400         char *shutdown_status;
1401         while (i) {
1402                 shutdown_status = readsysfs(mic->name, "shutdown_status");
1403                 if (!shutdown_status) {
1404                         sleep(1);
1405                         continue;
1406                 }
1407                 mpsslog("%s: %s %d shutdown_status %s\n",
1408                         mic->name, __func__, __LINE__, shutdown_status);
1409                 switch (get_mic_shutdown_status(mic, shutdown_status)) {
1410                 case MIC_RESTART:
1411                         mic->restart = 1;
1412                 case MIC_HALTED:
1413                 case MIC_POWER_OFF:
1414                 case MIC_CRASHED:
1415                         free(shutdown_status);
1416                         goto reset;
1417                 default:
1418                         break;
1419                 }
1420                 free(shutdown_status);
1421                 sleep(1);
1422                 i--;
1423         }
1424 reset:
1425         if (!i)
1426                 mpsslog("%s: %s %d timing out waiting for shutdown_status %s\n",
1427                         mic->name, __func__, __LINE__, shutdown_status);
1428         reset(mic);
1429 }
1430
1431 static int open_state_fd(struct mic_info *mic)
1432 {
1433         char pathname[PATH_MAX];
1434         int fd;
1435
1436         snprintf(pathname, PATH_MAX - 1, "%s/%s/%s",
1437                  MICSYSFSDIR, mic->name, "state");
1438
1439         fd = open(pathname, O_RDONLY);
1440         if (fd < 0)
1441                 mpsslog("%s: opening file %s failed %s\n",
1442                         mic->name, pathname, strerror(errno));
1443         return fd;
1444 }
1445
1446 static int block_till_state_change(int fd, struct mic_info *mic)
1447 {
1448         struct pollfd ufds[1];
1449         char value[PAGE_SIZE];
1450         int ret;
1451
1452         ufds[0].fd = fd;
1453         ufds[0].events = POLLERR | POLLPRI;
1454         ret = poll(ufds, 1, -1);
1455         if (ret < 0) {
1456                 mpsslog("%s: %s %d poll failed %s\n",
1457                         mic->name, __func__, __LINE__, strerror(errno));
1458                 return ret;
1459         }
1460
1461         ret = lseek(fd, 0, SEEK_SET);
1462         if (ret < 0) {
1463                 mpsslog("%s: %s %d Failed to seek to 0: %s\n",
1464                         mic->name, __func__, __LINE__, strerror(errno));
1465                 return ret;
1466         }
1467
1468         ret = read(fd, value, sizeof(value));
1469         if (ret < 0) {
1470                 mpsslog("%s: %s %d Failed to read sysfs entry: %s\n",
1471                         mic->name, __func__, __LINE__, strerror(errno));
1472                 return ret;
1473         }
1474
1475         return 0;
1476 }
1477
1478 static void *
1479 mic_config(void *arg)
1480 {
1481         struct mic_info *mic = (struct mic_info *)arg;
1482         int fd, ret, stat = 0;
1483
1484         fd = open_state_fd(mic);
1485         if (fd < 0) {
1486                 mpsslog("%s: %s %d open state fd failed %s\n",
1487                         mic->name, __func__, __LINE__, strerror(errno));
1488                 goto exit;
1489         }
1490
1491         do {
1492                 ret = block_till_state_change(fd, mic);
1493                 if (ret < 0) {
1494                         mpsslog("%s: %s %d block_till_state_change error %s\n",
1495                                 mic->name, __func__, __LINE__, strerror(errno));
1496                         goto close_exit;
1497                 }
1498
1499                 switch (get_mic_state(mic)) {
1500                 case MIC_SHUTTING_DOWN:
1501                         mic_handle_shutdown(mic);
1502                         break;
1503                 case MIC_READY:
1504                 case MIC_RESET_FAILED:
1505                         ret = kill(mic->pid, SIGTERM);
1506                         mpsslog("%s: %s %d kill pid %d ret %d\n",
1507                                 mic->name, __func__, __LINE__,
1508                                 mic->pid, ret);
1509                         if (!ret) {
1510                                 ret = waitpid(mic->pid, &stat,
1511                                               WIFSIGNALED(stat));
1512                                 mpsslog("%s: %s %d waitpid ret %d pid %d\n",
1513                                         mic->name, __func__, __LINE__,
1514                                         ret, mic->pid);
1515                         }
1516                         if (mic->boot_on_resume) {
1517                                 setsysfs(mic->name, "state", "boot");
1518                                 mic->boot_on_resume = 0;
1519                         }
1520                         goto close_exit;
1521                 default:
1522                         break;
1523                 }
1524         } while (1);
1525
1526 close_exit:
1527         close(fd);
1528 exit:
1529         init_mic(mic);
1530         pthread_exit(NULL);
1531 }
1532
1533 static void
1534 set_cmdline(struct mic_info *mic)
1535 {
1536         char buffer[PATH_MAX];
1537         int len;
1538
1539         len = snprintf(buffer, PATH_MAX,
1540                 "clocksource=tsc highres=off nohz=off ");
1541         len += snprintf(buffer + len, PATH_MAX,
1542                 "cpufreq_on;corec6_off;pc3_off;pc6_off ");
1543         len += snprintf(buffer + len, PATH_MAX,
1544                 "ifcfg=static;address,172.31.%d.1;netmask,255.255.255.0",
1545                 mic->id + 1);
1546
1547         setsysfs(mic->name, "cmdline", buffer);
1548         mpsslog("%s: Command line: \"%s\"\n", mic->name, buffer);
1549         snprintf(buffer, PATH_MAX, "172.31.%d.1", mic->id + 1);
1550         mpsslog("%s: IPADDR: \"%s\"\n", mic->name, buffer);
1551 }
1552
1553 static void
1554 set_log_buf_info(struct mic_info *mic)
1555 {
1556         int fd;
1557         off_t len;
1558         char system_map[] = "/lib/firmware/mic/System.map";
1559         char *map, *temp, log_buf[17] = {'\0'};
1560
1561         fd = open(system_map, O_RDONLY);
1562         if (fd < 0) {
1563                 mpsslog("%s: Opening System.map failed: %d\n",
1564                         mic->name, errno);
1565                 return;
1566         }
1567         len = lseek(fd, 0, SEEK_END);
1568         if (len < 0) {
1569                 mpsslog("%s: Reading System.map size failed: %d\n",
1570                         mic->name, errno);
1571                 close(fd);
1572                 return;
1573         }
1574         map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
1575         if (map == MAP_FAILED) {
1576                 mpsslog("%s: mmap of System.map failed: %d\n",
1577                         mic->name, errno);
1578                 close(fd);
1579                 return;
1580         }
1581         temp = strstr(map, "__log_buf");
1582         if (!temp) {
1583                 mpsslog("%s: __log_buf not found: %d\n", mic->name, errno);
1584                 munmap(map, len);
1585                 close(fd);
1586                 return;
1587         }
1588         strncpy(log_buf, temp - 19, 16);
1589         setsysfs(mic->name, "log_buf_addr", log_buf);
1590         mpsslog("%s: log_buf_addr: %s\n", mic->name, log_buf);
1591         temp = strstr(map, "log_buf_len");
1592         if (!temp) {
1593                 mpsslog("%s: log_buf_len not found: %d\n", mic->name, errno);
1594                 munmap(map, len);
1595                 close(fd);
1596                 return;
1597         }
1598         strncpy(log_buf, temp - 19, 16);
1599         setsysfs(mic->name, "log_buf_len", log_buf);
1600         mpsslog("%s: log_buf_len: %s\n", mic->name, log_buf);
1601         munmap(map, len);
1602         close(fd);
1603 }
1604
1605 static void
1606 change_virtblk_backend(int x, siginfo_t *siginfo, void *p)
1607 {
1608         struct mic_info *mic;
1609
1610         for (mic = mic_list.next; mic != NULL; mic = mic->next)
1611                 mic->mic_virtblk.signaled = 1/* true */;
1612 }
1613
1614 static void
1615 set_mic_boot_params(struct mic_info *mic)
1616 {
1617         set_log_buf_info(mic);
1618         set_cmdline(mic);
1619 }
1620
1621 static void *
1622 init_mic(void *arg)
1623 {
1624         struct mic_info *mic = (struct mic_info *)arg;
1625         struct sigaction ignore = {
1626                 .sa_flags = 0,
1627                 .sa_handler = SIG_IGN
1628         };
1629         struct sigaction act = {
1630                 .sa_flags = SA_SIGINFO,
1631                 .sa_sigaction = change_virtblk_backend,
1632         };
1633         char buffer[PATH_MAX];
1634         int err, fd;
1635
1636         /*
1637          * Currently, one virtio block device is supported for each MIC card
1638          * at a time. Any user (or test) can send a SIGUSR1 to the MIC daemon.
1639          * The signal informs the virtio block backend about a change in the
1640          * configuration file which specifies the virtio backend file name on
1641          * the host. Virtio block backend then re-reads the configuration file
1642          * and switches to the new block device. This signalling mechanism may
1643          * not be required once multiple virtio block devices are supported by
1644          * the MIC daemon.
1645          */
1646         sigaction(SIGUSR1, &ignore, NULL);
1647 retry:
1648         fd = open_state_fd(mic);
1649         if (fd < 0) {
1650                 mpsslog("%s: %s %d open state fd failed %s\n",
1651                         mic->name, __func__, __LINE__, strerror(errno));
1652                 sleep(2);
1653                 goto retry;
1654         }
1655
1656         if (mic->restart) {
1657                 snprintf(buffer, PATH_MAX, "boot");
1658                 setsysfs(mic->name, "state", buffer);
1659                 mpsslog("%s restarting mic %d\n",
1660                         mic->name, mic->restart);
1661                 mic->restart = 0;
1662         }
1663
1664         while (1) {
1665                 while (block_till_state_change(fd, mic)) {
1666                         mpsslog("%s: %s %d block_till_state_change error %s\n",
1667                                 mic->name, __func__, __LINE__, strerror(errno));
1668                         sleep(2);
1669                         continue;
1670                 }
1671
1672                 if (get_mic_state(mic) == MIC_BOOTING)
1673                         break;
1674         }
1675
1676         mic->pid = fork();
1677         switch (mic->pid) {
1678         case 0:
1679                 add_virtio_device(mic, &virtcons_dev_page.dd);
1680                 add_virtio_device(mic, &virtnet_dev_page.dd);
1681                 err = pthread_create(&mic->mic_console.console_thread, NULL,
1682                         virtio_console, mic);
1683                 if (err)
1684                         mpsslog("%s virtcons pthread_create failed %s\n",
1685                                 mic->name, strerror(err));
1686                 err = pthread_create(&mic->mic_net.net_thread, NULL,
1687                         virtio_net, mic);
1688                 if (err)
1689                         mpsslog("%s virtnet pthread_create failed %s\n",
1690                                 mic->name, strerror(err));
1691                 err = pthread_create(&mic->mic_virtblk.block_thread, NULL,
1692                         virtio_block, mic);
1693                 if (err)
1694                         mpsslog("%s virtblk pthread_create failed %s\n",
1695                                 mic->name, strerror(err));
1696                 sigemptyset(&act.sa_mask);
1697                 err = sigaction(SIGUSR1, &act, NULL);
1698                 if (err)
1699                         mpsslog("%s sigaction SIGUSR1 failed %s\n",
1700                                 mic->name, strerror(errno));
1701                 while (1)
1702                         sleep(60);
1703         case -1:
1704                 mpsslog("fork failed MIC name %s id %d errno %d\n",
1705                         mic->name, mic->id, errno);
1706                 break;
1707         default:
1708                 err = pthread_create(&mic->config_thread, NULL,
1709                                      mic_config, mic);
1710                 if (err)
1711                         mpsslog("%s mic_config pthread_create failed %s\n",
1712                                 mic->name, strerror(err));
1713         }
1714
1715         return NULL;
1716 }
1717
1718 static void
1719 start_daemon(void)
1720 {
1721         struct mic_info *mic;
1722         int err;
1723
1724         for (mic = mic_list.next; mic; mic = mic->next) {
1725                 set_mic_boot_params(mic);
1726                 err = pthread_create(&mic->init_thread, NULL, init_mic, mic);
1727                 if (err)
1728                         mpsslog("%s init_mic pthread_create failed %s\n",
1729                                 mic->name, strerror(err));
1730         }
1731
1732         while (1)
1733                 sleep(60);
1734 }
1735
1736 static int
1737 init_mic_list(void)
1738 {
1739         struct mic_info *mic = &mic_list;
1740         struct dirent *file;
1741         DIR *dp;
1742         int cnt = 0;
1743
1744         dp = opendir(MICSYSFSDIR);
1745         if (!dp)
1746                 return 0;
1747
1748         while ((file = readdir(dp)) != NULL) {
1749                 if (!strncmp(file->d_name, "mic", 3)) {
1750                         mic->next = calloc(1, sizeof(struct mic_info));
1751                         if (mic->next) {
1752                                 mic = mic->next;
1753                                 mic->id = atoi(&file->d_name[3]);
1754                                 mic->name = malloc(strlen(file->d_name) + 16);
1755                                 if (mic->name)
1756                                         strcpy(mic->name, file->d_name);
1757                                 mpsslog("MIC name %s id %d\n", mic->name,
1758                                         mic->id);
1759                                 cnt++;
1760                         }
1761                 }
1762         }
1763
1764         closedir(dp);
1765         return cnt;
1766 }
1767
1768 void
1769 mpsslog(char *format, ...)
1770 {
1771         va_list args;
1772         char buffer[4096];
1773         char ts[52], *ts1;
1774         time_t t;
1775
1776         if (logfp == NULL)
1777                 return;
1778
1779         va_start(args, format);
1780         vsprintf(buffer, format, args);
1781         va_end(args);
1782
1783         time(&t);
1784         ts1 = ctime_r(&t, ts);
1785         ts1[strlen(ts1) - 1] = '\0';
1786         fprintf(logfp, "%s: %s", ts1, buffer);
1787
1788         fflush(logfp);
1789 }
1790
1791 int
1792 main(int argc, char *argv[])
1793 {
1794         int cnt;
1795         pid_t pid;
1796
1797         myname = argv[0];
1798
1799         logfp = fopen(LOGFILE_NAME, "a+");
1800         if (!logfp) {
1801                 fprintf(stderr, "cannot open logfile '%s'\n", LOGFILE_NAME);
1802                 exit(1);
1803         }
1804         pid = fork();
1805         switch (pid) {
1806         case 0:
1807                 break;
1808         case -1:
1809                 exit(2);
1810         default:
1811                 exit(0);
1812         }
1813
1814         mpsslog("MIC Daemon start\n");
1815
1816         cnt = init_mic_list();
1817         if (cnt == 0) {
1818                 mpsslog("MIC module not loaded\n");
1819                 exit(3);
1820         }
1821         mpsslog("MIC found %d devices\n", cnt);
1822
1823         start_daemon();
1824
1825         exit(0);
1826 }