1 #include "kvm/virtio-net.h"
2 #include "kvm/virtio-pci-dev.h"
3 #include "kvm/virtio.h"
4 #include "kvm/ioport.h"
12 #include <linux/virtio_net.h>
13 #include <linux/if_tun.h>
15 #include <arpa/inet.h>
22 #include <sys/socket.h>
23 #include <sys/ioctl.h>
24 #include <sys/types.h>
27 #define VIRTIO_NET_QUEUE_SIZE 128
28 #define VIRTIO_NET_NUM_QUEUES 2
29 #define VIRTIO_NET_RX_QUEUE 0
30 #define VIRTIO_NET_TX_QUEUE 1
32 static struct pci_device_header pci_header = {
33 .vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET,
34 .device_id = PCI_DEVICE_ID_VIRTIO_NET,
35 .header_type = PCI_HEADER_TYPE_NORMAL,
38 .subsys_vendor_id = PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET,
39 .subsys_id = VIRTIO_ID_NET,
40 .bar[0] = IOPORT_VIRTIO_NET | PCI_BASE_ADDRESS_SPACE_IO,
44 pthread_mutex_t mutex;
46 struct virt_queue vqs[VIRTIO_NET_NUM_QUEUES];
47 struct virtio_net_config config;
55 pthread_t io_rx_thread;
56 pthread_mutex_t io_rx_lock;
57 pthread_cond_t io_rx_cond;
59 pthread_t io_tx_thread;
60 pthread_mutex_t io_tx_lock;
61 pthread_cond_t io_tx_cond;
64 char tap_name[IFNAMSIZ];
67 static struct net_device ndev = {
68 .mutex = PTHREAD_MUTEX_INITIALIZER,
71 .mac = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55 },
72 .status = VIRTIO_NET_S_LINK_UP,
74 .host_features = 1UL << VIRTIO_NET_F_MAC
75 | 1UL << VIRTIO_NET_F_CSUM
76 | 1UL << VIRTIO_NET_F_HOST_UFO
77 | 1UL << VIRTIO_NET_F_HOST_TSO4
78 | 1UL << VIRTIO_NET_F_HOST_TSO6
79 | 1UL << VIRTIO_NET_F_GUEST_UFO
80 | 1UL << VIRTIO_NET_F_GUEST_TSO4
81 | 1UL << VIRTIO_NET_F_GUEST_TSO6,
84 static void *virtio_net_rx_thread(void *p)
86 struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
87 struct virt_queue *vq;
94 vq = &ndev.vqs[VIRTIO_NET_RX_QUEUE];
97 mutex_lock(&ndev.io_rx_lock);
98 if (!virt_queue__available(vq))
99 pthread_cond_wait(&ndev.io_rx_cond, &ndev.io_rx_lock);
100 mutex_unlock(&ndev.io_rx_lock);
102 while (virt_queue__available(vq)) {
103 head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
104 len = readv(ndev.tap_fd, iov, in);
105 virt_queue__set_used_elem(vq, head, len);
107 /* We should interrupt guest right now, otherwise latency is huge. */
108 virt_queue__trigger_irq(vq, pci_header.irq_line, &ndev.isr, kvm);
118 static void *virtio_net_tx_thread(void *p)
120 struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
121 struct virt_queue *vq;
128 vq = &ndev.vqs[VIRTIO_NET_TX_QUEUE];
131 mutex_lock(&ndev.io_tx_lock);
132 if (!virt_queue__available(vq))
133 pthread_cond_wait(&ndev.io_tx_cond, &ndev.io_tx_lock);
134 mutex_unlock(&ndev.io_tx_lock);
136 while (virt_queue__available(vq)) {
137 head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
138 len = writev(ndev.tap_fd, iov, out);
139 virt_queue__set_used_elem(vq, head, len);
142 virt_queue__trigger_irq(vq, pci_header.irq_line, &ndev.isr, kvm);
152 static bool virtio_net_pci_io_device_specific_in(void *data, unsigned long offset, int size, u32 count)
154 u8 *config_space = (u8 *)&ndev.config;
156 if (size != 1 || count != 1)
159 if ((offset - VIRTIO_MSI_CONFIG_VECTOR) > sizeof(struct virtio_net_config))
160 error("config offset is too big: %li", offset - VIRTIO_MSI_CONFIG_VECTOR);
162 ioport__write8(data, config_space[offset - VIRTIO_MSI_CONFIG_VECTOR]);
167 static bool virtio_net_pci_io_in(struct kvm *kvm, u16 port, void *data, int size, u32 count)
169 unsigned long offset = port - IOPORT_VIRTIO_NET;
172 mutex_lock(&ndev.mutex);
175 case VIRTIO_PCI_HOST_FEATURES:
176 ioport__write32(data, ndev.host_features);
178 case VIRTIO_PCI_GUEST_FEATURES:
181 case VIRTIO_PCI_QUEUE_PFN:
182 ioport__write32(data, ndev.vqs[ndev.queue_selector].pfn);
184 case VIRTIO_PCI_QUEUE_NUM:
185 ioport__write16(data, VIRTIO_NET_QUEUE_SIZE);
187 case VIRTIO_PCI_QUEUE_SEL:
188 case VIRTIO_PCI_QUEUE_NOTIFY:
191 case VIRTIO_PCI_STATUS:
192 ioport__write8(data, ndev.status);
195 ioport__write8(data, ndev.isr);
196 kvm__irq_line(kvm, pci_header.irq_line, VIRTIO_IRQ_LOW);
197 ndev.isr = VIRTIO_IRQ_LOW;
199 case VIRTIO_MSI_CONFIG_VECTOR:
200 ioport__write16(data, ndev.config_vector);
203 ret = virtio_net_pci_io_device_specific_in(data, offset, size, count);
206 mutex_unlock(&ndev.mutex);
211 static void virtio_net_handle_callback(struct kvm *kvm, u16 queue_index)
213 switch (queue_index) {
214 case VIRTIO_NET_TX_QUEUE: {
215 mutex_lock(&ndev.io_tx_lock);
216 pthread_cond_signal(&ndev.io_tx_cond);
217 mutex_unlock(&ndev.io_tx_lock);
220 case VIRTIO_NET_RX_QUEUE: {
221 mutex_lock(&ndev.io_rx_lock);
222 pthread_cond_signal(&ndev.io_rx_cond);
223 mutex_unlock(&ndev.io_rx_lock);
227 warning("Unknown queue index %u", queue_index);
231 static bool virtio_net_pci_io_out(struct kvm *kvm, u16 port, void *data, int size, u32 count)
233 unsigned long offset = port - IOPORT_VIRTIO_NET;
236 mutex_lock(&ndev.mutex);
239 case VIRTIO_PCI_GUEST_FEATURES:
240 ndev.guest_features = ioport__read32(data);
242 case VIRTIO_PCI_QUEUE_PFN: {
243 struct virt_queue *queue;
246 assert(ndev.queue_selector < VIRTIO_NET_NUM_QUEUES);
248 queue = &ndev.vqs[ndev.queue_selector];
249 queue->pfn = ioport__read32(data);
250 p = guest_pfn_to_host(kvm, queue->pfn);
252 vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, VIRTIO_PCI_VRING_ALIGN);
256 case VIRTIO_PCI_QUEUE_SEL:
257 ndev.queue_selector = ioport__read16(data);
259 case VIRTIO_PCI_QUEUE_NOTIFY: {
262 queue_index = ioport__read16(data);
263 virtio_net_handle_callback(kvm, queue_index);
266 case VIRTIO_PCI_STATUS:
267 ndev.status = ioport__read8(data);
269 case VIRTIO_MSI_CONFIG_VECTOR:
270 ndev.config_vector = VIRTIO_MSI_NO_VECTOR;
272 case VIRTIO_MSI_QUEUE_VECTOR:
278 mutex_unlock(&ndev.mutex);
283 static struct ioport_operations virtio_net_io_ops = {
284 .io_in = virtio_net_pci_io_in,
285 .io_out = virtio_net_pci_io_out,
288 static bool virtio_net__tap_init(const struct virtio_net_parameters *params)
290 int sock = socket(AF_INET, SOCK_STREAM, 0);
291 int i, pid, status, offload, hdr_len;
292 struct sockaddr_in sin = {0};
295 for (i = 0 ; i < 6 ; i++)
296 ndev.config.mac[i] = params->guest_mac[i];
298 ndev.tap_fd = open("/dev/net/tun", O_RDWR);
299 if (ndev.tap_fd < 0) {
300 warning("Unable to open /dev/net/tun");
304 memset(&ifr, 0, sizeof(ifr));
305 ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
306 if (ioctl(ndev.tap_fd, TUNSETIFF, &ifr) < 0) {
307 warning("Config tap device error. Are you root?");
311 strncpy(ndev.tap_name, ifr.ifr_name, sizeof(ndev.tap_name));
313 if (ioctl(ndev.tap_fd, TUNSETNOCSUM, 1) < 0) {
314 warning("Config tap device TUNSETNOCSUM error");
318 hdr_len = sizeof(struct virtio_net_hdr);
319 if (ioctl(ndev.tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0) {
320 warning("Config tap device TUNSETVNETHDRSZ error");
324 offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_UFO;
325 if (ioctl(ndev.tap_fd, TUNSETOFFLOAD, offload) < 0) {
326 warning("Config tap device TUNSETOFFLOAD error");
330 if (strcmp(params->script, "none")) {
333 execl(params->script, params->script, ndev.tap_name, NULL);
336 waitpid(pid, &status, 0);
337 if (WIFEXITED(status) && WEXITSTATUS(status) != 0) {
338 warning("Fail to setup tap by %s", params->script);
343 memset(&ifr, 0, sizeof(ifr));
344 strncpy(ifr.ifr_name, ndev.tap_name, sizeof(ndev.tap_name));
345 sin.sin_addr.s_addr = inet_addr(params->host_ip);
346 memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr));
347 ifr.ifr_addr.sa_family = AF_INET;
348 if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) {
349 warning("Could not set ip address on tap device");
354 memset(&ifr, 0, sizeof(ifr));
355 strncpy(ifr.ifr_name, ndev.tap_name, sizeof(ndev.tap_name));
356 ioctl(sock, SIOCGIFFLAGS, &ifr);
357 ifr.ifr_flags |= IFF_UP | IFF_RUNNING;
358 if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0)
359 warning("Could not bring tap device up");
368 if (ndev.tap_fd >= 0)
374 static void virtio_net__io_thread_init(struct kvm *kvm)
376 pthread_mutex_init(&ndev.io_rx_lock, NULL);
377 pthread_cond_init(&ndev.io_tx_cond, NULL);
379 pthread_mutex_init(&ndev.io_rx_lock, NULL);
380 pthread_cond_init(&ndev.io_tx_cond, NULL);
382 pthread_create(&ndev.io_rx_thread, NULL, virtio_net_rx_thread, (void *)kvm);
383 pthread_create(&ndev.io_tx_thread, NULL, virtio_net_tx_thread, (void *)kvm);
386 void virtio_net__init(const struct virtio_net_parameters *params)
388 if (virtio_net__tap_init(params)) {
391 if (irq__register_device(VIRTIO_ID_NET, &dev, &pin, &line) < 0)
394 pci_header.irq_pin = pin;
395 pci_header.irq_line = line;
396 pci__register(&pci_header, dev);
397 ioport__register(IOPORT_VIRTIO_NET, &virtio_net_io_ops, IOPORT_VIRTIO_NET_SIZE);
399 virtio_net__io_thread_init(params->kvm);