]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - net/dccp/proto.c
[DCCP]: Initial dccp_poll implementation
[karo-tx-linux.git] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/config.h>
13 #include <linux/dccp.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/sched.h>
17 #include <linux/kernel.h>
18 #include <linux/skbuff.h>
19 #include <linux/netdevice.h>
20 #include <linux/in.h>
21 #include <linux/if_arp.h>
22 #include <linux/init.h>
23 #include <linux/random.h>
24 #include <net/checksum.h>
25
26 #include <net/inet_common.h>
27 #include <net/ip.h>
28 #include <net/protocol.h>
29 #include <net/sock.h>
30 #include <net/xfrm.h>
31
32 #include <asm/semaphore.h>
33 #include <linux/spinlock.h>
34 #include <linux/timer.h>
35 #include <linux/delay.h>
36 #include <linux/poll.h>
37 #include <linux/dccp.h>
38
39 #include "ccid.h"
40 #include "dccp.h"
41
42 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics);
43
44 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
45
46 static struct net_protocol dccp_protocol = {
47         .handler        = dccp_v4_rcv,
48         .err_handler    = dccp_v4_err,
49 };
50
51 const char *dccp_packet_name(const int type)
52 {
53         static const char *dccp_packet_names[] = {
54                 [DCCP_PKT_REQUEST]  = "REQUEST",
55                 [DCCP_PKT_RESPONSE] = "RESPONSE",
56                 [DCCP_PKT_DATA]     = "DATA",
57                 [DCCP_PKT_ACK]      = "ACK",
58                 [DCCP_PKT_DATAACK]  = "DATAACK",
59                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
60                 [DCCP_PKT_CLOSE]    = "CLOSE",
61                 [DCCP_PKT_RESET]    = "RESET",
62                 [DCCP_PKT_SYNC]     = "SYNC",
63                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
64         };
65
66         if (type >= DCCP_NR_PKT_TYPES)
67                 return "INVALID";
68         else
69                 return dccp_packet_names[type];
70 }
71
72 EXPORT_SYMBOL_GPL(dccp_packet_name);
73
74 const char *dccp_state_name(const int state)
75 {
76         static char *dccp_state_names[] = {
77         [DCCP_OPEN]       = "OPEN",
78         [DCCP_REQUESTING] = "REQUESTING",
79         [DCCP_PARTOPEN]   = "PARTOPEN",
80         [DCCP_LISTEN]     = "LISTEN",
81         [DCCP_RESPOND]    = "RESPOND",
82         [DCCP_CLOSING]    = "CLOSING",
83         [DCCP_TIME_WAIT]  = "TIME_WAIT",
84         [DCCP_CLOSED]     = "CLOSED",
85         };
86
87         if (state >= DCCP_MAX_STATES)
88                 return "INVALID STATE!";
89         else
90                 return dccp_state_names[state];
91 }
92
93 EXPORT_SYMBOL_GPL(dccp_state_name);
94
95 static inline int dccp_listen_start(struct sock *sk)
96 {
97         dccp_sk(sk)->dccps_role = DCCP_ROLE_LISTEN;
98         return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
99 }
100
101 int dccp_disconnect(struct sock *sk, int flags)
102 {
103         struct inet_connection_sock *icsk = inet_csk(sk);
104         struct inet_sock *inet = inet_sk(sk);
105         int err = 0;
106         const int old_state = sk->sk_state;
107
108         if (old_state != DCCP_CLOSED)
109                 dccp_set_state(sk, DCCP_CLOSED);
110
111         /* ABORT function of RFC793 */
112         if (old_state == DCCP_LISTEN) {
113                 inet_csk_listen_stop(sk);
114         /* FIXME: do the active reset thing */
115         } else if (old_state == DCCP_REQUESTING)
116                 sk->sk_err = ECONNRESET;
117
118         dccp_clear_xmit_timers(sk);
119         __skb_queue_purge(&sk->sk_receive_queue);
120         if (sk->sk_send_head != NULL) {
121                 __kfree_skb(sk->sk_send_head);
122                 sk->sk_send_head = NULL;
123         }
124
125         inet->dport = 0;
126
127         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
128                 inet_reset_saddr(sk);
129
130         sk->sk_shutdown = 0;
131         sock_reset_flag(sk, SOCK_DONE);
132
133         icsk->icsk_backoff = 0;
134         inet_csk_delack_init(sk);
135         __sk_dst_reset(sk);
136
137         BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
138
139         sk->sk_error_report(sk);
140         return err;
141 }
142
143 /*
144  *      Wait for a DCCP event.
145  *
146  *      Note that we don't need to lock the socket, as the upper poll layers
147  *      take care of normal races (between the test and the event) and we don't
148  *      go look at any of the socket buffers directly.
149  */
150 static unsigned int dccp_poll(struct file *file, struct socket *sock,
151                               poll_table *wait)
152 {
153         unsigned int mask;
154         struct sock *sk = sock->sk;
155
156         poll_wait(file, sk->sk_sleep, wait);
157         if (sk->sk_state == DCCP_LISTEN)
158                 return inet_csk_listen_poll(sk);
159
160         /* Socket is not locked. We are protected from async events
161            by poll logic and correct handling of state changes
162            made by another threads is impossible in any case.
163          */
164
165         mask = 0;
166         if (sk->sk_err)
167                 mask = POLLERR;
168
169         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
170                 mask |= POLLHUP;
171         if (sk->sk_shutdown & RCV_SHUTDOWN)
172                 mask |= POLLIN | POLLRDNORM;
173
174         /* Connected? */
175         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
176                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
177                         mask |= POLLIN | POLLRDNORM;
178
179                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
180                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
181                                 mask |= POLLOUT | POLLWRNORM;
182                         } else {  /* send SIGIO later */
183                                 set_bit(SOCK_ASYNC_NOSPACE,
184                                         &sk->sk_socket->flags);
185                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
186
187                                 /* Race breaker. If space is freed after
188                                  * wspace test but before the flags are set,
189                                  * IO signal will be lost.
190                                  */
191                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
192                                         mask |= POLLOUT | POLLWRNORM;
193                         }
194                 }
195         }
196         return mask;
197 }
198
199 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
200 {
201         dccp_pr_debug("entry\n");
202         return -ENOIOCTLCMD;
203 }
204
205 int dccp_setsockopt(struct sock *sk, int level, int optname,
206                     char __user *optval, int optlen)
207 {
208         dccp_pr_debug("entry\n");
209
210         if (level != SOL_DCCP)
211                 return ip_setsockopt(sk, level, optname, optval, optlen);
212
213         return -EOPNOTSUPP;
214 }
215
216 int dccp_getsockopt(struct sock *sk, int level, int optname,
217                     char __user *optval, int __user *optlen)
218 {
219         dccp_pr_debug("entry\n");
220
221         if (level != SOL_DCCP)
222                 return ip_getsockopt(sk, level, optname, optval, optlen);
223
224         return -EOPNOTSUPP;
225 }
226
227 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
228                  size_t len)
229 {
230         const struct dccp_sock *dp = dccp_sk(sk);
231         const int flags = msg->msg_flags;
232         const int noblock = flags & MSG_DONTWAIT;
233         struct sk_buff *skb;
234         int rc, size;
235         long timeo;
236
237         if (len > dp->dccps_mss_cache)
238                 return -EMSGSIZE;
239
240         lock_sock(sk);
241         timeo = sock_sndtimeo(sk, noblock);
242
243         /*
244          * We have to use sk_stream_wait_connect here to set sk_write_pending,
245          * so that the trick in dccp_rcv_request_sent_state_process.
246          */
247         /* Wait for a connection to finish. */
248         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
249                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
250                         goto out_release;
251
252         size = sk->sk_prot->max_header + len;
253         release_sock(sk);
254         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
255         lock_sock(sk);
256         if (skb == NULL)
257                 goto out_release;
258
259         skb_reserve(skb, sk->sk_prot->max_header);
260         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
261         if (rc != 0)
262                 goto out_discard;
263
264         rc = dccp_write_xmit(sk, skb, len);
265         /*
266          * XXX we don't use sk_write_queue, so just discard the packet.
267          *     Current plan however is to _use_ sk_write_queue with
268          *     an algorith similar to tcp_sendmsg, where the main difference
269          *     is that in DCCP we have to respect packet boundaries, so
270          *     no coalescing of skbs.
271          *
272          *     This bug was _quickly_ found & fixed by just looking at an OSTRA
273          *     generated callgraph 8) -acme
274          */
275         if (rc != 0)
276                 goto out_discard;
277 out_release:
278         release_sock(sk);
279         return rc ? : len;
280 out_discard:
281         kfree_skb(skb);
282         goto out_release;
283 }
284
285 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
286                  size_t len, int nonblock, int flags, int *addr_len)
287 {
288         const struct dccp_hdr *dh;
289         long timeo;
290
291         lock_sock(sk);
292
293         if (sk->sk_state == DCCP_LISTEN) {
294                 len = -ENOTCONN;
295                 goto out;
296         }
297
298         timeo = sock_rcvtimeo(sk, nonblock);
299
300         do {
301                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
302
303                 if (skb == NULL)
304                         goto verify_sock_status;
305
306                 dh = dccp_hdr(skb);
307
308                 if (dh->dccph_type == DCCP_PKT_DATA ||
309                     dh->dccph_type == DCCP_PKT_DATAACK)
310                         goto found_ok_skb;
311
312                 if (dh->dccph_type == DCCP_PKT_RESET ||
313                     dh->dccph_type == DCCP_PKT_CLOSE) {
314                         dccp_pr_debug("found fin ok!\n");
315                         len = 0;
316                         goto found_fin_ok;
317                 }
318                 dccp_pr_debug("packet_type=%s\n",
319                               dccp_packet_name(dh->dccph_type));
320                 sk_eat_skb(sk, skb);
321 verify_sock_status:
322                 if (sock_flag(sk, SOCK_DONE)) {
323                         len = 0;
324                         break;
325                 }
326
327                 if (sk->sk_err) {
328                         len = sock_error(sk);
329                         break;
330                 }
331
332                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
333                         len = 0;
334                         break;
335                 }
336
337                 if (sk->sk_state == DCCP_CLOSED) {
338                         if (!sock_flag(sk, SOCK_DONE)) {
339                                 /* This occurs when user tries to read
340                                  * from never connected socket.
341                                  */
342                                 len = -ENOTCONN;
343                                 break;
344                         }
345                         len = 0;
346                         break;
347                 }
348
349                 if (!timeo) {
350                         len = -EAGAIN;
351                         break;
352                 }
353
354                 if (signal_pending(current)) {
355                         len = sock_intr_errno(timeo);
356                         break;
357                 }
358
359                 sk_wait_data(sk, &timeo);
360                 continue;
361         found_ok_skb:
362                 if (len > skb->len)
363                         len = skb->len;
364                 else if (len < skb->len)
365                         msg->msg_flags |= MSG_TRUNC;
366
367                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
368                         /* Exception. Bailout! */
369                         len = -EFAULT;
370                         break;
371                 }
372         found_fin_ok:
373                 if (!(flags & MSG_PEEK))
374                         sk_eat_skb(sk, skb);
375                 break;
376         } while (1);
377 out:
378         release_sock(sk);
379         return len;
380 }
381
382 static int inet_dccp_listen(struct socket *sock, int backlog)
383 {
384         struct sock *sk = sock->sk;
385         unsigned char old_state;
386         int err;
387
388         lock_sock(sk);
389
390         err = -EINVAL;
391         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
392                 goto out;
393
394         old_state = sk->sk_state;
395         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
396                 goto out;
397
398         /* Really, if the socket is already in listen state
399          * we can only allow the backlog to be adjusted.
400          */
401         if (old_state != DCCP_LISTEN) {
402                 /*
403                  * FIXME: here it probably should be sk->sk_prot->listen_start
404                  * see tcp_listen_start
405                  */
406                 err = dccp_listen_start(sk);
407                 if (err)
408                         goto out;
409         }
410         sk->sk_max_ack_backlog = backlog;
411         err = 0;
412
413 out:
414         release_sock(sk);
415         return err;
416 }
417
418 static const unsigned char dccp_new_state[] = {
419         /* current state:   new state:      action:     */
420         [0]               = DCCP_CLOSED,
421         [DCCP_OPEN]       = DCCP_CLOSING | DCCP_ACTION_FIN,
422         [DCCP_REQUESTING] = DCCP_CLOSED,
423         [DCCP_PARTOPEN]   = DCCP_CLOSING | DCCP_ACTION_FIN,
424         [DCCP_LISTEN]     = DCCP_CLOSED,
425         [DCCP_RESPOND]    = DCCP_CLOSED,
426         [DCCP_CLOSING]    = DCCP_CLOSED,
427         [DCCP_TIME_WAIT]  = DCCP_CLOSED,
428         [DCCP_CLOSED]     = DCCP_CLOSED,
429 };
430
431 static int dccp_close_state(struct sock *sk)
432 {
433         const int next = dccp_new_state[sk->sk_state];
434         const int ns = next & DCCP_STATE_MASK;
435
436         if (ns != sk->sk_state)
437                 dccp_set_state(sk, ns);
438
439         return next & DCCP_ACTION_FIN;
440 }
441
442 void dccp_close(struct sock *sk, long timeout)
443 {
444         struct sk_buff *skb;
445
446         lock_sock(sk);
447
448         sk->sk_shutdown = SHUTDOWN_MASK;
449
450         if (sk->sk_state == DCCP_LISTEN) {
451                 dccp_set_state(sk, DCCP_CLOSED);
452
453                 /* Special case. */
454                 inet_csk_listen_stop(sk);
455
456                 goto adjudge_to_death;
457         }
458
459         /*
460          * We need to flush the recv. buffs.  We do this only on the
461          * descriptor close, not protocol-sourced closes, because the
462           *reader process may not have drained the data yet!
463          */
464         /* FIXME: check for unread data */
465         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
466                 __kfree_skb(skb);
467         }
468
469         if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
470                 /* Check zero linger _after_ checking for unread data. */
471                 sk->sk_prot->disconnect(sk, 0);
472         } else if (dccp_close_state(sk)) {
473                 dccp_send_close(sk, 1);
474         }
475
476         sk_stream_wait_close(sk, timeout);
477
478 adjudge_to_death:
479         /*
480          * It is the last release_sock in its life. It will remove backlog.
481          */
482         release_sock(sk);
483         /*
484          * Now socket is owned by kernel and we acquire BH lock
485          * to finish close. No need to check for user refs.
486          */
487         local_bh_disable();
488         bh_lock_sock(sk);
489         BUG_TRAP(!sock_owned_by_user(sk));
490
491         sock_hold(sk);
492         sock_orphan(sk);
493
494         /*
495          * The last release_sock may have processed the CLOSE or RESET
496          * packet moving sock to CLOSED state, if not we have to fire
497          * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
498          * in draft-ietf-dccp-spec-11. -acme
499          */
500         if (sk->sk_state == DCCP_CLOSING) {
501                 /* FIXME: should start at 2 * RTT */
502                 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
503                 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
504                                           inet_csk(sk)->icsk_rto,
505                                           DCCP_RTO_MAX);
506 #if 0
507                 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
508                 dccp_set_state(sk, DCCP_CLOSED);
509 #endif
510         }
511
512         atomic_inc(sk->sk_prot->orphan_count);
513         if (sk->sk_state == DCCP_CLOSED)
514                 inet_csk_destroy_sock(sk);
515
516         /* Otherwise, socket is reprieved until protocol close. */
517
518         bh_unlock_sock(sk);
519         local_bh_enable();
520         sock_put(sk);
521 }
522
523 void dccp_shutdown(struct sock *sk, int how)
524 {
525         dccp_pr_debug("entry\n");
526 }
527
528 static struct proto_ops inet_dccp_ops = {
529         .family         = PF_INET,
530         .owner          = THIS_MODULE,
531         .release        = inet_release,
532         .bind           = inet_bind,
533         .connect        = inet_stream_connect,
534         .socketpair     = sock_no_socketpair,
535         .accept         = inet_accept,
536         .getname        = inet_getname,
537         /* FIXME: work on tcp_poll to rename it to inet_csk_poll */
538         .poll           = dccp_poll,
539         .ioctl          = inet_ioctl,
540         /* FIXME: work on inet_listen to rename it to sock_common_listen */
541         .listen         = inet_dccp_listen,
542         .shutdown       = inet_shutdown,
543         .setsockopt     = sock_common_setsockopt,
544         .getsockopt     = sock_common_getsockopt,
545         .sendmsg        = inet_sendmsg,
546         .recvmsg        = sock_common_recvmsg,
547         .mmap           = sock_no_mmap,
548         .sendpage       = sock_no_sendpage,
549 };
550
551 extern struct net_proto_family inet_family_ops;
552
553 static struct inet_protosw dccp_v4_protosw = {
554         .type           = SOCK_DCCP,
555         .protocol       = IPPROTO_DCCP,
556         .prot           = &dccp_v4_prot,
557         .ops            = &inet_dccp_ops,
558         .capability     = -1,
559         .no_check       = 0,
560         .flags          = 0,
561 };
562
563 /*
564  * This is the global socket data structure used for responding to
565  * the Out-of-the-blue (OOTB) packets. A control sock will be created
566  * for this socket at the initialization time.
567  */
568 struct socket *dccp_ctl_socket;
569
570 static char dccp_ctl_socket_err_msg[] __initdata =
571         KERN_ERR "DCCP: Failed to create the control socket.\n";
572
573 static int __init dccp_ctl_sock_init(void)
574 {
575         int rc = sock_create_kern(PF_INET, SOCK_DCCP, IPPROTO_DCCP,
576                                   &dccp_ctl_socket);
577         if (rc < 0)
578                 printk(dccp_ctl_socket_err_msg);
579         else {
580                 dccp_ctl_socket->sk->sk_allocation = GFP_ATOMIC;
581                 inet_sk(dccp_ctl_socket->sk)->uc_ttl = -1;
582
583                 /* Unhash it so that IP input processing does not even
584                  * see it, we do not wish this socket to see incoming
585                  * packets.
586                  */
587                 dccp_ctl_socket->sk->sk_prot->unhash(dccp_ctl_socket->sk);
588         }
589
590         return rc;
591 }
592
593 #ifdef CONFIG_IP_DCCP_UNLOAD_HACK
594 void dccp_ctl_sock_exit(void)
595 {
596         if (dccp_ctl_socket != NULL) {
597                 sock_release(dccp_ctl_socket);
598                 dccp_ctl_socket = NULL;
599         }
600 }
601
602 EXPORT_SYMBOL_GPL(dccp_ctl_sock_exit);
603 #endif
604
605 static int __init init_dccp_v4_mibs(void)
606 {
607         int rc = -ENOMEM;
608
609         dccp_statistics[0] = alloc_percpu(struct dccp_mib);
610         if (dccp_statistics[0] == NULL)
611                 goto out;
612
613         dccp_statistics[1] = alloc_percpu(struct dccp_mib);
614         if (dccp_statistics[1] == NULL)
615                 goto out_free_one;
616
617         rc = 0;
618 out:
619         return rc;
620 out_free_one:
621         free_percpu(dccp_statistics[0]);
622         dccp_statistics[0] = NULL;
623         goto out;
624
625 }
626
627 static int thash_entries;
628 module_param(thash_entries, int, 0444);
629 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
630
631 #ifdef CONFIG_IP_DCCP_DEBUG
632 int dccp_debug;
633 module_param(dccp_debug, int, 0444);
634 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
635 #endif
636
637 static int __init dccp_init(void)
638 {
639         unsigned long goal;
640         int ehash_order, bhash_order, i;
641         int rc = proto_register(&dccp_v4_prot, 1);
642
643         if (rc)
644                 goto out;
645
646         dccp_hashinfo.bind_bucket_cachep =
647                 kmem_cache_create("dccp_bind_bucket",
648                                   sizeof(struct inet_bind_bucket), 0,
649                                   SLAB_HWCACHE_ALIGN, NULL, NULL);
650         if (!dccp_hashinfo.bind_bucket_cachep)
651                 goto out_proto_unregister;
652
653         /*
654          * Size and allocate the main established and bind bucket
655          * hash tables.
656          *
657          * The methodology is similar to that of the buffer cache.
658          */
659         if (num_physpages >= (128 * 1024))
660                 goal = num_physpages >> (21 - PAGE_SHIFT);
661         else
662                 goal = num_physpages >> (23 - PAGE_SHIFT);
663
664         if (thash_entries)
665                 goal = (thash_entries *
666                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
667         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
668                 ;
669         do {
670                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
671                                         sizeof(struct inet_ehash_bucket);
672                 dccp_hashinfo.ehash_size >>= 1;
673                 while (dccp_hashinfo.ehash_size &
674                        (dccp_hashinfo.ehash_size - 1))
675                         dccp_hashinfo.ehash_size--;
676                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
677                         __get_free_pages(GFP_ATOMIC, ehash_order);
678         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
679
680         if (!dccp_hashinfo.ehash) {
681                 printk(KERN_CRIT "Failed to allocate DCCP "
682                                  "established hash table\n");
683                 goto out_free_bind_bucket_cachep;
684         }
685
686         for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
687                 rwlock_init(&dccp_hashinfo.ehash[i].lock);
688                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
689         }
690
691         bhash_order = ehash_order;
692
693         do {
694                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
695                                         sizeof(struct inet_bind_hashbucket);
696                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
697                     bhash_order > 0)
698                         continue;
699                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
700                         __get_free_pages(GFP_ATOMIC, bhash_order);
701         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
702
703         if (!dccp_hashinfo.bhash) {
704                 printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
705                 goto out_free_dccp_ehash;
706         }
707
708         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
709                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
710                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
711         }
712
713         if (init_dccp_v4_mibs())
714                 goto out_free_dccp_bhash;
715
716         rc = -EAGAIN;
717         if (inet_add_protocol(&dccp_protocol, IPPROTO_DCCP))
718                 goto out_free_dccp_v4_mibs;
719
720         inet_register_protosw(&dccp_v4_protosw);
721
722         rc = dccp_ctl_sock_init();
723         if (rc)
724                 goto out_unregister_protosw;
725 out:
726         return rc;
727 out_unregister_protosw:
728         inet_unregister_protosw(&dccp_v4_protosw);
729         inet_del_protocol(&dccp_protocol, IPPROTO_DCCP);
730 out_free_dccp_v4_mibs:
731         free_percpu(dccp_statistics[0]);
732         free_percpu(dccp_statistics[1]);
733         dccp_statistics[0] = dccp_statistics[1] = NULL;
734 out_free_dccp_bhash:
735         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
736         dccp_hashinfo.bhash = NULL;
737 out_free_dccp_ehash:
738         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
739         dccp_hashinfo.ehash = NULL;
740 out_free_bind_bucket_cachep:
741         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
742         dccp_hashinfo.bind_bucket_cachep = NULL;
743 out_proto_unregister:
744         proto_unregister(&dccp_v4_prot);
745         goto out;
746 }
747
748 static const char dccp_del_proto_err_msg[] __exitdata =
749         KERN_ERR "can't remove dccp net_protocol\n";
750
751 static void __exit dccp_fini(void)
752 {
753         inet_unregister_protosw(&dccp_v4_protosw);
754
755         if (inet_del_protocol(&dccp_protocol, IPPROTO_DCCP) < 0)
756                 printk(dccp_del_proto_err_msg);
757
758         free_percpu(dccp_statistics[0]);
759         free_percpu(dccp_statistics[1]);
760         free_pages((unsigned long)dccp_hashinfo.bhash,
761                    get_order(dccp_hashinfo.bhash_size *
762                              sizeof(struct inet_bind_hashbucket)));
763         free_pages((unsigned long)dccp_hashinfo.ehash,
764                    get_order(dccp_hashinfo.ehash_size *
765                              sizeof(struct inet_ehash_bucket)));
766         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
767         proto_unregister(&dccp_v4_prot);
768 }
769
770 module_init(dccp_init);
771 module_exit(dccp_fini);
772
773 /*
774  * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33)
775  * values directly, Also cover the case where the protocol is not specified,
776  * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP
777  */
778 MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-33-type-6");
779 MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-0-type-6");
780 MODULE_LICENSE("GPL");
781 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
782 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");