]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - net/dccp/proto.c
dccp: change L/R must have at least one byte in the dccpsf_val field
[karo-tx-linux.git] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28
29 #include <asm/ioctls.h>
30 #include <asm/semaphore.h>
31 #include <linux/spinlock.h>
32 #include <linux/timer.h>
33 #include <linux/delay.h>
34 #include <linux/poll.h>
35
36 #include "ccid.h"
37 #include "dccp.h"
38 #include "feat.h"
39
40 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
41
42 EXPORT_SYMBOL_GPL(dccp_statistics);
43
44 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
45
46 EXPORT_SYMBOL_GPL(dccp_orphan_count);
47
48 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
49         .lhash_lock     = RW_LOCK_UNLOCKED,
50         .lhash_users    = ATOMIC_INIT(0),
51         .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
52 };
53
54 EXPORT_SYMBOL_GPL(dccp_hashinfo);
55
56 /* the maximum queue length for tx in packets. 0 is no limit */
57 int sysctl_dccp_tx_qlen __read_mostly = 5;
58
59 void dccp_set_state(struct sock *sk, const int state)
60 {
61         const int oldstate = sk->sk_state;
62
63         dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
64                       dccp_state_name(oldstate), dccp_state_name(state));
65         WARN_ON(state == oldstate);
66
67         switch (state) {
68         case DCCP_OPEN:
69                 if (oldstate != DCCP_OPEN)
70                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
71                 break;
72
73         case DCCP_CLOSED:
74                 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
75                     oldstate == DCCP_CLOSING)
76                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
77
78                 sk->sk_prot->unhash(sk);
79                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
80                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
81                         inet_put_port(sk);
82                 /* fall through */
83         default:
84                 if (oldstate == DCCP_OPEN)
85                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
86         }
87
88         /* Change state AFTER socket is unhashed to avoid closed
89          * socket sitting in hash tables.
90          */
91         sk->sk_state = state;
92 }
93
94 EXPORT_SYMBOL_GPL(dccp_set_state);
95
96 static void dccp_finish_passive_close(struct sock *sk)
97 {
98         switch (sk->sk_state) {
99         case DCCP_PASSIVE_CLOSE:
100                 /* Node (client or server) has received Close packet. */
101                 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
102                 dccp_set_state(sk, DCCP_CLOSED);
103                 break;
104         case DCCP_PASSIVE_CLOSEREQ:
105                 /*
106                  * Client received CloseReq. We set the `active' flag so that
107                  * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
108                  */
109                 dccp_send_close(sk, 1);
110                 dccp_set_state(sk, DCCP_CLOSING);
111         }
112 }
113
114 void dccp_done(struct sock *sk)
115 {
116         dccp_set_state(sk, DCCP_CLOSED);
117         dccp_clear_xmit_timers(sk);
118
119         sk->sk_shutdown = SHUTDOWN_MASK;
120
121         if (!sock_flag(sk, SOCK_DEAD))
122                 sk->sk_state_change(sk);
123         else
124                 inet_csk_destroy_sock(sk);
125 }
126
127 EXPORT_SYMBOL_GPL(dccp_done);
128
129 const char *dccp_packet_name(const int type)
130 {
131         static const char *dccp_packet_names[] = {
132                 [DCCP_PKT_REQUEST]  = "REQUEST",
133                 [DCCP_PKT_RESPONSE] = "RESPONSE",
134                 [DCCP_PKT_DATA]     = "DATA",
135                 [DCCP_PKT_ACK]      = "ACK",
136                 [DCCP_PKT_DATAACK]  = "DATAACK",
137                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
138                 [DCCP_PKT_CLOSE]    = "CLOSE",
139                 [DCCP_PKT_RESET]    = "RESET",
140                 [DCCP_PKT_SYNC]     = "SYNC",
141                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
142         };
143
144         if (type >= DCCP_NR_PKT_TYPES)
145                 return "INVALID";
146         else
147                 return dccp_packet_names[type];
148 }
149
150 EXPORT_SYMBOL_GPL(dccp_packet_name);
151
152 const char *dccp_state_name(const int state)
153 {
154         static char *dccp_state_names[] = {
155         [DCCP_OPEN]             = "OPEN",
156         [DCCP_REQUESTING]       = "REQUESTING",
157         [DCCP_PARTOPEN]         = "PARTOPEN",
158         [DCCP_LISTEN]           = "LISTEN",
159         [DCCP_RESPOND]          = "RESPOND",
160         [DCCP_CLOSING]          = "CLOSING",
161         [DCCP_ACTIVE_CLOSEREQ]  = "CLOSEREQ",
162         [DCCP_PASSIVE_CLOSE]    = "PASSIVE_CLOSE",
163         [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
164         [DCCP_TIME_WAIT]        = "TIME_WAIT",
165         [DCCP_CLOSED]           = "CLOSED",
166         };
167
168         if (state >= DCCP_MAX_STATES)
169                 return "INVALID STATE!";
170         else
171                 return dccp_state_names[state];
172 }
173
174 EXPORT_SYMBOL_GPL(dccp_state_name);
175
176 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
177 {
178         struct dccp_sock *dp = dccp_sk(sk);
179         struct dccp_minisock *dmsk = dccp_msk(sk);
180         struct inet_connection_sock *icsk = inet_csk(sk);
181
182         dccp_minisock_init(&dp->dccps_minisock);
183
184         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
185         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
186         sk->sk_state            = DCCP_CLOSED;
187         sk->sk_write_space      = dccp_write_space;
188         icsk->icsk_sync_mss     = dccp_sync_mss;
189         dp->dccps_mss_cache     = 536;
190         dp->dccps_rate_last     = jiffies;
191         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
192         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
193         dp->dccps_l_ack_ratio   = dp->dccps_r_ack_ratio = 1;
194
195         dccp_init_xmit_timers(sk);
196
197         /*
198          * FIXME: We're hardcoding the CCID, and doing this at this point makes
199          * the listening (master) sock get CCID control blocks, which is not
200          * necessary, but for now, to not mess with the test userspace apps,
201          * lets leave it here, later the real solution is to do this in a
202          * setsockopt(CCIDs-I-want/accept). -acme
203          */
204         if (likely(ctl_sock_initialized)) {
205                 int rc = dccp_feat_init(dmsk);
206
207                 if (rc)
208                         return rc;
209
210                 if (dmsk->dccpms_send_ack_vector) {
211                         dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
212                         if (dp->dccps_hc_rx_ackvec == NULL)
213                                 return -ENOMEM;
214                 }
215                 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
216                                                       sk, GFP_KERNEL);
217                 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
218                                                       sk, GFP_KERNEL);
219                 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
220                              dp->dccps_hc_tx_ccid == NULL)) {
221                         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
222                         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
223                         if (dmsk->dccpms_send_ack_vector) {
224                                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
225                                 dp->dccps_hc_rx_ackvec = NULL;
226                         }
227                         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
228                         return -ENOMEM;
229                 }
230         } else {
231                 /* control socket doesn't need feat nego */
232                 INIT_LIST_HEAD(&dmsk->dccpms_pending);
233                 INIT_LIST_HEAD(&dmsk->dccpms_conf);
234         }
235
236         return 0;
237 }
238
239 EXPORT_SYMBOL_GPL(dccp_init_sock);
240
241 int dccp_destroy_sock(struct sock *sk)
242 {
243         struct dccp_sock *dp = dccp_sk(sk);
244         struct dccp_minisock *dmsk = dccp_msk(sk);
245
246         /*
247          * DCCP doesn't use sk_write_queue, just sk_send_head
248          * for retransmissions
249          */
250         if (sk->sk_send_head != NULL) {
251                 kfree_skb(sk->sk_send_head);
252                 sk->sk_send_head = NULL;
253         }
254
255         /* Clean up a referenced DCCP bind bucket. */
256         if (inet_csk(sk)->icsk_bind_hash != NULL)
257                 inet_put_port(sk);
258
259         kfree(dp->dccps_service_list);
260         dp->dccps_service_list = NULL;
261
262         if (dmsk->dccpms_send_ack_vector) {
263                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
264                 dp->dccps_hc_rx_ackvec = NULL;
265         }
266         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
267         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
268         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
269
270         /* clean up feature negotiation state */
271         dccp_feat_clean(dmsk);
272
273         return 0;
274 }
275
276 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
277
278 static inline int dccp_listen_start(struct sock *sk, int backlog)
279 {
280         struct dccp_sock *dp = dccp_sk(sk);
281
282         dp->dccps_role = DCCP_ROLE_LISTEN;
283         return inet_csk_listen_start(sk, backlog);
284 }
285
286 static inline int dccp_need_reset(int state)
287 {
288         return state != DCCP_CLOSED && state != DCCP_LISTEN &&
289                state != DCCP_REQUESTING;
290 }
291
292 int dccp_disconnect(struct sock *sk, int flags)
293 {
294         struct inet_connection_sock *icsk = inet_csk(sk);
295         struct inet_sock *inet = inet_sk(sk);
296         int err = 0;
297         const int old_state = sk->sk_state;
298
299         if (old_state != DCCP_CLOSED)
300                 dccp_set_state(sk, DCCP_CLOSED);
301
302         /*
303          * This corresponds to the ABORT function of RFC793, sec. 3.8
304          * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
305          */
306         if (old_state == DCCP_LISTEN) {
307                 inet_csk_listen_stop(sk);
308         } else if (dccp_need_reset(old_state)) {
309                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
310                 sk->sk_err = ECONNRESET;
311         } else if (old_state == DCCP_REQUESTING)
312                 sk->sk_err = ECONNRESET;
313
314         dccp_clear_xmit_timers(sk);
315         __skb_queue_purge(&sk->sk_receive_queue);
316         if (sk->sk_send_head != NULL) {
317                 __kfree_skb(sk->sk_send_head);
318                 sk->sk_send_head = NULL;
319         }
320
321         inet->dport = 0;
322
323         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
324                 inet_reset_saddr(sk);
325
326         sk->sk_shutdown = 0;
327         sock_reset_flag(sk, SOCK_DONE);
328
329         icsk->icsk_backoff = 0;
330         inet_csk_delack_init(sk);
331         __sk_dst_reset(sk);
332
333         BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
334
335         sk->sk_error_report(sk);
336         return err;
337 }
338
339 EXPORT_SYMBOL_GPL(dccp_disconnect);
340
341 /*
342  *      Wait for a DCCP event.
343  *
344  *      Note that we don't need to lock the socket, as the upper poll layers
345  *      take care of normal races (between the test and the event) and we don't
346  *      go look at any of the socket buffers directly.
347  */
348 unsigned int dccp_poll(struct file *file, struct socket *sock,
349                        poll_table *wait)
350 {
351         unsigned int mask;
352         struct sock *sk = sock->sk;
353
354         poll_wait(file, sk->sk_sleep, wait);
355         if (sk->sk_state == DCCP_LISTEN)
356                 return inet_csk_listen_poll(sk);
357
358         /* Socket is not locked. We are protected from async events
359            by poll logic and correct handling of state changes
360            made by another threads is impossible in any case.
361          */
362
363         mask = 0;
364         if (sk->sk_err)
365                 mask = POLLERR;
366
367         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
368                 mask |= POLLHUP;
369         if (sk->sk_shutdown & RCV_SHUTDOWN)
370                 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
371
372         /* Connected? */
373         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
374                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
375                         mask |= POLLIN | POLLRDNORM;
376
377                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
378                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
379                                 mask |= POLLOUT | POLLWRNORM;
380                         } else {  /* send SIGIO later */
381                                 set_bit(SOCK_ASYNC_NOSPACE,
382                                         &sk->sk_socket->flags);
383                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
384
385                                 /* Race breaker. If space is freed after
386                                  * wspace test but before the flags are set,
387                                  * IO signal will be lost.
388                                  */
389                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
390                                         mask |= POLLOUT | POLLWRNORM;
391                         }
392                 }
393         }
394         return mask;
395 }
396
397 EXPORT_SYMBOL_GPL(dccp_poll);
398
399 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
400 {
401         int rc = -ENOTCONN;
402
403         lock_sock(sk);
404
405         if (sk->sk_state == DCCP_LISTEN)
406                 goto out;
407
408         switch (cmd) {
409         case SIOCINQ: {
410                 struct sk_buff *skb;
411                 unsigned long amount = 0;
412
413                 skb = skb_peek(&sk->sk_receive_queue);
414                 if (skb != NULL) {
415                         /*
416                          * We will only return the amount of this packet since
417                          * that is all that will be read.
418                          */
419                         amount = skb->len;
420                 }
421                 rc = put_user(amount, (int __user *)arg);
422         }
423                 break;
424         default:
425                 rc = -ENOIOCTLCMD;
426                 break;
427         }
428 out:
429         release_sock(sk);
430         return rc;
431 }
432
433 EXPORT_SYMBOL_GPL(dccp_ioctl);
434
435 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
436                                    char __user *optval, int optlen)
437 {
438         struct dccp_sock *dp = dccp_sk(sk);
439         struct dccp_service_list *sl = NULL;
440
441         if (service == DCCP_SERVICE_INVALID_VALUE ||
442             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
443                 return -EINVAL;
444
445         if (optlen > sizeof(service)) {
446                 sl = kmalloc(optlen, GFP_KERNEL);
447                 if (sl == NULL)
448                         return -ENOMEM;
449
450                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
451                 if (copy_from_user(sl->dccpsl_list,
452                                    optval + sizeof(service),
453                                    optlen - sizeof(service)) ||
454                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
455                         kfree(sl);
456                         return -EFAULT;
457                 }
458         }
459
460         lock_sock(sk);
461         dp->dccps_service = service;
462
463         kfree(dp->dccps_service_list);
464
465         dp->dccps_service_list = sl;
466         release_sock(sk);
467         return 0;
468 }
469
470 /* byte 1 is feature.  the rest is the preference list */
471 static int dccp_setsockopt_change(struct sock *sk, int type,
472                                   struct dccp_so_feat __user *optval)
473 {
474         struct dccp_so_feat opt;
475         u8 *val;
476         int rc;
477
478         if (copy_from_user(&opt, optval, sizeof(opt)))
479                 return -EFAULT;
480         /*
481          * rfc4340: 6.1. Change Options
482          */
483         if (opt.dccpsf_len < 1)
484                 return -EINVAL;
485
486         val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
487         if (!val)
488                 return -ENOMEM;
489
490         if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
491                 rc = -EFAULT;
492                 goto out_free_val;
493         }
494
495         rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
496                               val, opt.dccpsf_len, GFP_KERNEL);
497         if (rc)
498                 goto out_free_val;
499
500 out:
501         return rc;
502
503 out_free_val:
504         kfree(val);
505         goto out;
506 }
507
508 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
509                 char __user *optval, int optlen)
510 {
511         struct dccp_sock *dp = dccp_sk(sk);
512         int val, err = 0;
513
514         if (optlen < sizeof(int))
515                 return -EINVAL;
516
517         if (get_user(val, (int __user *)optval))
518                 return -EFAULT;
519
520         if (optname == DCCP_SOCKOPT_SERVICE)
521                 return dccp_setsockopt_service(sk, val, optval, optlen);
522
523         lock_sock(sk);
524         switch (optname) {
525         case DCCP_SOCKOPT_PACKET_SIZE:
526                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
527                 err = 0;
528                 break;
529         case DCCP_SOCKOPT_CHANGE_L:
530                 if (optlen != sizeof(struct dccp_so_feat))
531                         err = -EINVAL;
532                 else
533                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
534                                                      (struct dccp_so_feat __user *)
535                                                      optval);
536                 break;
537         case DCCP_SOCKOPT_CHANGE_R:
538                 if (optlen != sizeof(struct dccp_so_feat))
539                         err = -EINVAL;
540                 else
541                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
542                                                      (struct dccp_so_feat __user *)
543                                                      optval);
544                 break;
545         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
546                 if (dp->dccps_role != DCCP_ROLE_SERVER)
547                         err = -EOPNOTSUPP;
548                 else
549                         dp->dccps_server_timewait = (val != 0);
550                 break;
551         case DCCP_SOCKOPT_SEND_CSCOV:   /* sender side, RFC 4340, sec. 9.2 */
552                 if (val < 0 || val > 15)
553                         err = -EINVAL;
554                 else
555                         dp->dccps_pcslen = val;
556                 break;
557         case DCCP_SOCKOPT_RECV_CSCOV:   /* receiver side, RFC 4340 sec. 9.2.1 */
558                 if (val < 0 || val > 15)
559                         err = -EINVAL;
560                 else {
561                         dp->dccps_pcrlen = val;
562                         /* FIXME: add feature negotiation,
563                          * ChangeL(MinimumChecksumCoverage, val) */
564                 }
565                 break;
566         default:
567                 err = -ENOPROTOOPT;
568                 break;
569         }
570
571         release_sock(sk);
572         return err;
573 }
574
575 int dccp_setsockopt(struct sock *sk, int level, int optname,
576                     char __user *optval, int optlen)
577 {
578         if (level != SOL_DCCP)
579                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
580                                                              optname, optval,
581                                                              optlen);
582         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
583 }
584
585 EXPORT_SYMBOL_GPL(dccp_setsockopt);
586
587 #ifdef CONFIG_COMPAT
588 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
589                            char __user *optval, int optlen)
590 {
591         if (level != SOL_DCCP)
592                 return inet_csk_compat_setsockopt(sk, level, optname,
593                                                   optval, optlen);
594         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
595 }
596
597 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
598 #endif
599
600 static int dccp_getsockopt_service(struct sock *sk, int len,
601                                    __be32 __user *optval,
602                                    int __user *optlen)
603 {
604         const struct dccp_sock *dp = dccp_sk(sk);
605         const struct dccp_service_list *sl;
606         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
607
608         lock_sock(sk);
609         if ((sl = dp->dccps_service_list) != NULL) {
610                 slen = sl->dccpsl_nr * sizeof(u32);
611                 total_len += slen;
612         }
613
614         err = -EINVAL;
615         if (total_len > len)
616                 goto out;
617
618         err = 0;
619         if (put_user(total_len, optlen) ||
620             put_user(dp->dccps_service, optval) ||
621             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
622                 err = -EFAULT;
623 out:
624         release_sock(sk);
625         return err;
626 }
627
628 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
629                     char __user *optval, int __user *optlen)
630 {
631         struct dccp_sock *dp;
632         int val, len;
633
634         if (get_user(len, optlen))
635                 return -EFAULT;
636
637         if (len < (int)sizeof(int))
638                 return -EINVAL;
639
640         dp = dccp_sk(sk);
641
642         switch (optname) {
643         case DCCP_SOCKOPT_PACKET_SIZE:
644                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
645                 return 0;
646         case DCCP_SOCKOPT_SERVICE:
647                 return dccp_getsockopt_service(sk, len,
648                                                (__be32 __user *)optval, optlen);
649         case DCCP_SOCKOPT_GET_CUR_MPS:
650                 val = dp->dccps_mss_cache;
651                 break;
652         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
653                 val = dp->dccps_server_timewait;
654                 break;
655         case DCCP_SOCKOPT_SEND_CSCOV:
656                 val = dp->dccps_pcslen;
657                 break;
658         case DCCP_SOCKOPT_RECV_CSCOV:
659                 val = dp->dccps_pcrlen;
660                 break;
661         case 128 ... 191:
662                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
663                                              len, (u32 __user *)optval, optlen);
664         case 192 ... 255:
665                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
666                                              len, (u32 __user *)optval, optlen);
667         default:
668                 return -ENOPROTOOPT;
669         }
670
671         len = sizeof(val);
672         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
673                 return -EFAULT;
674
675         return 0;
676 }
677
678 int dccp_getsockopt(struct sock *sk, int level, int optname,
679                     char __user *optval, int __user *optlen)
680 {
681         if (level != SOL_DCCP)
682                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
683                                                              optname, optval,
684                                                              optlen);
685         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
686 }
687
688 EXPORT_SYMBOL_GPL(dccp_getsockopt);
689
690 #ifdef CONFIG_COMPAT
691 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
692                            char __user *optval, int __user *optlen)
693 {
694         if (level != SOL_DCCP)
695                 return inet_csk_compat_getsockopt(sk, level, optname,
696                                                   optval, optlen);
697         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
698 }
699
700 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
701 #endif
702
703 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
704                  size_t len)
705 {
706         const struct dccp_sock *dp = dccp_sk(sk);
707         const int flags = msg->msg_flags;
708         const int noblock = flags & MSG_DONTWAIT;
709         struct sk_buff *skb;
710         int rc, size;
711         long timeo;
712
713         if (len > dp->dccps_mss_cache)
714                 return -EMSGSIZE;
715
716         lock_sock(sk);
717
718         if (sysctl_dccp_tx_qlen &&
719             (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
720                 rc = -EAGAIN;
721                 goto out_release;
722         }
723
724         timeo = sock_sndtimeo(sk, noblock);
725
726         /*
727          * We have to use sk_stream_wait_connect here to set sk_write_pending,
728          * so that the trick in dccp_rcv_request_sent_state_process.
729          */
730         /* Wait for a connection to finish. */
731         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
732                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
733                         goto out_release;
734
735         size = sk->sk_prot->max_header + len;
736         release_sock(sk);
737         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
738         lock_sock(sk);
739         if (skb == NULL)
740                 goto out_release;
741
742         skb_reserve(skb, sk->sk_prot->max_header);
743         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
744         if (rc != 0)
745                 goto out_discard;
746
747         skb_queue_tail(&sk->sk_write_queue, skb);
748         dccp_write_xmit(sk,0);
749 out_release:
750         release_sock(sk);
751         return rc ? : len;
752 out_discard:
753         kfree_skb(skb);
754         goto out_release;
755 }
756
757 EXPORT_SYMBOL_GPL(dccp_sendmsg);
758
759 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
760                  size_t len, int nonblock, int flags, int *addr_len)
761 {
762         const struct dccp_hdr *dh;
763         long timeo;
764
765         lock_sock(sk);
766
767         if (sk->sk_state == DCCP_LISTEN) {
768                 len = -ENOTCONN;
769                 goto out;
770         }
771
772         timeo = sock_rcvtimeo(sk, nonblock);
773
774         do {
775                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
776
777                 if (skb == NULL)
778                         goto verify_sock_status;
779
780                 dh = dccp_hdr(skb);
781
782                 switch (dh->dccph_type) {
783                 case DCCP_PKT_DATA:
784                 case DCCP_PKT_DATAACK:
785                         goto found_ok_skb;
786
787                 case DCCP_PKT_CLOSE:
788                 case DCCP_PKT_CLOSEREQ:
789                         if (!(flags & MSG_PEEK))
790                                 dccp_finish_passive_close(sk);
791                         /* fall through */
792                 case DCCP_PKT_RESET:
793                         dccp_pr_debug("found fin (%s) ok!\n",
794                                       dccp_packet_name(dh->dccph_type));
795                         len = 0;
796                         goto found_fin_ok;
797                 default:
798                         dccp_pr_debug("packet_type=%s\n",
799                                       dccp_packet_name(dh->dccph_type));
800                         sk_eat_skb(sk, skb, 0);
801                 }
802 verify_sock_status:
803                 if (sock_flag(sk, SOCK_DONE)) {
804                         len = 0;
805                         break;
806                 }
807
808                 if (sk->sk_err) {
809                         len = sock_error(sk);
810                         break;
811                 }
812
813                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
814                         len = 0;
815                         break;
816                 }
817
818                 if (sk->sk_state == DCCP_CLOSED) {
819                         if (!sock_flag(sk, SOCK_DONE)) {
820                                 /* This occurs when user tries to read
821                                  * from never connected socket.
822                                  */
823                                 len = -ENOTCONN;
824                                 break;
825                         }
826                         len = 0;
827                         break;
828                 }
829
830                 if (!timeo) {
831                         len = -EAGAIN;
832                         break;
833                 }
834
835                 if (signal_pending(current)) {
836                         len = sock_intr_errno(timeo);
837                         break;
838                 }
839
840                 sk_wait_data(sk, &timeo);
841                 continue;
842         found_ok_skb:
843                 if (len > skb->len)
844                         len = skb->len;
845                 else if (len < skb->len)
846                         msg->msg_flags |= MSG_TRUNC;
847
848                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
849                         /* Exception. Bailout! */
850                         len = -EFAULT;
851                         break;
852                 }
853         found_fin_ok:
854                 if (!(flags & MSG_PEEK))
855                         sk_eat_skb(sk, skb, 0);
856                 break;
857         } while (1);
858 out:
859         release_sock(sk);
860         return len;
861 }
862
863 EXPORT_SYMBOL_GPL(dccp_recvmsg);
864
865 int inet_dccp_listen(struct socket *sock, int backlog)
866 {
867         struct sock *sk = sock->sk;
868         unsigned char old_state;
869         int err;
870
871         lock_sock(sk);
872
873         err = -EINVAL;
874         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
875                 goto out;
876
877         old_state = sk->sk_state;
878         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
879                 goto out;
880
881         /* Really, if the socket is already in listen state
882          * we can only allow the backlog to be adjusted.
883          */
884         if (old_state != DCCP_LISTEN) {
885                 /*
886                  * FIXME: here it probably should be sk->sk_prot->listen_start
887                  * see tcp_listen_start
888                  */
889                 err = dccp_listen_start(sk, backlog);
890                 if (err)
891                         goto out;
892         }
893         sk->sk_max_ack_backlog = backlog;
894         err = 0;
895
896 out:
897         release_sock(sk);
898         return err;
899 }
900
901 EXPORT_SYMBOL_GPL(inet_dccp_listen);
902
903 static void dccp_terminate_connection(struct sock *sk)
904 {
905         u8 next_state = DCCP_CLOSED;
906
907         switch (sk->sk_state) {
908         case DCCP_PASSIVE_CLOSE:
909         case DCCP_PASSIVE_CLOSEREQ:
910                 dccp_finish_passive_close(sk);
911                 break;
912         case DCCP_PARTOPEN:
913                 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
914                 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
915                 /* fall through */
916         case DCCP_OPEN:
917                 dccp_send_close(sk, 1);
918
919                 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
920                     !dccp_sk(sk)->dccps_server_timewait)
921                         next_state = DCCP_ACTIVE_CLOSEREQ;
922                 else
923                         next_state = DCCP_CLOSING;
924                 /* fall through */
925         default:
926                 dccp_set_state(sk, next_state);
927         }
928 }
929
930 void dccp_close(struct sock *sk, long timeout)
931 {
932         struct dccp_sock *dp = dccp_sk(sk);
933         struct sk_buff *skb;
934         u32 data_was_unread = 0;
935         int state;
936
937         lock_sock(sk);
938
939         sk->sk_shutdown = SHUTDOWN_MASK;
940
941         if (sk->sk_state == DCCP_LISTEN) {
942                 dccp_set_state(sk, DCCP_CLOSED);
943
944                 /* Special case. */
945                 inet_csk_listen_stop(sk);
946
947                 goto adjudge_to_death;
948         }
949
950         sk_stop_timer(sk, &dp->dccps_xmit_timer);
951
952         /*
953          * We need to flush the recv. buffs.  We do this only on the
954          * descriptor close, not protocol-sourced closes, because the
955           *reader process may not have drained the data yet!
956          */
957         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
958                 data_was_unread += skb->len;
959                 __kfree_skb(skb);
960         }
961
962         if (data_was_unread) {
963                 /* Unread data was tossed, send an appropriate Reset Code */
964                 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
965                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
966                 dccp_set_state(sk, DCCP_CLOSED);
967         } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
968                 /* Check zero linger _after_ checking for unread data. */
969                 sk->sk_prot->disconnect(sk, 0);
970         } else if (sk->sk_state != DCCP_CLOSED) {
971                 dccp_terminate_connection(sk);
972         }
973
974         sk_stream_wait_close(sk, timeout);
975
976 adjudge_to_death:
977         state = sk->sk_state;
978         sock_hold(sk);
979         sock_orphan(sk);
980         atomic_inc(sk->sk_prot->orphan_count);
981
982         /*
983          * It is the last release_sock in its life. It will remove backlog.
984          */
985         release_sock(sk);
986         /*
987          * Now socket is owned by kernel and we acquire BH lock
988          * to finish close. No need to check for user refs.
989          */
990         local_bh_disable();
991         bh_lock_sock(sk);
992         BUG_TRAP(!sock_owned_by_user(sk));
993
994         /* Have we already been destroyed by a softirq or backlog? */
995         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
996                 goto out;
997
998         if (sk->sk_state == DCCP_CLOSED)
999                 inet_csk_destroy_sock(sk);
1000
1001         /* Otherwise, socket is reprieved until protocol close. */
1002
1003 out:
1004         bh_unlock_sock(sk);
1005         local_bh_enable();
1006         sock_put(sk);
1007 }
1008
1009 EXPORT_SYMBOL_GPL(dccp_close);
1010
1011 void dccp_shutdown(struct sock *sk, int how)
1012 {
1013         dccp_pr_debug("called shutdown(%x)\n", how);
1014 }
1015
1016 EXPORT_SYMBOL_GPL(dccp_shutdown);
1017
1018 static int __init dccp_mib_init(void)
1019 {
1020         int rc = -ENOMEM;
1021
1022         dccp_statistics[0] = alloc_percpu(struct dccp_mib);
1023         if (dccp_statistics[0] == NULL)
1024                 goto out;
1025
1026         dccp_statistics[1] = alloc_percpu(struct dccp_mib);
1027         if (dccp_statistics[1] == NULL)
1028                 goto out_free_one;
1029
1030         rc = 0;
1031 out:
1032         return rc;
1033 out_free_one:
1034         free_percpu(dccp_statistics[0]);
1035         dccp_statistics[0] = NULL;
1036         goto out;
1037
1038 }
1039
1040 static void dccp_mib_exit(void)
1041 {
1042         free_percpu(dccp_statistics[0]);
1043         free_percpu(dccp_statistics[1]);
1044         dccp_statistics[0] = dccp_statistics[1] = NULL;
1045 }
1046
1047 static int thash_entries;
1048 module_param(thash_entries, int, 0444);
1049 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1050
1051 #ifdef CONFIG_IP_DCCP_DEBUG
1052 int dccp_debug;
1053 module_param(dccp_debug, bool, 0444);
1054 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1055
1056 EXPORT_SYMBOL_GPL(dccp_debug);
1057 #endif
1058
1059 static int __init dccp_init(void)
1060 {
1061         unsigned long goal;
1062         int ehash_order, bhash_order, i;
1063         int rc = -ENOBUFS;
1064
1065         BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1066                      FIELD_SIZEOF(struct sk_buff, cb));
1067
1068         dccp_hashinfo.bind_bucket_cachep =
1069                 kmem_cache_create("dccp_bind_bucket",
1070                                   sizeof(struct inet_bind_bucket), 0,
1071                                   SLAB_HWCACHE_ALIGN, NULL);
1072         if (!dccp_hashinfo.bind_bucket_cachep)
1073                 goto out;
1074
1075         /*
1076          * Size and allocate the main established and bind bucket
1077          * hash tables.
1078          *
1079          * The methodology is similar to that of the buffer cache.
1080          */
1081         if (num_physpages >= (128 * 1024))
1082                 goal = num_physpages >> (21 - PAGE_SHIFT);
1083         else
1084                 goal = num_physpages >> (23 - PAGE_SHIFT);
1085
1086         if (thash_entries)
1087                 goal = (thash_entries *
1088                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1089         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1090                 ;
1091         do {
1092                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1093                                         sizeof(struct inet_ehash_bucket);
1094                 while (dccp_hashinfo.ehash_size &
1095                        (dccp_hashinfo.ehash_size - 1))
1096                         dccp_hashinfo.ehash_size--;
1097                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1098                         __get_free_pages(GFP_ATOMIC, ehash_order);
1099         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1100
1101         if (!dccp_hashinfo.ehash) {
1102                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1103                 goto out_free_bind_bucket_cachep;
1104         }
1105
1106         for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1107                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1108                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1109         }
1110
1111         if (inet_ehash_locks_alloc(&dccp_hashinfo))
1112                         goto out_free_dccp_ehash;
1113
1114         bhash_order = ehash_order;
1115
1116         do {
1117                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1118                                         sizeof(struct inet_bind_hashbucket);
1119                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1120                     bhash_order > 0)
1121                         continue;
1122                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1123                         __get_free_pages(GFP_ATOMIC, bhash_order);
1124         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1125
1126         if (!dccp_hashinfo.bhash) {
1127                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1128                 goto out_free_dccp_locks;
1129         }
1130
1131         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1132                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1133                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1134         }
1135
1136         rc = dccp_mib_init();
1137         if (rc)
1138                 goto out_free_dccp_bhash;
1139
1140         rc = dccp_ackvec_init();
1141         if (rc)
1142                 goto out_free_dccp_mib;
1143
1144         rc = dccp_sysctl_init();
1145         if (rc)
1146                 goto out_ackvec_exit;
1147
1148         dccp_timestamping_init();
1149 out:
1150         return rc;
1151 out_ackvec_exit:
1152         dccp_ackvec_exit();
1153 out_free_dccp_mib:
1154         dccp_mib_exit();
1155 out_free_dccp_bhash:
1156         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1157         dccp_hashinfo.bhash = NULL;
1158 out_free_dccp_locks:
1159         inet_ehash_locks_free(&dccp_hashinfo);
1160 out_free_dccp_ehash:
1161         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1162         dccp_hashinfo.ehash = NULL;
1163 out_free_bind_bucket_cachep:
1164         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1165         dccp_hashinfo.bind_bucket_cachep = NULL;
1166         goto out;
1167 }
1168
1169 static void __exit dccp_fini(void)
1170 {
1171         dccp_mib_exit();
1172         free_pages((unsigned long)dccp_hashinfo.bhash,
1173                    get_order(dccp_hashinfo.bhash_size *
1174                              sizeof(struct inet_bind_hashbucket)));
1175         free_pages((unsigned long)dccp_hashinfo.ehash,
1176                    get_order(dccp_hashinfo.ehash_size *
1177                              sizeof(struct inet_ehash_bucket)));
1178         inet_ehash_locks_free(&dccp_hashinfo);
1179         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1180         dccp_ackvec_exit();
1181         dccp_sysctl_exit();
1182 }
1183
1184 module_init(dccp_init);
1185 module_exit(dccp_fini);
1186
1187 MODULE_LICENSE("GPL");
1188 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1189 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");