kernel/bpf/syscall.c

   1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
   2  *
   3  * This program is free software; you can redistribute it and/or
   4  * modify it under the terms of version 2 of the GNU General Public
   5  * License as published by the Free Software Foundation.
   6  *
   7  * This program is distributed in the hope that it will be useful, but
   8  * WITHOUT ANY WARRANTY; without even the implied warranty of
   9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  10  * General Public License for more details.
  11  */
  12 #include <linux/bpf.h>
  13 #include <linux/bpf_trace.h>
  14 #include <linux/syscalls.h>
  15 #include <linux/slab.h>
  16 #include <linux/sched/signal.h>
  17 #include <linux/vmalloc.h>
  18 #include <linux/mmzone.h>
  19 #include <linux/anon_inodes.h>
  20 #include <linux/file.h>
  21 #include <linux/license.h>
  22 #include <linux/filter.h>
  23 #include <linux/version.h>
  24 #include <linux/kernel.h>
  25
  26 DEFINE_PER_CPU(int, bpf_prog_active);
  27
  28 int sysctl_unprivileged_bpf_disabled __read_mostly;
  29
  30 static const struct bpf_map_ops * const bpf_map_types[] = {
  31 #define BPF_PROG_TYPE(_id, _ops)
  32 #define BPF_MAP_TYPE(_id, _ops) \
  33         [_id] = &_ops,
  34 #include <linux/bpf_types.h>
  35 #undef BPF_PROG_TYPE
  36 #undef BPF_MAP_TYPE
  37 };
  38
  39 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
  40 {
  41         struct bpf_map *map;
  42
  43         if (attr->map_type >= ARRAY_SIZE(bpf_map_types) ||
  44             !bpf_map_types[attr->map_type])
  45                 return ERR_PTR(-EINVAL);
  46
  47         map = bpf_map_types[attr->map_type]->map_alloc(attr);
  48         if (IS_ERR(map))
  49                 return map;
  50         map->ops = bpf_map_types[attr->map_type];
  51         map->map_type = attr->map_type;
  52         return map;
  53 }
  54
  55 void *bpf_map_area_alloc(size_t size)
  56 {
  57         /* We definitely need __GFP_NORETRY, so OOM killer doesn't
  58          * trigger under memory pressure as we really just want to
  59          * fail instead.
  60          */
  61         const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO;
  62         void *area;
  63
  64         if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
  65                 area = kmalloc(size, GFP_USER | flags);
  66                 if (area != NULL)
  67                         return area;
  68         }
  69
  70         return __vmalloc(size, GFP_KERNEL | flags, PAGE_KERNEL);
  71 }
  72
  73 void bpf_map_area_free(void *area)
  74 {
  75         kvfree(area);
  76 }
  77
  78 int bpf_map_precharge_memlock(u32 pages)
  79 {
  80         struct user_struct *user = get_current_user();
  81         unsigned long memlock_limit, cur;
  82
  83         memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
  84         cur = atomic_long_read(&user->locked_vm);
  85         free_uid(user);
  86         if (cur + pages > memlock_limit)
  87                 return -EPERM;
  88         return 0;
  89 }
  90
  91 static int bpf_map_charge_memlock(struct bpf_map *map)
  92 {
  93         struct user_struct *user = get_current_user();
  94         unsigned long memlock_limit;
  95
  96         memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
  97
  98         atomic_long_add(map->pages, &user->locked_vm);
  99
 100         if (atomic_long_read(&user->locked_vm) > memlock_limit) {
 101                 atomic_long_sub(map->pages, &user->locked_vm);
 102                 free_uid(user);
 103                 return -EPERM;
 104         }
 105         map->user = user;
 106         return 0;
 107 }
 108
 109 static void bpf_map_uncharge_memlock(struct bpf_map *map)
 110 {
 111         struct user_struct *user = map->user;
 112
 113         atomic_long_sub(map->pages, &user->locked_vm);
 114         free_uid(user);
 115 }
 116
 117 /* called from workqueue */
 118 static void bpf_map_free_deferred(struct work_struct *work)
 119 {
 120         struct bpf_map *map = container_of(work, struct bpf_map, work);
 121
 122         bpf_map_uncharge_memlock(map);
 123         /* implementation dependent freeing */
 124         map->ops->map_free(map);
 125 }
 126
 127 static void bpf_map_put_uref(struct bpf_map *map)
 128 {
 129         if (atomic_dec_and_test(&map->usercnt)) {
 130                 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY)
 131                         bpf_fd_array_map_clear(map);
 132         }
 133 }
 134
 135 /* decrement map refcnt and schedule it for freeing via workqueue
 136  * (unrelying map implementation ops->map_free() might sleep)
 137  */
 138 void bpf_map_put(struct bpf_map *map)
 139 {
 140         if (atomic_dec_and_test(&map->refcnt)) {
 141                 INIT_WORK(&map->work, bpf_map_free_deferred);
 142                 schedule_work(&map->work);
 143         }
 144 }
 145
 146 void bpf_map_put_with_uref(struct bpf_map *map)
 147 {
 148         bpf_map_put_uref(map);
 149         bpf_map_put(map);
 150 }
 151
 152 static int bpf_map_release(struct inode *inode, struct file *filp)
 153 {
 154         struct bpf_map *map = filp->private_data;
 155
 156         if (map->ops->map_release)
 157                 map->ops->map_release(map, filp);
 158
 159         bpf_map_put_with_uref(map);
 160         return 0;
 161 }
 162
 163 #ifdef CONFIG_PROC_FS
 164 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
 165 {
 166         const struct bpf_map *map = filp->private_data;
 167         const struct bpf_array *array;
 168         u32 owner_prog_type = 0;
 169
 170         if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
 171                 array = container_of(map, struct bpf_array, map);
 172                 owner_prog_type = array->owner_prog_type;
 173         }
 174
 175         seq_printf(m,
 176                    "map_type:\t%u\n"
 177                    "key_size:\t%u\n"
 178                    "value_size:\t%u\n"
 179                    "max_entries:\t%u\n"
 180                    "map_flags:\t%#x\n"
 181                    "memlock:\t%llu\n",
 182                    map->map_type,
 183                    map->key_size,
 184                    map->value_size,
 185                    map->max_entries,
 186                    map->map_flags,
 187                    map->pages * 1ULL << PAGE_SHIFT);
 188
 189         if (owner_prog_type)
 190                 seq_printf(m, "owner_prog_type:\t%u\n",
 191                            owner_prog_type);
 192 }
 193 #endif
 194
 195 static const struct file_operations bpf_map_fops = {
 196 #ifdef CONFIG_PROC_FS
 197         .show_fdinfo    = bpf_map_show_fdinfo,
 198 #endif
 199         .release        = bpf_map_release,
 200 };
 201
 202 int bpf_map_new_fd(struct bpf_map *map)
 203 {
 204         return anon_inode_getfd("bpf-map", &bpf_map_fops, map,
 205                                 O_RDWR | O_CLOEXEC);
 206 }
 207
 208 /* helper macro to check that unused fields 'union bpf_attr' are zero */
 209 #define CHECK_ATTR(CMD) \
 210         memchr_inv((void *) &attr->CMD##_LAST_FIELD + \
 211                    sizeof(attr->CMD##_LAST_FIELD), 0, \
 212                    sizeof(*attr) - \
 213                    offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
 214                    sizeof(attr->CMD##_LAST_FIELD)) != NULL
 215
 216 #define BPF_MAP_CREATE_LAST_FIELD inner_map_fd
 217 /* called via syscall */
 218 static int map_create(union bpf_attr *attr)
 219 {
 220         struct bpf_map *map;
 221         int err;
 222
 223         err = CHECK_ATTR(BPF_MAP_CREATE);
 224         if (err)
 225                 return -EINVAL;
 226
 227         /* find map type and init map: hashtable vs rbtree vs bloom vs ... */
 228         map = find_and_alloc_map(attr);
 229         if (IS_ERR(map))
 230                 return PTR_ERR(map);
 231
 232         atomic_set(&map->refcnt, 1);
 233         atomic_set(&map->usercnt, 1);
 234
 235         err = bpf_map_charge_memlock(map);
 236         if (err)
 237                 goto free_map_nouncharge;
 238
 239         err = bpf_map_new_fd(map);
 240         if (err < 0)
 241                 /* failed to allocate fd */
 242                 goto free_map;
 243
 244         trace_bpf_map_create(map, err);
 245         return err;
 246
 247 free_map:
 248         bpf_map_uncharge_memlock(map);
 249 free_map_nouncharge:
 250         map->ops->map_free(map);
 251         return err;
 252 }
 253
 254 /* if error is returned, fd is released.
 255  * On success caller should complete fd access with matching fdput()
 256  */
 257 struct bpf_map *__bpf_map_get(struct fd f)
 258 {
 259         if (!f.file)
 260                 return ERR_PTR(-EBADF);
 261         if (f.file->f_op != &bpf_map_fops) {
 262                 fdput(f);
 263                 return ERR_PTR(-EINVAL);
 264         }
 265
 266         return f.file->private_data;
 267 }
 268
 269 /* prog's and map's refcnt limit */
 270 #define BPF_MAX_REFCNT 32768
 271
 272 struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref)
 273 {
 274         if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) {
 275                 atomic_dec(&map->refcnt);
 276                 return ERR_PTR(-EBUSY);
 277         }
 278         if (uref)
 279                 atomic_inc(&map->usercnt);
 280         return map;
 281 }
 282
 283 struct bpf_map *bpf_map_get_with_uref(u32 ufd)
 284 {
 285         struct fd f = fdget(ufd);
 286         struct bpf_map *map;
 287
 288         map = __bpf_map_get(f);
 289         if (IS_ERR(map))
 290                 return map;
 291
 292         map = bpf_map_inc(map, true);
 293         fdput(f);
 294
 295         return map;
 296 }
 297
 298 int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
 299 {
 300         return -ENOTSUPP;
 301 }
 302
 303 /* last field in 'union bpf_attr' used by this command */
 304 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
 305
 306 static int map_lookup_elem(union bpf_attr *attr)
 307 {
 308         void __user *ukey = u64_to_user_ptr(attr->key);
 309         void __user *uvalue = u64_to_user_ptr(attr->value);
 310         int ufd = attr->map_fd;
 311         struct bpf_map *map;
 312         void *key, *value, *ptr;
 313         u32 value_size;
 314         struct fd f;
 315         int err;
 316
 317         if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
 318                 return -EINVAL;
 319
 320         f = fdget(ufd);
 321         map = __bpf_map_get(f);
 322         if (IS_ERR(map))
 323                 return PTR_ERR(map);
 324
 325         err = -ENOMEM;
 326         key = kmalloc(map->key_size, GFP_USER);
 327         if (!key)
 328                 goto err_put;
 329
 330         err = -EFAULT;
 331         if (copy_from_user(key, ukey, map->key_size) != 0)
 332                 goto free_key;
 333
 334         if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 335             map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
 336             map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
 337                 value_size = round_up(map->value_size, 8) * num_possible_cpus();
 338         else
 339                 value_size = map->value_size;
 340
 341         err = -ENOMEM;
 342         value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
 343         if (!value)
 344                 goto free_key;
 345
 346         if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 347             map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
 348                 err = bpf_percpu_hash_copy(map, key, value);
 349         } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
 350                 err = bpf_percpu_array_copy(map, key, value);
 351         } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
 352                 err = bpf_stackmap_copy(map, key, value);
 353         } else if (map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
 354                    map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
 355                 err = -ENOTSUPP;
 356         } else {
 357                 rcu_read_lock();
 358                 ptr = map->ops->map_lookup_elem(map, key);
 359                 if (ptr)
 360                         memcpy(value, ptr, value_size);
 361                 rcu_read_unlock();
 362                 err = ptr ? 0 : -ENOENT;
 363         }
 364
 365         if (err)
 366                 goto free_value;
 367
 368         err = -EFAULT;
 369         if (copy_to_user(uvalue, value, value_size) != 0)
 370                 goto free_value;
 371
 372         trace_bpf_map_lookup_elem(map, ufd, key, value);
 373         err = 0;
 374
 375 free_value:
 376         kfree(value);
 377 free_key:
 378         kfree(key);
 379 err_put:
 380         fdput(f);
 381         return err;
 382 }
 383
 384 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
 385
 386 static int map_update_elem(union bpf_attr *attr)
 387 {
 388         void __user *ukey = u64_to_user_ptr(attr->key);
 389         void __user *uvalue = u64_to_user_ptr(attr->value);
 390         int ufd = attr->map_fd;
 391         struct bpf_map *map;
 392         void *key, *value;
 393         u32 value_size;
 394         struct fd f;
 395         int err;
 396
 397         if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
 398                 return -EINVAL;
 399
 400         f = fdget(ufd);
 401         map = __bpf_map_get(f);
 402         if (IS_ERR(map))
 403                 return PTR_ERR(map);
 404
 405         err = -ENOMEM;
 406         key = kmalloc(map->key_size, GFP_USER);
 407         if (!key)
 408                 goto err_put;
 409
 410         err = -EFAULT;
 411         if (copy_from_user(key, ukey, map->key_size) != 0)
 412                 goto free_key;
 413
 414         if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 415             map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
 416             map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
 417                 value_size = round_up(map->value_size, 8) * num_possible_cpus();
 418         else
 419                 value_size = map->value_size;
 420
 421         err = -ENOMEM;
 422         value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
 423         if (!value)
 424                 goto free_key;
 425
 426         err = -EFAULT;
 427         if (copy_from_user(value, uvalue, value_size) != 0)
 428                 goto free_value;
 429
 430         /* must increment bpf_prog_active to avoid kprobe+bpf triggering from
 431          * inside bpf map update or delete otherwise deadlocks are possible
 432          */
 433         preempt_disable();
 434         __this_cpu_inc(bpf_prog_active);
 435         if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 436             map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
 437                 err = bpf_percpu_hash_update(map, key, value, attr->flags);
 438         } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
 439                 err = bpf_percpu_array_update(map, key, value, attr->flags);
 440         } else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY ||
 441                    map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
 442                    map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY ||
 443                    map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) {
 444                 rcu_read_lock();
 445                 err = bpf_fd_array_map_update_elem(map, f.file, key, value,
 446                                                    attr->flags);
 447                 rcu_read_unlock();
 448         } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
 449                 rcu_read_lock();
 450                 err = bpf_fd_htab_map_update_elem(map, f.file, key, value,
 451                                                   attr->flags);
 452                 rcu_read_unlock();
 453         } else {
 454                 rcu_read_lock();
 455                 err = map->ops->map_update_elem(map, key, value, attr->flags);
 456                 rcu_read_unlock();
 457         }
 458         __this_cpu_dec(bpf_prog_active);
 459         preempt_enable();
 460
 461         if (!err)
 462                 trace_bpf_map_update_elem(map, ufd, key, value);
 463 free_value:
 464         kfree(value);
 465 free_key:
 466         kfree(key);
 467 err_put:
 468         fdput(f);
 469         return err;
 470 }
 471
 472 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key
 473
 474 static int map_delete_elem(union bpf_attr *attr)
 475 {
 476         void __user *ukey = u64_to_user_ptr(attr->key);
 477         int ufd = attr->map_fd;
 478         struct bpf_map *map;
 479         struct fd f;
 480         void *key;
 481         int err;
 482
 483         if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
 484                 return -EINVAL;
 485
 486         f = fdget(ufd);
 487         map = __bpf_map_get(f);
 488         if (IS_ERR(map))
 489                 return PTR_ERR(map);
 490
 491         err = -ENOMEM;
 492         key = kmalloc(map->key_size, GFP_USER);
 493         if (!key)
 494                 goto err_put;
 495
 496         err = -EFAULT;
 497         if (copy_from_user(key, ukey, map->key_size) != 0)
 498                 goto free_key;
 499
 500         preempt_disable();
 501         __this_cpu_inc(bpf_prog_active);
 502         rcu_read_lock();
 503         err = map->ops->map_delete_elem(map, key);
 504         rcu_read_unlock();
 505         __this_cpu_dec(bpf_prog_active);
 506         preempt_enable();
 507
 508         if (!err)
 509                 trace_bpf_map_delete_elem(map, ufd, key);
 510 free_key:
 511         kfree(key);
 512 err_put:
 513         fdput(f);
 514         return err;
 515 }
 516
 517 /* last field in 'union bpf_attr' used by this command */
 518 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
 519
 520 static int map_get_next_key(union bpf_attr *attr)
 521 {
 522         void __user *ukey = u64_to_user_ptr(attr->key);
 523         void __user *unext_key = u64_to_user_ptr(attr->next_key);
 524         int ufd = attr->map_fd;
 525         struct bpf_map *map;
 526         void *key, *next_key;
 527         struct fd f;
 528         int err;
 529
 530         if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
 531                 return -EINVAL;
 532
 533         f = fdget(ufd);
 534         map = __bpf_map_get(f);
 535         if (IS_ERR(map))
 536                 return PTR_ERR(map);
 537
 538         if (ukey) {
 539                 err = -ENOMEM;
 540                 key = kmalloc(map->key_size, GFP_USER);
 541                 if (!key)
 542                         goto err_put;
 543
 544                 err = -EFAULT;
 545                 if (copy_from_user(key, ukey, map->key_size) != 0)
 546                         goto free_key;
 547         } else {
 548                 key = NULL;
 549         }
 550
 551         err = -ENOMEM;
 552         next_key = kmalloc(map->key_size, GFP_USER);
 553         if (!next_key)
 554                 goto free_key;
 555
 556         rcu_read_lock();
 557         err = map->ops->map_get_next_key(map, key, next_key);
 558         rcu_read_unlock();
 559         if (err)
 560                 goto free_next_key;
 561
 562         err = -EFAULT;
 563         if (copy_to_user(unext_key, next_key, map->key_size) != 0)
 564                 goto free_next_key;
 565
 566         trace_bpf_map_next_key(map, ufd, key, next_key);
 567         err = 0;
 568
 569 free_next_key:
 570         kfree(next_key);
 571 free_key:
 572         kfree(key);
 573 err_put:
 574         fdput(f);
 575         return err;
 576 }
 577
 578 static const struct bpf_verifier_ops * const bpf_prog_types[] = {
 579 #define BPF_PROG_TYPE(_id, _ops) \
 580         [_id] = &_ops,
 581 #define BPF_MAP_TYPE(_id, _ops)
 582 #include <linux/bpf_types.h>
 583 #undef BPF_PROG_TYPE
 584 #undef BPF_MAP_TYPE
 585 };
 586
 587 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
 588 {
 589         if (type >= ARRAY_SIZE(bpf_prog_types) || !bpf_prog_types[type])
 590                 return -EINVAL;
 591
 592         prog->aux->ops = bpf_prog_types[type];
 593         prog->type = type;
 594         return 0;
 595 }
 596
 597 /* drop refcnt on maps used by eBPF program and free auxilary data */
 598 static void free_used_maps(struct bpf_prog_aux *aux)
 599 {
 600         int i;
 601
 602         for (i = 0; i < aux->used_map_cnt; i++)
 603                 bpf_map_put(aux->used_maps[i]);
 604
 605         kfree(aux->used_maps);
 606 }
 607
 608 int __bpf_prog_charge(struct user_struct *user, u32 pages)
 609 {
 610         unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 611         unsigned long user_bufs;
 612
 613         if (user) {
 614                 user_bufs = atomic_long_add_return(pages, &user->locked_vm);
 615                 if (user_bufs > memlock_limit) {
 616                         atomic_long_sub(pages, &user->locked_vm);
 617                         return -EPERM;
 618                 }
 619         }
 620
 621         return 0;
 622 }
 623
 624 void __bpf_prog_uncharge(struct user_struct *user, u32 pages)
 625 {
 626         if (user)
 627                 atomic_long_sub(pages, &user->locked_vm);
 628 }
 629
 630 static int bpf_prog_charge_memlock(struct bpf_prog *prog)
 631 {
 632         struct user_struct *user = get_current_user();
 633         int ret;
 634
 635         ret = __bpf_prog_charge(user, prog->pages);
 636         if (ret) {
 637                 free_uid(user);
 638                 return ret;
 639         }
 640
 641         prog->aux->user = user;
 642         return 0;
 643 }
 644
 645 static void bpf_prog_uncharge_memlock(struct bpf_prog *prog)
 646 {
 647         struct user_struct *user = prog->aux->user;
 648
 649         __bpf_prog_uncharge(user, prog->pages);
 650         free_uid(user);
 651 }
 652
 653 static void __bpf_prog_put_rcu(struct rcu_head *rcu)
 654 {
 655         struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
 656
 657         free_used_maps(aux);
 658         bpf_prog_uncharge_memlock(aux->prog);
 659         bpf_prog_free(aux->prog);
 660 }
 661
 662 void bpf_prog_put(struct bpf_prog *prog)
 663 {
 664         if (atomic_dec_and_test(&prog->aux->refcnt)) {
 665                 trace_bpf_prog_put_rcu(prog);
 666                 bpf_prog_kallsyms_del(prog);
 667                 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
 668         }
 669 }
 670 EXPORT_SYMBOL_GPL(bpf_prog_put);
 671
 672 static int bpf_prog_release(struct inode *inode, struct file *filp)
 673 {
 674         struct bpf_prog *prog = filp->private_data;
 675
 676         bpf_prog_put(prog);
 677         return 0;
 678 }
 679
 680 #ifdef CONFIG_PROC_FS
 681 static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
 682 {
 683         const struct bpf_prog *prog = filp->private_data;
 684         char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
 685
 686         bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
 687         seq_printf(m,
 688                    "prog_type:\t%u\n"
 689                    "prog_jited:\t%u\n"
 690                    "prog_tag:\t%s\n"
 691                    "memlock:\t%llu\n",
 692                    prog->type,
 693                    prog->jited,
 694                    prog_tag,
 695                    prog->pages * 1ULL << PAGE_SHIFT);
 696 }
 697 #endif
 698
 699 static const struct file_operations bpf_prog_fops = {
 700 #ifdef CONFIG_PROC_FS
 701         .show_fdinfo    = bpf_prog_show_fdinfo,
 702 #endif
 703         .release        = bpf_prog_release,
 704 };
 705
 706 int bpf_prog_new_fd(struct bpf_prog *prog)
 707 {
 708         return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog,
 709                                 O_RDWR | O_CLOEXEC);
 710 }
 711
 712 static struct bpf_prog *____bpf_prog_get(struct fd f)
 713 {
 714         if (!f.file)
 715                 return ERR_PTR(-EBADF);
 716         if (f.file->f_op != &bpf_prog_fops) {
 717                 fdput(f);
 718                 return ERR_PTR(-EINVAL);
 719         }
 720
 721         return f.file->private_data;
 722 }
 723
 724 struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i)
 725 {
 726         if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) {
 727                 atomic_sub(i, &prog->aux->refcnt);
 728                 return ERR_PTR(-EBUSY);
 729         }
 730         return prog;
 731 }
 732 EXPORT_SYMBOL_GPL(bpf_prog_add);
 733
 734 void bpf_prog_sub(struct bpf_prog *prog, int i)
 735 {
 736         /* Only to be used for undoing previous bpf_prog_add() in some
 737          * error path. We still know that another entity in our call
 738          * path holds a reference to the program, thus atomic_sub() can
 739          * be safely used in such cases!
 740          */
 741         WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0);
 742 }
 743 EXPORT_SYMBOL_GPL(bpf_prog_sub);
 744
 745 struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
 746 {
 747         return bpf_prog_add(prog, 1);
 748 }
 749 EXPORT_SYMBOL_GPL(bpf_prog_inc);
 750
 751 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type)
 752 {
 753         struct fd f = fdget(ufd);
 754         struct bpf_prog *prog;
 755
 756         prog = ____bpf_prog_get(f);
 757         if (IS_ERR(prog))
 758                 return prog;
 759         if (type && prog->type != *type) {
 760                 prog = ERR_PTR(-EINVAL);
 761                 goto out;
 762         }
 763
 764         prog = bpf_prog_inc(prog);
 765 out:
 766         fdput(f);
 767         return prog;
 768 }
 769
 770 struct bpf_prog *bpf_prog_get(u32 ufd)
 771 {
 772         return __bpf_prog_get(ufd, NULL);
 773 }
 774
 775 struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type)
 776 {
 777         struct bpf_prog *prog = __bpf_prog_get(ufd, &type);
 778
 779         if (!IS_ERR(prog))
 780                 trace_bpf_prog_get_type(prog);
 781         return prog;
 782 }
 783 EXPORT_SYMBOL_GPL(bpf_prog_get_type);
 784
 785 /* last field in 'union bpf_attr' used by this command */
 786 #define BPF_PROG_LOAD_LAST_FIELD kern_version
 787
 788 static int bpf_prog_load(union bpf_attr *attr)
 789 {
 790         enum bpf_prog_type type = attr->prog_type;
 791         struct bpf_prog *prog;
 792         int err;
 793         char license[128];
 794         bool is_gpl;
 795
 796         if (CHECK_ATTR(BPF_PROG_LOAD))
 797                 return -EINVAL;
 798
 799         /* copy eBPF program license from user space */
 800         if (strncpy_from_user(license, u64_to_user_ptr(attr->license),
 801                               sizeof(license) - 1) < 0)
 802                 return -EFAULT;
 803         license[sizeof(license) - 1] = 0;
 804
 805         /* eBPF programs must be GPL compatible to use GPL-ed functions */
 806         is_gpl = license_is_gpl_compatible(license);
 807
 808         if (attr->insn_cnt == 0 || attr->insn_cnt > BPF_MAXINSNS)
 809                 return -E2BIG;
 810
 811         if (type == BPF_PROG_TYPE_KPROBE &&
 812             attr->kern_version != LINUX_VERSION_CODE)
 813                 return -EINVAL;
 814
 815         if (type != BPF_PROG_TYPE_SOCKET_FILTER && !capable(CAP_SYS_ADMIN))
 816                 return -EPERM;
 817
 818         /* plain bpf_prog allocation */
 819         prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
 820         if (!prog)
 821                 return -ENOMEM;
 822
 823         err = bpf_prog_charge_memlock(prog);
 824         if (err)
 825                 goto free_prog_nouncharge;
 826
 827         prog->len = attr->insn_cnt;
 828
 829         err = -EFAULT;
 830         if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns),
 831                            bpf_prog_insn_size(prog)) != 0)
 832                 goto free_prog;
 833
 834         prog->orig_prog = NULL;
 835         prog->jited = 0;
 836
 837         atomic_set(&prog->aux->refcnt, 1);
 838         prog->gpl_compatible = is_gpl ? 1 : 0;
 839
 840         /* find program type: socket_filter vs tracing_filter */
 841         err = find_prog_type(type, prog);
 842         if (err < 0)
 843                 goto free_prog;
 844
 845         /* run eBPF verifier */
 846         err = bpf_check(&prog, attr);
 847         if (err < 0)
 848                 goto free_used_maps;
 849
 850         /* eBPF program is ready to be JITed */
 851         prog = bpf_prog_select_runtime(prog, &err);
 852         if (err < 0)
 853                 goto free_used_maps;
 854
 855         err = bpf_prog_new_fd(prog);
 856         if (err < 0)
 857                 /* failed to allocate fd */
 858                 goto free_used_maps;
 859
 860         bpf_prog_kallsyms_add(prog);
 861         trace_bpf_prog_load(prog, err);
 862         return err;
 863
 864 free_used_maps:
 865         free_used_maps(prog->aux);
 866 free_prog:
 867         bpf_prog_uncharge_memlock(prog);
 868 free_prog_nouncharge:
 869         bpf_prog_free(prog);
 870         return err;
 871 }
 872
 873 #define BPF_OBJ_LAST_FIELD bpf_fd
 874
 875 static int bpf_obj_pin(const union bpf_attr *attr)
 876 {
 877         if (CHECK_ATTR(BPF_OBJ))
 878                 return -EINVAL;
 879
 880         return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname));
 881 }
 882
 883 static int bpf_obj_get(const union bpf_attr *attr)
 884 {
 885         if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0)
 886                 return -EINVAL;
 887
 888         return bpf_obj_get_user(u64_to_user_ptr(attr->pathname));
 889 }
 890
 891 #ifdef CONFIG_CGROUP_BPF
 892
 893 #define BPF_PROG_ATTACH_LAST_FIELD attach_flags
 894
 895 static int bpf_prog_attach(const union bpf_attr *attr)
 896 {
 897         enum bpf_prog_type ptype;
 898         struct bpf_prog *prog;
 899         struct cgroup *cgrp;
 900         int ret;
 901
 902         if (!capable(CAP_NET_ADMIN))
 903                 return -EPERM;
 904
 905         if (CHECK_ATTR(BPF_PROG_ATTACH))
 906                 return -EINVAL;
 907
 908         if (attr->attach_flags & ~BPF_F_ALLOW_OVERRIDE)
 909                 return -EINVAL;
 910
 911         switch (attr->attach_type) {
 912         case BPF_CGROUP_INET_INGRESS:
 913         case BPF_CGROUP_INET_EGRESS:
 914                 ptype = BPF_PROG_TYPE_CGROUP_SKB;
 915                 break;
 916         case BPF_CGROUP_INET_SOCK_CREATE:
 917                 ptype = BPF_PROG_TYPE_CGROUP_SOCK;
 918                 break;
 919         default:
 920                 return -EINVAL;
 921         }
 922
 923         prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
 924         if (IS_ERR(prog))
 925                 return PTR_ERR(prog);
 926
 927         cgrp = cgroup_get_from_fd(attr->target_fd);
 928         if (IS_ERR(cgrp)) {
 929                 bpf_prog_put(prog);
 930                 return PTR_ERR(cgrp);
 931         }
 932
 933         ret = cgroup_bpf_update(cgrp, prog, attr->attach_type,
 934                                 attr->attach_flags & BPF_F_ALLOW_OVERRIDE);
 935         if (ret)
 936                 bpf_prog_put(prog);
 937         cgroup_put(cgrp);
 938
 939         return ret;
 940 }
 941
 942 #define BPF_PROG_DETACH_LAST_FIELD attach_type
 943
 944 static int bpf_prog_detach(const union bpf_attr *attr)
 945 {
 946         struct cgroup *cgrp;
 947         int ret;
 948
 949         if (!capable(CAP_NET_ADMIN))
 950                 return -EPERM;
 951
 952         if (CHECK_ATTR(BPF_PROG_DETACH))
 953                 return -EINVAL;
 954
 955         switch (attr->attach_type) {
 956         case BPF_CGROUP_INET_INGRESS:
 957         case BPF_CGROUP_INET_EGRESS:
 958         case BPF_CGROUP_INET_SOCK_CREATE:
 959                 cgrp = cgroup_get_from_fd(attr->target_fd);
 960                 if (IS_ERR(cgrp))
 961                         return PTR_ERR(cgrp);
 962
 963                 ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false);
 964                 cgroup_put(cgrp);
 965                 break;
 966
 967         default:
 968                 return -EINVAL;
 969         }
 970
 971         return ret;
 972 }
 973 #endif /* CONFIG_CGROUP_BPF */
 974
 975 #define BPF_PROG_TEST_RUN_LAST_FIELD test.duration
 976
 977 static int bpf_prog_test_run(const union bpf_attr *attr,
 978                              union bpf_attr __user *uattr)
 979 {
 980         struct bpf_prog *prog;
 981         int ret = -ENOTSUPP;
 982
 983         if (CHECK_ATTR(BPF_PROG_TEST_RUN))
 984                 return -EINVAL;
 985
 986         prog = bpf_prog_get(attr->test.prog_fd);
 987         if (IS_ERR(prog))
 988                 return PTR_ERR(prog);
 989
 990         if (prog->aux->ops->test_run)
 991                 ret = prog->aux->ops->test_run(prog, attr, uattr);
 992
 993         bpf_prog_put(prog);
 994         return ret;
 995 }
 996
 997 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
 998 {
 999         union bpf_attr attr = {};
1000         int err;
1001
1002         if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled)
1003                 return -EPERM;
1004
1005         if (!access_ok(VERIFY_READ, uattr, 1))
1006                 return -EFAULT;
1007
1008         if (size > PAGE_SIZE)   /* silly large */
1009                 return -E2BIG;
1010
1011         /* If we're handed a bigger struct than we know of,
1012          * ensure all the unknown bits are 0 - i.e. new
1013          * user-space does not rely on any kernel feature
1014          * extensions we dont know about yet.
1015          */
1016         if (size > sizeof(attr)) {
1017                 unsigned char __user *addr;
1018                 unsigned char __user *end;
1019                 unsigned char val;
1020
1021                 addr = (void __user *)uattr + sizeof(attr);
1022                 end  = (void __user *)uattr + size;
1023
1024                 for (; addr < end; addr++) {
1025                         err = get_user(val, addr);
1026                         if (err)
1027                                 return err;
1028                         if (val)
1029                                 return -E2BIG;
1030                 }
1031                 size = sizeof(attr);
1032         }
1033
1034         /* copy attributes from user space, may be less than sizeof(bpf_attr) */
1035         if (copy_from_user(&attr, uattr, size) != 0)
1036                 return -EFAULT;
1037
1038         switch (cmd) {
1039         case BPF_MAP_CREATE:
1040                 err = map_create(&attr);
1041                 break;
1042         case BPF_MAP_LOOKUP_ELEM:
1043                 err = map_lookup_elem(&attr);
1044                 break;
1045         case BPF_MAP_UPDATE_ELEM:
1046                 err = map_update_elem(&attr);
1047                 break;
1048         case BPF_MAP_DELETE_ELEM:
1049                 err = map_delete_elem(&attr);
1050                 break;
1051         case BPF_MAP_GET_NEXT_KEY:
1052                 err = map_get_next_key(&attr);
1053                 break;
1054         case BPF_PROG_LOAD:
1055                 err = bpf_prog_load(&attr);
1056                 break;
1057         case BPF_OBJ_PIN:
1058                 err = bpf_obj_pin(&attr);
1059                 break;
1060         case BPF_OBJ_GET:
1061                 err = bpf_obj_get(&attr);
1062                 break;
1063 #ifdef CONFIG_CGROUP_BPF
1064         case BPF_PROG_ATTACH:
1065                 err = bpf_prog_attach(&attr);
1066                 break;
1067         case BPF_PROG_DETACH:
1068                 err = bpf_prog_detach(&attr);
1069                 break;
1070 #endif
1071         case BPF_PROG_TEST_RUN:
1072                 err = bpf_prog_test_run(&attr, uattr);
1073                 break;
1074         default:
1075                 err = -EINVAL;
1076                 break;
1077         }
1078
1079         return err;
1080 }