drivers/gpu/drm/amd/scheduler/gpu_scheduler.c

   1 /*
   2  * Copyright 2015 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  *
  23  */
  24 #include <linux/kthread.h>
  25 #include <linux/wait.h>
  26 #include <linux/sched.h>
  27 #include <drm/drmP.h>
  28 #include "gpu_scheduler.h"
  29
  30 #define CREATE_TRACE_POINTS
  31 #include "gpu_sched_trace.h"
  32
  33 static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity);
  34 static void amd_sched_wakeup(struct amd_gpu_scheduler *sched);
  35
  36 struct kmem_cache *sched_fence_slab;
  37 atomic_t sched_fence_slab_ref = ATOMIC_INIT(0);
  38
  39 /* Initialize a given run queue struct */
  40 static void amd_sched_rq_init(struct amd_sched_rq *rq)
  41 {
  42         spin_lock_init(&rq->lock);
  43         INIT_LIST_HEAD(&rq->entities);
  44         rq->current_entity = NULL;
  45 }
  46
  47 static void amd_sched_rq_add_entity(struct amd_sched_rq *rq,
  48                                     struct amd_sched_entity *entity)
  49 {
  50         if (!list_empty(&entity->list))
  51                 return;
  52         spin_lock(&rq->lock);
  53         list_add_tail(&entity->list, &rq->entities);
  54         spin_unlock(&rq->lock);
  55 }
  56
  57 static void amd_sched_rq_remove_entity(struct amd_sched_rq *rq,
  58                                        struct amd_sched_entity *entity)
  59 {
  60         if (list_empty(&entity->list))
  61                 return;
  62         spin_lock(&rq->lock);
  63         list_del_init(&entity->list);
  64         if (rq->current_entity == entity)
  65                 rq->current_entity = NULL;
  66         spin_unlock(&rq->lock);
  67 }
  68
  69 /**
  70  * Select an entity which could provide a job to run
  71  *
  72  * @rq          The run queue to check.
  73  *
  74  * Try to find a ready entity, returns NULL if none found.
  75  */
  76 static struct amd_sched_entity *
  77 amd_sched_rq_select_entity(struct amd_sched_rq *rq)
  78 {
  79         struct amd_sched_entity *entity;
  80
  81         spin_lock(&rq->lock);
  82
  83         entity = rq->current_entity;
  84         if (entity) {
  85                 list_for_each_entry_continue(entity, &rq->entities, list) {
  86                         if (amd_sched_entity_is_ready(entity)) {
  87                                 rq->current_entity = entity;
  88                                 spin_unlock(&rq->lock);
  89                                 return entity;
  90                         }
  91                 }
  92         }
  93
  94         list_for_each_entry(entity, &rq->entities, list) {
  95
  96                 if (amd_sched_entity_is_ready(entity)) {
  97                         rq->current_entity = entity;
  98                         spin_unlock(&rq->lock);
  99                         return entity;
 100                 }
 101
 102                 if (entity == rq->current_entity)
 103                         break;
 104         }
 105
 106         spin_unlock(&rq->lock);
 107
 108         return NULL;
 109 }
 110
 111 /**
 112  * Init a context entity used by scheduler when submit to HW ring.
 113  *
 114  * @sched       The pointer to the scheduler
 115  * @entity      The pointer to a valid amd_sched_entity
 116  * @rq          The run queue this entity belongs
 117  * @kernel      If this is an entity for the kernel
 118  * @jobs        The max number of jobs in the job queue
 119  *
 120  * return 0 if succeed. negative error code on failure
 121 */
 122 int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
 123                           struct amd_sched_entity *entity,
 124                           struct amd_sched_rq *rq,
 125                           uint32_t jobs)
 126 {
 127         int r;
 128
 129         if (!(sched && entity && rq))
 130                 return -EINVAL;
 131
 132         memset(entity, 0, sizeof(struct amd_sched_entity));
 133         INIT_LIST_HEAD(&entity->list);
 134         entity->rq = rq;
 135         entity->sched = sched;
 136
 137         spin_lock_init(&entity->queue_lock);
 138         r = kfifo_alloc(&entity->job_queue, jobs * sizeof(void *), GFP_KERNEL);
 139         if (r)
 140                 return r;
 141
 142         atomic_set(&entity->fence_seq, 0);
 143         entity->fence_context = fence_context_alloc(2);
 144
 145         return 0;
 146 }
 147
 148 /**
 149  * Query if entity is initialized
 150  *
 151  * @sched       Pointer to scheduler instance
 152  * @entity      The pointer to a valid scheduler entity
 153  *
 154  * return true if entity is initialized, false otherwise
 155 */
 156 static bool amd_sched_entity_is_initialized(struct amd_gpu_scheduler *sched,
 157                                             struct amd_sched_entity *entity)
 158 {
 159         return entity->sched == sched &&
 160                 entity->rq != NULL;
 161 }
 162
 163 /**
 164  * Check if entity is idle
 165  *
 166  * @entity      The pointer to a valid scheduler entity
 167  *
 168  * Return true if entity don't has any unscheduled jobs.
 169  */
 170 static bool amd_sched_entity_is_idle(struct amd_sched_entity *entity)
 171 {
 172         rmb();
 173         if (kfifo_is_empty(&entity->job_queue))
 174                 return true;
 175
 176         return false;
 177 }
 178
 179 /**
 180  * Check if entity is ready
 181  *
 182  * @entity      The pointer to a valid scheduler entity
 183  *
 184  * Return true if entity could provide a job.
 185  */
 186 static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity)
 187 {
 188         if (kfifo_is_empty(&entity->job_queue))
 189                 return false;
 190
 191         if (ACCESS_ONCE(entity->dependency))
 192                 return false;
 193
 194         return true;
 195 }
 196
 197 /**
 198  * Destroy a context entity
 199  *
 200  * @sched       Pointer to scheduler instance
 201  * @entity      The pointer to a valid scheduler entity
 202  *
 203  * Cleanup and free the allocated resources.
 204  */
 205 void amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
 206                            struct amd_sched_entity *entity)
 207 {
 208         struct amd_sched_rq *rq = entity->rq;
 209
 210         if (!amd_sched_entity_is_initialized(sched, entity))
 211                 return;
 212
 213         /**
 214          * The client will not queue more IBs during this fini, consume existing
 215          * queued IBs
 216         */
 217         wait_event(sched->job_scheduled, amd_sched_entity_is_idle(entity));
 218
 219         amd_sched_rq_remove_entity(rq, entity);
 220         kfifo_free(&entity->job_queue);
 221 }
 222
 223 static void amd_sched_entity_wakeup(struct fence *f, struct fence_cb *cb)
 224 {
 225         struct amd_sched_entity *entity =
 226                 container_of(cb, struct amd_sched_entity, cb);
 227         entity->dependency = NULL;
 228         fence_put(f);
 229         amd_sched_wakeup(entity->sched);
 230 }
 231
 232 static void amd_sched_entity_clear_dep(struct fence *f, struct fence_cb *cb)
 233 {
 234         struct amd_sched_entity *entity =
 235                 container_of(cb, struct amd_sched_entity, cb);
 236         entity->dependency = NULL;
 237         fence_put(f);
 238 }
 239
 240 static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity)
 241 {
 242         struct amd_gpu_scheduler *sched = entity->sched;
 243         struct fence * fence = entity->dependency;
 244         struct amd_sched_fence *s_fence;
 245
 246         if (fence->context == entity->fence_context) {
 247                 /* We can ignore fences from ourself */
 248                 fence_put(entity->dependency);
 249                 return false;
 250         }
 251
 252         s_fence = to_amd_sched_fence(fence);
 253         if (s_fence && s_fence->sched == sched) {
 254
 255                 /*
 256                  * Fence is from the same scheduler, only need to wait for
 257                  * it to be scheduled
 258                  */
 259                 fence = fence_get(&s_fence->scheduled);
 260                 fence_put(entity->dependency);
 261                 entity->dependency = fence;
 262                 if (!fence_add_callback(fence, &entity->cb,
 263                                         amd_sched_entity_clear_dep))
 264                         return true;
 265
 266                 /* Ignore it when it is already scheduled */
 267                 fence_put(fence);
 268                 return false;
 269         }
 270
 271         if (!fence_add_callback(entity->dependency, &entity->cb,
 272                                 amd_sched_entity_wakeup))
 273                 return true;
 274
 275         fence_put(entity->dependency);
 276         return false;
 277 }
 278
 279 static struct amd_sched_job *
 280 amd_sched_entity_pop_job(struct amd_sched_entity *entity)
 281 {
 282         struct amd_gpu_scheduler *sched = entity->sched;
 283         struct amd_sched_job *sched_job;
 284
 285         if (!kfifo_out_peek(&entity->job_queue, &sched_job, sizeof(sched_job)))
 286                 return NULL;
 287
 288         while ((entity->dependency = sched->ops->dependency(sched_job)))
 289                 if (amd_sched_entity_add_dependency_cb(entity))
 290                         return NULL;
 291
 292         return sched_job;
 293 }
 294
 295 /**
 296  * Helper to submit a job to the job queue
 297  *
 298  * @sched_job           The pointer to job required to submit
 299  *
 300  * Returns true if we could submit the job.
 301  */
 302 static bool amd_sched_entity_in(struct amd_sched_job *sched_job)
 303 {
 304         struct amd_gpu_scheduler *sched = sched_job->sched;
 305         struct amd_sched_entity *entity = sched_job->s_entity;
 306         bool added, first = false;
 307
 308         spin_lock(&entity->queue_lock);
 309         added = kfifo_in(&entity->job_queue, &sched_job,
 310                         sizeof(sched_job)) == sizeof(sched_job);
 311
 312         if (added && kfifo_len(&entity->job_queue) == sizeof(sched_job))
 313                 first = true;
 314
 315         spin_unlock(&entity->queue_lock);
 316
 317         /* first job wakes up scheduler */
 318         if (first) {
 319                 /* Add the entity to the run queue */
 320                 amd_sched_rq_add_entity(entity->rq, entity);
 321                 amd_sched_wakeup(sched);
 322         }
 323         return added;
 324 }
 325
 326 /* job_finish is called after hw fence signaled, and
 327  * the job had already been deleted from ring_mirror_list
 328  */
 329 static void amd_sched_job_finish(struct work_struct *work)
 330 {
 331         struct amd_sched_job *s_job = container_of(work, struct amd_sched_job,
 332                                                    finish_work);
 333         struct amd_gpu_scheduler *sched = s_job->sched;
 334
 335         /* remove job from ring_mirror_list */
 336         spin_lock(&sched->job_list_lock);
 337         list_del_init(&s_job->node);
 338         if (sched->timeout != MAX_SCHEDULE_TIMEOUT) {
 339                 struct amd_sched_job *next;
 340
 341                 spin_unlock(&sched->job_list_lock);
 342                 cancel_delayed_work_sync(&s_job->work_tdr);
 343                 spin_lock(&sched->job_list_lock);
 344
 345                 /* queue TDR for next job */
 346                 next = list_first_entry_or_null(&sched->ring_mirror_list,
 347                                                 struct amd_sched_job, node);
 348
 349                 if (next)
 350                         schedule_delayed_work(&next->work_tdr, sched->timeout);
 351         }
 352         spin_unlock(&sched->job_list_lock);
 353         sched->ops->free_job(s_job);
 354 }
 355
 356 static void amd_sched_job_finish_cb(struct fence *f, struct fence_cb *cb)
 357 {
 358         struct amd_sched_job *job = container_of(cb, struct amd_sched_job,
 359                                                  finish_cb);
 360         schedule_work(&job->finish_work);
 361 }
 362
 363 static void amd_sched_job_begin(struct amd_sched_job *s_job)
 364 {
 365         struct amd_gpu_scheduler *sched = s_job->sched;
 366
 367         spin_lock(&sched->job_list_lock);
 368         list_add_tail(&s_job->node, &sched->ring_mirror_list);
 369         if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
 370             list_first_entry_or_null(&sched->ring_mirror_list,
 371                                      struct amd_sched_job, node) == s_job)
 372                 schedule_delayed_work(&s_job->work_tdr, sched->timeout);
 373         spin_unlock(&sched->job_list_lock);
 374 }
 375
 376 static void amd_sched_job_timedout(struct work_struct *work)
 377 {
 378         struct amd_sched_job *job = container_of(work, struct amd_sched_job,
 379                                                  work_tdr.work);
 380
 381         job->sched->ops->timedout_job(job);
 382 }
 383
 384 void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched)
 385 {
 386         struct amd_sched_job *s_job;
 387
 388         spin_lock(&sched->job_list_lock);
 389         list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node) {
 390                 if (fence_remove_callback(s_job->s_fence->parent, &s_job->s_fence->cb)) {
 391                         fence_put(s_job->s_fence->parent);
 392                         s_job->s_fence->parent = NULL;
 393                 }
 394         }
 395         spin_unlock(&sched->job_list_lock);
 396 }
 397
 398 /**
 399  * Submit a job to the job queue
 400  *
 401  * @sched_job           The pointer to job required to submit
 402  *
 403  * Returns 0 for success, negative error code otherwise.
 404  */
 405 void amd_sched_entity_push_job(struct amd_sched_job *sched_job)
 406 {
 407         struct amd_sched_entity *entity = sched_job->s_entity;
 408
 409         trace_amd_sched_job(sched_job);
 410         fence_add_callback(&sched_job->s_fence->finished, &sched_job->finish_cb,
 411                            amd_sched_job_finish_cb);
 412         wait_event(entity->sched->job_scheduled,
 413                    amd_sched_entity_in(sched_job));
 414 }
 415
 416 /* init a sched_job with basic field */
 417 int amd_sched_job_init(struct amd_sched_job *job,
 418                        struct amd_gpu_scheduler *sched,
 419                        struct amd_sched_entity *entity,
 420                        void *owner)
 421 {
 422         job->sched = sched;
 423         job->s_entity = entity;
 424         job->s_fence = amd_sched_fence_create(entity, owner);
 425         if (!job->s_fence)
 426                 return -ENOMEM;
 427
 428         INIT_WORK(&job->finish_work, amd_sched_job_finish);
 429         INIT_LIST_HEAD(&job->node);
 430         INIT_DELAYED_WORK(&job->work_tdr, amd_sched_job_timedout);
 431
 432         return 0;
 433 }
 434
 435 /**
 436  * Return ture if we can push more jobs to the hw.
 437  */
 438 static bool amd_sched_ready(struct amd_gpu_scheduler *sched)
 439 {
 440         return atomic_read(&sched->hw_rq_count) <
 441                 sched->hw_submission_limit;
 442 }
 443
 444 /**
 445  * Wake up the scheduler when it is ready
 446  */
 447 static void amd_sched_wakeup(struct amd_gpu_scheduler *sched)
 448 {
 449         if (amd_sched_ready(sched))
 450                 wake_up_interruptible(&sched->wake_up_worker);
 451 }
 452
 453 /**
 454  * Select next entity to process
 455 */
 456 static struct amd_sched_entity *
 457 amd_sched_select_entity(struct amd_gpu_scheduler *sched)
 458 {
 459         struct amd_sched_entity *entity;
 460         int i;
 461
 462         if (!amd_sched_ready(sched))
 463                 return NULL;
 464
 465         /* Kernel run queue has higher priority than normal run queue*/
 466         for (i = 0; i < AMD_SCHED_MAX_PRIORITY; i++) {
 467                 entity = amd_sched_rq_select_entity(&sched->sched_rq[i]);
 468                 if (entity)
 469                         break;
 470         }
 471
 472         return entity;
 473 }
 474
 475 static void amd_sched_process_job(struct fence *f, struct fence_cb *cb)
 476 {
 477         struct amd_sched_fence *s_fence =
 478                 container_of(cb, struct amd_sched_fence, cb);
 479         struct amd_gpu_scheduler *sched = s_fence->sched;
 480
 481         atomic_dec(&sched->hw_rq_count);
 482         amd_sched_fence_finished(s_fence);
 483
 484         trace_amd_sched_process_job(s_fence);
 485         fence_put(&s_fence->finished);
 486         wake_up_interruptible(&sched->wake_up_worker);
 487 }
 488
 489 static bool amd_sched_blocked(struct amd_gpu_scheduler *sched)
 490 {
 491         if (kthread_should_park()) {
 492                 kthread_parkme();
 493                 return true;
 494         }
 495
 496         return false;
 497 }
 498
 499 static int amd_sched_main(void *param)
 500 {
 501         struct sched_param sparam = {.sched_priority = 1};
 502         struct amd_gpu_scheduler *sched = (struct amd_gpu_scheduler *)param;
 503         int r, count;
 504
 505         sched_setscheduler(current, SCHED_FIFO, &sparam);
 506
 507         while (!kthread_should_stop()) {
 508                 struct amd_sched_entity *entity = NULL;
 509                 struct amd_sched_fence *s_fence;
 510                 struct amd_sched_job *sched_job;
 511                 struct fence *fence;
 512
 513                 wait_event_interruptible(sched->wake_up_worker,
 514                                          (!amd_sched_blocked(sched) &&
 515                                           (entity = amd_sched_select_entity(sched))) ||
 516                                          kthread_should_stop());
 517
 518                 if (!entity)
 519                         continue;
 520
 521                 sched_job = amd_sched_entity_pop_job(entity);
 522                 if (!sched_job)
 523                         continue;
 524
 525                 s_fence = sched_job->s_fence;
 526
 527                 atomic_inc(&sched->hw_rq_count);
 528                 amd_sched_job_begin(sched_job);
 529
 530                 fence = sched->ops->run_job(sched_job);
 531                 amd_sched_fence_scheduled(s_fence);
 532                 if (fence) {
 533                         s_fence->parent = fence_get(fence);
 534                         r = fence_add_callback(fence, &s_fence->cb,
 535                                                amd_sched_process_job);
 536                         if (r == -ENOENT)
 537                                 amd_sched_process_job(fence, &s_fence->cb);
 538                         else if (r)
 539                                 DRM_ERROR("fence add callback failed (%d)\n",
 540                                           r);
 541                         fence_put(fence);
 542                 } else {
 543                         DRM_ERROR("Failed to run job!\n");
 544                         amd_sched_process_job(NULL, &s_fence->cb);
 545                 }
 546
 547                 count = kfifo_out(&entity->job_queue, &sched_job,
 548                                 sizeof(sched_job));
 549                 WARN_ON(count != sizeof(sched_job));
 550                 wake_up(&sched->job_scheduled);
 551         }
 552         return 0;
 553 }
 554
 555 /**
 556  * Init a gpu scheduler instance
 557  *
 558  * @sched               The pointer to the scheduler
 559  * @ops                 The backend operations for this scheduler.
 560  * @hw_submissions      Number of hw submissions to do.
 561  * @name                Name used for debugging
 562  *
 563  * Return 0 on success, otherwise error code.
 564 */
 565 int amd_sched_init(struct amd_gpu_scheduler *sched,
 566                    const struct amd_sched_backend_ops *ops,
 567                    unsigned hw_submission, long timeout, const char *name)
 568 {
 569         int i;
 570         sched->ops = ops;
 571         sched->hw_submission_limit = hw_submission;
 572         sched->name = name;
 573         sched->timeout = timeout;
 574         for (i = 0; i < AMD_SCHED_MAX_PRIORITY; i++)
 575                 amd_sched_rq_init(&sched->sched_rq[i]);
 576
 577         init_waitqueue_head(&sched->wake_up_worker);
 578         init_waitqueue_head(&sched->job_scheduled);
 579         INIT_LIST_HEAD(&sched->ring_mirror_list);
 580         spin_lock_init(&sched->job_list_lock);
 581         atomic_set(&sched->hw_rq_count, 0);
 582         if (atomic_inc_return(&sched_fence_slab_ref) == 1) {
 583                 sched_fence_slab = kmem_cache_create(
 584                         "amd_sched_fence", sizeof(struct amd_sched_fence), 0,
 585                         SLAB_HWCACHE_ALIGN, NULL);
 586                 if (!sched_fence_slab)
 587                         return -ENOMEM;
 588         }
 589
 590         /* Each scheduler will run on a seperate kernel thread */
 591         sched->thread = kthread_run(amd_sched_main, sched, sched->name);
 592         if (IS_ERR(sched->thread)) {
 593                 DRM_ERROR("Failed to create scheduler for %s.\n", name);
 594                 return PTR_ERR(sched->thread);
 595         }
 596
 597         return 0;
 598 }
 599
 600 /**
 601  * Destroy a gpu scheduler
 602  *
 603  * @sched       The pointer to the scheduler
 604  */
 605 void amd_sched_fini(struct amd_gpu_scheduler *sched)
 606 {
 607         if (sched->thread)
 608                 kthread_stop(sched->thread);
 609         if (atomic_dec_and_test(&sched_fence_slab_ref))
 610                 kmem_cache_destroy(sched_fence_slab);
 611 }