diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 1462 |
1 files changed, 963 insertions, 499 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index b17771dd5ce7..6904d794d60a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -134,48 +134,6 @@ struct amdgpu_prt_cb { }; /** - * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm - * - * @base: base structure for tracking BO usage in a VM - * @vm: vm to which bo is to be added - * @bo: amdgpu buffer object - * - * Initialize a bo_va_base structure and add it to the appropriate lists - * - */ -static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, - struct amdgpu_vm *vm, - struct amdgpu_bo *bo) -{ - base->vm = vm; - base->bo = bo; - INIT_LIST_HEAD(&base->bo_list); - INIT_LIST_HEAD(&base->vm_status); - - if (!bo) - return; - list_add_tail(&base->bo_list, &bo->va); - - if (bo->tbo.type == ttm_bo_type_kernel) - list_move(&base->vm_status, &vm->relocated); - - if (bo->tbo.resv != vm->root.base.bo->tbo.resv) - return; - - if (bo->preferred_domains & - amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type)) - return; - - /* - * we checked all the prerequisites, but it looks like this per vm bo - * is currently evicted. add the bo to the evicted list to make sure it - * is validated on next vm use to avoid fault. - * */ - list_move_tail(&base->vm_status, &vm->evicted); - base->moved = true; -} - -/** * amdgpu_vm_level_shift - return the addr shift for each level * * @adev: amdgpu_device pointer @@ -233,6 +191,26 @@ static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev, } /** + * amdgpu_vm_entries_mask - the mask to get the entry number of a PD/PT + * + * @adev: amdgpu_device pointer + * @level: VMPT level + * + * Returns: + * The mask to extract the entry number of a PD/PT from an address. + */ +static uint32_t amdgpu_vm_entries_mask(struct amdgpu_device *adev, + unsigned int level) +{ + if (level <= adev->vm_manager.root_level) + return 0xffffffff; + else if (level != AMDGPU_VM_PTB) + return 0x1ff; + else + return AMDGPU_VM_PTE_COUNT(adev) - 1; +} + +/** * amdgpu_vm_bo_size - returns the size of the BOs in bytes * * @adev: amdgpu_device pointer @@ -247,6 +225,382 @@ static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level) } /** + * amdgpu_vm_bo_evicted - vm_bo is evicted + * + * @vm_bo: vm_bo which is evicted + * + * State for PDs/PTs and per VM BOs which are not at the location they should + * be. + */ +static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo) +{ + struct amdgpu_vm *vm = vm_bo->vm; + struct amdgpu_bo *bo = vm_bo->bo; + + vm_bo->moved = true; + if (bo->tbo.type == ttm_bo_type_kernel) + list_move(&vm_bo->vm_status, &vm->evicted); + else + list_move_tail(&vm_bo->vm_status, &vm->evicted); +} + +/** + * amdgpu_vm_bo_relocated - vm_bo is reloacted + * + * @vm_bo: vm_bo which is relocated + * + * State for PDs/PTs which needs to update their parent PD. + */ +static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo) +{ + list_move(&vm_bo->vm_status, &vm_bo->vm->relocated); +} + +/** + * amdgpu_vm_bo_moved - vm_bo is moved + * + * @vm_bo: vm_bo which is moved + * + * State for per VM BOs which are moved, but that change is not yet reflected + * in the page tables. + */ +static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo) +{ + list_move(&vm_bo->vm_status, &vm_bo->vm->moved); +} + +/** + * amdgpu_vm_bo_idle - vm_bo is idle + * + * @vm_bo: vm_bo which is now idle + * + * State for PDs/PTs and per VM BOs which have gone through the state machine + * and are now idle. + */ +static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo) +{ + list_move(&vm_bo->vm_status, &vm_bo->vm->idle); + vm_bo->moved = false; +} + +/** + * amdgpu_vm_bo_invalidated - vm_bo is invalidated + * + * @vm_bo: vm_bo which is now invalidated + * + * State for normal BOs which are invalidated and that change not yet reflected + * in the PTs. + */ +static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo) +{ + spin_lock(&vm_bo->vm->invalidated_lock); + list_move(&vm_bo->vm_status, &vm_bo->vm->invalidated); + spin_unlock(&vm_bo->vm->invalidated_lock); +} + +/** + * amdgpu_vm_bo_done - vm_bo is done + * + * @vm_bo: vm_bo which is now done + * + * State for normal BOs which are invalidated and that change has been updated + * in the PTs. + */ +static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo) +{ + spin_lock(&vm_bo->vm->invalidated_lock); + list_del_init(&vm_bo->vm_status); + spin_unlock(&vm_bo->vm->invalidated_lock); +} + +/** + * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm + * + * @base: base structure for tracking BO usage in a VM + * @vm: vm to which bo is to be added + * @bo: amdgpu buffer object + * + * Initialize a bo_va_base structure and add it to the appropriate lists + * + */ +static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, + struct amdgpu_vm *vm, + struct amdgpu_bo *bo) +{ + base->vm = vm; + base->bo = bo; + base->next = NULL; + INIT_LIST_HEAD(&base->vm_status); + + if (!bo) + return; + base->next = bo->vm_bo; + bo->vm_bo = base; + + if (bo->tbo.resv != vm->root.base.bo->tbo.resv) + return; + + vm->bulk_moveable = false; + if (bo->tbo.type == ttm_bo_type_kernel) + amdgpu_vm_bo_relocated(base); + else + amdgpu_vm_bo_idle(base); + + if (bo->preferred_domains & + amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type)) + return; + + /* + * we checked all the prerequisites, but it looks like this per vm bo + * is currently evicted. add the bo to the evicted list to make sure it + * is validated on next vm use to avoid fault. + * */ + amdgpu_vm_bo_evicted(base); +} + +/** + * amdgpu_vm_pt_parent - get the parent page directory + * + * @pt: child page table + * + * Helper to get the parent entry for the child page table. NULL if we are at + * the root page directory. + */ +static struct amdgpu_vm_pt *amdgpu_vm_pt_parent(struct amdgpu_vm_pt *pt) +{ + struct amdgpu_bo *parent = pt->base.bo->parent; + + if (!parent) + return NULL; + + return container_of(parent->vm_bo, struct amdgpu_vm_pt, base); +} + +/** + * amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt + */ +struct amdgpu_vm_pt_cursor { + uint64_t pfn; + struct amdgpu_vm_pt *parent; + struct amdgpu_vm_pt *entry; + unsigned level; +}; + +/** + * amdgpu_vm_pt_start - start PD/PT walk + * + * @adev: amdgpu_device pointer + * @vm: amdgpu_vm structure + * @start: start address of the walk + * @cursor: state to initialize + * + * Initialize a amdgpu_vm_pt_cursor to start a walk. + */ +static void amdgpu_vm_pt_start(struct amdgpu_device *adev, + struct amdgpu_vm *vm, uint64_t start, + struct amdgpu_vm_pt_cursor *cursor) +{ + cursor->pfn = start; + cursor->parent = NULL; + cursor->entry = &vm->root; + cursor->level = adev->vm_manager.root_level; +} + +/** + * amdgpu_vm_pt_descendant - go to child node + * + * @adev: amdgpu_device pointer + * @cursor: current state + * + * Walk to the child node of the current node. + * Returns: + * True if the walk was possible, false otherwise. + */ +static bool amdgpu_vm_pt_descendant(struct amdgpu_device *adev, + struct amdgpu_vm_pt_cursor *cursor) +{ + unsigned mask, shift, idx; + + if (!cursor->entry->entries) + return false; + + BUG_ON(!cursor->entry->base.bo); + mask = amdgpu_vm_entries_mask(adev, cursor->level); + shift = amdgpu_vm_level_shift(adev, cursor->level); + + ++cursor->level; + idx = (cursor->pfn >> shift) & mask; + cursor->parent = cursor->entry; + cursor->entry = &cursor->entry->entries[idx]; + return true; +} + +/** + * amdgpu_vm_pt_sibling - go to sibling node + * + * @adev: amdgpu_device pointer + * @cursor: current state + * + * Walk to the sibling node of the current node. + * Returns: + * True if the walk was possible, false otherwise. + */ +static bool amdgpu_vm_pt_sibling(struct amdgpu_device *adev, + struct amdgpu_vm_pt_cursor *cursor) +{ + unsigned shift, num_entries; + + /* Root doesn't have a sibling */ + if (!cursor->parent) + return false; + + /* Go to our parents and see if we got a sibling */ + shift = amdgpu_vm_level_shift(adev, cursor->level - 1); + num_entries = amdgpu_vm_num_entries(adev, cursor->level - 1); + + if (cursor->entry == &cursor->parent->entries[num_entries - 1]) + return false; + + cursor->pfn += 1ULL << shift; + cursor->pfn &= ~((1ULL << shift) - 1); + ++cursor->entry; + return true; +} + +/** + * amdgpu_vm_pt_ancestor - go to parent node + * + * @cursor: current state + * + * Walk to the parent node of the current node. + * Returns: + * True if the walk was possible, false otherwise. + */ +static bool amdgpu_vm_pt_ancestor(struct amdgpu_vm_pt_cursor *cursor) +{ + if (!cursor->parent) + return false; + + --cursor->level; + cursor->entry = cursor->parent; + cursor->parent = amdgpu_vm_pt_parent(cursor->parent); + return true; +} + +/** + * amdgpu_vm_pt_next - get next PD/PT in hieratchy + * + * @adev: amdgpu_device pointer + * @cursor: current state + * + * Walk the PD/PT tree to the next node. + */ +static void amdgpu_vm_pt_next(struct amdgpu_device *adev, + struct amdgpu_vm_pt_cursor *cursor) +{ + /* First try a newborn child */ + if (amdgpu_vm_pt_descendant(adev, cursor)) + return; + + /* If that didn't worked try to find a sibling */ + while (!amdgpu_vm_pt_sibling(adev, cursor)) { + /* No sibling, go to our parents and grandparents */ + if (!amdgpu_vm_pt_ancestor(cursor)) { + cursor->pfn = ~0ll; + return; + } + } +} + +/** + * amdgpu_vm_pt_first_leaf - get first leaf PD/PT + * + * @adev: amdgpu_device pointer + * @vm: amdgpu_vm structure + * @start: start addr of the walk + * @cursor: state to initialize + * + * Start a walk and go directly to the leaf node. + */ +static void amdgpu_vm_pt_first_leaf(struct amdgpu_device *adev, + struct amdgpu_vm *vm, uint64_t start, + struct amdgpu_vm_pt_cursor *cursor) +{ + amdgpu_vm_pt_start(adev, vm, start, cursor); + while (amdgpu_vm_pt_descendant(adev, cursor)); +} + +/** + * amdgpu_vm_pt_next_leaf - get next leaf PD/PT + * + * @adev: amdgpu_device pointer + * @cursor: current state + * + * Walk the PD/PT tree to the next leaf node. + */ +static void amdgpu_vm_pt_next_leaf(struct amdgpu_device *adev, + struct amdgpu_vm_pt_cursor *cursor) +{ + amdgpu_vm_pt_next(adev, cursor); + while (amdgpu_vm_pt_descendant(adev, cursor)); +} + +/** + * for_each_amdgpu_vm_pt_leaf - walk over all leaf PDs/PTs in the hierarchy + */ +#define for_each_amdgpu_vm_pt_leaf(adev, vm, start, end, cursor) \ + for (amdgpu_vm_pt_first_leaf((adev), (vm), (start), &(cursor)); \ + (cursor).pfn <= end; amdgpu_vm_pt_next_leaf((adev), &(cursor))) + +/** + * amdgpu_vm_pt_first_dfs - start a deep first search + * + * @adev: amdgpu_device structure + * @vm: amdgpu_vm structure + * @cursor: state to initialize + * + * Starts a deep first traversal of the PD/PT tree. + */ +static void amdgpu_vm_pt_first_dfs(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct amdgpu_vm_pt_cursor *cursor) +{ + amdgpu_vm_pt_start(adev, vm, 0, cursor); + while (amdgpu_vm_pt_descendant(adev, cursor)); +} + +/** + * amdgpu_vm_pt_next_dfs - get the next node for a deep first search + * + * @adev: amdgpu_device structure + * @cursor: current state + * + * Move the cursor to the next node in a deep first search. + */ +static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev, + struct amdgpu_vm_pt_cursor *cursor) +{ + if (!cursor->entry) + return; + + if (!cursor->parent) + cursor->entry = NULL; + else if (amdgpu_vm_pt_sibling(adev, cursor)) + while (amdgpu_vm_pt_descendant(adev, cursor)); + else + amdgpu_vm_pt_ancestor(cursor); +} + +/** + * for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs + */ +#define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, cursor, entry) \ + for (amdgpu_vm_pt_first_dfs((adev), (vm), &(cursor)), \ + (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor));\ + (entry); (entry) = (cursor).entry, \ + amdgpu_vm_pt_next_dfs((adev), &(cursor))) + +/** * amdgpu_vm_get_pd_bo - add the VM PD to a validation list * * @vm: vm providing the BOs @@ -260,15 +614,55 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, struct list_head *validated, struct amdgpu_bo_list_entry *entry) { - entry->robj = vm->root.base.bo; entry->priority = 0; - entry->tv.bo = &entry->robj->tbo; + entry->tv.bo = &vm->root.base.bo->tbo; entry->tv.shared = true; entry->user_pages = NULL; list_add(&entry->tv.head, validated); } /** + * amdgpu_vm_move_to_lru_tail - move all BOs to the end of LRU + * + * @adev: amdgpu device pointer + * @vm: vm providing the BOs + * + * Move all BOs to the end of LRU and remember their positions to put them + * together. + */ +void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, + struct amdgpu_vm *vm) +{ + struct ttm_bo_global *glob = adev->mman.bdev.glob; + struct amdgpu_vm_bo_base *bo_base; + + if (vm->bulk_moveable) { + spin_lock(&glob->lru_lock); + ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move); + spin_unlock(&glob->lru_lock); + return; + } + + memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move)); + + spin_lock(&glob->lru_lock); + list_for_each_entry(bo_base, &vm->idle, vm_status) { + struct amdgpu_bo *bo = bo_base->bo; + + if (!bo->parent) + continue; + + ttm_bo_move_to_lru_tail(&bo->tbo, &vm->lru_bulk_move); + if (bo->shadow) + ttm_bo_move_to_lru_tail(&bo->shadow->tbo, + &vm->lru_bulk_move); + } + spin_unlock(&glob->lru_lock); + + vm->bulk_moveable = true; +} + +/** * amdgpu_vm_validate_pt_bos - validate the page table BOs * * @adev: amdgpu device pointer @@ -285,47 +679,36 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, int (*validate)(void *p, struct amdgpu_bo *bo), void *param) { - struct ttm_bo_global *glob = adev->mman.bdev.glob; struct amdgpu_vm_bo_base *bo_base, *tmp; int r = 0; + vm->bulk_moveable &= list_empty(&vm->evicted); + list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) { struct amdgpu_bo *bo = bo_base->bo; - if (bo->parent) { - r = validate(param, bo); - if (r) - break; - - spin_lock(&glob->lru_lock); - ttm_bo_move_to_lru_tail(&bo->tbo); - if (bo->shadow) - ttm_bo_move_to_lru_tail(&bo->shadow->tbo); - spin_unlock(&glob->lru_lock); - } + r = validate(param, bo); + if (r) + break; if (bo->tbo.type != ttm_bo_type_kernel) { - spin_lock(&vm->moved_lock); - list_move(&bo_base->vm_status, &vm->moved); - spin_unlock(&vm->moved_lock); + amdgpu_vm_bo_moved(bo_base); } else { - list_move(&bo_base->vm_status, &vm->relocated); + if (vm->use_cpu_for_update) + r = amdgpu_bo_kmap(bo, NULL); + else + r = amdgpu_ttm_alloc_gart(&bo->tbo); + if (r) + break; + if (bo->shadow) { + r = amdgpu_ttm_alloc_gart(&bo->shadow->tbo); + if (r) + break; + } + amdgpu_vm_bo_relocated(bo_base); } } - spin_lock(&glob->lru_lock); - list_for_each_entry(bo_base, &vm->idle, vm_status) { - struct amdgpu_bo *bo = bo_base->bo; - - if (!bo->parent) - continue; - - ttm_bo_move_to_lru_tail(&bo->tbo); - if (bo->shadow) - ttm_bo_move_to_lru_tail(&bo->shadow->tbo); - } - spin_unlock(&glob->lru_lock); - return r; } @@ -376,7 +759,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, if (level == adev->vm_manager.root_level) { ats_entries = amdgpu_vm_level_shift(adev, level); ats_entries += AMDGPU_GPU_PAGE_SHIFT; - ats_entries = AMDGPU_VA_HOLE_START >> ats_entries; + ats_entries = AMDGPU_GMC_HOLE_START >> ats_entries; ats_entries = min(ats_entries, entries); entries -= ats_entries; } else { @@ -397,6 +780,10 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, if (r) goto error; + r = amdgpu_ttm_alloc_gart(&bo->tbo); + if (r) + return r; + r = amdgpu_job_alloc_with_ib(adev, 64, &job); if (r) goto error; @@ -448,117 +835,33 @@ error: } /** - * amdgpu_vm_alloc_levels - allocate the PD/PT levels + * amdgpu_vm_bo_param - fill in parameters for PD/PT allocation * * @adev: amdgpu_device pointer - * @vm: requested vm - * @parent: parent PT - * @saddr: start of the address range - * @eaddr: end of the address range - * @level: VMPT level - * @ats: indicate ATS support from PTE - * - * Make sure the page directories and page tables are allocated - * - * Returns: - * 0 on success, errno otherwise. + * @vm: requesting vm + * @bp: resulting BO allocation parameters */ -static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_vm_pt *parent, - uint64_t saddr, uint64_t eaddr, - unsigned level, bool ats) +static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm, + int level, struct amdgpu_bo_param *bp) { - unsigned shift = amdgpu_vm_level_shift(adev, level); - unsigned pt_idx, from, to; - u64 flags; - int r; - - if (!parent->entries) { - unsigned num_entries = amdgpu_vm_num_entries(adev, level); - - parent->entries = kvmalloc_array(num_entries, - sizeof(struct amdgpu_vm_pt), - GFP_KERNEL | __GFP_ZERO); - if (!parent->entries) - return -ENOMEM; - memset(parent->entries, 0 , sizeof(struct amdgpu_vm_pt)); - } - - from = saddr >> shift; - to = eaddr >> shift; - if (from >= amdgpu_vm_num_entries(adev, level) || - to >= amdgpu_vm_num_entries(adev, level)) - return -EINVAL; - - ++level; - saddr = saddr & ((1 << shift) - 1); - eaddr = eaddr & ((1 << shift) - 1); - - flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - if (vm->root.base.bo->shadow) - flags |= AMDGPU_GEM_CREATE_SHADOW; + memset(bp, 0, sizeof(*bp)); + + bp->size = amdgpu_vm_bo_size(adev, level); + bp->byte_align = AMDGPU_GPU_PAGE_SIZE; + bp->domain = AMDGPU_GEM_DOMAIN_VRAM; + if (bp->size <= PAGE_SIZE && adev->asic_type >= CHIP_VEGA10 && + adev->flags & AMD_IS_APU) + bp->domain |= AMDGPU_GEM_DOMAIN_GTT; + bp->domain = amdgpu_bo_get_preferred_pin_domain(adev, bp->domain); + bp->flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | + AMDGPU_GEM_CREATE_CPU_GTT_USWC; if (vm->use_cpu_for_update) - flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; - else - flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS; - - /* walk over the address space and allocate the page tables */ - for (pt_idx = from; pt_idx <= to; ++pt_idx) { - struct reservation_object *resv = vm->root.base.bo->tbo.resv; - struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; - struct amdgpu_bo *pt; - - if (!entry->base.bo) { - struct amdgpu_bo_param bp; - - memset(&bp, 0, sizeof(bp)); - bp.size = amdgpu_vm_bo_size(adev, level); - bp.byte_align = AMDGPU_GPU_PAGE_SIZE; - bp.domain = AMDGPU_GEM_DOMAIN_VRAM; - bp.flags = flags; - bp.type = ttm_bo_type_kernel; - bp.resv = resv; - r = amdgpu_bo_create(adev, &bp, &pt); - if (r) - return r; - - r = amdgpu_vm_clear_bo(adev, vm, pt, level, ats); - if (r) { - amdgpu_bo_unref(&pt->shadow); - amdgpu_bo_unref(&pt); - return r; - } - - if (vm->use_cpu_for_update) { - r = amdgpu_bo_kmap(pt, NULL); - if (r) { - amdgpu_bo_unref(&pt->shadow); - amdgpu_bo_unref(&pt); - return r; - } - } - - /* Keep a reference to the root directory to avoid - * freeing them up in the wrong order. - */ - pt->parent = amdgpu_bo_ref(parent->base.bo); - - amdgpu_vm_bo_base_init(&entry->base, vm, pt); - } - - if (level < AMDGPU_VM_PTB) { - uint64_t sub_saddr = (pt_idx == from) ? saddr : 0; - uint64_t sub_eaddr = (pt_idx == to) ? eaddr : - ((1 << shift) - 1); - r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr, - sub_eaddr, level, ats); - if (r) - return r; - } - } - - return 0; + bp->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + else if (!vm->root.base.bo || vm->root.base.bo->shadow) + bp->flags |= AMDGPU_GEM_CREATE_SHADOW; + bp->type = ttm_bo_type_kernel; + if (vm->root.base.bo) + bp->resv = vm->root.base.bo->tbo.resv; } /** @@ -569,7 +872,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, * @saddr: Start address which needs to be allocated * @size: Size from start address we need. * - * Make sure the page tables are allocated. + * Make sure the page directories and page tables are allocated * * Returns: * 0 on success, errno otherwise. @@ -578,8 +881,11 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, struct amdgpu_vm *vm, uint64_t saddr, uint64_t size) { - uint64_t eaddr; + struct amdgpu_vm_pt_cursor cursor; + struct amdgpu_bo *pt; bool ats = false; + uint64_t eaddr; + int r; /* validate the parameters */ if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK) @@ -588,7 +894,7 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, eaddr = saddr + size - 1; if (vm->pte_support_ats) - ats = saddr < AMDGPU_VA_HOLE_START; + ats = saddr < AMDGPU_GMC_HOLE_START; saddr /= AMDGPU_GPU_PAGE_SIZE; eaddr /= AMDGPU_GPU_PAGE_SIZE; @@ -599,8 +905,84 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, return -EINVAL; } - return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr, - adev->vm_manager.root_level, ats); + for_each_amdgpu_vm_pt_leaf(adev, vm, saddr, eaddr, cursor) { + struct amdgpu_vm_pt *entry = cursor.entry; + struct amdgpu_bo_param bp; + + if (cursor.level < AMDGPU_VM_PTB) { + unsigned num_entries; + + num_entries = amdgpu_vm_num_entries(adev, cursor.level); + entry->entries = kvmalloc_array(num_entries, + sizeof(*entry->entries), + GFP_KERNEL | + __GFP_ZERO); + if (!entry->entries) + return -ENOMEM; + } + + + if (entry->base.bo) + continue; + + amdgpu_vm_bo_param(adev, vm, cursor.level, &bp); + + r = amdgpu_bo_create(adev, &bp, &pt); + if (r) + return r; + + r = amdgpu_vm_clear_bo(adev, vm, pt, cursor.level, ats); + if (r) + goto error_free_pt; + + if (vm->use_cpu_for_update) { + r = amdgpu_bo_kmap(pt, NULL); + if (r) + goto error_free_pt; + } + + /* Keep a reference to the root directory to avoid + * freeing them up in the wrong order. + */ + pt->parent = amdgpu_bo_ref(cursor.parent->base.bo); + + amdgpu_vm_bo_base_init(&entry->base, vm, pt); + } + + return 0; + +error_free_pt: + amdgpu_bo_unref(&pt->shadow); + amdgpu_bo_unref(&pt); + return r; +} + +/** + * amdgpu_vm_free_pts - free PD/PT levels + * + * @adev: amdgpu device structure + * @vm: amdgpu vm structure + * + * Free the page directory or page table level and all sub levels. + */ +static void amdgpu_vm_free_pts(struct amdgpu_device *adev, + struct amdgpu_vm *vm) +{ + struct amdgpu_vm_pt_cursor cursor; + struct amdgpu_vm_pt *entry; + + for_each_amdgpu_vm_pt_dfs_safe(adev, vm, cursor, entry) { + + if (entry->base.bo) { + entry->base.bo->vm_bo = NULL; + list_del(&entry->base.vm_status); + amdgpu_bo_unref(&entry->base.bo->shadow); + amdgpu_bo_unref(&entry->base.bo); + } + kvfree(entry->entries); + } + + BUG_ON(vm->root.base.bo); } /** @@ -714,7 +1096,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ } gds_switch_needed &= !!ring->funcs->emit_gds_switch; - vm_flush_needed &= !!ring->funcs->emit_vm_flush; + vm_flush_needed &= !!ring->funcs->emit_vm_flush && + job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET; pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping && ring->funcs->emit_wreg; @@ -799,12 +1182,13 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, struct amdgpu_bo *bo) { - struct amdgpu_bo_va *bo_va; + struct amdgpu_vm_bo_base *base; - list_for_each_entry(bo_va, &bo->va, base.bo_list) { - if (bo_va->base.vm == vm) { - return bo_va; - } + for (base = bo->vm_bo; base; base = base->next) { + if (base->vm != vm) + continue; + + return container_of(base, struct amdgpu_bo_va, base); } return NULL; } @@ -957,6 +1341,22 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm, return r; } +/** + * amdgpu_vm_update_func - helper to call update function + * + * Calls the update function for both the given BO as well as its shadow. + */ +static void amdgpu_vm_update_func(struct amdgpu_pte_update_params *params, + struct amdgpu_bo *bo, + uint64_t pe, uint64_t addr, + unsigned count, uint32_t incr, + uint64_t flags) +{ + if (bo->shadow) + params->func(params, bo->shadow, pe, addr, count, incr, flags); + params->func(params, bo, pe, addr, count, incr, flags); +} + /* * amdgpu_vm_update_pde - update a single level in the hierarchy * @@ -984,47 +1384,28 @@ static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params, pbo = pbo->parent; level += params->adev->vm_manager.root_level; - pt = amdgpu_bo_gpu_offset(entry->base.bo); - flags = AMDGPU_PTE_VALID; - amdgpu_gmc_get_vm_pde(params->adev, level, &pt, &flags); + amdgpu_gmc_get_pde_for_bo(entry->base.bo, level, &pt, &flags); pde = (entry - parent->entries) * 8; - if (bo->shadow) - params->func(params, bo->shadow, pde, pt, 1, 0, flags); - params->func(params, bo, pde, pt, 1, 0, flags); + amdgpu_vm_update_func(params, bo, pde, pt, 1, 0, flags); } /* - * amdgpu_vm_invalidate_level - mark all PD levels as invalid + * amdgpu_vm_invalidate_pds - mark all PDs as invalid * * @adev: amdgpu_device pointer * @vm: related vm - * @parent: parent PD - * @level: VMPT level * * Mark all PD level as invalid after an error. */ -static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_vm_pt *parent, - unsigned level) +static void amdgpu_vm_invalidate_pds(struct amdgpu_device *adev, + struct amdgpu_vm *vm) { - unsigned pt_idx, num_entries; - - /* - * Recurse into the subdirectories. This recursion is harmless because - * we only have a maximum of 5 layers. - */ - num_entries = amdgpu_vm_num_entries(adev, level); - for (pt_idx = 0; pt_idx < num_entries; ++pt_idx) { - struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; - - if (!entry->base.bo) - continue; + struct amdgpu_vm_pt_cursor cursor; + struct amdgpu_vm_pt *entry; - if (!entry->base.moved) - list_move(&entry->base.vm_status, &vm->relocated); - amdgpu_vm_invalidate_level(adev, vm, entry, level + 1); - } + for_each_amdgpu_vm_pt_dfs_safe(adev, vm, cursor, entry) + if (entry->base.bo && !entry->base.moved) + amdgpu_vm_bo_relocated(&entry->base); } /* @@ -1054,14 +1435,6 @@ restart: params.adev = adev; if (vm->use_cpu_for_update) { - struct amdgpu_vm_bo_base *bo_base; - - list_for_each_entry(bo_base, &vm->relocated, vm_status) { - r = amdgpu_bo_kmap(bo_base->bo, NULL); - if (unlikely(r)) - return r; - } - r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM); if (unlikely(r)) return r; @@ -1078,25 +1451,16 @@ restart: } while (!list_empty(&vm->relocated)) { - struct amdgpu_vm_bo_base *bo_base, *parent; struct amdgpu_vm_pt *pt, *entry; - struct amdgpu_bo *bo; - bo_base = list_first_entry(&vm->relocated, - struct amdgpu_vm_bo_base, - vm_status); - bo_base->moved = false; - list_del_init(&bo_base->vm_status); + entry = list_first_entry(&vm->relocated, struct amdgpu_vm_pt, + base.vm_status); + amdgpu_vm_bo_idle(&entry->base); - bo = bo_base->bo->parent; - if (!bo) + pt = amdgpu_vm_pt_parent(entry); + if (!pt) continue; - parent = list_first_entry(&bo->va, struct amdgpu_vm_bo_base, - bo_list); - pt = container_of(parent, struct amdgpu_vm_pt, base); - entry = container_of(bo_base, struct amdgpu_vm_pt, base); - amdgpu_vm_update_pde(¶ms, vm, pt, entry); if (!vm->use_cpu_for_update && @@ -1138,85 +1502,90 @@ restart: return 0; error: - amdgpu_vm_invalidate_level(adev, vm, &vm->root, - adev->vm_manager.root_level); + amdgpu_vm_invalidate_pds(adev, vm); amdgpu_job_free(job); return r; } /** - * amdgpu_vm_find_entry - find the entry for an address + * amdgpu_vm_update_huge - figure out parameters for PTE updates * - * @p: see amdgpu_pte_update_params definition - * @addr: virtual address in question - * @entry: resulting entry or NULL - * @parent: parent entry - * - * Find the vm_pt entry and it's parent for the given address. + * Make sure to set the right flags for the PTEs at the desired level. */ -void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr, - struct amdgpu_vm_pt **entry, - struct amdgpu_vm_pt **parent) -{ - unsigned level = p->adev->vm_manager.root_level; - - *parent = NULL; - *entry = &p->vm->root; - while ((*entry)->entries) { - unsigned shift = amdgpu_vm_level_shift(p->adev, level++); +static void amdgpu_vm_update_huge(struct amdgpu_pte_update_params *params, + struct amdgpu_bo *bo, unsigned level, + uint64_t pe, uint64_t addr, + unsigned count, uint32_t incr, + uint64_t flags) - *parent = *entry; - *entry = &(*entry)->entries[addr >> shift]; - addr &= (1ULL << shift) - 1; +{ + if (level != AMDGPU_VM_PTB) { + flags |= AMDGPU_PDE_PTE; + amdgpu_gmc_get_vm_pde(params->adev, level, &addr, &flags); } - if (level != AMDGPU_VM_PTB) - *entry = NULL; + amdgpu_vm_update_func(params, bo, pe, addr, count, incr, flags); } /** - * amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages + * amdgpu_vm_fragment - get fragment for PTEs * - * @p: see amdgpu_pte_update_params definition - * @entry: vm_pt entry to check - * @parent: parent entry - * @nptes: number of PTEs updated with this operation - * @dst: destination address where the PTEs should point to - * @flags: access flags fro the PTEs + * @params: see amdgpu_pte_update_params definition + * @start: first PTE to handle + * @end: last PTE to handle + * @flags: hw mapping flags + * @frag: resulting fragment size + * @frag_end: end of this fragment * - * Check if we can update the PD with a huge page. + * Returns the first possible fragment for the start and end address. */ -static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, - struct amdgpu_vm_pt *entry, - struct amdgpu_vm_pt *parent, - unsigned nptes, uint64_t dst, - uint64_t flags) +static void amdgpu_vm_fragment(struct amdgpu_pte_update_params *params, + uint64_t start, uint64_t end, uint64_t flags, + unsigned int *frag, uint64_t *frag_end) { - uint64_t pde; + /** + * The MC L1 TLB supports variable sized pages, based on a fragment + * field in the PTE. When this field is set to a non-zero value, page + * granularity is increased from 4KB to (1 << (12 + frag)). The PTE + * flags are considered valid for all PTEs within the fragment range + * and corresponding mappings are assumed to be physically contiguous. + * + * The L1 TLB can store a single PTE for the whole fragment, + * significantly increasing the space available for translation + * caching. This leads to large improvements in throughput when the + * TLB is under pressure. + * + * The L2 TLB distributes small and large fragments into two + * asymmetric partitions. The large fragment cache is significantly + * larger. Thus, we try to use large fragments wherever possible. + * Userspace can support this by aligning virtual base address and + * allocation size to the fragment size. + * + * Starting with Vega10 the fragment size only controls the L1. The L2 + * is now directly feed with small/huge/giant pages from the walker. + */ + unsigned max_frag; - /* In the case of a mixed PT the PDE must point to it*/ - if (p->adev->asic_type >= CHIP_VEGA10 && !p->src && - nptes == AMDGPU_VM_PTE_COUNT(p->adev)) { - /* Set the huge page flag to stop scanning at this PDE */ - flags |= AMDGPU_PDE_PTE; - } + if (params->adev->asic_type < CHIP_VEGA10) + max_frag = params->adev->vm_manager.fragment_size; + else + max_frag = 31; - if (!(flags & AMDGPU_PDE_PTE)) { - if (entry->huge) { - /* Add the entry to the relocated list to update it. */ - entry->huge = false; - list_move(&entry->base.vm_status, &p->vm->relocated); - } + /* system pages are non continuously */ + if (params->src) { + *frag = 0; + *frag_end = end; return; } - entry->huge = true; - amdgpu_gmc_get_vm_pde(p->adev, AMDGPU_VM_PDB0, &dst, &flags); - - pde = (entry - parent->entries) * 8; - if (parent->base.bo->shadow) - p->func(p, parent->base.bo->shadow, pde, dst, 1, 0, flags); - p->func(p, parent->base.bo, pde, dst, 1, 0, flags); + /* This intentionally wraps around if no bit is set */ + *frag = min((unsigned)ffs(start) - 1, (unsigned)fls64(end - start) - 1); + if (*frag >= max_frag) { + *frag = max_frag; + *frag_end = end & ~((1ULL << max_frag) - 1); + } else { + *frag_end = start + (1 << *frag); + } } /** @@ -1234,112 +1603,105 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, * 0 for success, -EINVAL for failure. */ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, - uint64_t start, uint64_t end, - uint64_t dst, uint64_t flags) + uint64_t start, uint64_t end, + uint64_t dst, uint64_t flags) { struct amdgpu_device *adev = params->adev; - const uint64_t mask = AMDGPU_VM_PTE_COUNT(adev) - 1; + struct amdgpu_vm_pt_cursor cursor; + uint64_t frag_start = start, frag_end; + unsigned int frag; - uint64_t addr, pe_start; - struct amdgpu_bo *pt; - unsigned nptes; + /* figure out the initial fragment */ + amdgpu_vm_fragment(params, frag_start, end, flags, &frag, &frag_end); - /* walk over the address space and update the page tables */ - for (addr = start; addr < end; addr += nptes, - dst += nptes * AMDGPU_GPU_PAGE_SIZE) { - struct amdgpu_vm_pt *entry, *parent; + /* walk over the address space and update the PTs */ + amdgpu_vm_pt_start(adev, params->vm, start, &cursor); + while (cursor.pfn < end) { + struct amdgpu_bo *pt = cursor.entry->base.bo; + unsigned shift, parent_shift, mask; + uint64_t incr, entry_end, pe_start; - amdgpu_vm_get_entry(params, addr, &entry, &parent); - if (!entry) + if (!pt) return -ENOENT; - if ((addr & ~mask) == (end & ~mask)) - nptes = end - addr; - else - nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask); - - amdgpu_vm_handle_huge_pages(params, entry, parent, - nptes, dst, flags); - /* We don't need to update PTEs for huge pages */ - if (entry->huge) + /* The root level can't be a huge page */ + if (cursor.level == adev->vm_manager.root_level) { + if (!amdgpu_vm_pt_descendant(adev, &cursor)) + return -ENOENT; continue; + } - pt = entry->base.bo; - pe_start = (addr & mask) * 8; - if (pt->shadow) - params->func(params, pt->shadow, pe_start, dst, nptes, - AMDGPU_GPU_PAGE_SIZE, flags); - params->func(params, pt, pe_start, dst, nptes, - AMDGPU_GPU_PAGE_SIZE, flags); - } - - return 0; -} + /* First check if the entry is already handled */ + if (cursor.pfn < frag_start) { + cursor.entry->huge = true; + amdgpu_vm_pt_next(adev, &cursor); + continue; + } -/* - * amdgpu_vm_frag_ptes - add fragment information to PTEs - * - * @params: see amdgpu_pte_update_params definition - * @vm: requested vm - * @start: first PTE to handle - * @end: last PTE to handle - * @dst: addr those PTEs should point to - * @flags: hw mapping flags - * - * Returns: - * 0 for success, -EINVAL for failure. - */ -static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, - uint64_t start, uint64_t end, - uint64_t dst, uint64_t flags) -{ - /** - * The MC L1 TLB supports variable sized pages, based on a fragment - * field in the PTE. When this field is set to a non-zero value, page - * granularity is increased from 4KB to (1 << (12 + frag)). The PTE - * flags are considered valid for all PTEs within the fragment range - * and corresponding mappings are assumed to be physically contiguous. - * - * The L1 TLB can store a single PTE for the whole fragment, - * significantly increasing the space available for translation - * caching. This leads to large improvements in throughput when the - * TLB is under pressure. - * - * The L2 TLB distributes small and large fragments into two - * asymmetric partitions. The large fragment cache is significantly - * larger. Thus, we try to use large fragments wherever possible. - * Userspace can support this by aligning virtual base address and - * allocation size to the fragment size. - */ - unsigned max_frag = params->adev->vm_manager.fragment_size; - int r; + /* If it isn't already handled it can't be a huge page */ + if (cursor.entry->huge) { + /* Add the entry to the relocated list to update it. */ + cursor.entry->huge = false; + amdgpu_vm_bo_relocated(&cursor.entry->base); + } - /* system pages are non continuously */ - if (params->src || !(flags & AMDGPU_PTE_VALID)) - return amdgpu_vm_update_ptes(params, start, end, dst, flags); - - while (start != end) { - uint64_t frag_flags, frag_end; - unsigned frag; - - /* This intentionally wraps around if no bit is set */ - frag = min((unsigned)ffs(start) - 1, - (unsigned)fls64(end - start) - 1); - if (frag >= max_frag) { - frag_flags = AMDGPU_PTE_FRAG(max_frag); - frag_end = end & ~((1ULL << max_frag) - 1); - } else { - frag_flags = AMDGPU_PTE_FRAG(frag); - frag_end = start + (1 << frag); + shift = amdgpu_vm_level_shift(adev, cursor.level); + parent_shift = amdgpu_vm_level_shift(adev, cursor.level - 1); + if (adev->asic_type < CHIP_VEGA10) { + /* No huge page support before GMC v9 */ + if (cursor.level != AMDGPU_VM_PTB) { + if (!amdgpu_vm_pt_descendant(adev, &cursor)) + return -ENOENT; + continue; + } + } else if (frag < shift) { + /* We can't use this level when the fragment size is + * smaller than the address shift. Go to the next + * child entry and try again. + */ + if (!amdgpu_vm_pt_descendant(adev, &cursor)) + return -ENOENT; + continue; + } else if (frag >= parent_shift) { + /* If the fragment size is even larger than the parent + * shift we should go up one level and check it again. + */ + if (!amdgpu_vm_pt_ancestor(&cursor)) + return -ENOENT; + continue; } - r = amdgpu_vm_update_ptes(params, start, frag_end, dst, - flags | frag_flags); - if (r) - return r; + /* Looks good so far, calculate parameters for the update */ + incr = AMDGPU_GPU_PAGE_SIZE << shift; + mask = amdgpu_vm_entries_mask(adev, cursor.level); + pe_start = ((cursor.pfn >> shift) & mask) * 8; + entry_end = (mask + 1) << shift; + entry_end += cursor.pfn & ~(entry_end - 1); + entry_end = min(entry_end, end); + + do { + uint64_t upd_end = min(entry_end, frag_end); + unsigned nptes = (upd_end - frag_start) >> shift; + + amdgpu_vm_update_huge(params, pt, cursor.level, + pe_start, dst, nptes, incr, + flags | AMDGPU_PTE_FRAG(frag)); + + pe_start += nptes * 8; + dst += nptes * AMDGPU_GPU_PAGE_SIZE << shift; + + frag_start = upd_end; + if (frag_start >= frag_end) { + /* figure out the next fragment */ + amdgpu_vm_fragment(params, frag_start, end, + flags, &frag, &frag_end); + if (frag < shift) + break; + } + } while (frag_start < entry_end); - dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE; - start = frag_end; + if (frag >= shift) + amdgpu_vm_pt_next(adev, &cursor); } return 0; @@ -1401,8 +1763,8 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, params.func = amdgpu_vm_cpu_set_ptes; params.pages_addr = pages_addr; - return amdgpu_vm_frag_ptes(¶ms, start, last + 1, - addr, flags); + return amdgpu_vm_update_ptes(¶ms, start, last + 1, + addr, flags); } ring = container_of(vm->entity.rq->sched, struct amdgpu_ring, sched); @@ -1481,7 +1843,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (r) goto error_free; - r = amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags); + r = amdgpu_vm_update_ptes(¶ms, start, last + 1, addr, flags); if (r) goto error_free; @@ -1696,10 +2058,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, amdgpu_asic_flush_hdp(adev, NULL); } - spin_lock(&vm->moved_lock); - list_del_init(&bo_va->base.vm_status); - spin_unlock(&vm->moved_lock); - /* If the BO is not in its preferred location add it back to * the evicted list so that it gets validated again on the * next command submission. @@ -1708,9 +2066,11 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, uint32_t mem_type = bo->tbo.mem.mem_type; if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(mem_type))) - list_add_tail(&bo_va->base.vm_status, &vm->evicted); + amdgpu_vm_bo_evicted(&bo_va->base); else - list_add(&bo_va->base.vm_status, &vm->idle); + amdgpu_vm_bo_idle(&bo_va->base); + } else { + amdgpu_vm_bo_done(&bo_va->base); } list_splice_init(&bo_va->invalids, &bo_va->valids); @@ -1895,7 +2255,8 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping, list); list_del(&mapping->list); - if (vm->pte_support_ats && mapping->start < AMDGPU_VA_HOLE_START) + if (vm->pte_support_ats && + mapping->start < AMDGPU_GMC_HOLE_START) init_pte_value = AMDGPU_PTE_DEFAULT_ATC; r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm, @@ -1936,40 +2297,40 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm) { struct amdgpu_bo_va *bo_va, *tmp; - struct list_head moved; + struct reservation_object *resv; bool clear; int r; - INIT_LIST_HEAD(&moved); - spin_lock(&vm->moved_lock); - list_splice_init(&vm->moved, &moved); - spin_unlock(&vm->moved_lock); + list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) { + /* Per VM BOs never need to bo cleared in the page tables */ + r = amdgpu_vm_bo_update(adev, bo_va, false); + if (r) + return r; + } - list_for_each_entry_safe(bo_va, tmp, &moved, base.vm_status) { - struct reservation_object *resv = bo_va->base.bo->tbo.resv; + spin_lock(&vm->invalidated_lock); + while (!list_empty(&vm->invalidated)) { + bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va, + base.vm_status); + resv = bo_va->base.bo->tbo.resv; + spin_unlock(&vm->invalidated_lock); - /* Per VM BOs never need to bo cleared in the page tables */ - if (resv == vm->root.base.bo->tbo.resv) - clear = false; /* Try to reserve the BO to avoid clearing its ptes */ - else if (!amdgpu_vm_debug && reservation_object_trylock(resv)) + if (!amdgpu_vm_debug && reservation_object_trylock(resv)) clear = false; /* Somebody else is using the BO right now */ else clear = true; r = amdgpu_vm_bo_update(adev, bo_va, clear); - if (r) { - spin_lock(&vm->moved_lock); - list_splice(&moved, &vm->moved); - spin_unlock(&vm->moved_lock); + if (r) return r; - } - if (!clear && resv != vm->root.base.bo->tbo.resv) + if (!clear) reservation_object_unlock(resv); - + spin_lock(&vm->invalidated_lock); } + spin_unlock(&vm->invalidated_lock); return 0; } @@ -2034,9 +2395,7 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev, if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv && !bo_va->base.moved) { - spin_lock(&vm->moved_lock); list_move(&bo_va->base.vm_status, &vm->moved); - spin_unlock(&vm->moved_lock); } trace_amdgpu_vm_bo_map(bo_va, mapping); } @@ -2388,13 +2747,27 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va) { struct amdgpu_bo_va_mapping *mapping, *next; + struct amdgpu_bo *bo = bo_va->base.bo; struct amdgpu_vm *vm = bo_va->base.vm; + struct amdgpu_vm_bo_base **base; + + if (bo) { + if (bo->tbo.resv == vm->root.base.bo->tbo.resv) + vm->bulk_moveable = false; - list_del(&bo_va->base.bo_list); + for (base = &bo_va->base.bo->vm_bo; *base; + base = &(*base)->next) { + if (*base != &bo_va->base) + continue; + + *base = bo_va->base.next; + break; + } + } - spin_lock(&vm->moved_lock); + spin_lock(&vm->invalidated_lock); list_del(&bo_va->base.vm_status); - spin_unlock(&vm->moved_lock); + spin_unlock(&vm->invalidated_lock); list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { list_del(&mapping->list); @@ -2432,30 +2805,24 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, if (bo->parent && bo->parent->shadow == bo) bo = bo->parent; - list_for_each_entry(bo_base, &bo->va, bo_list) { + for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) { struct amdgpu_vm *vm = bo_base->vm; - bool was_moved = bo_base->moved; - bo_base->moved = true; if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) { - if (bo->tbo.type == ttm_bo_type_kernel) - list_move(&bo_base->vm_status, &vm->evicted); - else - list_move_tail(&bo_base->vm_status, - &vm->evicted); + amdgpu_vm_bo_evicted(bo_base); continue; } - if (was_moved) + if (bo_base->moved) continue; + bo_base->moved = true; - if (bo->tbo.type == ttm_bo_type_kernel) { - list_move(&bo_base->vm_status, &vm->relocated); - } else { - spin_lock(&bo_base->vm->moved_lock); - list_move(&bo_base->vm_status, &vm->moved); - spin_unlock(&bo_base->vm->moved_lock); - } + if (bo->tbo.type == ttm_bo_type_kernel) + amdgpu_vm_bo_relocated(bo_base); + else if (bo->tbo.resv == vm->root.base.bo->tbo.resv) + amdgpu_vm_bo_moved(bo_base); + else + amdgpu_vm_bo_invalidated(bo_base); } } @@ -2574,6 +2941,22 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, adev->vm_manager.fragment_size); } +static struct amdgpu_retryfault_hashtable *init_fault_hash(void) +{ + struct amdgpu_retryfault_hashtable *fault_hash; + + fault_hash = kmalloc(sizeof(*fault_hash), GFP_KERNEL); + if (!fault_hash) + return fault_hash; + + INIT_CHASH_TABLE(fault_hash->hash, + AMDGPU_PAGEFAULT_HASH_BITS, 8, 0); + spin_lock_init(&fault_hash->lock); + fault_hash->count = 0; + + return fault_hash; +} + /** * amdgpu_vm_init - initialize a vm instance * @@ -2592,13 +2975,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, { struct amdgpu_bo_param bp; struct amdgpu_bo *root; - const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, - AMDGPU_VM_PTE_COUNT(adev) * 8); - unsigned ring_instance; - struct amdgpu_ring *ring; - struct drm_sched_rq *rq; - unsigned long size; - uint64_t flags; int r, i; vm->va = RB_ROOT_CACHED; @@ -2606,18 +2982,15 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->reserved_vmid[i] = NULL; INIT_LIST_HEAD(&vm->evicted); INIT_LIST_HEAD(&vm->relocated); - spin_lock_init(&vm->moved_lock); INIT_LIST_HEAD(&vm->moved); INIT_LIST_HEAD(&vm->idle); + INIT_LIST_HEAD(&vm->invalidated); + spin_lock_init(&vm->invalidated_lock); INIT_LIST_HEAD(&vm->freed); /* create scheduler entity for page table updates */ - - ring_instance = atomic_inc_return(&adev->vm_manager.vm_pte_next_ring); - ring_instance %= adev->vm_manager.vm_pte_num_rings; - ring = adev->vm_manager.vm_pte_rings[ring_instance]; - rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL]; - r = drm_sched_entity_init(&vm->entity, &rq, 1, NULL); + r = drm_sched_entity_init(&vm->entity, adev->vm_manager.vm_pte_rqs, + adev->vm_manager.vm_pte_num_rqs, NULL); if (r) return r; @@ -2639,20 +3012,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, "CPU update of VM recommended only for large BAR system\n"); vm->last_update = NULL; - flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - if (vm->use_cpu_for_update) - flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; - else if (vm_context != AMDGPU_VM_CONTEXT_COMPUTE) - flags |= AMDGPU_GEM_CREATE_SHADOW; - - size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level); - memset(&bp, 0, sizeof(bp)); - bp.size = size; - bp.byte_align = align; - bp.domain = AMDGPU_GEM_DOMAIN_VRAM; - bp.flags = flags; - bp.type = ttm_bo_type_kernel; - bp.resv = NULL; + amdgpu_vm_bo_param(adev, vm, adev->vm_manager.root_level, &bp); + if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) + bp.flags &= ~AMDGPU_GEM_CREATE_SHADOW; r = amdgpu_bo_create(adev, &bp, &root); if (r) goto error_free_sched_entity; @@ -2683,6 +3045,12 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->pasid = pasid; } + vm->fault_hash = init_fault_hash(); + if (!vm->fault_hash) { + r = -ENOMEM; + goto error_free_root; + } + INIT_KFIFO(vm->faults); vm->fault_credit = 16; @@ -2722,7 +3090,7 @@ error_free_sched_entity: * Returns: * 0 for success, -errno for errors. */ -int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) +int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned int pasid) { bool pte_support_ats = (adev->asic_type == CHIP_RAVEN); int r; @@ -2734,7 +3102,20 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) /* Sanity checks */ if (!RB_EMPTY_ROOT(&vm->va.rb_root) || vm->root.entries) { r = -EINVAL; - goto error; + goto unreserve_bo; + } + + if (pasid) { + unsigned long flags; + + spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); + r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, pasid + 1, + GFP_ATOMIC); + spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); + + if (r == -ENOSPC) + goto unreserve_bo; + r = 0; } /* Check if PD needs to be reinitialized and do it before @@ -2745,7 +3126,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) adev->vm_manager.root_level, pte_support_ats); if (r) - goto error; + goto free_idr; } /* Update VM state */ @@ -2764,45 +3145,52 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) idr_remove(&adev->vm_manager.pasid_idr, vm->pasid); spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); + /* Free the original amdgpu allocated pasid + * Will be replaced with kfd allocated pasid + */ + amdgpu_pasid_free(vm->pasid); vm->pasid = 0; } /* Free the shadow bo for compute VM */ amdgpu_bo_unref(&vm->root.base.bo->shadow); -error: + if (pasid) + vm->pasid = pasid; + + goto unreserve_bo; + +free_idr: + if (pasid) { + unsigned long flags; + + spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); + idr_remove(&adev->vm_manager.pasid_idr, pasid); + spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); + } +unreserve_bo: amdgpu_bo_unreserve(vm->root.base.bo); return r; } /** - * amdgpu_vm_free_levels - free PD/PT levels - * - * @adev: amdgpu device structure - * @parent: PD/PT starting level to free - * @level: level of parent structure + * amdgpu_vm_release_compute - release a compute vm + * @adev: amdgpu_device pointer + * @vm: a vm turned into compute vm by calling amdgpu_vm_make_compute * - * Free the page directory or page table level and all sub levels. + * This is a correspondant of amdgpu_vm_make_compute. It decouples compute + * pasid from vm. Compute should stop use of vm after this call. */ -static void amdgpu_vm_free_levels(struct amdgpu_device *adev, - struct amdgpu_vm_pt *parent, - unsigned level) +void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) { - unsigned i, num_entries = amdgpu_vm_num_entries(adev, level); + if (vm->pasid) { + unsigned long flags; - if (parent->base.bo) { - list_del(&parent->base.bo_list); - list_del(&parent->base.vm_status); - amdgpu_bo_unref(&parent->base.bo->shadow); - amdgpu_bo_unref(&parent->base.bo); + spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); + idr_remove(&adev->vm_manager.pasid_idr, vm->pasid); + spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); } - - if (parent->entries) - for (i = 0; i < num_entries; i++) - amdgpu_vm_free_levels(adev, &parent->entries[i], - level + 1); - - kvfree(parent->entries); + vm->pasid = 0; } /** @@ -2826,7 +3214,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) /* Clear pending page faults from IH when the VM is destroyed */ while (kfifo_get(&vm->faults, &fault)) - amdgpu_ih_clear_fault(adev, fault); + amdgpu_vm_clear_fault(vm->fault_hash, fault); if (vm->pasid) { unsigned long flags; @@ -2836,6 +3224,9 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); } + kfree(vm->fault_hash); + vm->fault_hash = NULL; + drm_sched_entity_destroy(&vm->entity); if (!RB_EMPTY_ROOT(&vm->va.rb_root)) { @@ -2862,8 +3253,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) if (r) { dev_err(adev->dev, "Leaking page tables because BO reservation failed\n"); } else { - amdgpu_vm_free_levels(adev, &vm->root, - adev->vm_manager.root_level); + amdgpu_vm_free_pts(adev, vm); amdgpu_bo_unreserve(root); } amdgpu_bo_unref(&root); @@ -2926,7 +3316,6 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) for (i = 0; i < AMDGPU_MAX_RINGS; ++i) adev->vm_manager.seqno[i] = 0; - atomic_set(&adev->vm_manager.vm_pte_next_ring, 0); spin_lock_init(&adev->vm_manager.prt_lock); atomic_set(&adev->vm_manager.num_prt_users, 0); @@ -3002,7 +3391,7 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) /** * amdgpu_vm_get_task_info - Extracts task info for a PASID. * - * @dev: drm device pointer + * @adev: drm device pointer * @pasid: PASID identifier for VM * @task_info: task_info to fill. */ @@ -3037,3 +3426,78 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) } } } + +/** + * amdgpu_vm_add_fault - Add a page fault record to fault hash table + * + * @fault_hash: fault hash table + * @key: 64-bit encoding of PASID and address + * + * This should be called when a retry page fault interrupt is + * received. If this is a new page fault, it will be added to a hash + * table. The return value indicates whether this is a new fault, or + * a fault that was already known and is already being handled. + * + * If there are too many pending page faults, this will fail. Retry + * interrupts should be ignored in this case until there is enough + * free space. + * + * Returns 0 if the fault was added, 1 if the fault was already known, + * -ENOSPC if there are too many pending faults. + */ +int amdgpu_vm_add_fault(struct amdgpu_retryfault_hashtable *fault_hash, u64 key) +{ + unsigned long flags; + int r = -ENOSPC; + + if (WARN_ON_ONCE(!fault_hash)) + /* Should be allocated in amdgpu_vm_init + */ + return r; + + spin_lock_irqsave(&fault_hash->lock, flags); + + /* Only let the hash table fill up to 50% for best performance */ + if (fault_hash->count >= (1 << (AMDGPU_PAGEFAULT_HASH_BITS-1))) + goto unlock_out; + + r = chash_table_copy_in(&fault_hash->hash, key, NULL); + if (!r) + fault_hash->count++; + + /* chash_table_copy_in should never fail unless we're losing count */ + WARN_ON_ONCE(r < 0); + +unlock_out: + spin_unlock_irqrestore(&fault_hash->lock, flags); + return r; +} + +/** + * amdgpu_vm_clear_fault - Remove a page fault record + * + * @fault_hash: fault hash table + * @key: 64-bit encoding of PASID and address + * + * This should be called when a page fault has been handled. Any + * future interrupt with this key will be processed as a new + * page fault. + */ +void amdgpu_vm_clear_fault(struct amdgpu_retryfault_hashtable *fault_hash, u64 key) +{ + unsigned long flags; + int r; + + if (!fault_hash) + return; + + spin_lock_irqsave(&fault_hash->lock, flags); + + r = chash_table_remove(&fault_hash->hash, key, NULL); + if (!WARN_ON_ONCE(r < 0)) { + fault_hash->count--; + WARN_ON_ONCE(fault_hash->count < 0); + } + + spin_unlock_irqrestore(&fault_hash->lock, flags); +} |