179 files changed, 9108 insertions, 7701 deletions
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild
index ba88613e1e46..8bf1635ffabc 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild
@@ -8,3 +8,5 @@ nvkm-y += nvkm/engine/ce/gp100.o
 nvkm-y += nvkm/engine/ce/gp102.o
 nvkm-y += nvkm/engine/ce/gv100.o
 nvkm-y += nvkm/engine/ce/tu102.o
+nvkm-y += nvkm/engine/ce/ga100.o
+nvkm-y += nvkm/engine/ce/ga102.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/ga100.c b/drivers/gpu/drm/nouveau/nvkm/engine/ce/ga100.c
new file mode 100644
index 000000000000..6648ed62daa6
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/ga100.c
@@ -0,0 +1,82 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "priv.h"
+
+#include <subdev/vfn.h>
+
+#include <nvif/class.h>
+
+static irqreturn_t
+ga100_ce_intr(struct nvkm_inth *inth)
+{
+	struct nvkm_subdev *subdev = container_of(inth, typeof(*subdev), inth);
+
+	/*TODO*/
+	nvkm_error(subdev, "intr\n");
+	return IRQ_NONE;
+}
+
+int
+ga100_ce_fini(struct nvkm_engine *engine, bool suspend)
+{
+	nvkm_inth_block(&engine->subdev.inth);
+	return 0;
+}
+
+int
+ga100_ce_init(struct nvkm_engine *engine)
+{
+	nvkm_inth_allow(&engine->subdev.inth);
+	return 0;
+}
+
+int
+ga100_ce_oneinit(struct nvkm_engine *engine)
+{
+	struct nvkm_subdev *subdev = &engine->subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 vector;
+
+	vector = nvkm_rd32(device, 0x10442c + (subdev->inst * 0x80)) & 0x00000fff;
+
+	return nvkm_inth_add(&device->vfn->intr, vector, NVKM_INTR_PRIO_NORMAL,
+			     subdev, ga100_ce_intr, &subdev->inth);
+}
+
+static const struct nvkm_engine_func
+ga100_ce = {
+	.oneinit = ga100_ce_oneinit,
+	.init = ga100_ce_init,
+	.fini = ga100_ce_fini,
+	.cclass = &gv100_ce_cclass,
+	.sclass = {
+		{ -1, -1, AMPERE_DMA_COPY_A },
+		{}
+	}
+};
+
+int
+ga100_ce_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
+	     struct nvkm_engine **pengine)
+{
+	return nvkm_engine_new_(&ga100_ce, device, type, inst, true, pengine);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/usergv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/ce/ga102.c
index 3dc3b8b312de..9f3448ad625f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/usergv100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/ga102.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2018 Red Hat Inc.
+ * Copyright 2021 Red Hat Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -19,27 +19,26 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  */
-#include "user.h"
+#include "priv.h"
 
-static int
-gv100_fifo_user_map(struct nvkm_object *object, void *argv, u32 argc,
-		    enum nvkm_object_map *type, u64 *addr, u64 *size)
-{
-	struct nvkm_device *device = object->engine->subdev.device;
-	*addr = 0x810000 + device->func->resource_addr(device, 0);
-	*size = 0x010000;
-	*type = NVKM_OBJECT_MAP_IO;
-	return 0;
-}
+#include <nvif/class.h>
 
-static const struct nvkm_object_func
-gv100_fifo_user = {
-	.map = gv100_fifo_user_map,
+static const struct nvkm_engine_func
+ga102_ce = {
+	.oneinit = ga100_ce_oneinit,
+	.init = ga100_ce_init,
+	.fini = ga100_ce_fini,
+	.cclass = &gv100_ce_cclass,
+	.sclass = {
+		{ -1, -1, AMPERE_DMA_COPY_A },
+		{ -1, -1, AMPERE_DMA_COPY_B },
+		{}
+	}
 };
 
 int
-gv100_fifo_user_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
-		    struct nvkm_object **pobject)
+ga102_ce_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
+	     struct nvkm_engine **pengine)
 {
-	return nvkm_object_new_(&gv100_fifo_user, oclass, argv, argc, pobject);
+	return nvkm_engine_new_(&ga102_ce, device, type, inst, true, pengine);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/gt215.c b/drivers/gpu/drm/nouveau/nvkm/engine/ce/gt215.c
index 09a112af2f89..c9bf6305c3ec 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/ce/gt215.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/gt215.c
@@ -40,7 +40,7 @@ gt215_ce_isr_error_name[] = {
 };
 
 void
-gt215_ce_intr(struct nvkm_falcon *ce, struct nvkm_fifo_chan *chan)
+gt215_ce_intr(struct nvkm_falcon *ce, struct nvkm_chan *chan)
 {
 	struct nvkm_subdev *subdev = &ce->engine.subdev;
 	struct nvkm_device *device = subdev->device;
@@ -55,9 +55,9 @@ gt215_ce_intr(struct nvkm_falcon *ce, struct nvkm_fifo_chan *chan)
 
 	nvkm_error(subdev, "DISPATCH_ERROR %04x [%s] ch %d [%010llx %s] "
 			   "subc %d mthd %04x data %08x\n", ssta,
-		   en ? en->name : "", chan ? chan->chid : -1,
+		   en ? en->name : "", chan ? chan->id : -1,
 		   chan ? chan->inst->addr : 0,
-		   chan ? chan->object.client->name : "unknown",
+		   chan ? chan->name : "unknown",
 		   subc, mthd, data);
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/ce/priv.h
index cd53b93664d6..c4c046916fa6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/ce/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/priv.h
@@ -8,4 +8,8 @@ void gk104_ce_intr(struct nvkm_engine *);
 void gp100_ce_intr(struct nvkm_engine *);
 
 extern const struct nvkm_object_func gv100_ce_cclass;
+
+int ga100_ce_oneinit(struct nvkm_engine *);
+int ga100_ce_init(struct nvkm_engine *);
+int ga100_ce_fini(struct nvkm_engine *, bool);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/cipher/g84.c b/drivers/gpu/drm/nouveau/nvkm/engine/cipher/g84.c
index be2a7181dc15..caca4f639895 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/cipher/g84.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/cipher/g84.c
@@ -81,8 +81,7 @@ g84_cipher_intr(struct nvkm_engine *cipher)
 {
 	struct nvkm_subdev *subdev = &cipher->subdev;
 	struct nvkm_device *device = subdev->device;
-	struct nvkm_fifo *fifo = device->fifo;
-	struct nvkm_fifo_chan *chan;
+	struct nvkm_chan *chan;
 	u32 stat = nvkm_rd32(device, 0x102130);
 	u32 mthd = nvkm_rd32(device, 0x102190);
 	u32 data = nvkm_rd32(device, 0x102194);
@@ -90,16 +89,16 @@ g84_cipher_intr(struct nvkm_engine *cipher)
 	unsigned long flags;
 	char msg[128];
 
-	chan = nvkm_fifo_chan_inst(fifo, (u64)inst << 12, &flags);
+	chan = nvkm_chan_get_inst(cipher, (u64)inst << 12, &flags);
 	if (stat) {
 		nvkm_snprintbf(msg, sizeof(msg), g84_cipher_intr_mask, stat);
 		nvkm_error(subdev,  "%08x [%s] ch %d [%010llx %s] "
 				    "mthd %04x data %08x\n", stat, msg,
-			   chan ? chan->chid : -1, (u64)inst << 12,
-			   chan ? chan->object.client->name : "unknown",
+			   chan ? chan->id : -1, (u64)inst << 12,
+			   chan ? chan->name : "unknown",
 			   mthd, data);
 	}
-	nvkm_fifo_chan_put(fifo, flags, &chan);
+	nvkm_chan_put(&chan, flags);
 
 	nvkm_wr32(device, 0x102130, stat);
 	nvkm_wr32(device, 0x10200c, 0x10);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
index d8cf71fb0512..364fea320cb3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
@@ -1095,7 +1095,7 @@ nv98_chipset = {
 	.volt     = { 0x00000001, nv40_volt_new },
 	.disp     = { 0x00000001, g94_disp_new },
 	.dma      = { 0x00000001, nv50_dma_new },
-	.fifo     = { 0x00000001, g84_fifo_new },
+	.fifo     = { 0x00000001, g98_fifo_new },
 	.gr       = { 0x00000001, g84_gr_new },
 	.mspdec   = { 0x00000001, g98_mspdec_new },
 	.msppp    = { 0x00000001, g98_msppp_new },
@@ -1161,7 +1161,7 @@ nva3_chipset = {
 	.ce       = { 0x00000001, gt215_ce_new },
 	.disp     = { 0x00000001, gt215_disp_new },
 	.dma      = { 0x00000001, nv50_dma_new },
-	.fifo     = { 0x00000001, g84_fifo_new },
+	.fifo     = { 0x00000001, g98_fifo_new },
 	.gr       = { 0x00000001, gt215_gr_new },
 	.mpeg     = { 0x00000001, g84_mpeg_new },
 	.mspdec   = { 0x00000001, gt215_mspdec_new },
@@ -1195,7 +1195,7 @@ nva5_chipset = {
 	.ce       = { 0x00000001, gt215_ce_new },
 	.disp     = { 0x00000001, gt215_disp_new },
 	.dma      = { 0x00000001, nv50_dma_new },
-	.fifo     = { 0x00000001, g84_fifo_new },
+	.fifo     = { 0x00000001, g98_fifo_new },
 	.gr       = { 0x00000001, gt215_gr_new },
 	.mspdec   = { 0x00000001, gt215_mspdec_new },
 	.msppp    = { 0x00000001, gt215_msppp_new },
@@ -1228,7 +1228,7 @@ nva8_chipset = {
 	.ce       = { 0x00000001, gt215_ce_new },
 	.disp     = { 0x00000001, gt215_disp_new },
 	.dma      = { 0x00000001, nv50_dma_new },
-	.fifo     = { 0x00000001, g84_fifo_new },
+	.fifo     = { 0x00000001, g98_fifo_new },
 	.gr       = { 0x00000001, gt215_gr_new },
 	.mspdec   = { 0x00000001, gt215_mspdec_new },
 	.msppp    = { 0x00000001, gt215_msppp_new },
@@ -1259,7 +1259,7 @@ nvaa_chipset = {
 	.volt     = { 0x00000001, nv40_volt_new },
 	.disp     = { 0x00000001, mcp77_disp_new },
 	.dma      = { 0x00000001, nv50_dma_new },
-	.fifo     = { 0x00000001, g84_fifo_new },
+	.fifo     = { 0x00000001, g98_fifo_new },
 	.gr       = { 0x00000001, gt200_gr_new },
 	.mspdec   = { 0x00000001, g98_mspdec_new },
 	.msppp    = { 0x00000001, g98_msppp_new },
@@ -1291,7 +1291,7 @@ nvac_chipset = {
 	.volt     = { 0x00000001, nv40_volt_new },
 	.disp     = { 0x00000001, mcp77_disp_new },
 	.dma      = { 0x00000001, nv50_dma_new },
-	.fifo     = { 0x00000001, g84_fifo_new },
+	.fifo     = { 0x00000001, g98_fifo_new },
 	.gr       = { 0x00000001, mcp79_gr_new },
 	.mspdec   = { 0x00000001, g98_mspdec_new },
 	.msppp    = { 0x00000001, g98_msppp_new },
@@ -1325,7 +1325,7 @@ nvaf_chipset = {
 	.ce       = { 0x00000001, gt215_ce_new },
 	.disp     = { 0x00000001, mcp89_disp_new },
 	.dma      = { 0x00000001, nv50_dma_new },
-	.fifo     = { 0x00000001, g84_fifo_new },
+	.fifo     = { 0x00000001, g98_fifo_new },
 	.gr       = { 0x00000001, mcp89_gr_new },
 	.mspdec   = { 0x00000001, gt215_mspdec_new },
 	.msppp    = { 0x00000001, gt215_msppp_new },
@@ -2130,7 +2130,7 @@ nv12b_chipset = {
 	.volt     = { 0x00000001, gm20b_volt_new },
 	.ce       = { 0x00000004, gm200_ce_new },
 	.dma      = { 0x00000001, gf119_dma_new },
-	.fifo     = { 0x00000001, gm20b_fifo_new },
+	.fifo     = { 0x00000001, gm200_fifo_new },
 	.gr       = { 0x00000001, gm20b_gr_new },
 	.sw       = { 0x00000001, gf100_sw_new },
 };
@@ -2356,7 +2356,7 @@ nv13b_chipset = {
 	.top      = { 0x00000001, gk104_top_new },
 	.ce       = { 0x00000001, gp100_ce_new },
 	.dma      = { 0x00000001, gf119_dma_new },
-	.fifo     = { 0x00000001, gp10b_fifo_new },
+	.fifo     = { 0x00000001, gp100_fifo_new },
 	.gr       = { 0x00000001, gp10b_gr_new },
 	.sw       = { 0x00000001, gf100_sw_new },
 };
@@ -2364,7 +2364,7 @@ nv13b_chipset = {
 static const struct nvkm_device_chip
 nv140_chipset = {
 	.name = "GV100",
-	.acr      = { 0x00000001, gp108_acr_new },
+	.acr      = { 0x00000001, gv100_acr_new },
 	.bar      = { 0x00000001, gm107_bar_new },
 	.bios     = { 0x00000001, nvkm_bios_new },
 	.bus      = { 0x00000001, gf100_bus_new },
@@ -2385,6 +2385,7 @@ nv140_chipset = {
 	.therm    = { 0x00000001, gp100_therm_new },
 	.timer    = { 0x00000001, gk20a_timer_new },
 	.top      = { 0x00000001, gk104_top_new },
+	.vfn      = { 0x00000001, gv100_vfn_new },
 	.ce       = { 0x000001ff, gv100_ce_new },
 	.disp     = { 0x00000001, gv100_disp_new },
 	.dma      = { 0x00000001, gv100_dma_new },
@@ -2411,7 +2412,7 @@ nv162_chipset = {
 	.i2c      = { 0x00000001, gm200_i2c_new },
 	.imem     = { 0x00000001, nv50_instmem_new },
 	.ltc      = { 0x00000001, gp102_ltc_new },
-	.mc       = { 0x00000001, tu102_mc_new },
+	.mc       = { 0x00000001, gp100_mc_new },
 	.mmu      = { 0x00000001, tu102_mmu_new },
 	.pci      = { 0x00000001, gp100_pci_new },
 	.pmu      = { 0x00000001, gp102_pmu_new },
@@ -2419,6 +2420,7 @@ nv162_chipset = {
 	.therm    = { 0x00000001, gp100_therm_new },
 	.timer    = { 0x00000001, gk20a_timer_new },
 	.top      = { 0x00000001, gk104_top_new },
+	.vfn      = { 0x00000001, tu102_vfn_new },
 	.ce       = { 0x0000001f, tu102_ce_new },
 	.disp     = { 0x00000001, tu102_disp_new },
 	.dma      = { 0x00000001, gv100_dma_new },
@@ -2445,7 +2447,7 @@ nv164_chipset = {
 	.i2c      = { 0x00000001, gm200_i2c_new },
 	.imem     = { 0x00000001, nv50_instmem_new },
 	.ltc      = { 0x00000001, gp102_ltc_new },
-	.mc       = { 0x00000001, tu102_mc_new },
+	.mc       = { 0x00000001, gp100_mc_new },
 	.mmu      = { 0x00000001, tu102_mmu_new },
 	.pci      = { 0x00000001, gp100_pci_new },
 	.pmu      = { 0x00000001, gp102_pmu_new },
@@ -2453,6 +2455,7 @@ nv164_chipset = {
 	.therm    = { 0x00000001, gp100_therm_new },
 	.timer    = { 0x00000001, gk20a_timer_new },
 	.top      = { 0x00000001, gk104_top_new },
+	.vfn      = { 0x00000001, tu102_vfn_new },
 	.ce       = { 0x0000001f, tu102_ce_new },
 	.disp     = { 0x00000001, tu102_disp_new },
 	.dma      = { 0x00000001, gv100_dma_new },
@@ -2479,7 +2482,7 @@ nv166_chipset = {
 	.i2c      = { 0x00000001, gm200_i2c_new },
 	.imem     = { 0x00000001, nv50_instmem_new },
 	.ltc      = { 0x00000001, gp102_ltc_new },
-	.mc       = { 0x00000001, tu102_mc_new },
+	.mc       = { 0x00000001, gp100_mc_new },
 	.mmu      = { 0x00000001, tu102_mmu_new },
 	.pci      = { 0x00000001, gp100_pci_new },
 	.pmu      = { 0x00000001, gp102_pmu_new },
@@ -2487,6 +2490,7 @@ nv166_chipset = {
 	.therm    = { 0x00000001, gp100_therm_new },
 	.timer    = { 0x00000001, gk20a_timer_new },
 	.top      = { 0x00000001, gk104_top_new },
+	.vfn      = { 0x00000001, tu102_vfn_new },
 	.ce       = { 0x0000001f, tu102_ce_new },
 	.disp     = { 0x00000001, tu102_disp_new },
 	.dma      = { 0x00000001, gv100_dma_new },
@@ -2513,7 +2517,7 @@ nv167_chipset = {
 	.i2c      = { 0x00000001, gm200_i2c_new },
 	.imem     = { 0x00000001, nv50_instmem_new },
 	.ltc      = { 0x00000001, gp102_ltc_new },
-	.mc       = { 0x00000001, tu102_mc_new },
+	.mc       = { 0x00000001, gp100_mc_new },
 	.mmu      = { 0x00000001, tu102_mmu_new },
 	.pci      = { 0x00000001, gp100_pci_new },
 	.pmu      = { 0x00000001, gp102_pmu_new },
@@ -2521,6 +2525,7 @@ nv167_chipset = {
 	.therm    = { 0x00000001, gp100_therm_new },
 	.timer    = { 0x00000001, gk20a_timer_new },
 	.top      = { 0x00000001, gk104_top_new },
+	.vfn      = { 0x00000001, tu102_vfn_new },
 	.ce       = { 0x0000001f, tu102_ce_new },
 	.disp     = { 0x00000001, tu102_disp_new },
 	.dma      = { 0x00000001, gv100_dma_new },
@@ -2547,7 +2552,7 @@ nv168_chipset = {
 	.i2c      = { 0x00000001, gm200_i2c_new },
 	.imem     = { 0x00000001, nv50_instmem_new },
 	.ltc      = { 0x00000001, gp102_ltc_new },
-	.mc       = { 0x00000001, tu102_mc_new },
+	.mc       = { 0x00000001, gp100_mc_new },
 	.mmu      = { 0x00000001, tu102_mmu_new },
 	.pci      = { 0x00000001, gp100_pci_new },
 	.pmu      = { 0x00000001, gp102_pmu_new },
@@ -2555,6 +2560,7 @@ nv168_chipset = {
 	.therm    = { 0x00000001, gp100_therm_new },
 	.timer    = { 0x00000001, gk20a_timer_new },
 	.top      = { 0x00000001, gk104_top_new },
+	.vfn      = { 0x00000001, tu102_vfn_new },
 	.ce       = { 0x0000001f, tu102_ce_new },
 	.disp     = { 0x00000001, tu102_disp_new },
 	.dma      = { 0x00000001, gv100_dma_new },
@@ -2571,6 +2577,7 @@ nv170_chipset = {
 	.bar      = { 0x00000001, tu102_bar_new },
 	.bios     = { 0x00000001, nvkm_bios_new },
 	.devinit  = { 0x00000001, ga100_devinit_new },
+	.fault    = { 0x00000001, tu102_fault_new },
 	.fb       = { 0x00000001, ga100_fb_new },
 	.gpio     = { 0x00000001, gk104_gpio_new },
 	.i2c      = { 0x00000001, gm200_i2c_new },
@@ -2581,111 +2588,159 @@ nv170_chipset = {
 	.privring = { 0x00000001, gm200_privring_new },
 	.timer    = { 0x00000001, gk20a_timer_new },
 	.top      = { 0x00000001, ga100_top_new },
+	.vfn      = { 0x00000001, ga100_vfn_new },
+	.ce       = { 0x000003ff, ga100_ce_new },
+	.fifo     = { 0x00000001, ga100_fifo_new },
 };
 
 static const struct nvkm_device_chip
 nv172_chipset = {
 	.name = "GA102",
+	.acr      = { 0x00000001, ga102_acr_new },
 	.bar      = { 0x00000001, tu102_bar_new },
 	.bios     = { 0x00000001, nvkm_bios_new },
 	.devinit  = { 0x00000001, ga100_devinit_new },
+	.fault    = { 0x00000001, tu102_fault_new },
 	.fb       = { 0x00000001, ga102_fb_new },
 	.gpio     = { 0x00000001, ga102_gpio_new },
+	.gsp      = { 0x00000001, ga102_gsp_new },
 	.i2c      = { 0x00000001, gm200_i2c_new },
 	.imem     = { 0x00000001, nv50_instmem_new },
+	.ltc      = { 0x00000001, ga102_ltc_new },
 	.mc       = { 0x00000001, ga100_mc_new },
 	.mmu      = { 0x00000001, tu102_mmu_new },
 	.pci      = { 0x00000001, gp100_pci_new },
 	.privring = { 0x00000001, gm200_privring_new },
 	.timer    = { 0x00000001, gk20a_timer_new },
 	.top      = { 0x00000001, ga100_top_new },
+	.vfn      = { 0x00000001, ga100_vfn_new },
+	.ce       = { 0x0000001f, ga102_ce_new },
 	.disp     = { 0x00000001, ga102_disp_new },
 	.dma      = { 0x00000001, gv100_dma_new },
 	.fifo     = { 0x00000001, ga102_fifo_new },
+	.gr       = { 0x00000001, ga102_gr_new },
+	.nvdec    = { 0x00000001, ga102_nvdec_new },
+	.sec2     = { 0x00000001, ga102_sec2_new },
 };
 
 static const struct nvkm_device_chip
 nv173_chipset = {
 	.name = "GA103",
+	.acr      = { 0x00000001, ga102_acr_new },
 	.bar      = { 0x00000001, tu102_bar_new },
 	.bios     = { 0x00000001, nvkm_bios_new },
 	.devinit  = { 0x00000001, ga100_devinit_new },
+	.fault    = { 0x00000001, tu102_fault_new },
 	.fb       = { 0x00000001, ga102_fb_new },
 	.gpio     = { 0x00000001, ga102_gpio_new },
+	.gsp      = { 0x00000001, ga102_gsp_new },
 	.i2c      = { 0x00000001, gm200_i2c_new },
 	.imem     = { 0x00000001, nv50_instmem_new },
+	.ltc      = { 0x00000001, ga102_ltc_new },
 	.mc       = { 0x00000001, ga100_mc_new },
 	.mmu      = { 0x00000001, tu102_mmu_new },
 	.pci      = { 0x00000001, gp100_pci_new },
 	.privring = { 0x00000001, gm200_privring_new },
 	.timer    = { 0x00000001, gk20a_timer_new },
 	.top      = { 0x00000001, ga100_top_new },
+	.vfn      = { 0x00000001, ga100_vfn_new },
+	.ce       = { 0x0000001f, ga102_ce_new },
 	.disp     = { 0x00000001, ga102_disp_new },
 	.dma      = { 0x00000001, gv100_dma_new },
 	.fifo     = { 0x00000001, ga102_fifo_new },
+	.gr       = { 0x00000001, ga102_gr_new },
+	.nvdec    = { 0x00000001, ga102_nvdec_new },
+	.sec2     = { 0x00000001, ga102_sec2_new },
 };
 
 static const struct nvkm_device_chip
 nv174_chipset = {
 	.name = "GA104",
+	.acr      = { 0x00000001, ga102_acr_new },
 	.bar      = { 0x00000001, tu102_bar_new },
 	.bios     = { 0x00000001, nvkm_bios_new },
 	.devinit  = { 0x00000001, ga100_devinit_new },
+	.fault    = { 0x00000001, tu102_fault_new },
 	.fb       = { 0x00000001, ga102_fb_new },
 	.gpio     = { 0x00000001, ga102_gpio_new },
+	.gsp      = { 0x00000001, ga102_gsp_new },
 	.i2c      = { 0x00000001, gm200_i2c_new },
 	.imem     = { 0x00000001, nv50_instmem_new },
+	.ltc      = { 0x00000001, ga102_ltc_new },
 	.mc       = { 0x00000001, ga100_mc_new },
 	.mmu      = { 0x00000001, tu102_mmu_new },
 	.pci      = { 0x00000001, gp100_pci_new },
 	.privring = { 0x00000001, gm200_privring_new },
 	.timer    = { 0x00000001, gk20a_timer_new },
 	.top      = { 0x00000001, ga100_top_new },
+	.vfn      = { 0x00000001, ga100_vfn_new },
+	.ce       = { 0x0000001f, ga102_ce_new },
 	.disp     = { 0x00000001, ga102_disp_new },
 	.dma      = { 0x00000001, gv100_dma_new },
 	.fifo     = { 0x00000001, ga102_fifo_new },
+	.gr       = { 0x00000001, ga102_gr_new },
+	.nvdec    = { 0x00000001, ga102_nvdec_new },
+	.sec2     = { 0x00000001, ga102_sec2_new },
 };
 
 static const struct nvkm_device_chip
 nv176_chipset = {
 	.name = "GA106",
+	.acr      = { 0x00000001, ga102_acr_new },
 	.bar      = { 0x00000001, tu102_bar_new },
 	.bios     = { 0x00000001, nvkm_bios_new },
 	.devinit  = { 0x00000001, ga100_devinit_new },
+	.fault    = { 0x00000001, tu102_fault_new },
 	.fb       = { 0x00000001, ga102_fb_new },
 	.gpio     = { 0x00000001, ga102_gpio_new },
+	.gsp      = { 0x00000001, ga102_gsp_new },
 	.i2c      = { 0x00000001, gm200_i2c_new },
 	.imem     = { 0x00000001, nv50_instmem_new },
+	.ltc      = { 0x00000001, ga102_ltc_new },
 	.mc       = { 0x00000001, ga100_mc_new },
 	.mmu      = { 0x00000001, tu102_mmu_new },
 	.pci      = { 0x00000001, gp100_pci_new },
 	.privring = { 0x00000001, gm200_privring_new },
 	.timer    = { 0x00000001, gk20a_timer_new },
 	.top      = { 0x00000001, ga100_top_new },
+	.vfn      = { 0x00000001, ga100_vfn_new },
+	.ce       = { 0x0000001f, ga102_ce_new },
 	.disp     = { 0x00000001, ga102_disp_new },
 	.dma      = { 0x00000001, gv100_dma_new },
 	.fifo     = { 0x00000001, ga102_fifo_new },
+	.gr       = { 0x00000001, ga102_gr_new },
+	.nvdec    = { 0x00000001, ga102_nvdec_new },
+	.sec2     = { 0x00000001, ga102_sec2_new },
 };
 
 static const struct nvkm_device_chip
 nv177_chipset = {
 	.name = "GA107",
+	.acr      = { 0x00000001, ga102_acr_new },
 	.bar      = { 0x00000001, tu102_bar_new },
 	.bios     = { 0x00000001, nvkm_bios_new },
 	.devinit  = { 0x00000001, ga100_devinit_new },
+	.fault    = { 0x00000001, tu102_fault_new },
 	.fb       = { 0x00000001, ga102_fb_new },
 	.gpio     = { 0x00000001, ga102_gpio_new },
+	.gsp      = { 0x00000001, ga102_gsp_new },
 	.i2c      = { 0x00000001, gm200_i2c_new },
 	.imem     = { 0x00000001, nv50_instmem_new },
+	.ltc      = { 0x00000001, ga102_ltc_new },
 	.mc       = { 0x00000001, ga100_mc_new },
 	.mmu      = { 0x00000001, tu102_mmu_new },
 	.pci      = { 0x00000001, gp100_pci_new },
 	.privring = { 0x00000001, gm200_privring_new },
 	.timer    = { 0x00000001, gk20a_timer_new },
 	.top      = { 0x00000001, ga100_top_new },
+	.vfn      = { 0x00000001, ga100_vfn_new },
+	.ce       = { 0x0000001f, ga102_ce_new },
 	.disp     = { 0x00000001, ga102_disp_new },
 	.dma      = { 0x00000001, gv100_dma_new },
 	.fifo     = { 0x00000001, ga102_fifo_new },
+	.gr       = { 0x00000001, ga102_gr_new },
+	.nvdec    = { 0x00000001, ga102_nvdec_new },
+	.sec2     = { 0x00000001, ga102_sec2_new },
 };
 
 struct nvkm_subdev *
@@ -2734,6 +2789,8 @@ nvkm_device_fini(struct nvkm_device *device, bool suspend)
 	if (device->func->fini)
 		device->func->fini(device, suspend);
 
+	nvkm_intr_unarm(device);
+
 	time = ktime_to_us(ktime_get()) - time;
 	nvdev_trace(device, "%s completed in %lldus...\n", action, time);
 	return 0;
@@ -2759,6 +2816,8 @@ nvkm_device_preinit(struct nvkm_device *device)
 	nvdev_trace(device, "preinit running...\n");
 	time = ktime_to_us(ktime_get());
 
+	nvkm_intr_unarm(device);
+
 	if (device->func->preinit) {
 		ret = device->func->preinit(device);
 		if (ret)
@@ -2775,6 +2834,14 @@ nvkm_device_preinit(struct nvkm_device *device)
 	if (ret)
 		goto fail;
 
+	ret = nvkm_top_parse(device);
+	if (ret)
+		goto fail;
+
+	ret = nvkm_fb_mem_unlock(device->fb);
+	if (ret)
+		goto fail;
+
 	time = ktime_to_us(ktime_get()) - time;
 	nvdev_trace(device, "preinit completed in %lldus\n", time);
 	return 0;
@@ -2800,6 +2867,8 @@ nvkm_device_init(struct nvkm_device *device)
 	nvdev_trace(device, "init running...\n");
 	time = ktime_to_us(ktime_get());
 
+	nvkm_intr_rearm(device);
+
 	if (device->func->init) {
 		ret = device->func->init(device);
 		if (ret)
@@ -2837,6 +2906,8 @@ nvkm_device_del(struct nvkm_device **pdevice)
 	if (device) {
 		mutex_lock(&nv_devices_mutex);
 
+		nvkm_intr_dtor(device);
+
 		list_for_each_entry_safe_reverse(subdev, subtmp, &device->subdev, head)
 			nvkm_subdev_del(&subdev);
 
@@ -3144,6 +3215,7 @@ nvkm_device_ctor(const struct nvkm_device_func *func,
 		device->name = device->chip->name;
 
 	mutex_init(&device->mutex);
+	nvkm_intr_ctor(device);
 
 #define NVKM_LAYOUT_ONCE(type,data,ptr)                                                      \
 	if (device->chip->ptr.inst && (subdev_mask & (BIT_ULL(type)))) {                     \
@@ -3185,7 +3257,7 @@ nvkm_device_ctor(const struct nvkm_device_func *func,
 #undef NVKM_LAYOUT_INST
 #undef NVKM_LAYOUT_ONCE
 
-	ret = 0;
+	ret = nvkm_intr_install(device);
 done:
 	if (device->pri && (!mmio || ret)) {
 		iounmap(device->pri);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
index f302d2b5782a..abccb2bb68a6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
@@ -1574,6 +1574,12 @@ nvkm_device_pci_resource_size(struct nvkm_device *device, unsigned bar)
 	return pci_resource_len(pdev->pdev, bar);
 }
 
+static int
+nvkm_device_pci_irq(struct nvkm_device *device)
+{
+	return nvkm_device_pci(device)->pdev->irq;
+}
+
 static void
 nvkm_device_pci_fini(struct nvkm_device *device, bool suspend)
 {
@@ -1612,6 +1618,7 @@ nvkm_device_pci_func = {
 	.dtor = nvkm_device_pci_dtor,
 	.preinit = nvkm_device_pci_preinit,
 	.fini = nvkm_device_pci_fini,
+	.irq = nvkm_device_pci_irq,
 	.resource_addr = nvkm_device_pci_resource_addr,
 	.resource_size = nvkm_device_pci_resource_size,
 	.cpu_coherent = !IS_ENABLED(CONFIG_ARM),
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/device/priv.h
index 93949b3c7214..24faaac15891 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/priv.h
@@ -27,6 +27,7 @@
 #include <subdev/therm.h>
 #include <subdev/timer.h>
 #include <subdev/top.h>
+#include <subdev/vfn.h>
 #include <subdev/volt.h>
 
 #include <engine/bsp.h>
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
index ac9e122586bc..87caa4a72921 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
@@ -206,45 +206,12 @@ nvkm_device_tegra_resource_size(struct nvkm_device *device, unsigned bar)
 	return res ? resource_size(res) : 0;
 }
 
-static irqreturn_t
-nvkm_device_tegra_intr(int irq, void *arg)
-{
-	struct nvkm_device_tegra *tdev = arg;
-	struct nvkm_device *device = &tdev->device;
-	bool handled = false;
-	nvkm_mc_intr_unarm(device);
-	nvkm_mc_intr(device, &handled);
-	nvkm_mc_intr_rearm(device);
-	return handled ? IRQ_HANDLED : IRQ_NONE;
-}
-
-static void
-nvkm_device_tegra_fini(struct nvkm_device *device, bool suspend)
-{
-	struct nvkm_device_tegra *tdev = nvkm_device_tegra(device);
-	if (tdev->irq) {
-		free_irq(tdev->irq, tdev);
-		tdev->irq = 0;
-	}
-}
-
 static int
-nvkm_device_tegra_init(struct nvkm_device *device)
+nvkm_device_tegra_irq(struct nvkm_device *device)
 {
 	struct nvkm_device_tegra *tdev = nvkm_device_tegra(device);
-	int irq, ret;
-
-	irq = platform_get_irq_byname(tdev->pdev, "stall");
-	if (irq < 0)
-		return irq;
 
-	ret = request_irq(irq, nvkm_device_tegra_intr,
-			  IRQF_SHARED, "nvkm", tdev);
-	if (ret)
-		return ret;
-
-	tdev->irq = irq;
-	return 0;
+	return platform_get_irq_byname(tdev->pdev, "stall");
 }
 
 static void *
@@ -260,8 +227,7 @@ static const struct nvkm_device_func
 nvkm_device_tegra_func = {
 	.tegra = nvkm_device_tegra,
 	.dtor = nvkm_device_tegra_dtor,
-	.init = nvkm_device_tegra_init,
-	.fini = nvkm_device_tegra_fini,
+	.irq = nvkm_device_tegra_irq,
 	.resource_addr = nvkm_device_tegra_resource_addr,
 	.resource_size = nvkm_device_tegra_resource_size,
 	.cpu_coherent = false,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c
index 45f509c11c36..9b39ec341615 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c
@@ -342,6 +342,8 @@ nvkm_udevice_child_get(struct nvkm_object *object, int index,
 			sclass = &device->mmu->user;
 		else if (device->fault && index-- == 0)
 			sclass = &device->fault->user;
+		else if (device->vfn && index-- == 0)
+			sclass = &device->vfn->user;
 		else
 			return -EINVAL;
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild
index 600072a904be..e1aecd3fe96c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild
@@ -28,9 +28,7 @@ nvkm-y += nvkm/engine/disp/gv100.o
 nvkm-y += nvkm/engine/disp/tu102.o
 nvkm-y += nvkm/engine/disp/ga102.o
 
-nvkm-y += nvkm/engine/disp/rootnv04.o
-nvkm-y += nvkm/engine/disp/rootnv50.o
-
 nvkm-y += nvkm/engine/disp/udisp.o
 nvkm-y += nvkm/engine/disp/uconn.o
 nvkm-y += nvkm/engine/disp/uoutp.o
+nvkm-y += nvkm/engine/disp/uhead.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c
index 65c99d948b68..73104b59f97f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c
@@ -29,7 +29,6 @@
 #include "outp.h"
 
 #include <core/client.h>
-#include <core/notify.h>
 #include <core/ramht.h>
 #include <subdev/bios.h>
 #include <subdev/bios/dcb.h>
@@ -57,32 +56,8 @@ nvkm_disp_vblank_init(struct nvkm_event *event, int type, int id)
 		head->func->vblank_get(head);
 }
 
-static int
-nvkm_disp_vblank_ctor(struct nvkm_object *object, void *data, u32 size,
-		      struct nvkm_notify *notify)
-{
-	struct nvkm_disp *disp =
-		container_of(notify->event, typeof(*disp), vblank);
-	union {
-		struct nvif_notify_head_req_v0 v0;
-	} *req = data;
-	int ret = -ENOSYS;
-
-	if (!(ret = nvif_unpack(ret, &data, &size, req->v0, 0, 0, false))) {
-		notify->size = sizeof(struct nvif_notify_head_rep_v0);
-		if (ret = -ENXIO, req->v0.head <= disp->vblank.index_nr) {
-			notify->types = 1;
-			notify->index = req->v0.head;
-			return 0;
-		}
-	}
-
-	return ret;
-}
-
 static const struct nvkm_event_func
 nvkm_disp_vblank_func = {
-	.ctor = nvkm_disp_vblank_ctor,
 	.init = nvkm_disp_vblank_init,
 	.fini = nvkm_disp_vblank_fini,
 };
@@ -90,59 +65,7 @@ nvkm_disp_vblank_func = {
 void
 nvkm_disp_vblank(struct nvkm_disp *disp, int head)
 {
-	struct nvif_notify_head_rep_v0 rep = {};
-	nvkm_event_send(&disp->vblank, 1, head, &rep, sizeof(rep));
-}
-
-static int
-nvkm_disp_hpd_ctor(struct nvkm_object *object, void *data, u32 size,
-		   struct nvkm_notify *notify)
-{
-	struct nvkm_disp *disp =
-		container_of(notify->event, typeof(*disp), hpd);
-	union {
-		struct nvif_notify_conn_req_v0 v0;
-	} *req = data;
-	struct nvkm_outp *outp;
-	int ret = -ENOSYS;
-
-	if (!(ret = nvif_unpack(ret, &data, &size, req->v0, 0, 0, false))) {
-		notify->size = sizeof(struct nvif_notify_conn_rep_v0);
-		list_for_each_entry(outp, &disp->outps, head) {
-			if (ret = -ENXIO, outp->conn->index == req->v0.conn) {
-				if (ret = -ENODEV, outp->conn->hpd.event) {
-					notify->types = req->v0.mask;
-					notify->index = req->v0.conn;
-					ret = 0;
-				}
-				break;
-			}
-		}
-	}
-
-	return ret;
-}
-
-static const struct nvkm_event_func
-nvkm_disp_hpd_func = {
-	.ctor = nvkm_disp_hpd_ctor
-};
-
-int
-nvkm_disp_ntfy(struct nvkm_object *object, u32 type, struct nvkm_event **event)
-{
-	struct nvkm_disp *disp = nvkm_disp(object->engine);
-	switch (type) {
-	case NV04_DISP_NTFY_VBLANK:
-		*event = &disp->vblank;
-		return 0;
-	case NV04_DISP_NTFY_CONN:
-		*event = &disp->hpd;
-		return 0;
-	default:
-		break;
-	}
-	return -EINVAL;
+	nvkm_event_ntfy(&disp->vblank, head, NVKM_DISP_HEAD_EVENT_VBLANK);
 }
 
 static int
@@ -343,9 +266,7 @@ nvkm_disp_oneinit(struct nvkm_engine *engine)
 		/* Apparently we need to create a new one! */
 		ret = nvkm_conn_new(disp, i, &connE, &outp->conn);
 		if (ret) {
-			nvkm_error(&disp->engine.subdev,
-				   "failed to create outp %d conn: %d\n",
-				   outp->index, ret);
+			nvkm_error(subdev, "failed to create outp %d conn: %d\n", outp->index, ret);
 			nvkm_conn_del(&outp->conn);
 			list_del(&outp->head);
 			nvkm_outp_del(&outp);
@@ -355,10 +276,6 @@ nvkm_disp_oneinit(struct nvkm_engine *engine)
 		list_add_tail(&outp->conn->head, &disp->conns);
 	}
 
-	ret = nvkm_event_init(&nvkm_disp_hpd_func, 3, hpd, &disp->hpd);
-	if (ret)
-		return ret;
-
 	if (disp->func->oneinit) {
 		ret = disp->func->oneinit(disp);
 		if (ret)
@@ -382,7 +299,7 @@ nvkm_disp_oneinit(struct nvkm_engine *engine)
 	list_for_each_entry(head, &disp->heads, head)
 		i = max(i, head->id + 1);
 
-	return nvkm_event_init(&nvkm_disp_vblank_func, 1, i, &disp->vblank);
+	return nvkm_event_init(&nvkm_disp_vblank_func, subdev, 1, i, &disp->vblank);
 }
 
 static void *
@@ -406,7 +323,6 @@ nvkm_disp_dtor(struct nvkm_engine *engine)
 	}
 
 	nvkm_event_fini(&disp->vblank);
-	nvkm_event_fini(&disp->hpd);
 
 	while (!list_empty(&disp->conns)) {
 		conn = list_first_entry(&disp->conns, typeof(*conn), head);
@@ -473,5 +389,6 @@ nvkm_disp_new_(const struct nvkm_disp_func *func, struct nvkm_device *device,
 		mutex_init(&disp->super.mutex);
 	}
 
-	return nvkm_event_init(func->uevent, 1, ARRAY_SIZE(disp->chan), &disp->uevent);
+	return nvkm_event_init(func->uevent, &disp->engine.subdev, 1, ARRAY_SIZE(disp->chan),
+			       &disp->uevent);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/conn.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/conn.c
index 7ed11801a3ae..fbdae1137864 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/conn.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/conn.c
@@ -29,38 +29,14 @@
 
 #include <nvif/event.h>
 
-static int
-nvkm_conn_hpd(struct nvkm_notify *notify)
-{
-	struct nvkm_conn *conn = container_of(notify, typeof(*conn), hpd);
-	struct nvkm_disp *disp = conn->disp;
-	struct nvkm_gpio *gpio = disp->engine.subdev.device->gpio;
-	const struct nvkm_gpio_ntfy_rep *line = notify->data;
-	struct nvif_notify_conn_rep_v0 rep;
-	int index = conn->index;
-
-	CONN_DBG(conn, "HPD: %d", line->mask);
-
-	if (!nvkm_gpio_get(gpio, 0, DCB_GPIO_UNUSED, conn->hpd.index))
-		rep.mask = NVIF_NOTIFY_CONN_V0_UNPLUG;
-	else
-		rep.mask = NVIF_NOTIFY_CONN_V0_PLUG;
-	rep.version = 0;
-
-	nvkm_event_send(&disp->hpd, rep.mask, index, &rep, sizeof(rep));
-	return NVKM_NOTIFY_KEEP;
-}
-
 void
 nvkm_conn_fini(struct nvkm_conn *conn)
 {
-	nvkm_notify_put(&conn->hpd);
 }
 
 void
 nvkm_conn_init(struct nvkm_conn *conn)
 {
-	nvkm_notify_get(&conn->hpd);
 }
 
 void
@@ -68,7 +44,6 @@ nvkm_conn_del(struct nvkm_conn **pconn)
 {
 	struct nvkm_conn *conn = *pconn;
 	if (conn) {
-		nvkm_notify_fini(&conn->hpd);
 		kfree(*pconn);
 		*pconn = NULL;
 	}
@@ -106,20 +81,6 @@ nvkm_conn_ctor(struct nvkm_disp *disp, int index, struct nvbios_connE *info,
 		}
 
 		conn->info.hpd = func.line;
-
-		ret = nvkm_notify_init(NULL, &gpio->event, nvkm_conn_hpd,
-				       true, &(struct nvkm_gpio_ntfy_req) {
-					.mask = NVKM_GPIO_TOGGLED,
-					.line = func.line,
-				       },
-				       sizeof(struct nvkm_gpio_ntfy_req),
-				       sizeof(struct nvkm_gpio_ntfy_rep),
-				       &conn->hpd);
-		if (ret) {
-			CONN_ERR(conn, "func %02x failed, %d", info->hpd, ret);
-		} else {
-			CONN_DBG(conn, "func %02x (HPD)", info->hpd);
-		}
 	}
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/conn.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/conn.h
index f109634ce5ca..a0600e72b0ec 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/conn.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/conn.h
@@ -3,7 +3,6 @@
 #define __NVKM_DISP_CONN_H__
 #include "priv.h"
 
-#include <core/notify.h>
 #include <subdev/bios.h>
 #include <subdev/bios/conn.h>
 
@@ -12,8 +11,6 @@ struct nvkm_conn {
 	int index;
 	struct nvbios_connE info;
 
-	struct nvkm_notify hpd;
-
 	struct list_head head;
 
 	struct nvkm_object object;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c
index c1b3206f27e6..40c8ea43c42f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c
@@ -274,70 +274,17 @@ nvkm_dp_train_cr(struct lt_state *lt)
 }
 
 static int
-nvkm_dp_train_links(struct nvkm_outp *outp, int rate)
+nvkm_dp_train_link(struct nvkm_outp *outp, int rate)
 {
 	struct nvkm_ior *ior = outp->ior;
-	struct nvkm_disp *disp = outp->disp;
-	struct nvkm_subdev *subdev = &disp->engine.subdev;
-	struct nvkm_bios *bios = subdev->device->bios;
 	struct lt_state lt = {
 		.outp = outp,
+		.pc2 = outp->dp.dpcd[DPCD_RC02] & DPCD_RC02_TPS3_SUPPORTED,
 	};
-	u32 lnkcmp;
 	u8 sink[2], data;
 	int ret;
 
-	OUTP_DBG(outp, "training %d x %d MB/s", ior->dp.nr, ior->dp.bw * 27);
-
-	/* Intersect misc. capabilities of the OR and sink. */
-	if (disp->engine.subdev.device->chipset < 0x110)
-		outp->dp.dpcd[DPCD_RC03] &= ~DPCD_RC03_TPS4_SUPPORTED;
-	if (disp->engine.subdev.device->chipset < 0xd0)
-		outp->dp.dpcd[DPCD_RC02] &= ~DPCD_RC02_TPS3_SUPPORTED;
-	lt.pc2 = outp->dp.dpcd[DPCD_RC02] & DPCD_RC02_TPS3_SUPPORTED;
-
-	if (AMPERE_IED_HACK(disp) && (lnkcmp = lt.outp->dp.info.script[0])) {
-		/* Execute BeforeLinkTraining script from DP Info table. */
-		while (ior->dp.bw < nvbios_rd08(bios, lnkcmp))
-			lnkcmp += 3;
-		lnkcmp = nvbios_rd16(bios, lnkcmp + 1);
-
-		nvbios_init(&outp->disp->engine.subdev, lnkcmp,
-			init.outp = &outp->info;
-			init.or   = ior->id;
-			init.link = ior->asy.link;
-		);
-	}
-
-	/* Set desired link configuration on the source. */
-	if ((lnkcmp = lt.outp->dp.info.lnkcmp)) {
-		if (outp->dp.version < 0x30) {
-			while ((ior->dp.bw * 2700) < nvbios_rd16(bios, lnkcmp))
-				lnkcmp += 4;
-			lnkcmp = nvbios_rd16(bios, lnkcmp + 2);
-		} else {
-			while (ior->dp.bw < nvbios_rd08(bios, lnkcmp))
-				lnkcmp += 3;
-			lnkcmp = nvbios_rd16(bios, lnkcmp + 1);
-		}
-
-		nvbios_init(subdev, lnkcmp,
-			init.outp = &outp->info;
-			init.or   = ior->id;
-			init.link = ior->asy.link;
-		);
-	}
-
-	ret = ior->func->dp->links(ior, outp->dp.aux);
-	if (ret) {
-		if (ret < 0) {
-			OUTP_ERR(outp, "train failed with %d", ret);
-			return ret;
-		}
-		return 0;
-	}
-
-	ior->func->dp->power(ior, ior->dp.nr);
+	OUTP_DBG(outp, "training %dx%02x", ior->dp.nr, ior->dp.bw);
 
 	/* Select LTTPR non-transparent mode if we have a valid configuration,
 	 * use transparent mode otherwise.
@@ -393,6 +340,71 @@ nvkm_dp_train_links(struct nvkm_outp *outp, int rate)
 	return ret;
 }
 
+static int
+nvkm_dp_train_links(struct nvkm_outp *outp, int rate)
+{
+	struct nvkm_ior *ior = outp->ior;
+	struct nvkm_disp *disp = outp->disp;
+	struct nvkm_subdev *subdev = &disp->engine.subdev;
+	struct nvkm_bios *bios = subdev->device->bios;
+	u32 lnkcmp;
+	int ret;
+
+	OUTP_DBG(outp, "programming link for %dx%02x", ior->dp.nr, ior->dp.bw);
+
+	/* Intersect misc. capabilities of the OR and sink. */
+	if (disp->engine.subdev.device->chipset < 0x110)
+		outp->dp.dpcd[DPCD_RC03] &= ~DPCD_RC03_TPS4_SUPPORTED;
+	if (disp->engine.subdev.device->chipset < 0xd0)
+		outp->dp.dpcd[DPCD_RC02] &= ~DPCD_RC02_TPS3_SUPPORTED;
+
+	if (AMPERE_IED_HACK(disp) && (lnkcmp = outp->dp.info.script[0])) {
+		/* Execute BeforeLinkTraining script from DP Info table. */
+		while (ior->dp.bw < nvbios_rd08(bios, lnkcmp))
+			lnkcmp += 3;
+		lnkcmp = nvbios_rd16(bios, lnkcmp + 1);
+
+		nvbios_init(&outp->disp->engine.subdev, lnkcmp,
+			init.outp = &outp->info;
+			init.or   = ior->id;
+			init.link = ior->asy.link;
+		);
+	}
+
+	/* Set desired link configuration on the source. */
+	if ((lnkcmp = outp->dp.info.lnkcmp)) {
+		if (outp->dp.version < 0x30) {
+			while ((ior->dp.bw * 2700) < nvbios_rd16(bios, lnkcmp))
+				lnkcmp += 4;
+			lnkcmp = nvbios_rd16(bios, lnkcmp + 2);
+		} else {
+			while (ior->dp.bw < nvbios_rd08(bios, lnkcmp))
+				lnkcmp += 3;
+			lnkcmp = nvbios_rd16(bios, lnkcmp + 1);
+		}
+
+		nvbios_init(subdev, lnkcmp,
+			init.outp = &outp->info;
+			init.or   = ior->id;
+			init.link = ior->asy.link;
+		);
+	}
+
+	ret = ior->func->dp->links(ior, outp->dp.aux);
+	if (ret) {
+		if (ret < 0) {
+			OUTP_ERR(outp, "train failed with %d", ret);
+			return ret;
+		}
+		return 0;
+	}
+
+	ior->func->dp->power(ior, ior->dp.nr);
+
+	/* Attempt to train the link in this configuration. */
+	return nvkm_dp_train_link(outp, rate);
+}
+
 static void
 nvkm_dp_train_fini(struct nvkm_outp *outp)
 {
@@ -439,6 +451,16 @@ nvkm_dp_train(struct nvkm_outp *outp, u32 dataKBps)
 	int ret = -EINVAL, nr, rate;
 	u8  pwr;
 
+	/* Retraining link?  Skip source configuration, it can mess up the active modeset. */
+	if (atomic_read(&outp->dp.lt.done)) {
+		for (rate = 0; rate < outp->dp.rates; rate++) {
+			if (outp->dp.rate[rate].rate == ior->dp.bw * 27000)
+				return nvkm_dp_train_link(outp, ret);
+		}
+		WARN_ON(1);
+		return -EINVAL;
+	}
+
 	/* Ensure sink is not in a low-power state. */
 	if (!nvkm_rdaux(outp->dp.aux, DPCD_SC00, &pwr, 1)) {
 		if ((pwr & DPCD_SC00_SET_POWER) != DPCD_SC00_SET_POWER_D0) {
@@ -455,6 +477,21 @@ nvkm_dp_train(struct nvkm_outp *outp, u32 dataKBps)
 	/* Link training. */
 	OUTP_DBG(outp, "training");
 	nvkm_dp_train_init(outp);
+
+	/* Validate and train at configuration requested (if any) on ACQUIRE. */
+	if (outp->dp.lt.nr) {
+		for (nr = outp->dp.links; ret < 0 && nr; nr >>= 1) {
+			for (rate = 0; nr == outp->dp.lt.nr && rate < outp->dp.rates; rate++) {
+				if (outp->dp.rate[rate].rate / 27000 == outp->dp.lt.bw) {
+					ior->dp.bw = outp->dp.rate[rate].rate / 27000;
+					ior->dp.nr = nr;
+					ret = nvkm_dp_train_links(outp, rate);
+				}
+			}
+		}
+	}
+
+	/* Otherwise, loop through all valid link configurations that support the data rate. */
 	for (nr = outp->dp.links; ret < 0 && nr; nr >>= 1) {
 		for (rate = 0; ret < 0 && rate < outp->dp.rates; rate++) {
 			if (outp->dp.rate[rate].rate * nr >= dataKBps || WARN_ON(!ior->dp.nr)) {
@@ -465,6 +502,8 @@ nvkm_dp_train(struct nvkm_outp *outp, u32 dataKBps)
 			}
 		}
 	}
+
+	/* Finish up. */
 	nvkm_dp_train_fini(outp);
 	if (ret < 0)
 		OUTP_ERR(outp, "training failed");
@@ -595,18 +634,38 @@ nvkm_dp_enable_supported_link_rates(struct nvkm_outp *outp)
 	return outp->dp.rates != 0;
 }
 
-static bool
-nvkm_dp_enable(struct nvkm_outp *outp, bool enable)
+void
+nvkm_dp_enable(struct nvkm_outp *outp, bool auxpwr)
 {
+	struct nvkm_gpio *gpio = outp->disp->engine.subdev.device->gpio;
 	struct nvkm_i2c_aux *aux = outp->dp.aux;
 
-	if (enable) {
-		if (!outp->dp.present) {
-			OUTP_DBG(outp, "aux power -> always");
-			nvkm_i2c_aux_monitor(aux, true);
-			outp->dp.present = true;
+	if (auxpwr && !outp->dp.aux_pwr) {
+		/* eDP panels need powering on by us (if the VBIOS doesn't default it
+		 * to on) before doing any AUX channel transactions.  LVDS panel power
+		 * is handled by the SOR itself, and not required for LVDS DDC.
+		 */
+		if (outp->conn->info.type == DCB_CONNECTOR_eDP) {
+			int power = nvkm_gpio_get(gpio, 0, DCB_GPIO_PANEL_POWER, 0xff);
+			if (power == 0) {
+				nvkm_gpio_set(gpio, 0, DCB_GPIO_PANEL_POWER, 0xff, 1);
+				outp->dp.aux_pwr_pu = true;
+			}
+
+			/* We delay here unconditionally, even if already powered,
+			 * because some laptop panels having a significant resume
+			 * delay before the panel begins responding.
+			 *
+			 * This is likely a bit of a hack, but no better idea for
+			 * handling this at the moment.
+			 */
+			msleep(300);
 		}
 
+		OUTP_DBG(outp, "aux power -> always");
+		nvkm_i2c_aux_monitor(aux, true);
+		outp->dp.aux_pwr = true;
+
 		/* Detect any LTTPRs before reading DPCD receiver caps. */
 		if (!nvkm_rdaux(aux, DPCD_LTTPR_REV, outp->dp.lttpr, sizeof(outp->dp.lttpr)) &&
 		    outp->dp.lttpr[0] >= 0x14 && outp->dp.lttpr[2]) {
@@ -659,96 +718,41 @@ nvkm_dp_enable(struct nvkm_outp *outp, bool enable)
 					outp->dp.rates++;
 				}
 			}
-
-			return true;
 		}
-	}
-
-	if (outp->dp.present) {
+	} else
+	if (!auxpwr && outp->dp.aux_pwr) {
 		OUTP_DBG(outp, "aux power -> demand");
 		nvkm_i2c_aux_monitor(aux, false);
-		outp->dp.present = false;
-	}
-
-	atomic_set(&outp->dp.lt.done, 0);
-	return false;
-}
-
-static int
-nvkm_dp_hpd(struct nvkm_notify *notify)
-{
-	const struct nvkm_i2c_ntfy_rep *line = notify->data;
-	struct nvkm_outp *outp = container_of(notify, typeof(*outp), dp.hpd);
-	struct nvkm_conn *conn = outp->conn;
-	struct nvkm_disp *disp = outp->disp;
-	struct nvif_notify_conn_rep_v0 rep = {};
+		outp->dp.aux_pwr = false;
+		atomic_set(&outp->dp.lt.done, 0);
 
-	OUTP_DBG(outp, "HPD: %d", line->mask);
-	if (line->mask & NVKM_I2C_IRQ) {
-		if (atomic_read(&outp->dp.lt.done))
-			outp->func->acquire(outp);
-		rep.mask |= NVIF_NOTIFY_CONN_V0_IRQ;
-	} else {
-		nvkm_dp_enable(outp, true);
+		/* Restore eDP panel GPIO to its prior state if we changed it, as
+		 * it could potentially interfere with other outputs.
+		 */
+		if (outp->conn->info.type == DCB_CONNECTOR_eDP) {
+			if (outp->dp.aux_pwr_pu) {
+				nvkm_gpio_set(gpio, 0, DCB_GPIO_PANEL_POWER, 0xff, 0);
+				outp->dp.aux_pwr_pu = false;
+			}
+		}
 	}
-
-	if (line->mask & NVKM_I2C_UNPLUG)
-		rep.mask |= NVIF_NOTIFY_CONN_V0_UNPLUG;
-	if (line->mask & NVKM_I2C_PLUG)
-		rep.mask |= NVIF_NOTIFY_CONN_V0_PLUG;
-
-	nvkm_event_send(&disp->hpd, rep.mask, conn->index, &rep, sizeof(rep));
-	return NVKM_NOTIFY_KEEP;
 }
 
 static void
 nvkm_dp_fini(struct nvkm_outp *outp)
 {
-	nvkm_notify_put(&outp->dp.hpd);
 	nvkm_dp_enable(outp, false);
 }
 
 static void
 nvkm_dp_init(struct nvkm_outp *outp)
 {
-	struct nvkm_gpio *gpio = outp->disp->engine.subdev.device->gpio;
-
-	nvkm_notify_put(&outp->conn->hpd);
-
-	/* eDP panels need powering on by us (if the VBIOS doesn't default it
-	 * to on) before doing any AUX channel transactions.  LVDS panel power
-	 * is handled by the SOR itself, and not required for LVDS DDC.
-	 */
-	if (outp->conn->info.type == DCB_CONNECTOR_eDP) {
-		int power = nvkm_gpio_get(gpio, 0, DCB_GPIO_PANEL_POWER, 0xff);
-		if (power == 0)
-			nvkm_gpio_set(gpio, 0, DCB_GPIO_PANEL_POWER, 0xff, 1);
-
-		/* We delay here unconditionally, even if already powered,
-		 * because some laptop panels having a significant resume
-		 * delay before the panel begins responding.
-		 *
-		 * This is likely a bit of a hack, but no better idea for
-		 * handling this at the moment.
-		 */
-		msleep(300);
-
-		/* If the eDP panel can't be detected, we need to restore
-		 * the panel power GPIO to avoid breaking another output.
-		 */
-		if (!nvkm_dp_enable(outp, true) && power == 0)
-			nvkm_gpio_set(gpio, 0, DCB_GPIO_PANEL_POWER, 0xff, 0);
-	} else {
-		nvkm_dp_enable(outp, true);
-	}
-
-	nvkm_notify_get(&outp->dp.hpd);
+	nvkm_dp_enable(outp, outp->dp.enabled);
 }
 
 static void *
 nvkm_dp_dtor(struct nvkm_outp *outp)
 {
-	nvkm_notify_fini(&outp->dp.hpd);
 	return outp;
 }
 
@@ -797,21 +801,6 @@ nvkm_dp_new(struct nvkm_disp *disp, int index, struct dcb_output *dcbE, struct n
 
 	OUTP_DBG(outp, "bios dp %02x %02x %02x %02x", outp->dp.version, hdr, cnt, len);
 
-	/* hotplug detect, replaces gpio-based mechanism with aux events */
-	ret = nvkm_notify_init(NULL, &i2c->event, nvkm_dp_hpd, true,
-			       &(struct nvkm_i2c_ntfy_req) {
-				.mask = NVKM_I2C_PLUG | NVKM_I2C_UNPLUG |
-					NVKM_I2C_IRQ,
-				.port = outp->dp.aux->id,
-			       },
-			       sizeof(struct nvkm_i2c_ntfy_req),
-			       sizeof(struct nvkm_i2c_ntfy_rep),
-			       &outp->dp.hpd);
-	if (ret) {
-		OUTP_ERR(outp, "error monitoring aux hpd: %d", ret);
-		return ret;
-	}
-
 	mutex_init(&outp->dp.mutex);
 	atomic_set(&outp->dp.lt.done, 0);
 	return 0;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.h
index 1d86baa6a424..9a6be43916bc 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.h
@@ -6,6 +6,7 @@
 int nvkm_dp_new(struct nvkm_disp *, int index, struct dcb_output *,
 		struct nvkm_outp **);
 void nvkm_dp_disable(struct nvkm_outp *, struct nvkm_ior *);
+void nvkm_dp_enable(struct nvkm_outp *, bool auxpwr);
 
 /* DPCD Receiver Capabilities */
 #define DPCD_RC00_DPCD_REV                                              0x00000
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/g84.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/g84.c
index 4966a51af3d7..23ae451ba473 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/g84.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/g84.c
@@ -29,9 +29,54 @@
 
 #include <nvif/class.h>
 
-void
-g84_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packet,
-		  u8 rekey, u8 *avi, u8 avi_size, u8 *vendor, u8 vendor_size)
+static void
+g84_sor_hdmi_infoframe_vsi(struct nvkm_ior *ior, int head, void *data, u32 size)
+{
+	struct nvkm_device *device = ior->disp->engine.subdev.device;
+	struct packed_hdmi_infoframe vsi;
+	const u32 hoff = head * 0x800;
+
+	nvkm_mask(device, 0x61653c + hoff, 0x00010001, 0x00010000);
+	if (!size)
+		return;
+
+	pack_hdmi_infoframe(&vsi, data, size);
+
+	nvkm_wr32(device, 0x616544 + hoff, vsi.header);
+	nvkm_wr32(device, 0x616548 + hoff, vsi.subpack0_low);
+	nvkm_wr32(device, 0x61654c + hoff, vsi.subpack0_high);
+	/* Is there a second (or up to fourth?) set of subpack registers here? */
+	/* nvkm_wr32(device, 0x616550 + hoff, vsi.subpack1_low); */
+	/* nvkm_wr32(device, 0x616554 + hoff, vsi.subpack1_high); */
+
+	nvkm_mask(device, 0x61653c + hoff, 0x00010001, 0x00010001);
+}
+
+static void
+g84_sor_hdmi_infoframe_avi(struct nvkm_ior *ior, int head, void *data, u32 size)
+{
+	struct nvkm_device *device = ior->disp->engine.subdev.device;
+	struct packed_hdmi_infoframe avi;
+	const u32 hoff = head * 0x800;
+
+	pack_hdmi_infoframe(&avi, data, size);
+
+	nvkm_mask(device, 0x616520 + hoff, 0x00000001, 0x00000000);
+	if (!size)
+		return;
+
+	nvkm_wr32(device, 0x616528 + hoff, avi.header);
+	nvkm_wr32(device, 0x61652c + hoff, avi.subpack0_low);
+	nvkm_wr32(device, 0x616530 + hoff, avi.subpack0_high);
+	nvkm_wr32(device, 0x616534 + hoff, avi.subpack1_low);
+	nvkm_wr32(device, 0x616538 + hoff, avi.subpack1_high);
+
+	nvkm_mask(device, 0x616520 + hoff, 0x00000001, 0x00000001);
+}
+
+
+static void
+g84_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packet, u8 rekey)
 {
 	struct nvkm_device *device = ior->disp->engine.subdev.device;
 	const u32 ctrl = 0x40000000 * enable |
@@ -39,31 +84,13 @@ g84_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packet,
 			 max_ac_packet << 16 |
 			 rekey;
 	const u32 hoff = head * 0x800;
-	struct packed_hdmi_infoframe avi_infoframe;
-	struct packed_hdmi_infoframe vendor_infoframe;
-
-	pack_hdmi_infoframe(&avi_infoframe, avi, avi_size);
-	pack_hdmi_infoframe(&vendor_infoframe, vendor, vendor_size);
 
 	if (!(ctrl & 0x40000000)) {
 		nvkm_mask(device, 0x6165a4 + hoff, 0x40000000, 0x00000000);
-		nvkm_mask(device, 0x61653c + hoff, 0x00000001, 0x00000000);
-		nvkm_mask(device, 0x616520 + hoff, 0x00000001, 0x00000000);
 		nvkm_mask(device, 0x616500 + hoff, 0x00000001, 0x00000000);
 		return;
 	}
 
-	/* AVI InfoFrame */
-	nvkm_mask(device, 0x616520 + hoff, 0x00000001, 0x00000000);
-	if (avi_size) {
-		nvkm_wr32(device, 0x616528 + hoff, avi_infoframe.header);
-		nvkm_wr32(device, 0x61652c + hoff, avi_infoframe.subpack0_low);
-		nvkm_wr32(device, 0x616530 + hoff, avi_infoframe.subpack0_high);
-		nvkm_wr32(device, 0x616534 + hoff, avi_infoframe.subpack1_low);
-		nvkm_wr32(device, 0x616538 + hoff, avi_infoframe.subpack1_high);
-		nvkm_mask(device, 0x616520 + hoff, 0x00000001, 0x00000001);
-	}
-
 	/* Audio InfoFrame */
 	nvkm_mask(device, 0x616500 + hoff, 0x00000001, 0x00000000);
 	nvkm_wr32(device, 0x616508 + hoff, 0x000a0184);
@@ -71,17 +98,6 @@ g84_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packet,
 	nvkm_wr32(device, 0x616510 + hoff, 0x00000000);
 	nvkm_mask(device, 0x616500 + hoff, 0x00000001, 0x00000001);
 
-	/* Vendor InfoFrame */
-	nvkm_mask(device, 0x61653c + hoff, 0x00010001, 0x00010000);
-	if (vendor_size) {
-		nvkm_wr32(device, 0x616544 + hoff, vendor_infoframe.header);
-		nvkm_wr32(device, 0x616548 + hoff, vendor_infoframe.subpack0_low);
-		nvkm_wr32(device, 0x61654c + hoff, vendor_infoframe.subpack0_high);
-		/* Is there a second (or up to fourth?) set of subpack registers here? */
-		/* nvkm_wr32(device, 0x616550 + hoff, vendor_infoframe->subpack1_low); */
-		/* nvkm_wr32(device, 0x616554 + hoff, vendor_infoframe->subpack1_high); */
-		nvkm_mask(device, 0x61653c + hoff, 0x00010001, 0x00010001);
-	}
 
 	nvkm_mask(device, 0x6165d0 + hoff, 0x00070001, 0x00010001); /* SPARE, HW_CTS */
 	nvkm_mask(device, 0x616568 + hoff, 0x00010101, 0x00000000); /* ACR_CTRL, ?? */
@@ -96,14 +112,19 @@ g84_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packet,
 	nvkm_mask(device, 0x6165a4 + hoff, 0x5f1f007f, ctrl);
 }
 
+const struct nvkm_ior_func_hdmi
+g84_sor_hdmi = {
+	.ctrl = g84_sor_hdmi_ctrl,
+	.infoframe_avi = g84_sor_hdmi_infoframe_avi,
+	.infoframe_vsi = g84_sor_hdmi_infoframe_vsi,
+};
+
 static const struct nvkm_ior_func
 g84_sor = {
 	.state = nv50_sor_state,
 	.power = nv50_sor_power,
 	.clock = nv50_sor_clock,
-	.hdmi = {
-		.ctrl = g84_sor_hdmi_ctrl,
-	},
+	.hdmi = &g84_sor_hdmi,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ga102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ga102.c
index 7489d0d7fce0..52099b75f52a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ga102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ga102.c
@@ -105,10 +105,7 @@ ga102_sor = {
 	.state = gv100_sor_state,
 	.power = nv50_sor_power,
 	.clock = ga102_sor_clock,
-	.hdmi = {
-		.ctrl = gv100_sor_hdmi_ctrl,
-		.scdc = gm200_sor_hdmi_scdc,
-	},
+	.hdmi = &gv100_sor_hdmi,
 	.dp = &ga102_sor_dp,
 	.hda = &gv100_sor_hda,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c
index 39822f1b5b95..a48e9bdf4cd0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c
@@ -202,19 +202,61 @@ gf119_sor_dp = {
 };
 
 static void
-gf119_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packet,
-		    u8 rekey, u8 *avi, u8 avi_size, u8 *vendor, u8 vendor_size)
+gf119_sor_hdmi_infoframe_vsi(struct nvkm_ior *ior, int head, void *data, u32 size)
+{
+	struct nvkm_device *device = ior->disp->engine.subdev.device;
+	struct packed_hdmi_infoframe vsi;
+	const u32 hoff = head * 0x800;
+
+	pack_hdmi_infoframe(&vsi, data, size);
+
+	nvkm_mask(device, 0x616730 + hoff, 0x00010001, 0x00010000);
+	if (!size)
+		return;
+
+	/*
+	 * These appear to be the audio infoframe registers,
+	 * but no other set of infoframe registers has yet
+	 * been found.
+	 */
+	nvkm_wr32(device, 0x616738 + hoff, vsi.header);
+	nvkm_wr32(device, 0x61673c + hoff, vsi.subpack0_low);
+	nvkm_wr32(device, 0x616740 + hoff, vsi.subpack0_high);
+	/* Is there a second (or further?) set of subpack registers here? */
+
+	nvkm_mask(device, 0x616730 + hoff, 0x00000001, 0x00000001);
+}
+
+static void
+gf119_sor_hdmi_infoframe_avi(struct nvkm_ior *ior, int head, void *data, u32 size)
+{
+	struct nvkm_device *device = ior->disp->engine.subdev.device;
+	struct packed_hdmi_infoframe avi;
+	const u32 hoff = head * 0x800;
+
+	pack_hdmi_infoframe(&avi, data, size);
+
+	nvkm_mask(device, 0x616714 + hoff, 0x00000001, 0x00000000);
+	if (!size)
+		return;
+
+	nvkm_wr32(device, 0x61671c + hoff, avi.header);
+	nvkm_wr32(device, 0x616720 + hoff, avi.subpack0_low);
+	nvkm_wr32(device, 0x616724 + hoff, avi.subpack0_high);
+	nvkm_wr32(device, 0x616728 + hoff, avi.subpack1_low);
+	nvkm_wr32(device, 0x61672c + hoff, avi.subpack1_high);
+
+	nvkm_mask(device, 0x616714 + hoff, 0x00000001, 0x00000001);
+}
+
+static void
+gf119_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packet, u8 rekey)
 {
 	struct nvkm_device *device = ior->disp->engine.subdev.device;
 	const u32 ctrl = 0x40000000 * enable |
 			 max_ac_packet << 16 |
 			 rekey;
 	const u32 hoff = head * 0x800;
-	struct packed_hdmi_infoframe avi_infoframe;
-	struct packed_hdmi_infoframe vendor_infoframe;
-
-	pack_hdmi_infoframe(&avi_infoframe, avi, avi_size);
-	pack_hdmi_infoframe(&vendor_infoframe, vendor, vendor_size);
 
 	if (!(ctrl & 0x40000000)) {
 		nvkm_mask(device, 0x616798 + hoff, 0x40000000, 0x00000000);
@@ -224,32 +266,6 @@ gf119_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packe
 		return;
 	}
 
-	/* AVI InfoFrame */
-	nvkm_mask(device, 0x616714 + hoff, 0x00000001, 0x00000000);
-	if (avi_size) {
-		nvkm_wr32(device, 0x61671c + hoff, avi_infoframe.header);
-		nvkm_wr32(device, 0x616720 + hoff, avi_infoframe.subpack0_low);
-		nvkm_wr32(device, 0x616724 + hoff, avi_infoframe.subpack0_high);
-		nvkm_wr32(device, 0x616728 + hoff, avi_infoframe.subpack1_low);
-		nvkm_wr32(device, 0x61672c + hoff, avi_infoframe.subpack1_high);
-		nvkm_mask(device, 0x616714 + hoff, 0x00000001, 0x00000001);
-	}
-
-	/* GENERIC(?) / Vendor InfoFrame? */
-	nvkm_mask(device, 0x616730 + hoff, 0x00010001, 0x00010000);
-	if (vendor_size) {
-		/*
-		 * These appear to be the audio infoframe registers,
-		 * but no other set of infoframe registers has yet
-		 * been found.
-		 */
-		nvkm_wr32(device, 0x616738 + hoff, vendor_infoframe.header);
-		nvkm_wr32(device, 0x61673c + hoff, vendor_infoframe.subpack0_low);
-		nvkm_wr32(device, 0x616740 + hoff, vendor_infoframe.subpack0_high);
-		/* Is there a second (or further?) set of subpack registers here? */
-		nvkm_mask(device, 0x616730 + hoff, 0x00000001, 0x00000001);
-	}
-
 	/* ??? InfoFrame? */
 	nvkm_mask(device, 0x6167a4 + hoff, 0x00000001, 0x00000000);
 	nvkm_wr32(device, 0x6167ac + hoff, 0x00000010);
@@ -259,6 +275,13 @@ gf119_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packe
 	nvkm_mask(device, 0x616798 + hoff, 0x401f007f, ctrl);
 }
 
+static const struct nvkm_ior_func_hdmi
+gf119_sor_hdmi = {
+	.ctrl = gf119_sor_hdmi_ctrl,
+	.infoframe_avi = gf119_sor_hdmi_infoframe_avi,
+	.infoframe_vsi = gf119_sor_hdmi_infoframe_vsi,
+};
+
 void
 gf119_sor_clock(struct nvkm_ior *sor)
 {
@@ -305,9 +328,7 @@ gf119_sor = {
 	.state = gf119_sor_state,
 	.power = nv50_sor_power,
 	.clock = gf119_sor_clock,
-	.hdmi = {
-		.ctrl = gf119_sor_hdmi_ctrl,
-	},
+	.hdmi = &gf119_sor_hdmi,
 	.dp = &gf119_sor_dp,
 	.hda = &gf119_sor_hda,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk104.c
index 7248e9ec835e..876a21a0cebb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk104.c
@@ -30,8 +30,51 @@
 #include <nvif/class.h>
 
 void
-gk104_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packet,
-		    u8 rekey, u8 *avi, u8 avi_size, u8 *vendor, u8 vendor_size)
+gk104_sor_hdmi_infoframe_vsi(struct nvkm_ior *ior, int head, void *data, u32 size)
+{
+	struct nvkm_device *device = ior->disp->engine.subdev.device;
+	struct packed_hdmi_infoframe vsi;
+	const u32 hoff = head * 0x400;
+
+	pack_hdmi_infoframe(&vsi, data, size);
+
+	/* GENERIC(?) / Vendor InfoFrame? */
+	nvkm_mask(device, 0x690100 + hoff, 0x00010001, 0x00000000);
+	if (!size)
+		return;
+
+	nvkm_wr32(device, 0x690108 + hoff, vsi.header);
+	nvkm_wr32(device, 0x69010c + hoff, vsi.subpack0_low);
+	nvkm_wr32(device, 0x690110 + hoff, vsi.subpack0_high);
+	/* Is there a second (or further?) set of subpack registers here? */
+	nvkm_mask(device, 0x690100 + hoff, 0x00000001, 0x00000001);
+}
+
+void
+gk104_sor_hdmi_infoframe_avi(struct nvkm_ior *ior, int head, void *data, u32 size)
+{
+	struct nvkm_device *device = ior->disp->engine.subdev.device;
+	struct packed_hdmi_infoframe avi;
+	const u32 hoff = head * 0x400;
+
+	pack_hdmi_infoframe(&avi, data, size);
+
+	/* AVI InfoFrame */
+	nvkm_mask(device, 0x690000 + hoff, 0x00000001, 0x00000000);
+	if (!size)
+		return;
+
+	nvkm_wr32(device, 0x690008 + hoff, avi.header);
+	nvkm_wr32(device, 0x69000c + hoff, avi.subpack0_low);
+	nvkm_wr32(device, 0x690010 + hoff, avi.subpack0_high);
+	nvkm_wr32(device, 0x690014 + hoff, avi.subpack1_low);
+	nvkm_wr32(device, 0x690018 + hoff, avi.subpack1_high);
+
+	nvkm_mask(device, 0x690000 + hoff, 0x00000001, 0x00000001);
+}
+
+void
+gk104_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packet, u8 rekey)
 {
 	struct nvkm_device *device = ior->disp->engine.subdev.device;
 	const u32 ctrl = 0x40000000 * enable |
@@ -39,11 +82,6 @@ gk104_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packe
 			 rekey;
 	const u32 hoff = head * 0x800;
 	const u32 hdmi = head * 0x400;
-	struct packed_hdmi_infoframe avi_infoframe;
-	struct packed_hdmi_infoframe vendor_infoframe;
-
-	pack_hdmi_infoframe(&avi_infoframe, avi, avi_size);
-	pack_hdmi_infoframe(&vendor_infoframe, vendor, vendor_size);
 
 	if (!(ctrl & 0x40000000)) {
 		nvkm_mask(device, 0x616798 + hoff, 0x40000000, 0x00000000);
@@ -53,28 +91,6 @@ gk104_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packe
 		return;
 	}
 
-	/* AVI InfoFrame */
-	nvkm_mask(device, 0x690000 + hdmi, 0x00000001, 0x00000000);
-	if (avi_size) {
-		nvkm_wr32(device, 0x690008 + hdmi, avi_infoframe.header);
-		nvkm_wr32(device, 0x69000c + hdmi, avi_infoframe.subpack0_low);
-		nvkm_wr32(device, 0x690010 + hdmi, avi_infoframe.subpack0_high);
-		nvkm_wr32(device, 0x690014 + hdmi, avi_infoframe.subpack1_low);
-		nvkm_wr32(device, 0x690018 + hdmi, avi_infoframe.subpack1_high);
-		nvkm_mask(device, 0x690000 + hdmi, 0x00000001, 0x00000001);
-	}
-
-	/* GENERIC(?) / Vendor InfoFrame? */
-	nvkm_mask(device, 0x690100 + hdmi, 0x00010001, 0x00000000);
-	if (vendor_size) {
-		nvkm_wr32(device, 0x690108 + hdmi, vendor_infoframe.header);
-		nvkm_wr32(device, 0x69010c + hdmi, vendor_infoframe.subpack0_low);
-		nvkm_wr32(device, 0x690110 + hdmi, vendor_infoframe.subpack0_high);
-		/* Is there a second (or further?) set of subpack registers here? */
-		nvkm_mask(device, 0x690100 + hdmi, 0x00000001, 0x00000001);
-	}
-
-
 	/* ??? InfoFrame? */
 	nvkm_mask(device, 0x6900c0 + hdmi, 0x00000001, 0x00000000);
 	nvkm_wr32(device, 0x6900cc + hdmi, 0x00000010);
@@ -87,14 +103,19 @@ gk104_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packe
 	nvkm_mask(device, 0x616798 + hoff, 0x401f007f, ctrl);
 }
 
+const struct nvkm_ior_func_hdmi
+gk104_sor_hdmi = {
+	.ctrl = gk104_sor_hdmi_ctrl,
+	.infoframe_avi = gk104_sor_hdmi_infoframe_avi,
+	.infoframe_vsi = gk104_sor_hdmi_infoframe_vsi,
+};
+
 static const struct nvkm_ior_func
 gk104_sor = {
 	.state = gf119_sor_state,
 	.power = nv50_sor_power,
 	.clock = gf119_sor_clock,
-	.hdmi = {
-		.ctrl = gk104_sor_hdmi_ctrl,
-	},
+	.hdmi = &gk104_sor_hdmi,
 	.dp = &gf119_sor_dp,
 	.hda = &gf119_sor_hda,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm107.c
index 9e9ef49bd8ac..b4d8e868616f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm107.c
@@ -70,9 +70,7 @@ gm107_sor = {
 	.state = gf119_sor_state,
 	.power = nv50_sor_power,
 	.clock = gf119_sor_clock,
-	.hdmi = {
-		.ctrl = gk104_sor_hdmi_ctrl,
-	},
+	.hdmi = &gk104_sor_hdmi,
 	.dp = &gm107_sor_dp,
 	.hda = &gf119_sor_hda,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm200.c
index 4ecc8f98af6e..562ebae57d44 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm200.c
@@ -79,6 +79,14 @@ gm200_sor_hdmi_scdc(struct nvkm_ior *ior, u8 scdc)
 	ior->tmds.high_speed = !!(scdc & 0x2);
 }
 
+const struct nvkm_ior_func_hdmi
+gm200_sor_hdmi = {
+	.ctrl = gk104_sor_hdmi_ctrl,
+	.scdc = gm200_sor_hdmi_scdc,
+	.infoframe_avi = gk104_sor_hdmi_infoframe_avi,
+	.infoframe_vsi = gk104_sor_hdmi_infoframe_vsi,
+};
+
 void
 gm200_sor_route_set(struct nvkm_outp *outp, struct nvkm_ior *ior)
 {
@@ -131,10 +139,7 @@ gm200_sor = {
 	.state = gf119_sor_state,
 	.power = nv50_sor_power,
 	.clock = gf119_sor_clock,
-	.hdmi = {
-		.ctrl = gk104_sor_hdmi_ctrl,
-		.scdc = gm200_sor_hdmi_scdc,
-	},
+	.hdmi = &gm200_sor_hdmi,
 	.dp = &gm200_sor_dp,
 	.hda = &gf119_sor_hda,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp100.c
index 7172a9dfd89b..7f1eb4332040 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp100.c
@@ -37,10 +37,7 @@ gp100_sor = {
 	.state = gf119_sor_state,
 	.power = nv50_sor_power,
 	.clock = gf119_sor_clock,
-	.hdmi = {
-		.ctrl = gk104_sor_hdmi_ctrl,
-		.scdc = gm200_sor_hdmi_scdc,
-	},
+	.hdmi = &gm200_sor_hdmi,
 	.dp = &gm200_sor_dp,
 	.hda = &gf119_sor_hda,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gt215.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gt215.c
index 70c49e7af9cf..a2c7c6f83dcd 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gt215.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gt215.c
@@ -92,9 +92,53 @@ gt215_sor_dp = {
 	.watermark = g94_sor_dp_watermark,
 };
 
-void
-gt215_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packet,
-		    u8 rekey, u8 *avi, u8 avi_size, u8 *vendor, u8 vendor_size)
+static void
+gt215_sor_hdmi_infoframe_vsi(struct nvkm_ior *ior, int head, void *data, u32 size)
+{
+	struct nvkm_device *device = ior->disp->engine.subdev.device;
+	struct packed_hdmi_infoframe vsi;
+	const u32 soff = nv50_ior_base(ior);
+
+	pack_hdmi_infoframe(&vsi, data, size);
+
+	nvkm_mask(device, 0x61c53c + soff, 0x00010001, 0x00010000);
+	if (!size)
+		return;
+
+	nvkm_wr32(device, 0x61c544 + soff, vsi.header);
+	nvkm_wr32(device, 0x61c548 + soff, vsi.subpack0_low);
+	nvkm_wr32(device, 0x61c54c + soff, vsi.subpack0_high);
+	/* Is there a second (or up to fourth?) set of subpack registers here? */
+	/* nvkm_wr32(device, 0x61c550 + soff, vsi.subpack1_low); */
+	/* nvkm_wr32(device, 0x61c554 + soff, vsi.subpack1_high); */
+
+	nvkm_mask(device, 0x61c53c + soff, 0x00010001, 0x00010001);
+}
+
+static void
+gt215_sor_hdmi_infoframe_avi(struct nvkm_ior *ior, int head, void *data, u32 size)
+{
+	struct nvkm_device *device = ior->disp->engine.subdev.device;
+	struct packed_hdmi_infoframe avi;
+	const u32 soff = nv50_ior_base(ior);
+
+	pack_hdmi_infoframe(&avi, data, size);
+
+	nvkm_mask(device, 0x61c520 + soff, 0x00000001, 0x00000000);
+	if (size)
+		return;
+
+	nvkm_wr32(device, 0x61c528 + soff, avi.header);
+	nvkm_wr32(device, 0x61c52c + soff, avi.subpack0_low);
+	nvkm_wr32(device, 0x61c530 + soff, avi.subpack0_high);
+	nvkm_wr32(device, 0x61c534 + soff, avi.subpack1_low);
+	nvkm_wr32(device, 0x61c538 + soff, avi.subpack1_high);
+
+	nvkm_mask(device, 0x61c520 + soff, 0x00000001, 0x00000001);
+}
+
+static void
+gt215_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packet, u8 rekey)
 {
 	struct nvkm_device *device = ior->disp->engine.subdev.device;
 	const u32 ctrl = 0x40000000 * enable |
@@ -102,11 +146,6 @@ gt215_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packe
 			 max_ac_packet << 16 |
 			 rekey;
 	const u32 soff = nv50_ior_base(ior);
-	struct packed_hdmi_infoframe avi_infoframe;
-	struct packed_hdmi_infoframe vendor_infoframe;
-
-	pack_hdmi_infoframe(&avi_infoframe, avi, avi_size);
-	pack_hdmi_infoframe(&vendor_infoframe, vendor, vendor_size);
 
 	if (!(ctrl & 0x40000000)) {
 		nvkm_mask(device, 0x61c5a4 + soff, 0x40000000, 0x00000000);
@@ -116,17 +155,6 @@ gt215_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packe
 		return;
 	}
 
-	/* AVI InfoFrame */
-	nvkm_mask(device, 0x61c520 + soff, 0x00000001, 0x00000000);
-	if (avi_size) {
-		nvkm_wr32(device, 0x61c528 + soff, avi_infoframe.header);
-		nvkm_wr32(device, 0x61c52c + soff, avi_infoframe.subpack0_low);
-		nvkm_wr32(device, 0x61c530 + soff, avi_infoframe.subpack0_high);
-		nvkm_wr32(device, 0x61c534 + soff, avi_infoframe.subpack1_low);
-		nvkm_wr32(device, 0x61c538 + soff, avi_infoframe.subpack1_high);
-		nvkm_mask(device, 0x61c520 + soff, 0x00000001, 0x00000001);
-	}
-
 	/* Audio InfoFrame */
 	nvkm_mask(device, 0x61c500 + soff, 0x00000001, 0x00000000);
 	nvkm_wr32(device, 0x61c508 + soff, 0x000a0184);
@@ -134,18 +162,6 @@ gt215_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packe
 	nvkm_wr32(device, 0x61c510 + soff, 0x00000000);
 	nvkm_mask(device, 0x61c500 + soff, 0x00000001, 0x00000001);
 
-	/* Vendor InfoFrame */
-	nvkm_mask(device, 0x61c53c + soff, 0x00010001, 0x00010000);
-	if (vendor_size) {
-		nvkm_wr32(device, 0x61c544 + soff, vendor_infoframe.header);
-		nvkm_wr32(device, 0x61c548 + soff, vendor_infoframe.subpack0_low);
-		nvkm_wr32(device, 0x61c54c + soff, vendor_infoframe.subpack0_high);
-		/* Is there a second (or up to fourth?) set of subpack registers here? */
-		/* nvkm_wr32(device, 0x61c550 + soff, vendor_infoframe.subpack1_low); */
-		/* nvkm_wr32(device, 0x61c554 + soff, vendor_infoframe.subpack1_high); */
-		nvkm_mask(device, 0x61c53c + soff, 0x00010001, 0x00010001);
-	}
-
 	nvkm_mask(device, 0x61c5d0 + soff, 0x00070001, 0x00010001); /* SPARE, HW_CTS */
 	nvkm_mask(device, 0x61c568 + soff, 0x00010101, 0x00000000); /* ACR_CTRL, ?? */
 	nvkm_mask(device, 0x61c578 + soff, 0x80000000, 0x80000000); /* ACR_0441_ENABLE */
@@ -159,14 +175,19 @@ gt215_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packe
 	nvkm_mask(device, 0x61c5a4 + soff, 0x5f1f007f, ctrl);
 }
 
+const struct nvkm_ior_func_hdmi
+gt215_sor_hdmi = {
+	.ctrl = gt215_sor_hdmi_ctrl,
+	.infoframe_avi = gt215_sor_hdmi_infoframe_avi,
+	.infoframe_vsi = gt215_sor_hdmi_infoframe_vsi,
+};
+
 static const struct nvkm_ior_func
 gt215_sor = {
 	.state = g94_sor_state,
 	.power = nv50_sor_power,
 	.clock = nv50_sor_clock,
-	.hdmi = {
-		.ctrl = gt215_sor_hdmi_ctrl,
-	},
+	.hdmi = &gt215_sor_hdmi,
 	.dp = &gt215_sor_dp,
 	.hda = &gt215_sor_hda,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c
index 6b9d49270fa7..115d0997fd62 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c
@@ -96,9 +96,54 @@ gv100_sor_dp = {
 	.watermark = gv100_sor_dp_watermark,
 };
 
-void
-gv100_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packet,
-		    u8 rekey, u8 *avi, u8 avi_size, u8 *vendor, u8 vendor_size)
+static void
+gv100_sor_hdmi_infoframe_vsi(struct nvkm_ior *ior, int head, void *data, u32 size)
+{
+	struct nvkm_device *device = ior->disp->engine.subdev.device;
+	struct packed_hdmi_infoframe vsi;
+	const u32 hoff = head * 0x400;
+
+	pack_hdmi_infoframe(&vsi, data, size);
+
+	nvkm_mask(device, 0x6f0100 + hoff, 0x00010001, 0x00000000);
+	if (!size)
+		return;
+
+	nvkm_wr32(device, 0x6f0108 + hoff, vsi.header);
+	nvkm_wr32(device, 0x6f010c + hoff, vsi.subpack0_low);
+	nvkm_wr32(device, 0x6f0110 + hoff, vsi.subpack0_high);
+	nvkm_wr32(device, 0x6f0114 + hoff, 0x00000000);
+	nvkm_wr32(device, 0x6f0118 + hoff, 0x00000000);
+	nvkm_wr32(device, 0x6f011c + hoff, 0x00000000);
+	nvkm_wr32(device, 0x6f0120 + hoff, 0x00000000);
+	nvkm_wr32(device, 0x6f0124 + hoff, 0x00000000);
+	nvkm_mask(device, 0x6f0100 + hoff, 0x00000001, 0x00000001);
+}
+
+static void
+gv100_sor_hdmi_infoframe_avi(struct nvkm_ior *ior, int head, void *data, u32 size)
+{
+	struct nvkm_device *device = ior->disp->engine.subdev.device;
+	struct packed_hdmi_infoframe avi;
+	const u32 hoff = head * 0x400;
+
+	pack_hdmi_infoframe(&avi, data, size);
+
+	nvkm_mask(device, 0x6f0000 + hoff, 0x00000001, 0x00000000);
+	if (!size)
+		return;
+
+	nvkm_wr32(device, 0x6f0008 + hoff, avi.header);
+	nvkm_wr32(device, 0x6f000c + hoff, avi.subpack0_low);
+	nvkm_wr32(device, 0x6f0010 + hoff, avi.subpack0_high);
+	nvkm_wr32(device, 0x6f0014 + hoff, avi.subpack1_low);
+	nvkm_wr32(device, 0x6f0018 + hoff, avi.subpack1_high);
+
+	nvkm_mask(device, 0x6f0000 + hoff, 0x00000001, 0x00000001);
+}
+
+static void
+gv100_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packet, u8 rekey)
 {
 	struct nvkm_device *device = ior->disp->engine.subdev.device;
 	const u32 ctrl = 0x40000000 * enable |
@@ -106,11 +151,6 @@ gv100_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packe
 			 rekey;
 	const u32 hoff = head * 0x800;
 	const u32 hdmi = head * 0x400;
-	struct packed_hdmi_infoframe avi_infoframe;
-	struct packed_hdmi_infoframe vendor_infoframe;
-
-	pack_hdmi_infoframe(&avi_infoframe, avi, avi_size);
-	pack_hdmi_infoframe(&vendor_infoframe, vendor, vendor_size);
 
 	if (!(ctrl & 0x40000000)) {
 		nvkm_mask(device, 0x6165c0 + hoff, 0x40000000, 0x00000000);
@@ -120,32 +160,6 @@ gv100_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packe
 		return;
 	}
 
-	/* AVI InfoFrame (AVI). */
-	nvkm_mask(device, 0x6f0000 + hdmi, 0x00000001, 0x00000000);
-	if (avi_size) {
-		nvkm_wr32(device, 0x6f0008 + hdmi, avi_infoframe.header);
-		nvkm_wr32(device, 0x6f000c + hdmi, avi_infoframe.subpack0_low);
-		nvkm_wr32(device, 0x6f0010 + hdmi, avi_infoframe.subpack0_high);
-		nvkm_wr32(device, 0x6f0014 + hdmi, avi_infoframe.subpack1_low);
-		nvkm_wr32(device, 0x6f0018 + hdmi, avi_infoframe.subpack1_high);
-		nvkm_mask(device, 0x6f0000 + hdmi, 0x00000001, 0x00000001);
-	}
-
-	/* Vendor-specific InfoFrame (VSI). */
-	nvkm_mask(device, 0x6f0100 + hdmi, 0x00010001, 0x00000000);
-	if (vendor_size) {
-		nvkm_wr32(device, 0x6f0108 + hdmi, vendor_infoframe.header);
-		nvkm_wr32(device, 0x6f010c + hdmi, vendor_infoframe.subpack0_low);
-		nvkm_wr32(device, 0x6f0110 + hdmi, vendor_infoframe.subpack0_high);
-		nvkm_wr32(device, 0x6f0114 + hdmi, 0x00000000);
-		nvkm_wr32(device, 0x6f0118 + hdmi, 0x00000000);
-		nvkm_wr32(device, 0x6f011c + hdmi, 0x00000000);
-		nvkm_wr32(device, 0x6f0120 + hdmi, 0x00000000);
-		nvkm_wr32(device, 0x6f0124 + hdmi, 0x00000000);
-		nvkm_mask(device, 0x6f0100 + hdmi, 0x00000001, 0x00000001);
-	}
-
-
 	/* General Control (GCP). */
 	nvkm_mask(device, 0x6f00c0 + hdmi, 0x00000001, 0x00000000);
 	nvkm_wr32(device, 0x6f00cc + hdmi, 0x00000010);
@@ -158,6 +172,14 @@ gv100_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packe
 	nvkm_mask(device, 0x6165c0 + hoff, 0x401f007f, ctrl);
 }
 
+const struct nvkm_ior_func_hdmi
+gv100_sor_hdmi = {
+	.ctrl = gv100_sor_hdmi_ctrl,
+	.scdc = gm200_sor_hdmi_scdc,
+	.infoframe_avi = gv100_sor_hdmi_infoframe_avi,
+	.infoframe_vsi = gv100_sor_hdmi_infoframe_vsi,
+};
+
 void
 gv100_sor_state(struct nvkm_ior *sor, struct nvkm_ior_state *state)
 {
@@ -190,10 +212,7 @@ gv100_sor = {
 	.state = gv100_sor_state,
 	.power = nv50_sor_power,
 	.clock = gf119_sor_clock,
-	.hdmi = {
-		.ctrl = gv100_sor_hdmi_ctrl,
-		.scdc = gm200_sor_hdmi_scdc,
-	},
+	.hdmi = &gv100_sor_hdmi,
 	.dp = &gv100_sor_dp,
 	.hda = &gv100_sor_hda,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/head.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/head.c
index 83152c26fe3e..7f5d13d13c94 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/head.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/head.c
@@ -39,44 +39,6 @@ nvkm_head_find(struct nvkm_disp *disp, int id)
 	return NULL;
 }
 
-int
-nvkm_head_mthd_scanoutpos(struct nvkm_object *object,
-			  struct nvkm_head *head, void *data, u32 size)
-{
-	union {
-		struct nv04_disp_scanoutpos_v0 v0;
-	} *args = data;
-	int ret = -ENOSYS;
-
-	nvif_ioctl(object, "head scanoutpos size %d\n", size);
-	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
-		nvif_ioctl(object, "head scanoutpos vers %d\n",
-			   args->v0.version);
-
-		head->func->state(head, &head->arm);
-		args->v0.vtotal  = head->arm.vtotal;
-		args->v0.vblanks = head->arm.vblanks;
-		args->v0.vblanke = head->arm.vblanke;
-		args->v0.htotal  = head->arm.htotal;
-		args->v0.hblanks = head->arm.hblanks;
-		args->v0.hblanke = head->arm.hblanke;
-
-		/* We don't support reading htotal/vtotal on pre-NV50 VGA,
-		 * so we have to give up and trigger the timestamping
-		 * fallback in the drm core.
-		 */
-		if (!args->v0.vtotal || !args->v0.htotal)
-			return -ENOTSUPP;
-
-		args->v0.time[0] = ktime_to_ns(ktime_get());
-		head->func->rgpos(head, &args->v0.hline, &args->v0.vline);
-		args->v0.time[1] = ktime_to_ns(ktime_get());
-	} else
-		return ret;
-
-	return 0;
-}
-
 void
 nvkm_head_del(struct nvkm_head **phead)
 {
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/head.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/head.h
index 84a2989193cf..856252bf559a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/head.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/head.h
@@ -1,6 +1,7 @@
 /* SPDX-License-Identifier: MIT */
 #ifndef __NVKM_DISP_HEAD_H__
 #define __NVKM_DISP_HEAD_H__
+#include <nvif/object.h>
 #include "priv.h"
 
 struct nvkm_head {
@@ -26,12 +27,12 @@ struct nvkm_head {
 			u8 depth;
 		} or;
 	} arm, asy;
+
+	struct nvkm_object object;
 };
 
 int nvkm_head_new_(const struct nvkm_head_func *, struct nvkm_disp *, int id);
 void nvkm_head_del(struct nvkm_head **);
-int nvkm_head_mthd_scanoutpos(struct nvkm_object *,
-			      struct nvkm_head *, void *, u32);
 struct nvkm_head *nvkm_head_find(struct nvkm_disp *, int id);
 
 struct nvkm_head_func {
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h
index 671c4674ffcc..da1b1a626ef2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h
@@ -63,12 +63,12 @@ struct nvkm_ior_func {
 	void (*war_2)(struct nvkm_ior *);
 	void (*war_3)(struct nvkm_ior *);
 
-	struct {
-		void (*ctrl)(struct nvkm_ior *, int head, bool enable,
-			     u8 max_ac_packet, u8 rekey, u8 *avi, u8 avi_size,
-			     u8 *vendor, u8 vendor_size);
+	const struct nvkm_ior_func_hdmi {
+		void (*ctrl)(struct nvkm_ior *, int head, bool enable, u8 max_ac_packet, u8 rekey);
 		void (*scdc)(struct nvkm_ior *, u8 scdc);
-	} hdmi;
+		void (*infoframe_avi)(struct nvkm_ior *, int head, void *data, u32 size);
+		void (*infoframe_vsi)(struct nvkm_ior *, int head, void *data, u32 size);
+	} *hdmi;
 
 	const struct nvkm_ior_func_dp {
 		u8 lanes[4];
@@ -124,9 +124,10 @@ void nv50_sor_power(struct nvkm_ior *, bool, bool, bool, bool, bool);
 void nv50_sor_clock(struct nvkm_ior *);
 
 int g84_sor_new(struct nvkm_disp *, int);
-void g84_sor_hdmi_ctrl(struct nvkm_ior *, int, bool, u8, u8, u8 *, u8 , u8 *, u8);
+extern const struct nvkm_ior_func_hdmi g84_sor_hdmi;
 
 int g94_sor_cnt(struct nvkm_disp *, unsigned long *);
+
 void g94_sor_state(struct nvkm_ior *, struct nvkm_ior_state *);
 extern const struct nvkm_ior_func_dp g94_sor_dp;
 int g94_sor_dp_links(struct nvkm_ior *, struct nvkm_i2c_aux *);
@@ -137,7 +138,7 @@ void g94_sor_dp_audio_sym(struct nvkm_ior *, int, u16, u32);
 void g94_sor_dp_activesym(struct nvkm_ior *, int, u8, u8, u8, u8);
 void g94_sor_dp_watermark(struct nvkm_ior *, int, u8);
 
-void gt215_sor_hdmi_ctrl(struct nvkm_ior *, int, bool, u8, u8, u8 *, u8 , u8 *, u8);
+extern const struct nvkm_ior_func_hdmi gt215_sor_hdmi;
 void gt215_sor_dp_audio(struct nvkm_ior *, int, bool);
 extern const struct nvkm_ior_func_hda gt215_sor_hda;
 
@@ -156,12 +157,16 @@ void gf119_sor_hda_hpd(struct nvkm_ior *, int, bool);
 void gf119_sor_hda_eld(struct nvkm_ior *, int, u8 *, u8);
 
 int gk104_sor_new(struct nvkm_disp *, int);
-void gk104_sor_hdmi_ctrl(struct nvkm_ior *, int, bool, u8, u8, u8 *, u8 , u8 *, u8);
+extern const struct nvkm_ior_func_hdmi gk104_sor_hdmi;
+void gk104_sor_hdmi_ctrl(struct nvkm_ior *, int, bool, u8, u8);
+void gk104_sor_hdmi_infoframe_avi(struct nvkm_ior *, int, void *, u32);
+void gk104_sor_hdmi_infoframe_vsi(struct nvkm_ior *, int, void *, u32);
 
 void gm107_sor_dp_pattern(struct nvkm_ior *, int);
 
 void gm200_sor_route_set(struct nvkm_outp *, struct nvkm_ior *);
 int gm200_sor_route_get(struct nvkm_outp *, int *);
+extern const struct nvkm_ior_func_hdmi gm200_sor_hdmi;
 void gm200_sor_hdmi_scdc(struct nvkm_ior *, u8);
 extern const struct nvkm_ior_func_dp gm200_sor_dp;
 void gm200_sor_dp_drive(struct nvkm_ior *, int, int, int, int, int);
@@ -170,7 +175,7 @@ int gp100_sor_new(struct nvkm_disp *, int);
 
 int gv100_sor_cnt(struct nvkm_disp *, unsigned long *);
 void gv100_sor_state(struct nvkm_ior *, struct nvkm_ior_state *);
-void gv100_sor_hdmi_ctrl(struct nvkm_ior *, int, bool, u8, u8, u8 *, u8 , u8 *, u8);
+extern const struct nvkm_ior_func_hdmi gv100_sor_hdmi;
 void gv100_sor_dp_audio(struct nvkm_ior *, int, bool);
 void gv100_sor_dp_audio_sym(struct nvkm_ior *, int, u16, u32);
 void gv100_sor_dp_watermark(struct nvkm_ior *, int, u8);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp77.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp77.c
index 916b1d477b0b..841e3b69fcaf 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp77.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp77.c
@@ -31,9 +31,7 @@ mcp77_sor = {
 	.state = g94_sor_state,
 	.power = nv50_sor_power,
 	.clock = nv50_sor_clock,
-	.hdmi = {
-		.ctrl = g84_sor_hdmi_ctrl,
-	},
+	.hdmi = &g84_sor_hdmi,
 	.dp = &g94_sor_dp,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp89.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp89.c
index a5a0b9439374..f96ba4752655 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp89.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp89.c
@@ -44,9 +44,7 @@ mcp89_sor = {
 	.state = g94_sor_state,
 	.power = nv50_sor_power,
 	.clock = nv50_sor_clock,
-	.hdmi = {
-		.ctrl = gt215_sor_hdmi_ctrl,
-	},
+	.hdmi = &gt215_sor_hdmi,
 	.dp = &mcp89_sor_dp,
 	.hda = &gt215_sor_hda,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c
index a46e13cc9ff1..be8116802960 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c
@@ -503,7 +503,7 @@ nv50_disp_chan_uevent_init(struct nvkm_event *event, int types, int index)
 void
 nv50_disp_chan_uevent_send(struct nvkm_disp *disp, int chid)
 {
-	nvkm_event_send(&disp->uevent, NVKM_DISP_EVENT_CHAN_AWAKEN, chid, NULL, 0);
+	nvkm_event_ntfy(&disp->uevent, chid, NVKM_DISP_EVENT_CHAN_AWAKEN);
 }
 
 const struct nvkm_event_func
@@ -1238,6 +1238,8 @@ nv50_disp_super_2_2(struct nvkm_disp *disp, struct nvkm_head *head)
 	if (!ior)
 		return;
 
+	outp = ior->asy.outp;
+
 	/* For some reason, NVIDIA decided not to:
 	 *
 	 * A) Give dual-link LVDS a separate EVO protocol, like for TMDS.
@@ -1247,13 +1249,13 @@ nv50_disp_super_2_2(struct nvkm_disp *disp, struct nvkm_head *head)
 	 * Override the values we usually read from HW with the same
 	 * data we pass though an ioctl instead.
 	 */
-	if (ior->type == SOR && ior->asy.proto == LVDS) {
-		head->asy.or.depth = (disp->sor.lvdsconf & 0x0200) ? 24 : 18;
-		ior->asy.link      = (disp->sor.lvdsconf & 0x0100) ? 3  : 1;
+	if (outp && ior->type == SOR && ior->asy.proto == LVDS) {
+		head->asy.or.depth = outp->lvds.bpc8 ? 24 : 18;
+		ior->asy.link      = outp->lvds.dual ? 3 : 1;
 	}
 
 	/* Handle any link training, etc. */
-	if ((outp = ior->asy.outp) && outp->func->acquire)
+	if (outp && outp->func->acquire)
 		outp->func->acquire(outp);
 
 	/* Execute OnInt2 IED script. */
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.h
index 3f3924c41957..b7631c1ab242 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.h
@@ -2,7 +2,6 @@
 #ifndef __NVKM_DISP_OUTP_H__
 #define __NVKM_DISP_OUTP_H__
 #include "priv.h"
-#include <core/notify.h>
 
 #include <subdev/bios.h>
 #include <subdev/bios/dcb.h>
@@ -28,13 +27,19 @@ struct nvkm_outp {
 
 	union {
 		struct {
+			bool dual;
+			bool bpc8;
+		} lvds;
+
+		struct {
 			struct nvbios_dpout info;
 			u8 version;
 
 			struct nvkm_i2c_aux *aux;
 
-			struct nvkm_notify hpd;
-			bool present;
+			bool enabled;
+			bool aux_pwr;
+			bool aux_pwr_pu;
 			u8 lttpr[6];
 			u8 lttprs;
 			u8 dpcd[16];
@@ -49,12 +54,17 @@ struct nvkm_outp {
 			struct mutex mutex;
 			struct {
 				atomic_t done;
+				u8 nr;
+				u8 bw;
 				bool mst;
 			} lt;
 		} dp;
 	};
 
 	struct nvkm_object object;
+	struct {
+		struct nvkm_head *head;
+	} asy;
 };
 
 int nvkm_outp_new_(const struct nvkm_outp_func *, struct nvkm_disp *, int index,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/priv.h
index cb25dfe849f0..ec5292a8f3c8 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/priv.h
@@ -42,10 +42,6 @@ struct nvkm_disp_func {
 	} user[];
 };
 
-int  nvkm_disp_ntfy(struct nvkm_object *, u32, struct nvkm_event **);
-int nv04_disp_mthd(struct nvkm_object *, u32, void *, u32);
-int nv50_disp_root_mthd_(struct nvkm_object *, u32, void *, u32);
-
 int nv50_disp_oneinit(struct nvkm_disp *);
 int nv50_disp_init(struct nvkm_disp *);
 void nv50_disp_fini(struct nvkm_disp *);
@@ -86,4 +82,5 @@ extern const struct nvkm_event_func gv100_disp_chan_uevent;
 int nvkm_udisp_new(const struct nvkm_oclass *, void *, u32, struct nvkm_object **);
 int nvkm_uconn_new(const struct nvkm_oclass *, void *, u32, struct nvkm_object **);
 int nvkm_uoutp_new(const struct nvkm_oclass *, void *, u32, struct nvkm_object **);
+int nvkm_uhead_new(const struct nvkm_oclass *, void *, u32, struct nvkm_object **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.c
deleted file mode 100644
index 0af45ccd140c..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.c
+++ /dev/null
@@ -1,250 +0,0 @@
-/*
- * Copyright 2012 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs
- */
-#include "chan.h"
-#include "head.h"
-#include "ior.h"
-#include "outp.h"
-
-#include <core/client.h>
-
-#include <nvif/class.h>
-#include <nvif/cl5070.h>
-#include <nvif/unpack.h>
-
-int
-nv50_disp_root_mthd_(struct nvkm_object *object, u32 mthd, void *data, u32 size)
-{
-	union {
-		struct nv50_disp_mthd_v0 v0;
-		struct nv50_disp_mthd_v1 v1;
-	} *args = data;
-	struct nvkm_disp *disp = nvkm_udisp(object);
-	struct nvkm_outp *temp, *outp = NULL;
-	struct nvkm_head *head;
-	u16 type, mask = 0;
-	int hidx, ret = -ENOSYS;
-
-	if (mthd != NV50_DISP_MTHD)
-		return -EINVAL;
-
-	nvif_ioctl(object, "disp mthd size %d\n", size);
-	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, true))) {
-		nvif_ioctl(object, "disp mthd vers %d mthd %02x head %d\n",
-			   args->v0.version, args->v0.method, args->v0.head);
-		mthd = args->v0.method;
-		hidx = args->v0.head;
-	} else
-	if (!(ret = nvif_unpack(ret, &data, &size, args->v1, 1, 1, true))) {
-		nvif_ioctl(object, "disp mthd vers %d mthd %02x "
-				   "type %04x mask %04x\n",
-			   args->v1.version, args->v1.method,
-			   args->v1.hasht, args->v1.hashm);
-		mthd = args->v1.method;
-		type = args->v1.hasht;
-		mask = args->v1.hashm;
-		hidx = ffs((mask >> 8) & 0x0f) - 1;
-	} else
-		return ret;
-
-	if (!(head = nvkm_head_find(disp, hidx)))
-		return -ENXIO;
-
-	if (mask) {
-		list_for_each_entry(temp, &disp->outps, head) {
-			if ((temp->info.hasht         == type) &&
-			    (temp->info.hashm & mask) == mask) {
-				outp = temp;
-				break;
-			}
-		}
-		if (outp == NULL)
-			return -ENXIO;
-	}
-
-	switch (mthd) {
-	case NV50_DISP_SCANOUTPOS: {
-		return nvkm_head_mthd_scanoutpos(object, head, data, size);
-	}
-	default:
-		break;
-	}
-
-	switch (mthd * !!outp) {
-	case NV50_DISP_MTHD_V1_ACQUIRE: {
-		union {
-			struct nv50_disp_acquire_v0 v0;
-		} *args = data;
-		int ret = -ENOSYS;
-		if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
-			ret = nvkm_outp_acquire(outp, NVKM_OUTP_USER, args->v0.hda);
-			if (ret == 0) {
-				args->v0.or = outp->ior->id;
-				args->v0.link = outp->ior->asy.link;
-			}
-		}
-		return ret;
-	}
-		break;
-	case NV50_DISP_MTHD_V1_RELEASE:
-		nvkm_outp_release(outp, NVKM_OUTP_USER);
-		return 0;
-	case NV50_DISP_MTHD_V1_SOR_HDA_ELD: {
-		union {
-			struct nv50_disp_sor_hda_eld_v0 v0;
-		} *args = data;
-		struct nvkm_ior *ior = outp->ior;
-		int ret = -ENOSYS;
-
-		nvif_ioctl(object, "disp sor hda eld size %d\n", size);
-		if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, true))) {
-			nvif_ioctl(object, "disp sor hda eld vers %d\n",
-				   args->v0.version);
-			if (size > 0x60)
-				return -E2BIG;
-		} else
-			return ret;
-
-		if (!ior->hda)
-			return -ENODEV;
-
-		if (size && args->v0.data[0]) {
-			if (outp->info.type == DCB_OUTPUT_DP)
-				ior->func->dp->audio(ior, hidx, true);
-			ior->func->hda->hpd(ior, hidx, true);
-			ior->func->hda->eld(ior, hidx, data, size);
-		} else {
-			if (outp->info.type == DCB_OUTPUT_DP)
-				ior->func->dp->audio(ior, hidx, false);
-			ior->func->hda->hpd(ior, hidx, false);
-		}
-
-		return 0;
-	}
-		break;
-	case NV50_DISP_MTHD_V1_SOR_HDMI_PWR: {
-		union {
-			struct nv50_disp_sor_hdmi_pwr_v0 v0;
-		} *args = data;
-		u8 *vendor, vendor_size;
-		u8 *avi, avi_size;
-		int ret = -ENOSYS;
-
-		nvif_ioctl(object, "disp sor hdmi ctrl size %d\n", size);
-		if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, true))) {
-			nvif_ioctl(object, "disp sor hdmi ctrl vers %d state %d "
-					   "max_ac_packet %d rekey %d scdc %d\n",
-				   args->v0.version, args->v0.state,
-				   args->v0.max_ac_packet, args->v0.rekey,
-				   args->v0.scdc);
-			if (args->v0.max_ac_packet > 0x1f || args->v0.rekey > 0x7f)
-				return -EINVAL;
-			if ((args->v0.avi_infoframe_length
-			     + args->v0.vendor_infoframe_length) > size)
-				return -EINVAL;
-			else
-			if ((args->v0.avi_infoframe_length
-			     + args->v0.vendor_infoframe_length) < size)
-				return -E2BIG;
-			avi = data;
-			avi_size = args->v0.avi_infoframe_length;
-			vendor = avi + avi_size;
-			vendor_size = args->v0.vendor_infoframe_length;
-		} else
-			return ret;
-
-		if (!outp->ior->func->hdmi.ctrl)
-			return -ENODEV;
-
-		outp->ior->func->hdmi.ctrl(outp->ior, hidx, args->v0.state,
-					   args->v0.max_ac_packet,
-					   args->v0.rekey, avi, avi_size,
-					   vendor, vendor_size);
-
-		if (outp->ior->func->hdmi.scdc)
-			outp->ior->func->hdmi.scdc(outp->ior, args->v0.scdc);
-
-		return 0;
-	}
-		break;
-	case NV50_DISP_MTHD_V1_SOR_LVDS_SCRIPT: {
-		union {
-			struct nv50_disp_sor_lvds_script_v0 v0;
-		} *args = data;
-		int ret = -ENOSYS;
-		nvif_ioctl(object, "disp sor lvds script size %d\n", size);
-		if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
-			nvif_ioctl(object, "disp sor lvds script "
-					   "vers %d name %04x\n",
-				   args->v0.version, args->v0.script);
-			disp->sor.lvdsconf = args->v0.script;
-			return 0;
-		} else
-			return ret;
-	}
-		break;
-	case NV50_DISP_MTHD_V1_SOR_DP_MST_LINK: {
-		union {
-			struct nv50_disp_sor_dp_mst_link_v0 v0;
-		} *args = data;
-		int ret = -ENOSYS;
-		nvif_ioctl(object, "disp sor dp mst link size %d\n", size);
-		if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
-			nvif_ioctl(object, "disp sor dp mst link vers %d state %d\n",
-				   args->v0.version, args->v0.state);
-			outp->dp.lt.mst = !!args->v0.state;
-			return 0;
-		} else
-			return ret;
-	}
-		break;
-	case NV50_DISP_MTHD_V1_SOR_DP_MST_VCPI: {
-		union {
-			struct nv50_disp_sor_dp_mst_vcpi_v0 v0;
-		} *args = data;
-		int ret = -ENOSYS;
-		nvif_ioctl(object, "disp sor dp mst vcpi size %d\n", size);
-		if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
-			nvif_ioctl(object, "disp sor dp mst vcpi vers %d "
-					   "slot %02x/%02x pbn %04x/%04x\n",
-				   args->v0.version, args->v0.start_slot,
-				   args->v0.num_slots, args->v0.pbn,
-				   args->v0.aligned_pbn);
-			if (!outp->ior->func->dp->vcpi)
-				return -ENODEV;
-			outp->ior->func->dp->vcpi(outp->ior, hidx,
-						 args->v0.start_slot,
-						 args->v0.num_slots,
-						 args->v0.pbn,
-						 args->v0.aligned_pbn);
-			return 0;
-		} else
-			return ret;
-	}
-		break;
-	default:
-		break;
-	}
-
-	return -EINVAL;
-}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/tu102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/tu102.c
index e4ad1a6f6c88..f5242a672279 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/tu102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/tu102.c
@@ -88,10 +88,7 @@ tu102_sor = {
 	.state = gv100_sor_state,
 	.power = nv50_sor_power,
 	.clock = gf119_sor_clock,
-	.hdmi = {
-		.ctrl = gv100_sor_hdmi_ctrl,
-		.scdc = gm200_sor_hdmi_scdc,
-	},
+	.hdmi = &gv100_sor_hdmi,
 	.dp = &tu102_sor_dp,
 	.hda = &gv100_sor_hda,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/uconn.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/uconn.c
index fd9f18144c26..dad942be6679 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/uconn.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/uconn.c
@@ -21,12 +21,86 @@
  */
 #define nvkm_uconn(p) container_of((p), struct nvkm_conn, object)
 #include "conn.h"
+#include "outp.h"
 
+#include <core/client.h>
+#include <core/event.h>
 #include <subdev/gpio.h>
+#include <subdev/i2c.h>
 
 #include <nvif/if0011.h>
 
 static int
+nvkm_uconn_uevent_aux(struct nvkm_object *object, u64 token, u32 bits)
+{
+	union nvif_conn_event_args args;
+
+	args.v0.version = 0;
+	args.v0.types = 0;
+	if (bits & NVKM_I2C_PLUG)
+		args.v0.types |= NVIF_CONN_EVENT_V0_PLUG;
+	if (bits & NVKM_I2C_UNPLUG)
+		args.v0.types |= NVIF_CONN_EVENT_V0_UNPLUG;
+	if (bits & NVKM_I2C_IRQ)
+		args.v0.types |= NVIF_CONN_EVENT_V0_IRQ;
+
+	return object->client->event(token, &args, sizeof(args.v0));
+}
+
+static int
+nvkm_uconn_uevent_gpio(struct nvkm_object *object, u64 token, u32 bits)
+{
+	union nvif_conn_event_args args;
+
+	args.v0.version = 0;
+	args.v0.types = 0;
+	if (bits & NVKM_GPIO_HI)
+		args.v0.types |= NVIF_CONN_EVENT_V0_PLUG;
+	if (bits & NVKM_GPIO_LO)
+		args.v0.types |= NVIF_CONN_EVENT_V0_UNPLUG;
+
+	return object->client->event(token, &args, sizeof(args.v0));
+}
+
+static int
+nvkm_uconn_uevent(struct nvkm_object *object, void *argv, u32 argc, struct nvkm_uevent *uevent)
+{
+	struct nvkm_conn *conn = nvkm_uconn(object);
+	struct nvkm_device *device = conn->disp->engine.subdev.device;
+	struct nvkm_outp *outp;
+	union nvif_conn_event_args *args = argv;
+	u64 bits = 0;
+
+	if (!uevent) {
+		if (conn->info.hpd == DCB_GPIO_UNUSED)
+			return -ENOSYS;
+		return 0;
+	}
+
+	if (argc != sizeof(args->v0) || args->v0.version != 0)
+		return -ENOSYS;
+
+	list_for_each_entry(outp, &conn->disp->outps, head) {
+		if (outp->info.connector == conn->index && outp->dp.aux) {
+			if (args->v0.types & NVIF_CONN_EVENT_V0_PLUG  ) bits |= NVKM_I2C_PLUG;
+			if (args->v0.types & NVIF_CONN_EVENT_V0_UNPLUG) bits |= NVKM_I2C_UNPLUG;
+			if (args->v0.types & NVIF_CONN_EVENT_V0_IRQ   ) bits |= NVKM_I2C_IRQ;
+
+			return nvkm_uevent_add(uevent, &device->i2c->event, outp->dp.aux->id, bits,
+					       nvkm_uconn_uevent_aux);
+		}
+	}
+
+	if (args->v0.types & NVIF_CONN_EVENT_V0_PLUG  ) bits |= NVKM_GPIO_HI;
+	if (args->v0.types & NVIF_CONN_EVENT_V0_UNPLUG) bits |= NVKM_GPIO_LO;
+	if (args->v0.types & NVIF_CONN_EVENT_V0_IRQ)
+		return -EINVAL;
+
+	return nvkm_uevent_add(uevent, &device->gpio->event, conn->info.hpd, bits,
+			       nvkm_uconn_uevent_gpio);
+}
+
+static int
 nvkm_uconn_mthd_hpd_status(struct nvkm_conn *conn, void *argv, u32 argc)
 {
 	struct nvkm_gpio *gpio = conn->disp->engine.subdev.device->gpio;
@@ -82,6 +156,7 @@ static const struct nvkm_object_func
 nvkm_uconn = {
 	.dtor = nvkm_uconn_dtor,
 	.mthd = nvkm_uconn_mthd,
+	.uevent = nvkm_uconn_uevent,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/udisp.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/udisp.c
index 0841e7ce0343..0268d1d75805 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/udisp.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/udisp.c
@@ -21,6 +21,7 @@
  */
 #include "priv.h"
 #include "conn.h"
+#include "head.h"
 #include "outp.h"
 
 #include <nvif/class.h>
@@ -43,6 +44,12 @@ nvkm_udisp_sclass(struct nvkm_object *object, int index, struct nvkm_oclass *scl
 		return 0;
 	}
 
+	if (index-- == 0) {
+		sclass->base = (struct nvkm_sclass) { 0, 0, NVIF_CLASS_HEAD };
+		sclass->ctor = nvkm_uhead_new;
+		return 0;
+	}
+
 	if (disp->func->user[index].ctor) {
 		sclass->base = disp->func->user[index].base;
 		sclass->ctor = disp->func->user[index].ctor;
@@ -52,17 +59,6 @@ nvkm_udisp_sclass(struct nvkm_object *object, int index, struct nvkm_oclass *scl
 	return -EINVAL;
 }
 
-static int
-nvkm_udisp_mthd(struct nvkm_object *object, u32 mthd, void *argv, u32 argc)
-{
-	struct nvkm_disp *disp = nvkm_udisp(object);
-
-	if (disp->engine.subdev.device->card_type >= NV_50)
-		return nv50_disp_root_mthd_(object, mthd, argv, argc);
-
-	return nv04_disp_mthd(object, mthd, argv, argc);
-}
-
 static void *
 nvkm_udisp_dtor(struct nvkm_object *object)
 {
@@ -78,8 +74,6 @@ nvkm_udisp_dtor(struct nvkm_object *object)
 static const struct nvkm_object_func
 nvkm_udisp = {
 	.dtor = nvkm_udisp_dtor,
-	.mthd = nvkm_udisp_mthd,
-	.ntfy = nvkm_disp_ntfy,
 	.sclass = nvkm_udisp_sclass,
 };
 
@@ -89,6 +83,7 @@ nvkm_udisp_new(const struct nvkm_oclass *oclass, void *argv, u32 argc, struct nv
 	struct nvkm_disp *disp = nvkm_disp(oclass->engine);
 	struct nvkm_conn *conn;
 	struct nvkm_outp *outp;
+	struct nvkm_head *head;
 	union nvif_disp_args *args = argv;
 
 	if (argc != sizeof(args->v0) || args->v0.version != 0)
@@ -111,5 +106,9 @@ nvkm_udisp_new(const struct nvkm_oclass *oclass, void *argv, u32 argc, struct nv
 	list_for_each_entry(outp, &disp->outps, head)
 		args->v0.outp_mask |= BIT(outp->index);
 
+	args->v0.head_mask = 0;
+	list_for_each_entry(head, &disp->heads, head)
+		args->v0.head_mask |= BIT(head->id);
+
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/uhead.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/uhead.c
new file mode 100644
index 000000000000..f072cec16040
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/uhead.c
@@ -0,0 +1,127 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#define nvkm_uhead(p) container_of((p), struct nvkm_head, object)
+#include "head.h"
+#include <core/event.h>
+
+#include <nvif/if0013.h>
+
+#include <nvif/event.h>
+
+static int
+nvkm_uhead_uevent(struct nvkm_object *object, void *argv, u32 argc, struct nvkm_uevent *uevent)
+{
+	struct nvkm_head *head = nvkm_uhead(object);
+	union nvif_head_event_args *args = argv;
+
+	if (!uevent)
+		return 0;
+	if (argc != sizeof(args->vn))
+		return -ENOSYS;
+
+	return nvkm_uevent_add(uevent, &head->disp->vblank, head->id,
+			       NVKM_DISP_HEAD_EVENT_VBLANK, NULL);
+}
+
+static int
+nvkm_uhead_mthd_scanoutpos(struct nvkm_head *head, void *argv, u32 argc)
+{
+	union nvif_head_scanoutpos_args *args = argv;
+
+	if (argc != sizeof(args->v0) || args->v0.version != 0)
+		return -ENOSYS;
+
+	head->func->state(head, &head->arm);
+	args->v0.vtotal  = head->arm.vtotal;
+	args->v0.vblanks = head->arm.vblanks;
+	args->v0.vblanke = head->arm.vblanke;
+	args->v0.htotal  = head->arm.htotal;
+	args->v0.hblanks = head->arm.hblanks;
+	args->v0.hblanke = head->arm.hblanke;
+
+	/* We don't support reading htotal/vtotal on pre-NV50 VGA,
+	 * so we have to give up and trigger the timestamping
+	 * fallback in the drm core.
+	 */
+	if (!args->v0.vtotal || !args->v0.htotal)
+		return -ENOTSUPP;
+
+	args->v0.time[0] = ktime_to_ns(ktime_get());
+	head->func->rgpos(head, &args->v0.hline, &args->v0.vline);
+	args->v0.time[1] = ktime_to_ns(ktime_get());
+	return 0;
+}
+
+static int
+nvkm_uhead_mthd(struct nvkm_object *object, u32 mthd, void *argv, u32 argc)
+{
+	struct nvkm_head *head = nvkm_uhead(object);
+
+	switch (mthd) {
+	case NVIF_HEAD_V0_SCANOUTPOS: return nvkm_uhead_mthd_scanoutpos(head, argv, argc);
+	default:
+		return -EINVAL;
+	}
+}
+
+static void *
+nvkm_uhead_dtor(struct nvkm_object *object)
+{
+	struct nvkm_head *head = nvkm_uhead(object);
+	struct nvkm_disp *disp = head->disp;
+
+	spin_lock(&disp->client.lock);
+	head->object.func = NULL;
+	spin_unlock(&disp->client.lock);
+	return NULL;
+}
+
+static const struct nvkm_object_func
+nvkm_uhead = {
+	.dtor = nvkm_uhead_dtor,
+	.mthd = nvkm_uhead_mthd,
+	.uevent = nvkm_uhead_uevent,
+};
+
+int
+nvkm_uhead_new(const struct nvkm_oclass *oclass, void *argv, u32 argc, struct nvkm_object **pobject)
+{
+	struct nvkm_disp *disp = nvkm_udisp(oclass->parent);
+	struct nvkm_head *head;
+	union nvif_head_args *args = argv;
+	int ret;
+
+	if (argc != sizeof(args->v0) || args->v0.version != 0)
+		return -ENOSYS;
+	if (!(head = nvkm_head_find(disp, args->v0.id)))
+		return -EINVAL;
+
+	ret = -EBUSY;
+	spin_lock(&disp->client.lock);
+	if (!head->object.func) {
+		nvkm_object_ctor(&nvkm_uhead, oclass, &head->object);
+		*pobject = &head->object;
+		ret = 0;
+	}
+	spin_unlock(&disp->client.lock);
+	return ret;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/uoutp.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/uoutp.c
index abedb3e86361..4f0ca709c85a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/uoutp.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/uoutp.c
@@ -21,11 +21,238 @@
  */
 #define nvkm_uoutp(p) container_of((p), struct nvkm_outp, object)
 #include "outp.h"
+#include "dp.h"
+#include "head.h"
 #include "ior.h"
 
 #include <nvif/if0012.h>
 
 static int
+nvkm_uoutp_mthd_dp_mst_vcpi(struct nvkm_outp *outp, void *argv, u32 argc)
+{
+	struct nvkm_ior *ior = outp->ior;
+	union nvif_outp_dp_mst_vcpi_args *args = argv;
+
+	if (argc != sizeof(args->v0) || args->v0.version != 0)
+		return -ENOSYS;
+	if (!ior->func->dp || !ior->func->dp->vcpi || !nvkm_head_find(outp->disp, args->v0.head))
+		return -EINVAL;
+
+	ior->func->dp->vcpi(ior, args->v0.head, args->v0.start_slot, args->v0.num_slots,
+				 args->v0.pbn, args->v0.aligned_pbn);
+	return 0;
+}
+
+static int
+nvkm_uoutp_mthd_dp_retrain(struct nvkm_outp *outp, void *argv, u32 argc)
+{
+	union nvif_outp_dp_retrain_args *args = argv;
+
+	if (argc != sizeof(args->vn))
+		return -ENOSYS;
+
+	if (!atomic_read(&outp->dp.lt.done))
+		return 0;
+
+	return outp->func->acquire(outp);
+}
+
+static int
+nvkm_uoutp_mthd_dp_aux_pwr(struct nvkm_outp *outp, void *argv, u32 argc)
+{
+	union nvif_outp_dp_aux_pwr_args *args = argv;
+
+	if (argc != sizeof(args->v0) || args->v0.version != 0)
+		return -ENOSYS;
+
+	outp->dp.enabled = !!args->v0.state;
+	nvkm_dp_enable(outp, outp->dp.enabled);
+	return 0;
+}
+
+static int
+nvkm_uoutp_mthd_hda_eld(struct nvkm_outp *outp, void *argv, u32 argc)
+{
+	struct nvkm_ior *ior = outp->ior;
+	union nvif_outp_hda_eld_args *args = argv;
+
+	if (argc < sizeof(args->v0) || args->v0.version != 0)
+		return -ENOSYS;
+	argc -= sizeof(args->v0);
+
+	if (!ior->hda || !nvkm_head_find(outp->disp, args->v0.head))
+		return -EINVAL;
+	if (argc > 0x60)
+		return -E2BIG;
+
+	if (argc && args->v0.data[0]) {
+		if (outp->info.type == DCB_OUTPUT_DP)
+			ior->func->dp->audio(ior, args->v0.head, true);
+		ior->func->hda->hpd(ior, args->v0.head, true);
+		ior->func->hda->eld(ior, args->v0.head, args->v0.data, argc);
+	} else {
+		if (outp->info.type == DCB_OUTPUT_DP)
+			ior->func->dp->audio(ior, args->v0.head, false);
+		ior->func->hda->hpd(ior, args->v0.head, false);
+	}
+
+	return 0;
+}
+
+static int
+nvkm_uoutp_mthd_infoframe(struct nvkm_outp *outp, void *argv, u32 argc)
+{
+	struct nvkm_ior *ior = outp->ior;
+	union nvif_outp_infoframe_args *args = argv;
+	ssize_t size = argc - sizeof(*args);
+
+	if (argc < sizeof(args->v0) || args->v0.version != 0)
+		return -ENOSYS;
+	if (!nvkm_head_find(outp->disp, args->v0.head))
+		return -EINVAL;
+
+	switch (ior->func->hdmi ? args->v0.type : 0xff) {
+	case NVIF_OUTP_INFOFRAME_V0_AVI:
+		ior->func->hdmi->infoframe_avi(ior, args->v0.head, &args->v0.data, size);
+		return 0;
+	case NVIF_OUTP_INFOFRAME_V0_VSI:
+		ior->func->hdmi->infoframe_vsi(ior, args->v0.head, &args->v0.data, size);
+		return 0;
+	default:
+		break;
+	}
+
+	return -EINVAL;
+}
+
+static int
+nvkm_uoutp_mthd_release(struct nvkm_outp *outp, void *argv, u32 argc)
+{
+	struct nvkm_head *head = outp->asy.head;
+	struct nvkm_ior *ior = outp->ior;
+	union nvif_outp_release_args *args = argv;
+
+	if (argc != sizeof(args->vn))
+		return -ENOSYS;
+
+	if (ior->func->hdmi && head) {
+		ior->func->hdmi->infoframe_avi(ior, head->id, NULL, 0);
+		ior->func->hdmi->infoframe_vsi(ior, head->id, NULL, 0);
+		ior->func->hdmi->ctrl(ior, head->id, false, 0, 0);
+	}
+
+	nvkm_outp_release(outp, NVKM_OUTP_USER);
+	return 0;
+}
+
+static int
+nvkm_uoutp_mthd_acquire_dp(struct nvkm_outp *outp, u8 dpcd[16],
+			   u8 link_nr, u8 link_bw, bool hda, bool mst)
+{
+	int ret;
+
+	ret = nvkm_outp_acquire(outp, NVKM_OUTP_USER, hda);
+	if (ret)
+		return ret;
+
+	memcpy(outp->dp.dpcd, dpcd, sizeof(outp->dp.dpcd));
+	outp->dp.lt.nr = link_nr;
+	outp->dp.lt.bw = link_bw;
+	outp->dp.lt.mst = mst;
+	return 0;
+}
+
+static int
+nvkm_uoutp_mthd_acquire_tmds(struct nvkm_outp *outp, u8 head, u8 hdmi, u8 hdmi_max_ac_packet,
+			     u8 hdmi_rekey, u8 hdmi_scdc, u8 hdmi_hda)
+{
+	struct nvkm_ior *ior;
+	int ret;
+
+	if (!(outp->asy.head = nvkm_head_find(outp->disp, head)))
+		return -EINVAL;
+
+	ret = nvkm_outp_acquire(outp, NVKM_OUTP_USER, hdmi && hdmi_hda);
+	if (ret)
+		return ret;
+
+	ior = outp->ior;
+
+	if (hdmi) {
+		if (!ior->func->hdmi ||
+		    hdmi_max_ac_packet > 0x1f || hdmi_rekey > 0x7f ||
+		    (hdmi_scdc && !ior->func->hdmi->scdc)) {
+			nvkm_outp_release(outp, NVKM_OUTP_USER);
+			return -EINVAL;
+		}
+
+		ior->func->hdmi->ctrl(ior, head, hdmi, hdmi_max_ac_packet, hdmi_rekey);
+		if (ior->func->hdmi->scdc)
+			ior->func->hdmi->scdc(ior, hdmi_scdc);
+	}
+
+	return 0;
+}
+
+static int
+nvkm_uoutp_mthd_acquire_lvds(struct nvkm_outp *outp, bool dual, bool bpc8)
+{
+	if (outp->info.type != DCB_OUTPUT_LVDS)
+		return -EINVAL;
+
+	outp->lvds.dual = dual;
+	outp->lvds.bpc8 = bpc8;
+
+	return nvkm_outp_acquire(outp, NVKM_OUTP_USER, false);
+}
+
+static int
+nvkm_uoutp_mthd_acquire(struct nvkm_outp *outp, void *argv, u32 argc)
+{
+	union nvif_outp_acquire_args *args = argv;
+	int ret;
+
+	if (argc != sizeof(args->v0) || args->v0.version != 0)
+		return -ENOSYS;
+	if (outp->ior)
+		return -EBUSY;
+
+	switch (args->v0.proto) {
+	case NVIF_OUTP_ACQUIRE_V0_RGB_CRT:
+		ret = nvkm_outp_acquire(outp, NVKM_OUTP_USER, false);
+		break;
+	case NVIF_OUTP_ACQUIRE_V0_TMDS:
+		ret = nvkm_uoutp_mthd_acquire_tmds(outp, args->v0.tmds.head,
+							 args->v0.tmds.hdmi,
+							 args->v0.tmds.hdmi_max_ac_packet,
+							 args->v0.tmds.hdmi_rekey,
+							 args->v0.tmds.hdmi_scdc,
+							 args->v0.tmds.hdmi_hda);
+		break;
+	case NVIF_OUTP_ACQUIRE_V0_LVDS:
+		ret = nvkm_uoutp_mthd_acquire_lvds(outp, args->v0.lvds.dual, args->v0.lvds.bpc8);
+		break;
+	case NVIF_OUTP_ACQUIRE_V0_DP:
+		ret = nvkm_uoutp_mthd_acquire_dp(outp, args->v0.dp.dpcd,
+						       args->v0.dp.link_nr,
+						       args->v0.dp.link_bw,
+						       args->v0.dp.hda != 0,
+						       args->v0.dp.mst != 0);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	if (ret)
+		return ret;
+
+	args->v0.or = outp->ior->id;
+	args->v0.link = outp->ior->asy.link;
+	return 0;
+}
+
+static int
 nvkm_uoutp_mthd_load_detect(struct nvkm_outp *outp, void *argv, u32 argc)
 {
 	union nvif_outp_load_detect_args *args = argv;
@@ -49,10 +276,28 @@ nvkm_uoutp_mthd_load_detect(struct nvkm_outp *outp, void *argv, u32 argc)
 }
 
 static int
+nvkm_uoutp_mthd_acquired(struct nvkm_outp *outp, u32 mthd, void *argv, u32 argc)
+{
+	switch (mthd) {
+	case NVIF_OUTP_V0_RELEASE    : return nvkm_uoutp_mthd_release    (outp, argv, argc);
+	case NVIF_OUTP_V0_INFOFRAME  : return nvkm_uoutp_mthd_infoframe  (outp, argv, argc);
+	case NVIF_OUTP_V0_HDA_ELD    : return nvkm_uoutp_mthd_hda_eld    (outp, argv, argc);
+	case NVIF_OUTP_V0_DP_RETRAIN : return nvkm_uoutp_mthd_dp_retrain (outp, argv, argc);
+	case NVIF_OUTP_V0_DP_MST_VCPI: return nvkm_uoutp_mthd_dp_mst_vcpi(outp, argv, argc);
+	default:
+		break;
+	}
+
+	return -EINVAL;
+}
+
+static int
 nvkm_uoutp_mthd_noacquire(struct nvkm_outp *outp, u32 mthd, void *argv, u32 argc)
 {
 	switch (mthd) {
 	case NVIF_OUTP_V0_LOAD_DETECT: return nvkm_uoutp_mthd_load_detect(outp, argv, argc);
+	case NVIF_OUTP_V0_ACQUIRE    : return nvkm_uoutp_mthd_acquire    (outp, argv, argc);
+	case NVIF_OUTP_V0_DP_AUX_PWR : return nvkm_uoutp_mthd_dp_aux_pwr (outp, argv, argc);
 	default:
 		break;
 	}
@@ -73,6 +318,11 @@ nvkm_uoutp_mthd(struct nvkm_object *object, u32 mthd, void *argv, u32 argc)
 	if (ret <= 0)
 		goto done;
 
+	if (outp->ior)
+		ret = nvkm_uoutp_mthd_acquired(outp, mthd, argv, argc);
+	else
+		ret = -EIO;
+
 done:
 	mutex_unlock(&disp->super.mutex);
 	return ret;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/falcon.c b/drivers/gpu/drm/nouveau/nvkm/engine/falcon.c
index 43b7dec45179..d619b40a42c3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/falcon.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/falcon.c
@@ -65,10 +65,10 @@ nvkm_falcon_intr(struct nvkm_engine *engine)
 	u32 dest = nvkm_rd32(device, base + 0x01c);
 	u32 intr = nvkm_rd32(device, base + 0x008) & dest & ~(dest >> 16);
 	u32 inst = nvkm_rd32(device, base + 0x050) & 0x3fffffff;
-	struct nvkm_fifo_chan *chan;
+	struct nvkm_chan *chan;
 	unsigned long flags;
 
-	chan = nvkm_fifo_chan_inst(device->fifo, (u64)inst << 12, &flags);
+	chan = nvkm_chan_get_inst(engine, (u64)inst << 12, &flags);
 
 	if (intr & 0x00000040) {
 		if (falcon->func->intr) {
@@ -89,7 +89,7 @@ nvkm_falcon_intr(struct nvkm_engine *engine)
 		nvkm_wr32(device, base + 0x004, intr);
 	}
 
-	nvkm_fifo_chan_put(device->fifo, flags, &chan);
+	nvkm_chan_put(&chan, flags);
 }
 
 static int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild
index 5e831d347a95..5a074b9970ab 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild
@@ -1,11 +1,18 @@
 # SPDX-License-Identifier: MIT
 nvkm-y += nvkm/engine/fifo/base.o
+nvkm-y += nvkm/engine/fifo/cgrp.o
+nvkm-y += nvkm/engine/fifo/chan.o
+nvkm-y += nvkm/engine/fifo/chid.o
+nvkm-y += nvkm/engine/fifo/runl.o
+nvkm-y += nvkm/engine/fifo/runq.o
+
 nvkm-y += nvkm/engine/fifo/nv04.o
 nvkm-y += nvkm/engine/fifo/nv10.o
 nvkm-y += nvkm/engine/fifo/nv17.o
 nvkm-y += nvkm/engine/fifo/nv40.o
 nvkm-y += nvkm/engine/fifo/nv50.o
 nvkm-y += nvkm/engine/fifo/g84.o
+nvkm-y += nvkm/engine/fifo/g98.o
 nvkm-y += nvkm/engine/fifo/gf100.o
 nvkm-y += nvkm/engine/fifo/gk104.o
 nvkm-y += nvkm/engine/fifo/gk110.o
@@ -13,28 +20,11 @@ nvkm-y += nvkm/engine/fifo/gk208.o
 nvkm-y += nvkm/engine/fifo/gk20a.o
 nvkm-y += nvkm/engine/fifo/gm107.o
 nvkm-y += nvkm/engine/fifo/gm200.o
-nvkm-y += nvkm/engine/fifo/gm20b.o
 nvkm-y += nvkm/engine/fifo/gp100.o
-nvkm-y += nvkm/engine/fifo/gp10b.o
 nvkm-y += nvkm/engine/fifo/gv100.o
 nvkm-y += nvkm/engine/fifo/tu102.o
+nvkm-y += nvkm/engine/fifo/ga100.o
 nvkm-y += nvkm/engine/fifo/ga102.o
 
-nvkm-y += nvkm/engine/fifo/chan.o
-nvkm-y += nvkm/engine/fifo/channv50.o
-nvkm-y += nvkm/engine/fifo/chang84.o
-
-nvkm-y += nvkm/engine/fifo/dmanv04.o
-nvkm-y += nvkm/engine/fifo/dmanv10.o
-nvkm-y += nvkm/engine/fifo/dmanv17.o
-nvkm-y += nvkm/engine/fifo/dmanv40.o
-
-nvkm-y += nvkm/engine/fifo/gpfifonv50.o
-nvkm-y += nvkm/engine/fifo/gpfifog84.o
-nvkm-y += nvkm/engine/fifo/gpfifogf100.o
-nvkm-y += nvkm/engine/fifo/gpfifogk104.o
-nvkm-y += nvkm/engine/fifo/gpfifogv100.o
-nvkm-y += nvkm/engine/fifo/gpfifotu102.o
-
-nvkm-y += nvkm/engine/fifo/usergv100.o
-nvkm-y += nvkm/engine/fifo/usertu102.o
+nvkm-y += nvkm/engine/fifo/ucgrp.o
+nvkm-y += nvkm/engine/fifo/uchan.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c
index 58b8df75fc40..5ea9a2ff0663 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c
@@ -23,25 +23,32 @@
  */
 #include "priv.h"
 #include "chan.h"
+#include "chid.h"
+#include "runl.h"
+#include "runq.h"
 
-#include <core/client.h>
 #include <core/gpuobj.h>
-#include <core/notify.h>
+#include <subdev/bar.h>
 #include <subdev/mc.h>
+#include <subdev/mmu.h>
 
-#include <nvif/event.h>
 #include <nvif/cl0080.h>
 #include <nvif/unpack.h>
 
-void
-nvkm_fifo_recover_chan(struct nvkm_fifo *fifo, int chid)
+bool
+nvkm_fifo_ctxsw_in_progress(struct nvkm_engine *engine)
 {
-	unsigned long flags;
-	if (WARN_ON(!fifo->func->recover_chan))
-		return;
-	spin_lock_irqsave(&fifo->lock, flags);
-	fifo->func->recover_chan(fifo, chid);
-	spin_unlock_irqrestore(&fifo->lock, flags);
+	struct nvkm_runl *runl;
+	struct nvkm_engn *engn;
+
+	nvkm_runl_foreach(runl, engine->subdev.device->fifo) {
+		nvkm_runl_foreach_engn(engn, runl) {
+			if (engn->engine == engine)
+				return engn->func->chsw ? engn->func->chsw(engn) : false;
+		}
+	}
+
+	return false;
 }
 
 void
@@ -59,160 +66,23 @@ nvkm_fifo_start(struct nvkm_fifo *fifo, unsigned long *flags)
 void
 nvkm_fifo_fault(struct nvkm_fifo *fifo, struct nvkm_fault_data *info)
 {
-	return fifo->func->fault(fifo, info);
-}
-
-void
-nvkm_fifo_chan_put(struct nvkm_fifo *fifo, unsigned long flags,
-		   struct nvkm_fifo_chan **pchan)
-{
-	struct nvkm_fifo_chan *chan = *pchan;
-	if (likely(chan)) {
-		*pchan = NULL;
-		spin_unlock_irqrestore(&fifo->lock, flags);
-	}
-}
-
-struct nvkm_fifo_chan *
-nvkm_fifo_chan_inst_locked(struct nvkm_fifo *fifo, u64 inst)
-{
-	struct nvkm_fifo_chan *chan;
-	list_for_each_entry(chan, &fifo->chan, head) {
-		if (chan->inst->addr == inst) {
-			list_del(&chan->head);
-			list_add(&chan->head, &fifo->chan);
-			return chan;
-		}
-	}
-	return NULL;
-}
-
-struct nvkm_fifo_chan *
-nvkm_fifo_chan_inst(struct nvkm_fifo *fifo, u64 inst, unsigned long *rflags)
-{
-	struct nvkm_fifo_chan *chan;
-	unsigned long flags;
-	spin_lock_irqsave(&fifo->lock, flags);
-	if ((chan = nvkm_fifo_chan_inst_locked(fifo, inst))) {
-		*rflags = flags;
-		return chan;
-	}
-	spin_unlock_irqrestore(&fifo->lock, flags);
-	return NULL;
-}
-
-struct nvkm_fifo_chan *
-nvkm_fifo_chan_chid(struct nvkm_fifo *fifo, int chid, unsigned long *rflags)
-{
-	struct nvkm_fifo_chan *chan;
-	unsigned long flags;
-	spin_lock_irqsave(&fifo->lock, flags);
-	list_for_each_entry(chan, &fifo->chan, head) {
-		if (chan->chid == chid) {
-			list_del(&chan->head);
-			list_add(&chan->head, &fifo->chan);
-			*rflags = flags;
-			return chan;
-		}
-	}
-	spin_unlock_irqrestore(&fifo->lock, flags);
-	return NULL;
-}
-
-void
-nvkm_fifo_kevent(struct nvkm_fifo *fifo, int chid)
-{
-	nvkm_event_send(&fifo->kevent, 1, chid, NULL, 0);
-}
-
-static int
-nvkm_fifo_kevent_ctor(struct nvkm_object *object, void *data, u32 size,
-		      struct nvkm_notify *notify)
-{
-	struct nvkm_fifo_chan *chan = nvkm_fifo_chan(object);
-	if (size == 0) {
-		notify->size  = 0;
-		notify->types = 1;
-		notify->index = chan->chid;
-		return 0;
-	}
-	return -ENOSYS;
-}
-
-static const struct nvkm_event_func
-nvkm_fifo_kevent_func = {
-	.ctor = nvkm_fifo_kevent_ctor,
-};
-
-static void
-nvkm_fifo_uevent_fini(struct nvkm_event *event, int type, int index)
-{
-	struct nvkm_fifo *fifo = container_of(event, typeof(*fifo), uevent);
-	fifo->func->uevent_fini(fifo);
-}
-
-static void
-nvkm_fifo_uevent_init(struct nvkm_event *event, int type, int index)
-{
-	struct nvkm_fifo *fifo = container_of(event, typeof(*fifo), uevent);
-	fifo->func->uevent_init(fifo);
-}
-
-static int
-nvkm_fifo_uevent_ctor(struct nvkm_object *object, void *data, u32 size,
-		      struct nvkm_notify *notify)
-{
-	union {
-		struct nvif_notify_uevent_req none;
-	} *req = data;
-	int ret = -ENOSYS;
-
-	if (!(ret = nvif_unvers(ret, &data, &size, req->none))) {
-		notify->size  = sizeof(struct nvif_notify_uevent_rep);
-		notify->types = 1;
-		notify->index = 0;
-	}
-
-	return ret;
-}
-
-static const struct nvkm_event_func
-nvkm_fifo_uevent_func = {
-	.ctor = nvkm_fifo_uevent_ctor,
-	.init = nvkm_fifo_uevent_init,
-	.fini = nvkm_fifo_uevent_fini,
-};
-
-void
-nvkm_fifo_uevent(struct nvkm_fifo *fifo)
-{
-	struct nvif_notify_uevent_rep rep = {
-	};
-	nvkm_event_send(&fifo->uevent, 1, 0, &rep, sizeof(rep));
+	return fifo->func->mmu_fault->recover(fifo, info);
 }
 
 static int
-nvkm_fifo_class_new_(struct nvkm_device *device,
-		     const struct nvkm_oclass *oclass, void *data, u32 size,
-		     struct nvkm_object **pobject)
+nvkm_fifo_class_new(struct nvkm_device *device, const struct nvkm_oclass *oclass,
+		    void *argv, u32 argc, struct nvkm_object **pobject)
 {
 	struct nvkm_fifo *fifo = nvkm_fifo(oclass->engine);
-	return fifo->func->class_new(fifo, oclass, data, size, pobject);
-}
 
-static const struct nvkm_device_oclass
-nvkm_fifo_class_ = {
-	.ctor = nvkm_fifo_class_new_,
-};
+	if (oclass->engn == &fifo->func->cgrp.user)
+		return nvkm_ucgrp_new(fifo, oclass, argv, argc, pobject);
 
-static int
-nvkm_fifo_class_new(struct nvkm_device *device,
-		    const struct nvkm_oclass *oclass, void *data, u32 size,
-		    struct nvkm_object **pobject)
-{
-	const struct nvkm_fifo_chan_oclass *sclass = oclass->engn;
-	struct nvkm_fifo *fifo = nvkm_fifo(oclass->engine);
-	return sclass->ctor(fifo, oclass, data, size, pobject);
+	if (oclass->engn == &fifo->func->chan.user)
+		return nvkm_uchan_new(fifo, NULL, oclass, argv, argc, pobject);
+
+	WARN_ON(1);
+	return -ENOSYS;
 }
 
 static const struct nvkm_device_oclass
@@ -221,24 +91,28 @@ nvkm_fifo_class = {
 };
 
 static int
-nvkm_fifo_class_get(struct nvkm_oclass *oclass, int index,
-		    const struct nvkm_device_oclass **class)
+nvkm_fifo_class_get(struct nvkm_oclass *oclass, int index, const struct nvkm_device_oclass **class)
 {
 	struct nvkm_fifo *fifo = nvkm_fifo(oclass->engine);
-	const struct nvkm_fifo_chan_oclass *sclass;
+	const struct nvkm_fifo_func_cgrp *cgrp = &fifo->func->cgrp;
+	const struct nvkm_fifo_func_chan *chan = &fifo->func->chan;
 	int c = 0;
 
-	if (fifo->func->class_get) {
-		int ret = fifo->func->class_get(fifo, index, oclass);
-		if (ret == 0)
-			*class = &nvkm_fifo_class_;
-		return ret;
+	/* *_CHANNEL_GROUP_* */
+	if (cgrp->user.oclass) {
+		if (c++ == index) {
+			oclass->base = cgrp->user;
+			oclass->engn = &fifo->func->cgrp.user;
+			*class = &nvkm_fifo_class;
+			return 0;
+		}
 	}
 
-	while ((sclass = fifo->func->chan[c])) {
+	/* *_CHANNEL_DMA, *_CHANNEL_GPFIFO_* */
+	if (chan->user.oclass) {
 		if (c++ == index) {
-			oclass->base = sclass->base;
-			oclass->engn = sclass;
+			oclass->base = chan->user;
+			oclass->engn = &fifo->func->chan.user;
 			*class = &nvkm_fifo_class;
 			return 0;
 		}
@@ -247,19 +121,47 @@ nvkm_fifo_class_get(struct nvkm_oclass *oclass, int index,
 	return c;
 }
 
-static void
-nvkm_fifo_intr(struct nvkm_engine *engine)
+static int
+nvkm_fifo_fini(struct nvkm_engine *engine, bool suspend)
 {
 	struct nvkm_fifo *fifo = nvkm_fifo(engine);
-	fifo->func->intr(fifo);
+	struct nvkm_runl *runl;
+
+	nvkm_inth_block(&fifo->engine.subdev.inth);
+
+	nvkm_runl_foreach(runl, fifo)
+		nvkm_runl_fini(runl);
+
+	return 0;
 }
 
 static int
-nvkm_fifo_fini(struct nvkm_engine *engine, bool suspend)
+nvkm_fifo_init(struct nvkm_engine *engine)
 {
 	struct nvkm_fifo *fifo = nvkm_fifo(engine);
-	if (fifo->func->fini)
-		fifo->func->fini(fifo);
+	struct nvkm_runq *runq;
+	struct nvkm_runl *runl;
+	u32 mask = 0;
+
+	if (fifo->func->init_pbdmas) {
+		nvkm_runq_foreach(runq, fifo)
+			mask |= BIT(runq->id);
+
+		fifo->func->init_pbdmas(fifo, mask);
+
+		nvkm_runq_foreach(runq, fifo)
+			runq->func->init(runq);
+	}
+
+	nvkm_runl_foreach(runl, fifo) {
+		if (runl->func->init)
+			runl->func->init(runl);
+	}
+
+	if (fifo->func->init)
+		fifo->func->init(fifo);
+
+	nvkm_inth_allow(&fifo->engine.subdev.inth);
 	return 0;
 }
 
@@ -267,22 +169,146 @@ static int
 nvkm_fifo_info(struct nvkm_engine *engine, u64 mthd, u64 *data)
 {
 	struct nvkm_fifo *fifo = nvkm_fifo(engine);
+	struct nvkm_runl *runl;
+	struct nvkm_engn *engn;
+	int ret;
+
+	ret = nvkm_subdev_oneinit(&fifo->engine.subdev);
+	if (ret)
+		return ret;
+
 	switch (mthd) {
-	case NV_DEVICE_HOST_CHANNELS: *data = fifo->nr; return 0;
+	case NV_DEVICE_HOST_CHANNELS: *data = fifo->chid ? fifo->chid->nr : 0; return 0;
+	case NV_DEVICE_HOST_RUNLISTS:
+		*data = 0;
+		nvkm_runl_foreach(runl, fifo)
+			*data |= BIT(runl->id);
+		return 0;
+	case NV_DEVICE_HOST_RUNLIST_ENGINES:
+		runl = nvkm_runl_get(fifo, *data, 0);
+		if (runl) {
+			*data = 0;
+			nvkm_runl_foreach_engn(engn, runl) {
+#define CASE(n) case NVKM_ENGINE_##n: *data |= NV_DEVICE_HOST_RUNLIST_ENGINES_##n; break
+				switch (engn->engine->subdev.type) {
+				case NVKM_ENGINE_DMAOBJ:
+					break;
+				CASE(SW    );
+				CASE(GR    );
+				CASE(MPEG  );
+				CASE(ME    );
+				CASE(CIPHER);
+				CASE(BSP   );
+				CASE(VP    );
+				CASE(CE    );
+				CASE(SEC   );
+				CASE(MSVLD );
+				CASE(MSPDEC);
+				CASE(MSPPP );
+				CASE(MSENC );
+				CASE(VIC   );
+				CASE(SEC2  );
+				CASE(NVDEC );
+				CASE(NVENC );
+				default:
+					WARN_ON(1);
+					break;
+				}
+#undef CASE
+			}
+			return 0;
+		}
+		return -EINVAL;
+	case NV_DEVICE_HOST_RUNLIST_CHANNELS:
+		if (!fifo->chid) {
+			runl = nvkm_runl_get(fifo, *data, 0);
+			if (runl) {
+				*data = runl->chid->nr;
+				return 0;
+			}
+		}
+		return -EINVAL;
 	default:
-		if (fifo->func->info)
-			return fifo->func->info(fifo, mthd, data);
 		break;
 	}
+
 	return -ENOSYS;
 }
 
 static int
 nvkm_fifo_oneinit(struct nvkm_engine *engine)
 {
+	struct nvkm_subdev *subdev = &engine->subdev;
+	struct nvkm_device *device = subdev->device;
 	struct nvkm_fifo *fifo = nvkm_fifo(engine);
-	if (fifo->func->oneinit)
-		return fifo->func->oneinit(fifo);
+	struct nvkm_runl *runl;
+	struct nvkm_engn *engn;
+	int ret, nr, i;
+
+	/* Initialise CHID/CGID allocator(s) on GPUs where they aren't per-runlist. */
+	if (fifo->func->chid_nr) {
+		ret = fifo->func->chid_ctor(fifo, fifo->func->chid_nr(fifo));
+		if (ret)
+			return ret;
+	}
+
+	/* Create runqueues for each PBDMA. */
+	if (fifo->func->runq_nr) {
+		for (nr = fifo->func->runq_nr(fifo), i = 0; i < nr; i++) {
+			if (!nvkm_runq_new(fifo, i))
+				return -ENOMEM;
+		}
+	}
+
+	/* Create runlists. */
+	ret = fifo->func->runl_ctor(fifo);
+	if (ret)
+		return ret;
+
+	nvkm_runl_foreach(runl, fifo) {
+		RUNL_DEBUG(runl, "chan:%06x", runl->chan);
+		nvkm_runl_foreach_engn(engn, runl) {
+			ENGN_DEBUG(engn, "");
+		}
+	}
+
+	/* Register interrupt handler. */
+	if (fifo->func->intr) {
+		ret = nvkm_inth_add(&device->mc->intr, NVKM_INTR_SUBDEV, NVKM_INTR_PRIO_NORMAL,
+				    subdev, fifo->func->intr, &subdev->inth);
+		if (ret) {
+			nvkm_error(subdev, "intr %d\n", ret);
+			return ret;
+		}
+	}
+
+	/* Initialise non-stall intr handling. */
+	if (fifo->func->nonstall_ctor) {
+		ret = fifo->func->nonstall_ctor(fifo);
+		if (ret) {
+			nvkm_error(subdev, "nonstall %d\n", ret);
+		}
+	}
+
+	/* Allocate USERD + BAR1 polling area. */
+	if (fifo->func->chan.func->userd->bar == 1) {
+		struct nvkm_vmm *bar1 = nvkm_bar_bar1_vmm(device);
+
+		ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, fifo->chid->nr *
+				      fifo->func->chan.func->userd->size, 0, true,
+				      &fifo->userd.mem);
+		if (ret)
+			return ret;
+
+		ret = nvkm_vmm_get(bar1, 12, nvkm_memory_size(fifo->userd.mem), &fifo->userd.bar1);
+		if (ret)
+			return ret;
+
+		ret = nvkm_memory_map(fifo->userd.mem, 0, bar1, fifo->userd.bar1, NULL, 0);
+		if (ret)
+			return ret;
+	}
+
 	return 0;
 }
 
@@ -292,25 +318,28 @@ nvkm_fifo_preinit(struct nvkm_engine *engine)
 	nvkm_mc_reset(engine->subdev.device, NVKM_ENGINE_FIFO, 0);
 }
 
-static int
-nvkm_fifo_init(struct nvkm_engine *engine)
-{
-	struct nvkm_fifo *fifo = nvkm_fifo(engine);
-	fifo->func->init(fifo);
-	return 0;
-}
-
 static void *
 nvkm_fifo_dtor(struct nvkm_engine *engine)
 {
 	struct nvkm_fifo *fifo = nvkm_fifo(engine);
-	void *data = fifo;
-	if (fifo->func->dtor)
-		data = fifo->func->dtor(fifo);
-	nvkm_event_fini(&fifo->kevent);
-	nvkm_event_fini(&fifo->uevent);
+	struct nvkm_runl *runl, *runt;
+	struct nvkm_runq *runq, *rtmp;
+
+	if (fifo->userd.bar1)
+		nvkm_vmm_put(nvkm_bar_bar1_vmm(engine->subdev.device), &fifo->userd.bar1);
+	nvkm_memory_unref(&fifo->userd.mem);
+
+	list_for_each_entry_safe(runl, runt, &fifo->runls, head)
+		nvkm_runl_del(runl);
+	list_for_each_entry_safe(runq, rtmp, &fifo->runqs, head)
+		nvkm_runq_del(runq);
+
+	nvkm_chid_unref(&fifo->cgid);
+	nvkm_chid_unref(&fifo->chid);
+
+	nvkm_event_fini(&fifo->nonstall.event);
 	mutex_destroy(&fifo->mutex);
-	return data;
+	return fifo;
 }
 
 static const struct nvkm_engine_func
@@ -321,37 +350,40 @@ nvkm_fifo = {
 	.info = nvkm_fifo_info,
 	.init = nvkm_fifo_init,
 	.fini = nvkm_fifo_fini,
-	.intr = nvkm_fifo_intr,
 	.base.sclass = nvkm_fifo_class_get,
 };
 
 int
-nvkm_fifo_ctor(const struct nvkm_fifo_func *func, struct nvkm_device *device,
-	       enum nvkm_subdev_type type, int inst, int nr, struct nvkm_fifo *fifo)
+nvkm_fifo_new_(const struct nvkm_fifo_func *func, struct nvkm_device *device,
+	       enum nvkm_subdev_type type, int inst, struct nvkm_fifo **pfifo)
 {
+	struct nvkm_fifo *fifo;
 	int ret;
 
+	if (!(fifo = *pfifo = kzalloc(sizeof(*fifo), GFP_KERNEL)))
+		return -ENOMEM;
+
 	fifo->func = func;
-	INIT_LIST_HEAD(&fifo->chan);
+	INIT_LIST_HEAD(&fifo->runqs);
+	INIT_LIST_HEAD(&fifo->runls);
+	/*TODO: Needs to be >CTXSW_TIMEOUT, so RC can recover before this is hit.
+	 *      CTXSW_TIMEOUT HW default seems to differ between GPUs, so just a
+	 *      large number for now until we support changing it.
+	 */
+	fifo->timeout.chan_msec = 10000;
 	spin_lock_init(&fifo->lock);
 	mutex_init(&fifo->mutex);
 
-	if (WARN_ON(fifo->nr > NVKM_FIFO_CHID_NR))
-		fifo->nr = NVKM_FIFO_CHID_NR;
-	else
-		fifo->nr = nr;
-	bitmap_clear(fifo->mask, 0, fifo->nr);
-
 	ret = nvkm_engine_ctor(&nvkm_fifo, device, type, inst, true, &fifo->engine);
 	if (ret)
 		return ret;
 
-	if (func->uevent_init) {
-		ret = nvkm_event_init(&nvkm_fifo_uevent_func, 1, 1,
-				      &fifo->uevent);
+	if (func->nonstall) {
+		ret = nvkm_event_init(func->nonstall, &fifo->engine.subdev, 1, 1,
+				      &fifo->nonstall.event);
 		if (ret)
 			return ret;
 	}
 
-	return nvkm_event_init(&nvkm_fifo_kevent_func, 1, nr, &fifo->kevent);
+	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/cgrp.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/cgrp.c
new file mode 100644
index 000000000000..ea53fb3d5d06
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/cgrp.c
@@ -0,0 +1,252 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "cgrp.h"
+#include "chan.h"
+#include "chid.h"
+#include "runl.h"
+#include "priv.h"
+
+#include <core/gpuobj.h>
+#include <subdev/mmu.h>
+
+static void
+nvkm_cgrp_ectx_put(struct nvkm_cgrp *cgrp, struct nvkm_ectx **pectx)
+{
+	struct nvkm_ectx *ectx = *pectx;
+
+	if (ectx) {
+		struct nvkm_engn *engn = ectx->engn;
+
+		if (refcount_dec_and_test(&ectx->refs)) {
+			CGRP_TRACE(cgrp, "dtor ectx %d[%s]", engn->id, engn->engine->subdev.name);
+			nvkm_object_del(&ectx->object);
+			list_del(&ectx->head);
+			kfree(ectx);
+		}
+
+		*pectx = NULL;
+	}
+}
+
+static int
+nvkm_cgrp_ectx_get(struct nvkm_cgrp *cgrp, struct nvkm_engn *engn, struct nvkm_ectx **pectx,
+		   struct nvkm_chan *chan, struct nvkm_client *client)
+{
+	struct nvkm_engine *engine = engn->engine;
+	struct nvkm_oclass cclass = {
+		.client = client,
+		.engine = engine,
+	};
+	struct nvkm_ectx *ectx;
+	int ret = 0;
+
+	/* Look for an existing context for this engine in the channel group. */
+	ectx = nvkm_list_find(ectx, &cgrp->ectxs, head, ectx->engn == engn);
+	if (ectx) {
+		refcount_inc(&ectx->refs);
+		*pectx = ectx;
+		return 0;
+	}
+
+	/* Nope - create a fresh one. */
+	CGRP_TRACE(cgrp, "ctor ectx %d[%s]", engn->id, engn->engine->subdev.name);
+	if (!(ectx = *pectx = kzalloc(sizeof(*ectx), GFP_KERNEL)))
+		return -ENOMEM;
+
+	ectx->engn = engn;
+	refcount_set(&ectx->refs, 1);
+	refcount_set(&ectx->uses, 0);
+	list_add_tail(&ectx->head, &cgrp->ectxs);
+
+	/* Allocate the HW structures. */
+	if (engine->func->fifo.cclass)
+		ret = engine->func->fifo.cclass(chan, &cclass, &ectx->object);
+	else if (engine->func->cclass)
+		ret = nvkm_object_new_(engine->func->cclass, &cclass, NULL, 0, &ectx->object);
+
+	if (ret)
+		nvkm_cgrp_ectx_put(cgrp, pectx);
+
+	return ret;
+}
+
+void
+nvkm_cgrp_vctx_put(struct nvkm_cgrp *cgrp, struct nvkm_vctx **pvctx)
+{
+	struct nvkm_vctx *vctx = *pvctx;
+
+	if (vctx) {
+		struct nvkm_engn *engn = vctx->ectx->engn;
+
+		if (refcount_dec_and_test(&vctx->refs)) {
+			CGRP_TRACE(cgrp, "dtor vctx %d[%s]", engn->id, engn->engine->subdev.name);
+			nvkm_vmm_put(vctx->vmm, &vctx->vma);
+			nvkm_gpuobj_del(&vctx->inst);
+
+			nvkm_cgrp_ectx_put(cgrp, &vctx->ectx);
+			if (vctx->vmm) {
+				atomic_dec(&vctx->vmm->engref[engn->engine->subdev.type]);
+				nvkm_vmm_unref(&vctx->vmm);
+			}
+			list_del(&vctx->head);
+			kfree(vctx);
+		}
+
+		*pvctx = NULL;
+	}
+}
+
+int
+nvkm_cgrp_vctx_get(struct nvkm_cgrp *cgrp, struct nvkm_engn *engn, struct nvkm_chan *chan,
+		   struct nvkm_vctx **pvctx, struct nvkm_client *client)
+{
+	struct nvkm_ectx *ectx;
+	struct nvkm_vctx *vctx;
+	int ret;
+
+	/* Look for an existing sub-context for this engine+VEID in the channel group. */
+	vctx = nvkm_list_find(vctx, &cgrp->vctxs, head,
+			      vctx->ectx->engn == engn && vctx->vmm == chan->vmm);
+	if (vctx) {
+		refcount_inc(&vctx->refs);
+		*pvctx = vctx;
+		return 0;
+	}
+
+	/* Nope - create a fresh one.  But, context first. */
+	ret = nvkm_cgrp_ectx_get(cgrp, engn, &ectx, chan, client);
+	if (ret) {
+		CGRP_ERROR(cgrp, "ectx %d[%s]: %d", engn->id, engn->engine->subdev.name, ret);
+		return ret;
+	}
+
+	/* Now, create the sub-context. */
+	CGRP_TRACE(cgrp, "ctor vctx %d[%s]", engn->id, engn->engine->subdev.name);
+	if (!(vctx = *pvctx = kzalloc(sizeof(*vctx), GFP_KERNEL))) {
+		nvkm_cgrp_ectx_put(cgrp, &ectx);
+		return -ENOMEM;
+	}
+
+	vctx->ectx = ectx;
+	vctx->vmm = nvkm_vmm_ref(chan->vmm);
+	refcount_set(&vctx->refs, 1);
+	list_add_tail(&vctx->head, &cgrp->vctxs);
+
+	/* MMU on some GPUs needs to know engine usage for TLB invalidation. */
+	if (vctx->vmm)
+		atomic_inc(&vctx->vmm->engref[engn->engine->subdev.type]);
+
+	/* Allocate the HW structures. */
+	if (engn->func->bind) {
+		ret = nvkm_object_bind(vctx->ectx->object, NULL, 0, &vctx->inst);
+		if (ret == 0 && engn->func->ctor)
+			ret = engn->func->ctor(engn, vctx);
+	}
+
+	if (ret)
+		nvkm_cgrp_vctx_put(cgrp, pvctx);
+
+	return ret;
+}
+
+static void
+nvkm_cgrp_del(struct kref *kref)
+{
+	struct nvkm_cgrp *cgrp = container_of(kref, typeof(*cgrp), kref);
+	struct nvkm_runl *runl = cgrp->runl;
+
+	if (runl->cgid)
+		nvkm_chid_put(runl->cgid, cgrp->id, &cgrp->lock);
+
+	mutex_destroy(&cgrp->mutex);
+	nvkm_vmm_unref(&cgrp->vmm);
+	kfree(cgrp);
+}
+
+void
+nvkm_cgrp_unref(struct nvkm_cgrp **pcgrp)
+{
+	struct nvkm_cgrp *cgrp = *pcgrp;
+
+	if (!cgrp)
+		return;
+
+	kref_put(&cgrp->kref, nvkm_cgrp_del);
+	*pcgrp = NULL;
+}
+
+struct nvkm_cgrp *
+nvkm_cgrp_ref(struct nvkm_cgrp *cgrp)
+{
+	if (cgrp)
+		kref_get(&cgrp->kref);
+
+	return cgrp;
+}
+
+void
+nvkm_cgrp_put(struct nvkm_cgrp **pcgrp, unsigned long irqflags)
+{
+	struct nvkm_cgrp *cgrp = *pcgrp;
+
+	if (!cgrp)
+		return;
+
+	*pcgrp = NULL;
+	spin_unlock_irqrestore(&cgrp->lock, irqflags);
+}
+
+int
+nvkm_cgrp_new(struct nvkm_runl *runl, const char *name, struct nvkm_vmm *vmm, bool hw,
+	      struct nvkm_cgrp **pcgrp)
+{
+	struct nvkm_cgrp *cgrp;
+
+	if (!(cgrp = *pcgrp = kmalloc(sizeof(*cgrp), GFP_KERNEL)))
+		return -ENOMEM;
+
+	cgrp->func = runl->fifo->func->cgrp.func;
+	strscpy(cgrp->name, name, sizeof(cgrp->name));
+	cgrp->runl = runl;
+	cgrp->vmm = nvkm_vmm_ref(vmm);
+	cgrp->hw = hw;
+	cgrp->id = -1;
+	kref_init(&cgrp->kref);
+	INIT_LIST_HEAD(&cgrp->chans);
+	cgrp->chan_nr = 0;
+	spin_lock_init(&cgrp->lock);
+	INIT_LIST_HEAD(&cgrp->ectxs);
+	INIT_LIST_HEAD(&cgrp->vctxs);
+	mutex_init(&cgrp->mutex);
+	atomic_set(&cgrp->rc, NVKM_CGRP_RC_NONE);
+
+	if (runl->cgid) {
+		cgrp->id = nvkm_chid_get(runl->cgid, cgrp);
+		if (cgrp->id < 0) {
+			RUNL_ERROR(runl, "!cgids");
+			nvkm_cgrp_unref(pcgrp);
+			return -ENOSPC;
+		}
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/cgrp.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/cgrp.h
index d0ac60b06720..5f6abd59a6ff 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/cgrp.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/cgrp.h
@@ -1,11 +1,75 @@
-#ifndef __NVKM_FIFO_CGRP_H__
-#define __NVKM_FIFO_CGRP_H__
-#include "priv.h"
+/* SPDX-License-Identifier: MIT */
+#ifndef __NVKM_CGRP_H__
+#define __NVKM_CGRP_H__
+#include <core/os.h>
+struct nvkm_chan;
+struct nvkm_client;
+
+struct nvkm_vctx {
+	struct nvkm_ectx *ectx;
+	struct nvkm_vmm *vmm;
+	refcount_t refs;
+
+	struct nvkm_gpuobj *inst;
+	struct nvkm_vma *vma;
 
-struct nvkm_fifo_cgrp {
-	int id;
 	struct list_head head;
-	struct list_head chan;
+};
+
+struct nvkm_ectx {
+	struct nvkm_engn *engn;
+	refcount_t refs;
+	refcount_t uses;
+
+	struct nvkm_object *object;
+
+	struct list_head head;
+};
+
+struct nvkm_cgrp {
+	const struct nvkm_cgrp_func {
+		void (*preempt)(struct nvkm_cgrp *);
+	} *func;
+	char name[64];
+	struct nvkm_runl *runl;
+	struct nvkm_vmm *vmm;
+	bool hw;
+	int id;
+	struct kref kref;
+
+	struct list_head chans;
 	int chan_nr;
+
+	spinlock_t lock; /* protects irq handler channel (group) lookup */
+
+	struct list_head ectxs;
+	struct list_head vctxs;
+	struct mutex mutex;
+
+#define NVKM_CGRP_RC_NONE    0
+#define NVKM_CGRP_RC_PENDING 1
+#define NVKM_CGRP_RC_RUNNING 2
+	atomic_t rc;
+
+	struct list_head head;
 };
+
+int nvkm_cgrp_new(struct nvkm_runl *, const char *name, struct nvkm_vmm *, bool hw,
+		  struct nvkm_cgrp **);
+struct nvkm_cgrp *nvkm_cgrp_ref(struct nvkm_cgrp *);
+void nvkm_cgrp_unref(struct nvkm_cgrp **);
+int nvkm_cgrp_vctx_get(struct nvkm_cgrp *, struct nvkm_engn *, struct nvkm_chan *,
+		       struct nvkm_vctx **, struct nvkm_client *);
+void nvkm_cgrp_vctx_put(struct nvkm_cgrp *, struct nvkm_vctx **);
+
+void nvkm_cgrp_put(struct nvkm_cgrp **, unsigned long irqflags);
+
+#define nvkm_cgrp_foreach_chan(chan,cgrp) list_for_each_entry((chan), &(cgrp)->chans, head)
+#define nvkm_cgrp_foreach_chan_safe(chan,ctmp,cgrp) \
+	list_for_each_entry_safe((chan), (ctmp), &(cgrp)->chans, head)
+
+#define CGRP_PRCLI(c,l,p,f,a...) RUNL_PRINT((c)->runl, l, p, "%04x:[%s]"f, (c)->id, (c)->name, ##a)
+#define CGRP_PRINT(c,l,p,f,a...) RUNL_PRINT((c)->runl, l, p, "%04x:"f, (c)->id, ##a)
+#define CGRP_ERROR(c,f,a...) CGRP_PRCLI((c), ERROR,    err, " "f"\n", ##a)
+#define CGRP_TRACE(c,f,a...) CGRP_PRINT((c), TRACE,   info, " "f"\n", ##a)
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c
index 2e7f32cebf2a..b7c9d6115bce 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c
@@ -22,285 +22,265 @@
  * Authors: Ben Skeggs
  */
 #include "chan.h"
+#include "chid.h"
+#include "cgrp.h"
+#include "chid.h"
+#include "runl.h"
+#include "priv.h"
 
-#include <core/client.h>
-#include <core/gpuobj.h>
-#include <core/oproxy.h>
+#include <core/ramht.h>
 #include <subdev/mmu.h>
 #include <engine/dma.h>
 
-struct nvkm_fifo_chan_object {
-	struct nvkm_oproxy oproxy;
-	struct nvkm_fifo_chan *chan;
-	int hash;
+#include <nvif/if0020.h>
+
+const struct nvkm_event_func
+nvkm_chan_event = {
 };
 
-static struct nvkm_fifo_engn *
-nvkm_fifo_chan_engn(struct nvkm_fifo_chan *chan, struct nvkm_engine *engine)
+void
+nvkm_chan_cctx_bind(struct nvkm_chan *chan, struct nvkm_engn *engn, struct nvkm_cctx *cctx)
 {
-	int engi = chan->fifo->func->engine_id(chan->fifo, engine);
-	if (engi >= 0)
-		return &chan->engn[engi];
-	return NULL;
+	struct nvkm_cgrp *cgrp = chan->cgrp;
+	struct nvkm_runl *runl = cgrp->runl;
+	struct nvkm_engine *engine = engn->engine;
+
+	if (!engn->func->bind)
+		return;
+
+	CHAN_TRACE(chan, "%sbind cctx %d[%s]", cctx ? "" : "un", engn->id, engine->subdev.name);
+
+	/* Prevent any channel in channel group from being rescheduled, kick them
+	 * off host and any engine(s) they're loaded on.
+	 */
+	if (cgrp->hw)
+		nvkm_runl_block(runl);
+	else
+		nvkm_chan_block(chan);
+	nvkm_chan_preempt(chan, true);
+
+	/* Update context pointer. */
+	engn->func->bind(engn, cctx, chan);
+
+	/* Resume normal operation. */
+	if (cgrp->hw)
+		nvkm_runl_allow(runl);
+	else
+		nvkm_chan_allow(chan);
 }
 
-static int
-nvkm_fifo_chan_child_fini(struct nvkm_oproxy *base, bool suspend)
+void
+nvkm_chan_cctx_put(struct nvkm_chan *chan, struct nvkm_cctx **pcctx)
 {
-	struct nvkm_fifo_chan_object *object =
-		container_of(base, typeof(*object), oproxy);
-	struct nvkm_engine *engine  = object->oproxy.object->engine;
-	struct nvkm_fifo_chan *chan = object->chan;
-	struct nvkm_fifo_engn *engn = nvkm_fifo_chan_engn(chan, engine);
-	const char *name = engine->subdev.name;
-	int ret = 0;
-
-	if (--engn->usecount)
-		return 0;
+	struct nvkm_cctx *cctx = *pcctx;
 
-	if (chan->func->engine_fini) {
-		ret = chan->func->engine_fini(chan, engine, suspend);
-		if (ret) {
-			nvif_error(&chan->object,
-				   "detach %s failed, %d\n", name, ret);
-			return ret;
+	if (cctx) {
+		struct nvkm_engn *engn = cctx->vctx->ectx->engn;
+
+		if (refcount_dec_and_mutex_lock(&cctx->refs, &chan->cgrp->mutex)) {
+			CHAN_TRACE(chan, "dtor cctx %d[%s]", engn->id, engn->engine->subdev.name);
+			nvkm_cgrp_vctx_put(chan->cgrp, &cctx->vctx);
+			list_del(&cctx->head);
+			kfree(cctx);
+			mutex_unlock(&chan->cgrp->mutex);
 		}
-	}
 
-	if (engn->object) {
-		ret = nvkm_object_fini(engn->object, suspend);
-		if (ret && suspend)
-			return ret;
+		*pcctx = NULL;
 	}
-
-	nvif_trace(&chan->object, "detached %s\n", name);
-	return ret;
 }
 
-static int
-nvkm_fifo_chan_child_init(struct nvkm_oproxy *base)
+int
+nvkm_chan_cctx_get(struct nvkm_chan *chan, struct nvkm_engn *engn, struct nvkm_cctx **pcctx,
+		   struct nvkm_client *client)
 {
-	struct nvkm_fifo_chan_object *object =
-		container_of(base, typeof(*object), oproxy);
-	struct nvkm_engine *engine  = object->oproxy.object->engine;
-	struct nvkm_fifo_chan *chan = object->chan;
-	struct nvkm_fifo_engn *engn = nvkm_fifo_chan_engn(chan, engine);
-	const char *name = engine->subdev.name;
+	struct nvkm_cgrp *cgrp = chan->cgrp;
+	struct nvkm_vctx *vctx;
+	struct nvkm_cctx *cctx;
 	int ret;
 
-	if (engn->usecount++)
+	/* Look for an existing channel context for this engine+VEID. */
+	mutex_lock(&cgrp->mutex);
+	cctx = nvkm_list_find(cctx, &chan->cctxs, head,
+			      cctx->vctx->ectx->engn == engn && cctx->vctx->vmm == chan->vmm);
+	if (cctx) {
+		refcount_inc(&cctx->refs);
+		*pcctx = cctx;
+		mutex_unlock(&chan->cgrp->mutex);
 		return 0;
+	}
 
-	if (engn->object) {
-		ret = nvkm_object_init(engn->object);
-		if (ret)
-			return ret;
+	/* Nope - create a fresh one.  But, sub-context first. */
+	ret = nvkm_cgrp_vctx_get(cgrp, engn, chan, &vctx, client);
+	if (ret) {
+		CHAN_ERROR(chan, "vctx %d[%s]: %d", engn->id, engn->engine->subdev.name, ret);
+		goto done;
 	}
 
-	if (chan->func->engine_init) {
-		ret = chan->func->engine_init(chan, engine);
-		if (ret) {
-			nvif_error(&chan->object,
-				   "attach %s failed, %d\n", name, ret);
-			return ret;
-		}
+	/* Now, create the channel context - to track engine binding. */
+	CHAN_TRACE(chan, "ctor cctx %d[%s]", engn->id, engn->engine->subdev.name);
+	if (!(cctx = *pcctx = kzalloc(sizeof(*cctx), GFP_KERNEL))) {
+		nvkm_cgrp_vctx_put(cgrp, &vctx);
+		ret = -ENOMEM;
+		goto done;
 	}
 
-	nvif_trace(&chan->object, "attached %s\n", name);
-	return 0;
+	cctx->vctx = vctx;
+	refcount_set(&cctx->refs, 1);
+	refcount_set(&cctx->uses, 0);
+	list_add_tail(&cctx->head, &chan->cctxs);
+done:
+	mutex_unlock(&cgrp->mutex);
+	return ret;
 }
 
-static void
-nvkm_fifo_chan_child_del(struct nvkm_oproxy *base)
+int
+nvkm_chan_preempt_locked(struct nvkm_chan *chan, bool wait)
 {
-	struct nvkm_fifo_chan_object *object =
-		container_of(base, typeof(*object), oproxy);
-	struct nvkm_engine *engine  = object->oproxy.base.engine;
-	struct nvkm_fifo_chan *chan = object->chan;
-	struct nvkm_fifo_engn *engn = nvkm_fifo_chan_engn(chan, engine);
-
-	if (chan->func->object_dtor)
-		chan->func->object_dtor(chan, object->hash);
-
-	if (!--engn->refcount) {
-		if (chan->func->engine_dtor)
-			chan->func->engine_dtor(chan, engine);
-		nvkm_object_del(&engn->object);
-		if (chan->vmm)
-			atomic_dec(&chan->vmm->engref[engine->subdev.type]);
-	}
-}
+	struct nvkm_runl *runl = chan->cgrp->runl;
 
-static const struct nvkm_oproxy_func
-nvkm_fifo_chan_child_func = {
-	.dtor[0] = nvkm_fifo_chan_child_del,
-	.init[0] = nvkm_fifo_chan_child_init,
-	.fini[0] = nvkm_fifo_chan_child_fini,
-};
+	CHAN_TRACE(chan, "preempt");
+	chan->func->preempt(chan);
+	if (!wait)
+		return 0;
+
+	return nvkm_runl_preempt_wait(runl);
+}
 
-static int
-nvkm_fifo_chan_child_new(const struct nvkm_oclass *oclass, void *data, u32 size,
-			 struct nvkm_object **pobject)
+int
+nvkm_chan_preempt(struct nvkm_chan *chan, bool wait)
 {
-	struct nvkm_engine *engine = oclass->engine;
-	struct nvkm_fifo_chan *chan = nvkm_fifo_chan(oclass->parent);
-	struct nvkm_fifo_engn *engn = nvkm_fifo_chan_engn(chan, engine);
-	struct nvkm_fifo_chan_object *object;
-	int ret = 0;
+	int ret;
 
-	if (!(object = kzalloc(sizeof(*object), GFP_KERNEL)))
-		return -ENOMEM;
-	nvkm_oproxy_ctor(&nvkm_fifo_chan_child_func, oclass, &object->oproxy);
-	object->chan = chan;
-	*pobject = &object->oproxy.base;
-
-	if (!engn->refcount++) {
-		struct nvkm_oclass cclass = {
-			.client = oclass->client,
-			.engine = oclass->engine,
-		};
-
-		if (chan->vmm)
-			atomic_inc(&chan->vmm->engref[engine->subdev.type]);
-
-		if (engine->func->fifo.cclass) {
-			ret = engine->func->fifo.cclass(chan, &cclass,
-							&engn->object);
-		} else
-		if (engine->func->cclass) {
-			ret = nvkm_object_new_(engine->func->cclass, &cclass,
-					       NULL, 0, &engn->object);
-		}
-		if (ret)
-			return ret;
+	if (!chan->func->preempt)
+		return 0;
 
-		if (chan->func->engine_ctor) {
-			ret = chan->func->engine_ctor(chan, oclass->engine,
-						      engn->object);
-			if (ret)
-				return ret;
-		}
-	}
+	mutex_lock(&chan->cgrp->runl->mutex);
+	ret = nvkm_chan_preempt_locked(chan, wait);
+	mutex_unlock(&chan->cgrp->runl->mutex);
+	return ret;
+}
 
-	ret = oclass->base.ctor(&(const struct nvkm_oclass) {
-					.base = oclass->base,
-					.engn = oclass->engn,
-					.handle = oclass->handle,
-					.object = oclass->object,
-					.client = oclass->client,
-					.parent = engn->object ?
-						  engn->object :
-						  oclass->parent,
-					.engine = engine,
-				}, data, size, &object->oproxy.object);
-	if (ret)
-		return ret;
+void
+nvkm_chan_remove_locked(struct nvkm_chan *chan)
+{
+	struct nvkm_cgrp *cgrp = chan->cgrp;
+	struct nvkm_runl *runl = cgrp->runl;
 
-	if (chan->func->object_ctor) {
-		object->hash =
-			chan->func->object_ctor(chan, object->oproxy.object);
-		if (object->hash < 0)
-			return object->hash;
-	}
+	if (list_empty(&chan->head))
+		return;
 
-	return 0;
+	CHAN_TRACE(chan, "remove");
+	if (!--cgrp->chan_nr) {
+		runl->cgrp_nr--;
+		list_del(&cgrp->head);
+	}
+	runl->chan_nr--;
+	list_del_init(&chan->head);
+	atomic_set(&runl->changed, 1);
 }
 
-static int
-nvkm_fifo_chan_child_get(struct nvkm_object *object, int index,
-			 struct nvkm_oclass *oclass)
+void
+nvkm_chan_remove(struct nvkm_chan *chan, bool preempt)
 {
-	struct nvkm_fifo_chan *chan = nvkm_fifo_chan(object);
-	struct nvkm_fifo *fifo = chan->fifo;
-	struct nvkm_engine *engine;
-	u32 engm = chan->engm;
-	int engi, ret, c;
-
-	for (; c = 0, engi = __ffs(engm), engm; engm &= ~(1ULL << engi)) {
-		if (!(engine = fifo->func->id_engine(fifo, engi)))
-			continue;
-		oclass->engine = engine;
-		oclass->base.oclass = 0;
-
-		if (engine->func->fifo.sclass) {
-			ret = engine->func->fifo.sclass(oclass, index);
-			if (oclass->base.oclass) {
-				if (!oclass->base.ctor)
-					oclass->base.ctor = nvkm_object_new;
-				oclass->ctor = nvkm_fifo_chan_child_new;
-				return 0;
-			}
+	struct nvkm_runl *runl = chan->cgrp->runl;
+
+	mutex_lock(&runl->mutex);
+	if (preempt && chan->func->preempt)
+		nvkm_chan_preempt_locked(chan, true);
+	nvkm_chan_remove_locked(chan);
+	nvkm_runl_update_locked(runl, true);
+	mutex_unlock(&runl->mutex);
+}
 
-			index -= ret;
-			continue;
-		}
+void
+nvkm_chan_insert(struct nvkm_chan *chan)
+{
+	struct nvkm_cgrp *cgrp = chan->cgrp;
+	struct nvkm_runl *runl = cgrp->runl;
 
-		while (engine->func->sclass[c].oclass) {
-			if (c++ == index) {
-				oclass->base = engine->func->sclass[index];
-				if (!oclass->base.ctor)
-					oclass->base.ctor = nvkm_object_new;
-				oclass->ctor = nvkm_fifo_chan_child_new;
-				return 0;
-			}
-		}
-		index -= c;
+	mutex_lock(&runl->mutex);
+	if (WARN_ON(!list_empty(&chan->head))) {
+		mutex_unlock(&runl->mutex);
+		return;
 	}
 
-	return -EINVAL;
+	CHAN_TRACE(chan, "insert");
+	list_add_tail(&chan->head, &cgrp->chans);
+	runl->chan_nr++;
+	if (!cgrp->chan_nr++) {
+		list_add_tail(&cgrp->head, &cgrp->runl->cgrps);
+		runl->cgrp_nr++;
+	}
+	atomic_set(&runl->changed, 1);
+	nvkm_runl_update_locked(runl, true);
+	mutex_unlock(&runl->mutex);
 }
 
-static int
-nvkm_fifo_chan_ntfy(struct nvkm_object *object, u32 type,
-		    struct nvkm_event **pevent)
+static void
+nvkm_chan_block_locked(struct nvkm_chan *chan)
 {
-	struct nvkm_fifo_chan *chan = nvkm_fifo_chan(object);
-	if (chan->func->ntfy)
-		return chan->func->ntfy(chan, type, pevent);
-	return -ENODEV;
+	CHAN_TRACE(chan, "block %d", atomic_read(&chan->blocked));
+	if (atomic_inc_return(&chan->blocked) == 1)
+		chan->func->stop(chan);
 }
 
-static int
-nvkm_fifo_chan_map(struct nvkm_object *object, void *argv, u32 argc,
-		   enum nvkm_object_map *type, u64 *addr, u64 *size)
+void
+nvkm_chan_error(struct nvkm_chan *chan, bool preempt)
 {
-	struct nvkm_fifo_chan *chan = nvkm_fifo_chan(object);
-	*type = NVKM_OBJECT_MAP_IO;
-	*addr = chan->addr;
-	*size = chan->size;
-	return 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->lock, flags);
+	if (atomic_inc_return(&chan->errored) == 1) {
+		CHAN_ERROR(chan, "errored - disabling channel");
+		nvkm_chan_block_locked(chan);
+		if (preempt)
+			chan->func->preempt(chan);
+		nvkm_event_ntfy(&chan->cgrp->runl->chid->event, chan->id, NVKM_CHAN_EVENT_ERRORED);
+	}
+	spin_unlock_irqrestore(&chan->lock, flags);
 }
 
-static int
-nvkm_fifo_chan_fini(struct nvkm_object *object, bool suspend)
+void
+nvkm_chan_block(struct nvkm_chan *chan)
 {
-	struct nvkm_fifo_chan *chan = nvkm_fifo_chan(object);
-	chan->func->fini(chan);
-	return 0;
+	spin_lock_irq(&chan->lock);
+	nvkm_chan_block_locked(chan);
+	spin_unlock_irq(&chan->lock);
 }
 
-static int
-nvkm_fifo_chan_init(struct nvkm_object *object)
+void
+nvkm_chan_allow(struct nvkm_chan *chan)
 {
-	struct nvkm_fifo_chan *chan = nvkm_fifo_chan(object);
-	chan->func->init(chan);
-	return 0;
+	spin_lock_irq(&chan->lock);
+	CHAN_TRACE(chan, "allow %d", atomic_read(&chan->blocked));
+	if (atomic_dec_and_test(&chan->blocked))
+		chan->func->start(chan);
+	spin_unlock_irq(&chan->lock);
 }
 
-static void *
-nvkm_fifo_chan_dtor(struct nvkm_object *object)
+void
+nvkm_chan_del(struct nvkm_chan **pchan)
 {
-	struct nvkm_fifo_chan *chan = nvkm_fifo_chan(object);
-	struct nvkm_fifo *fifo = chan->fifo;
-	void *data = chan->func->dtor(chan);
-	unsigned long flags;
+	struct nvkm_chan *chan = *pchan;
+
+	if (!chan)
+		return;
+
+	if (chan->func->ramfc->clear)
+		chan->func->ramfc->clear(chan);
 
-	spin_lock_irqsave(&fifo->lock, flags);
-	if (!list_empty(&chan->head)) {
-		__clear_bit(chan->chid, fifo->mask);
-		list_del(&chan->head);
+	nvkm_ramht_del(&chan->ramht);
+	nvkm_gpuobj_del(&chan->pgd);
+	nvkm_gpuobj_del(&chan->eng);
+	nvkm_gpuobj_del(&chan->cache);
+	nvkm_gpuobj_del(&chan->ramfc);
+
+	nvkm_memory_unref(&chan->userd.mem);
+
+	if (chan->cgrp) {
+		nvkm_chid_put(chan->cgrp->runl->chid, chan->id, &chan->cgrp->lock);
+		nvkm_cgrp_unref(&chan->cgrp);
 	}
-	spin_unlock_irqrestore(&fifo->lock, flags);
 
 	if (chan->vmm) {
 		nvkm_vmm_part(chan->vmm, chan->inst->memory);
@@ -309,85 +289,192 @@ nvkm_fifo_chan_dtor(struct nvkm_object *object)
 
 	nvkm_gpuobj_del(&chan->push);
 	nvkm_gpuobj_del(&chan->inst);
-	return data;
+	kfree(chan);
 }
 
-static const struct nvkm_object_func
-nvkm_fifo_chan_func = {
-	.dtor = nvkm_fifo_chan_dtor,
-	.init = nvkm_fifo_chan_init,
-	.fini = nvkm_fifo_chan_fini,
-	.ntfy = nvkm_fifo_chan_ntfy,
-	.map = nvkm_fifo_chan_map,
-	.sclass = nvkm_fifo_chan_child_get,
-};
+void
+nvkm_chan_put(struct nvkm_chan **pchan, unsigned long irqflags)
+{
+	struct nvkm_chan *chan = *pchan;
+
+	if (!chan)
+		return;
+
+	*pchan = NULL;
+	spin_unlock_irqrestore(&chan->cgrp->lock, irqflags);
+}
+
+struct nvkm_chan *
+nvkm_chan_get_inst(struct nvkm_engine *engine, u64 inst, unsigned long *pirqflags)
+{
+	struct nvkm_fifo *fifo = engine->subdev.device->fifo;
+	struct nvkm_runl *runl;
+	struct nvkm_engn *engn;
+	struct nvkm_chan *chan;
+
+	nvkm_runl_foreach(runl, fifo) {
+		nvkm_runl_foreach_engn(engn, runl) {
+			if (engine == &fifo->engine || engn->engine == engine) {
+				chan = nvkm_runl_chan_get_inst(runl, inst, pirqflags);
+				if (chan || engn->engine == engine)
+					return chan;
+			}
+		}
+	}
+
+	return NULL;
+}
+
+struct nvkm_chan *
+nvkm_chan_get_chid(struct nvkm_engine *engine, int id, unsigned long *pirqflags)
+{
+	struct nvkm_fifo *fifo = engine->subdev.device->fifo;
+	struct nvkm_runl *runl;
+	struct nvkm_engn *engn;
+
+	nvkm_runl_foreach(runl, fifo) {
+		nvkm_runl_foreach_engn(engn, runl) {
+			if (fifo->chid || engn->engine == engine)
+				return nvkm_runl_chan_get_chid(runl, id, pirqflags);
+		}
+	}
+
+	return NULL;
+}
 
 int
-nvkm_fifo_chan_ctor(const struct nvkm_fifo_chan_func *func,
-		    struct nvkm_fifo *fifo, u32 size, u32 align, bool zero,
-		    u64 hvmm, u64 push, u32 engm, int bar, u32 base,
-		    u32 user, const struct nvkm_oclass *oclass,
-		    struct nvkm_fifo_chan *chan)
+nvkm_chan_new_(const struct nvkm_chan_func *func, struct nvkm_runl *runl, int runq,
+	       struct nvkm_cgrp *cgrp, const char *name, bool priv, u32 devm, struct nvkm_vmm *vmm,
+	       struct nvkm_dmaobj *dmaobj, u64 offset, u64 length,
+	       struct nvkm_memory *userd, u64 ouserd, struct nvkm_chan **pchan)
 {
-	struct nvkm_client *client = oclass->client;
+	struct nvkm_fifo *fifo = runl->fifo;
 	struct nvkm_device *device = fifo->engine.subdev.device;
-	struct nvkm_dmaobj *dmaobj;
-	unsigned long flags;
+	struct nvkm_chan *chan;
 	int ret;
 
-	nvkm_object_ctor(&nvkm_fifo_chan_func, oclass, &chan->object);
+	/* Validate arguments against class requirements. */
+	if ((runq && runq >= runl->func->runqs) ||
+	    (!func->inst->vmm != !vmm) ||
+	    ((func->userd->bar < 0) == !userd) ||
+	    (!func->ramfc->ctxdma != !dmaobj) ||
+	    ((func->ramfc->devm < devm) && devm != BIT(0)) ||
+	    (!func->ramfc->priv && priv)) {
+		RUNL_DEBUG(runl, "args runq:%d:%d vmm:%d:%p userd:%d:%p "
+				 "push:%d:%p devm:%08x:%08x priv:%d:%d",
+			   runl->func->runqs, runq, func->inst->vmm, vmm,
+			   func->userd->bar < 0, userd, func->ramfc->ctxdma, dmaobj,
+			   func->ramfc->devm, devm, func->ramfc->priv, priv);
+		return -EINVAL;
+	}
+
+	if (!(chan = *pchan = kzalloc(sizeof(*chan), GFP_KERNEL)))
+		return -ENOMEM;
+
 	chan->func = func;
-	chan->fifo = fifo;
-	chan->engm = engm;
+	strscpy(chan->name, name, sizeof(chan->name));
+	chan->runq = runq;
+	chan->id = -1;
+	spin_lock_init(&chan->lock);
+	atomic_set(&chan->blocked, 1);
+	atomic_set(&chan->errored, 0);
+	INIT_LIST_HEAD(&chan->cctxs);
 	INIT_LIST_HEAD(&chan->head);
 
-	/* instance memory */
-	ret = nvkm_gpuobj_new(device, size, align, zero, NULL, &chan->inst);
-	if (ret)
-		return ret;
+	/* Join channel group.
+	 *
+	 * GK110 and newer support channel groups (aka TSGs), where individual channels
+	 * share a timeslice, and, engine context(s).
+	 *
+	 * As such, engine contexts are tracked in nvkm_cgrp and we need them even when
+	 * channels aren't in an API channel group, and on HW that doesn't support TSGs.
+	 */
+	if (!cgrp) {
+		ret = nvkm_cgrp_new(runl, chan->name, vmm, fifo->func->cgrp.force, &chan->cgrp);
+		if (ret) {
+			RUNL_DEBUG(runl, "cgrp %d", ret);
+			return ret;
+		}
 
-	/* allocate push buffer ctxdma instance */
-	if (push) {
-		dmaobj = nvkm_dmaobj_search(client, push);
-		if (IS_ERR(dmaobj))
-			return PTR_ERR(dmaobj);
+		cgrp = chan->cgrp;
+	} else {
+		if (cgrp->runl != runl || cgrp->vmm != vmm) {
+			RUNL_DEBUG(runl, "cgrp %d %d", cgrp->runl != runl, cgrp->vmm != vmm);
+			return -EINVAL;
+		}
 
-		ret = nvkm_object_bind(&dmaobj->object, chan->inst, -16,
-				       &chan->push);
-		if (ret)
-			return ret;
+		chan->cgrp = nvkm_cgrp_ref(cgrp);
 	}
 
-	/* channel address space */
-	if (hvmm) {
-		struct nvkm_vmm *vmm = nvkm_uvmm_search(client, hvmm);
-		if (IS_ERR(vmm))
-			return PTR_ERR(vmm);
+	/* Allocate instance block. */
+	ret = nvkm_gpuobj_new(device, func->inst->size, 0x1000, func->inst->zero, NULL,
+			      &chan->inst);
+	if (ret) {
+		RUNL_DEBUG(runl, "inst %d", ret);
+		return ret;
+	}
 
-		if (vmm->mmu != device->mmu)
+	/* Initialise virtual address-space. */
+	if (func->inst->vmm) {
+		if (WARN_ON(vmm->mmu != device->mmu))
 			return -EINVAL;
 
 		ret = nvkm_vmm_join(vmm, chan->inst->memory);
-		if (ret)
+		if (ret) {
+			RUNL_DEBUG(runl, "vmm %d", ret);
 			return ret;
+		}
 
 		chan->vmm = nvkm_vmm_ref(vmm);
 	}
 
-	/* allocate channel id */
-	spin_lock_irqsave(&fifo->lock, flags);
-	chan->chid = find_first_zero_bit(fifo->mask, NVKM_FIFO_CHID_NR);
-	if (chan->chid >= NVKM_FIFO_CHID_NR) {
-		spin_unlock_irqrestore(&fifo->lock, flags);
+	/* Allocate HW ctxdma for push buffer. */
+	if (func->ramfc->ctxdma) {
+		ret = nvkm_object_bind(&dmaobj->object, chan->inst, -16, &chan->push);
+		if (ret) {
+			RUNL_DEBUG(runl, "bind %d", ret);
+			return ret;
+		}
+	}
+
+	/* Allocate channel ID. */
+	chan->id = nvkm_chid_get(runl->chid, chan);
+	if (chan->id < 0) {
+		RUNL_ERROR(runl, "!chids");
 		return -ENOSPC;
 	}
-	list_add(&chan->head, &fifo->chan);
-	__set_bit(chan->chid, fifo->mask);
-	spin_unlock_irqrestore(&fifo->lock, flags);
-
-	/* determine address of this channel's user registers */
-	chan->addr = device->func->resource_addr(device, bar) +
-		     base + user * chan->chid;
-	chan->size = user;
+
+	if (cgrp->id < 0)
+		cgrp->id = chan->id;
+
+	/* Initialise USERD. */
+	if (func->userd->bar < 0) {
+		if (ouserd + chan->func->userd->size >= nvkm_memory_size(userd)) {
+			RUNL_DEBUG(runl, "ouserd %llx", ouserd);
+			return -EINVAL;
+		}
+
+		ret = nvkm_memory_kmap(userd, &chan->userd.mem);
+		if (ret) {
+			RUNL_DEBUG(runl, "userd %d", ret);
+			return ret;
+		}
+
+		chan->userd.base = ouserd;
+	} else {
+		chan->userd.mem = nvkm_memory_ref(fifo->userd.mem);
+		chan->userd.base = chan->id * chan->func->userd->size;
+	}
+
+	if (chan->func->userd->clear)
+		chan->func->userd->clear(chan);
+
+	/* Initialise RAMFC. */
+	ret = chan->func->ramfc->write(chan, offset, length, devm, priv);
+	if (ret) {
+		RUNL_DEBUG(runl, "ramfc %d", ret);
+		return ret;
+	}
+
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.h
index e53504354841..85b94f699128 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.h
@@ -1,35 +1,78 @@
 /* SPDX-License-Identifier: MIT */
-#ifndef __NVKM_FIFO_CHAN_H__
-#define __NVKM_FIFO_CHAN_H__
-#define nvkm_fifo_chan(p) container_of((p), struct nvkm_fifo_chan, object)
-#include "priv.h"
-
-struct nvkm_fifo_chan_func {
-	void *(*dtor)(struct nvkm_fifo_chan *);
-	void (*init)(struct nvkm_fifo_chan *);
-	void (*fini)(struct nvkm_fifo_chan *);
-	int (*ntfy)(struct nvkm_fifo_chan *, u32 type, struct nvkm_event **);
-	int  (*engine_ctor)(struct nvkm_fifo_chan *, struct nvkm_engine *,
-			    struct nvkm_object *);
-	void (*engine_dtor)(struct nvkm_fifo_chan *, struct nvkm_engine *);
-	int  (*engine_init)(struct nvkm_fifo_chan *, struct nvkm_engine *);
-	int  (*engine_fini)(struct nvkm_fifo_chan *, struct nvkm_engine *,
-			    bool suspend);
-	int  (*object_ctor)(struct nvkm_fifo_chan *, struct nvkm_object *);
-	void (*object_dtor)(struct nvkm_fifo_chan *, int);
-	u32 (*submit_token)(struct nvkm_fifo_chan *);
+#ifndef __NVKM_CHAN_H__
+#define __NVKM_CHAN_H__
+#include <engine/fifo.h>
+struct nvkm_dmaobj;
+struct nvkm_engn;
+struct nvkm_runl;
+
+extern const struct nvkm_event_func nvkm_chan_event;
+
+struct nvkm_cctx {
+	struct nvkm_vctx *vctx;
+	refcount_t refs;
+	refcount_t uses;
+
+	struct list_head head;
 };
 
-int nvkm_fifo_chan_ctor(const struct nvkm_fifo_chan_func *, struct nvkm_fifo *,
-			u32 size, u32 align, bool zero, u64 vm, u64 push,
-			u32 engm, int bar, u32 base, u32 user,
-			const struct nvkm_oclass *, struct nvkm_fifo_chan *);
+struct nvkm_chan_func {
+	const struct nvkm_chan_func_inst {
+		u32 size;
+		bool zero;
+		bool vmm;
+	} *inst;
 
-struct nvkm_fifo_chan_oclass {
-	int (*ctor)(struct nvkm_fifo *, const struct nvkm_oclass *,
-		    void *data, u32 size, struct nvkm_object **);
-	struct nvkm_sclass base;
+	const struct nvkm_chan_func_userd {
+		int bar;
+		u32 base;
+		u32 size;
+		void (*clear)(struct nvkm_chan *);
+	} *userd;
+
+	const struct nvkm_chan_func_ramfc {
+		const struct nvkm_ramfc_layout {
+			unsigned bits:6;
+			unsigned ctxs:5;
+			unsigned ctxp:8;
+			unsigned regs:5;
+			unsigned regp;
+		} *layout;
+		int (*write)(struct nvkm_chan *, u64 offset, u64 length, u32 devm, bool priv);
+		void (*clear)(struct nvkm_chan *);
+		bool ctxdma;
+		u32 devm;
+		bool priv;
+	} *ramfc;
+
+	void (*bind)(struct nvkm_chan *);
+	void (*unbind)(struct nvkm_chan *);
+	void (*start)(struct nvkm_chan *);
+	void (*stop)(struct nvkm_chan *);
+	void (*preempt)(struct nvkm_chan *);
+	u32 (*doorbell_handle)(struct nvkm_chan *);
 };
 
-int gf100_fifo_chan_ntfy(struct nvkm_fifo_chan *, u32, struct nvkm_event **);
+int nvkm_chan_new_(const struct nvkm_chan_func *, struct nvkm_runl *, int runq, struct nvkm_cgrp *,
+		   const char *name, bool priv, u32 devm, struct nvkm_vmm *, struct nvkm_dmaobj *,
+		   u64 offset, u64 length, struct nvkm_memory *userd, u64 userd_bar1,
+		   struct nvkm_chan **);
+void nvkm_chan_del(struct nvkm_chan **);
+void nvkm_chan_allow(struct nvkm_chan *);
+void nvkm_chan_block(struct nvkm_chan *);
+void nvkm_chan_error(struct nvkm_chan *, bool preempt);
+void nvkm_chan_insert(struct nvkm_chan *);
+void nvkm_chan_remove(struct nvkm_chan *, bool preempt);
+void nvkm_chan_remove_locked(struct nvkm_chan *);
+int nvkm_chan_preempt(struct nvkm_chan *, bool wait);
+int nvkm_chan_preempt_locked(struct nvkm_chan *, bool wait);
+int nvkm_chan_cctx_get(struct nvkm_chan *, struct nvkm_engn *, struct nvkm_cctx **,
+		       struct nvkm_client * /*TODO: remove need for this */);
+void nvkm_chan_cctx_put(struct nvkm_chan *, struct nvkm_cctx **);
+void nvkm_chan_cctx_bind(struct nvkm_chan *, struct nvkm_engn *, struct nvkm_cctx *);
+
+#define CHAN_PRCLI(c,l,p,f,a...) CGRP_PRINT((c)->cgrp, l, p, "%04x:[%s]"f, (c)->id, (c)->name, ##a)
+#define CHAN_PRINT(c,l,p,f,a...) CGRP_PRINT((c)->cgrp, l, p, "%04x:"f, (c)->id, ##a)
+#define CHAN_ERROR(c,f,a...) CHAN_PRCLI((c), ERROR,    err, " "f"\n", ##a)
+#define CHAN_TRACE(c,f,a...) CHAN_PRINT((c), TRACE,   info, " "f"\n", ##a)
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chang84.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chang84.c
deleted file mode 100644
index 3492c561f2cf..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chang84.c
+++ /dev/null
@@ -1,263 +0,0 @@
-/*
- * Copyright 2012 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs
- */
-#include "channv50.h"
-
-#include <core/client.h>
-#include <core/ramht.h>
-#include <subdev/mmu.h>
-#include <subdev/timer.h>
-
-#include <nvif/cl826e.h>
-
-static int
-g84_fifo_chan_ntfy(struct nvkm_fifo_chan *chan, u32 type,
-		   struct nvkm_event **pevent)
-{
-	switch (type) {
-	case NV826E_V0_NTFY_NON_STALL_INTERRUPT:
-		*pevent = &chan->fifo->uevent;
-		return 0;
-	default:
-		break;
-	}
-	return -EINVAL;
-}
-
-static int
-g84_fifo_chan_engine_addr(struct nvkm_engine *engine)
-{
-	switch (engine->subdev.type) {
-	case NVKM_ENGINE_DMAOBJ:
-	case NVKM_ENGINE_SW    : return -1;
-	case NVKM_ENGINE_GR    : return 0x0020;
-	case NVKM_ENGINE_VP    :
-	case NVKM_ENGINE_MSPDEC: return 0x0040;
-	case NVKM_ENGINE_MPEG  :
-	case NVKM_ENGINE_MSPPP : return 0x0060;
-	case NVKM_ENGINE_BSP   :
-	case NVKM_ENGINE_MSVLD : return 0x0080;
-	case NVKM_ENGINE_CIPHER:
-	case NVKM_ENGINE_SEC   : return 0x00a0;
-	case NVKM_ENGINE_CE    : return 0x00c0;
-	default:
-		WARN_ON(1);
-		return -1;
-	}
-}
-
-static int
-g84_fifo_chan_engine_fini(struct nvkm_fifo_chan *base,
-			  struct nvkm_engine *engine, bool suspend)
-{
-	struct nv50_fifo_chan *chan = nv50_fifo_chan(base);
-	struct nv50_fifo *fifo = chan->fifo;
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	u32 engn, save;
-	int offset;
-	bool done;
-
-	offset = g84_fifo_chan_engine_addr(engine);
-	if (offset < 0)
-		return 0;
-
-	engn = fifo->base.func->engine_id(&fifo->base, engine) - 1;
-	save = nvkm_mask(device, 0x002520, 0x0000003f, 1 << engn);
-	nvkm_wr32(device, 0x0032fc, chan->base.inst->addr >> 12);
-	done = nvkm_msec(device, 2000,
-		if (nvkm_rd32(device, 0x0032fc) != 0xffffffff)
-			break;
-	) >= 0;
-	nvkm_wr32(device, 0x002520, save);
-	if (!done) {
-		nvkm_error(subdev, "channel %d [%s] unload timeout\n",
-			   chan->base.chid, chan->base.object.client->name);
-		if (suspend)
-			return -EBUSY;
-	}
-
-	nvkm_kmap(chan->eng);
-	nvkm_wo32(chan->eng, offset + 0x00, 0x00000000);
-	nvkm_wo32(chan->eng, offset + 0x04, 0x00000000);
-	nvkm_wo32(chan->eng, offset + 0x08, 0x00000000);
-	nvkm_wo32(chan->eng, offset + 0x0c, 0x00000000);
-	nvkm_wo32(chan->eng, offset + 0x10, 0x00000000);
-	nvkm_wo32(chan->eng, offset + 0x14, 0x00000000);
-	nvkm_done(chan->eng);
-	return 0;
-}
-
-
-static int
-g84_fifo_chan_engine_init(struct nvkm_fifo_chan *base,
-			  struct nvkm_engine *engine)
-{
-	struct nv50_fifo_chan *chan = nv50_fifo_chan(base);
-	struct nvkm_gpuobj *engn = *nv50_fifo_chan_engine(chan, engine);
-	u64 limit, start;
-	int offset;
-
-	offset = g84_fifo_chan_engine_addr(engine);
-	if (offset < 0)
-		return 0;
-	limit = engn->addr + engn->size - 1;
-	start = engn->addr;
-
-	nvkm_kmap(chan->eng);
-	nvkm_wo32(chan->eng, offset + 0x00, 0x00190000);
-	nvkm_wo32(chan->eng, offset + 0x04, lower_32_bits(limit));
-	nvkm_wo32(chan->eng, offset + 0x08, lower_32_bits(start));
-	nvkm_wo32(chan->eng, offset + 0x0c, upper_32_bits(limit) << 24 |
-					    upper_32_bits(start));
-	nvkm_wo32(chan->eng, offset + 0x10, 0x00000000);
-	nvkm_wo32(chan->eng, offset + 0x14, 0x00000000);
-	nvkm_done(chan->eng);
-	return 0;
-}
-
-static int
-g84_fifo_chan_engine_ctor(struct nvkm_fifo_chan *base,
-			  struct nvkm_engine *engine,
-			  struct nvkm_object *object)
-{
-	struct nv50_fifo_chan *chan = nv50_fifo_chan(base);
-
-	if (g84_fifo_chan_engine_addr(engine) < 0)
-		return 0;
-
-	return nvkm_object_bind(object, NULL, 0, nv50_fifo_chan_engine(chan, engine));
-}
-
-static int
-g84_fifo_chan_object_ctor(struct nvkm_fifo_chan *base,
-			  struct nvkm_object *object)
-{
-	struct nv50_fifo_chan *chan = nv50_fifo_chan(base);
-	u32 handle = object->handle;
-	u32 context;
-
-	switch (object->engine->subdev.type) {
-	case NVKM_ENGINE_DMAOBJ:
-	case NVKM_ENGINE_SW    : context = 0x00000000; break;
-	case NVKM_ENGINE_GR    : context = 0x00100000; break;
-	case NVKM_ENGINE_MPEG  :
-	case NVKM_ENGINE_MSPPP : context = 0x00200000; break;
-	case NVKM_ENGINE_ME    :
-	case NVKM_ENGINE_CE    : context = 0x00300000; break;
-	case NVKM_ENGINE_VP    :
-	case NVKM_ENGINE_MSPDEC: context = 0x00400000; break;
-	case NVKM_ENGINE_CIPHER:
-	case NVKM_ENGINE_SEC   :
-	case NVKM_ENGINE_VIC   : context = 0x00500000; break;
-	case NVKM_ENGINE_BSP   :
-	case NVKM_ENGINE_MSVLD : context = 0x00600000; break;
-	default:
-		WARN_ON(1);
-		return -EINVAL;
-	}
-
-	return nvkm_ramht_insert(chan->ramht, object, 0, 4, handle, context);
-}
-
-static void
-g84_fifo_chan_init(struct nvkm_fifo_chan *base)
-{
-	struct nv50_fifo_chan *chan = nv50_fifo_chan(base);
-	struct nv50_fifo *fifo = chan->fifo;
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	u64 addr = chan->ramfc->addr >> 8;
-	u32 chid = chan->base.chid;
-
-	nvkm_wr32(device, 0x002600 + (chid * 4), 0x80000000 | addr);
-	nv50_fifo_runlist_update(fifo);
-}
-
-static const struct nvkm_fifo_chan_func
-g84_fifo_chan_func = {
-	.dtor = nv50_fifo_chan_dtor,
-	.init = g84_fifo_chan_init,
-	.fini = nv50_fifo_chan_fini,
-	.ntfy = g84_fifo_chan_ntfy,
-	.engine_ctor = g84_fifo_chan_engine_ctor,
-	.engine_dtor = nv50_fifo_chan_engine_dtor,
-	.engine_init = g84_fifo_chan_engine_init,
-	.engine_fini = g84_fifo_chan_engine_fini,
-	.object_ctor = g84_fifo_chan_object_ctor,
-	.object_dtor = nv50_fifo_chan_object_dtor,
-};
-
-int
-g84_fifo_chan_ctor(struct nv50_fifo *fifo, u64 vmm, u64 push,
-		   const struct nvkm_oclass *oclass,
-		   struct nv50_fifo_chan *chan)
-{
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	int ret;
-
-	if (!vmm)
-		return -EINVAL;
-
-	ret = nvkm_fifo_chan_ctor(&g84_fifo_chan_func, &fifo->base,
-				  0x10000, 0x1000, false, vmm, push,
-				  BIT(G84_FIFO_ENGN_SW) |
-				  BIT(G84_FIFO_ENGN_GR) |
-				  BIT(G84_FIFO_ENGN_MPEG) |
-				  BIT(G84_FIFO_ENGN_MSPPP) |
-				  BIT(G84_FIFO_ENGN_ME) |
-				  BIT(G84_FIFO_ENGN_CE0) |
-				  BIT(G84_FIFO_ENGN_VP) |
-				  BIT(G84_FIFO_ENGN_MSPDEC) |
-				  BIT(G84_FIFO_ENGN_CIPHER) |
-				  BIT(G84_FIFO_ENGN_SEC) |
-				  BIT(G84_FIFO_ENGN_VIC) |
-				  BIT(G84_FIFO_ENGN_BSP) |
-				  BIT(G84_FIFO_ENGN_MSVLD) |
-				  BIT(G84_FIFO_ENGN_DMA),
-				  0, 0xc00000, 0x2000, oclass, &chan->base);
-	chan->fifo = fifo;
-	if (ret)
-		return ret;
-
-	ret = nvkm_gpuobj_new(device, 0x0200, 0, true, chan->base.inst,
-			      &chan->eng);
-	if (ret)
-		return ret;
-
-	ret = nvkm_gpuobj_new(device, 0x4000, 0, false, chan->base.inst,
-			      &chan->pgd);
-	if (ret)
-		return ret;
-
-	ret = nvkm_gpuobj_new(device, 0x1000, 0x400, true, chan->base.inst,
-			      &chan->cache);
-	if (ret)
-		return ret;
-
-	ret = nvkm_gpuobj_new(device, 0x100, 0x100, true, chan->base.inst,
-			      &chan->ramfc);
-	if (ret)
-		return ret;
-
-	return nvkm_ramht_new(device, 0x8000, 16, chan->base.inst, &chan->ramht);
-}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changf100.h
deleted file mode 100644
index f7ac1061fa84..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changf100.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-#ifndef __GF100_FIFO_CHAN_H__
-#define __GF100_FIFO_CHAN_H__
-#define gf100_fifo_chan(p) container_of((p), struct gf100_fifo_chan, base)
-#include "chan.h"
-#include "gf100.h"
-
-struct gf100_fifo_chan {
-	struct nvkm_fifo_chan base;
-	struct gf100_fifo *fifo;
-
-	struct list_head head;
-	bool killed;
-
-#define GF100_FIFO_ENGN_GR     0
-#define GF100_FIFO_ENGN_MSPDEC 1
-#define GF100_FIFO_ENGN_MSPPP  2
-#define GF100_FIFO_ENGN_MSVLD  3
-#define GF100_FIFO_ENGN_CE0    4
-#define GF100_FIFO_ENGN_CE1    5
-#define GF100_FIFO_ENGN_SW     15
-	struct gf100_fifo_engn {
-		struct nvkm_gpuobj *inst;
-		struct nvkm_vma *vma;
-	} engn[NVKM_FIFO_ENGN_NR];
-};
-
-extern const struct nvkm_fifo_chan_oclass gf100_fifo_gpfifo_oclass;
-#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h
deleted file mode 100644
index 9713daee6c76..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-#ifndef __GK104_FIFO_CHAN_H__
-#define __GK104_FIFO_CHAN_H__
-#define gk104_fifo_chan(p) container_of((p), struct gk104_fifo_chan, base)
-#include "chan.h"
-#include "gk104.h"
-
-struct gk104_fifo_chan {
-	struct nvkm_fifo_chan base;
-	struct gk104_fifo *fifo;
-	int runl;
-
-	struct nvkm_fifo_cgrp *cgrp;
-	struct list_head head;
-	bool killed;
-
-#define GK104_FIFO_ENGN_SW 15
-	struct gk104_fifo_engn {
-		struct nvkm_gpuobj *inst;
-		struct nvkm_vma *vma;
-	} engn[NVKM_FIFO_ENGN_NR];
-};
-
-extern const struct nvkm_fifo_chan_func gk104_fifo_gpfifo_func;
-
-int gk104_fifo_gpfifo_new(struct gk104_fifo *, const struct nvkm_oclass *,
-			  void *data, u32 size, struct nvkm_object **);
-void *gk104_fifo_gpfifo_dtor(struct nvkm_fifo_chan *);
-void gk104_fifo_gpfifo_init(struct nvkm_fifo_chan *);
-void gk104_fifo_gpfifo_fini(struct nvkm_fifo_chan *);
-struct gk104_fifo_engn *gk104_fifo_gpfifo_engine(struct gk104_fifo_chan *, struct nvkm_engine *);
-int gk104_fifo_gpfifo_engine_ctor(struct nvkm_fifo_chan *, struct nvkm_engine *,
-				  struct nvkm_object *);
-void gk104_fifo_gpfifo_engine_dtor(struct nvkm_fifo_chan *,
-				   struct nvkm_engine *);
-int gk104_fifo_gpfifo_kick(struct gk104_fifo_chan *);
-int gk104_fifo_gpfifo_kick_locked(struct gk104_fifo_chan *);
-
-int gv100_fifo_gpfifo_new(struct gk104_fifo *, const struct nvkm_oclass *,
-			  void *data, u32 size, struct nvkm_object **);
-int gv100_fifo_gpfifo_new_(const struct nvkm_fifo_chan_func *,
-			   struct gk104_fifo *, u64 *, u16 *, u64, u64, u64,
-			   u64 *, bool, u32 *, const struct nvkm_oclass *,
-			   struct nvkm_object **);
-int gv100_fifo_gpfifo_engine_init(struct nvkm_fifo_chan *,
-				  struct nvkm_engine *);
-int gv100_fifo_gpfifo_engine_fini(struct nvkm_fifo_chan *,
-				  struct nvkm_engine *, bool);
-
-int tu102_fifo_gpfifo_new(struct gk104_fifo *, const struct nvkm_oclass *,
-			  void *data, u32 size, struct nvkm_object **);
-#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv04.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv04.h
deleted file mode 100644
index 727bc8976b40..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv04.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-#ifndef __NV04_FIFO_CHAN_H__
-#define __NV04_FIFO_CHAN_H__
-#define nv04_fifo_chan(p) container_of((p), struct nv04_fifo_chan, base)
-#include "chan.h"
-#include "nv04.h"
-
-struct nv04_fifo_chan {
-	struct nvkm_fifo_chan base;
-	struct nv04_fifo *fifo;
-	u32 ramfc;
-#define NV04_FIFO_ENGN_SW   0
-#define NV04_FIFO_ENGN_GR   1
-#define NV04_FIFO_ENGN_MPEG 2
-#define NV04_FIFO_ENGN_DMA  3
-	struct nvkm_gpuobj *engn[NVKM_FIFO_ENGN_NR];
-};
-
-extern const struct nvkm_fifo_chan_func nv04_fifo_dma_func;
-void *nv04_fifo_dma_dtor(struct nvkm_fifo_chan *);
-void nv04_fifo_dma_init(struct nvkm_fifo_chan *);
-void nv04_fifo_dma_fini(struct nvkm_fifo_chan *);
-void nv04_fifo_dma_object_dtor(struct nvkm_fifo_chan *, int);
-
-extern const struct nvkm_fifo_chan_oclass nv04_fifo_dma_oclass;
-extern const struct nvkm_fifo_chan_oclass nv10_fifo_dma_oclass;
-extern const struct nvkm_fifo_chan_oclass nv17_fifo_dma_oclass;
-extern const struct nvkm_fifo_chan_oclass nv40_fifo_dma_oclass;
-#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv50.c
deleted file mode 100644
index c44d7c81dd52..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv50.c
+++ /dev/null
@@ -1,276 +0,0 @@
-/*
- * Copyright 2012 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs
- */
-#include "channv50.h"
-
-#include <core/client.h>
-#include <core/ramht.h>
-#include <subdev/mmu.h>
-#include <subdev/timer.h>
-
-static int
-nv50_fifo_chan_engine_addr(struct nvkm_engine *engine)
-{
-	switch (engine->subdev.type) {
-	case NVKM_ENGINE_DMAOBJ:
-	case NVKM_ENGINE_SW    : return -1;
-	case NVKM_ENGINE_GR    : return 0x0000;
-	case NVKM_ENGINE_MPEG  : return 0x0060;
-	default:
-		WARN_ON(1);
-		return -1;
-	}
-}
-
-struct nvkm_gpuobj **
-nv50_fifo_chan_engine(struct nv50_fifo_chan *chan, struct nvkm_engine *engine)
-{
-	int engi = chan->base.fifo->func->engine_id(chan->base.fifo, engine);
-	if (engi >= 0)
-		return &chan->engn[engi];
-	return NULL;
-}
-
-static int
-nv50_fifo_chan_engine_fini(struct nvkm_fifo_chan *base,
-			   struct nvkm_engine *engine, bool suspend)
-{
-	struct nv50_fifo_chan *chan = nv50_fifo_chan(base);
-	struct nv50_fifo *fifo = chan->fifo;
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	int offset, ret = 0;
-	u32 me;
-
-	offset = nv50_fifo_chan_engine_addr(engine);
-	if (offset < 0)
-		return 0;
-
-	/* HW bug workaround:
-	 *
-	 * PFIFO will hang forever if the connected engines don't report
-	 * that they've processed the context switch request.
-	 *
-	 * In order for the kickoff to work, we need to ensure all the
-	 * connected engines are in a state where they can answer.
-	 *
-	 * Newer chipsets don't seem to suffer from this issue, and well,
-	 * there's also a "ignore these engines" bitmask reg we can use
-	 * if we hit the issue there..
-	 */
-	me = nvkm_mask(device, 0x00b860, 0x00000001, 0x00000001);
-
-	/* do the kickoff... */
-	nvkm_wr32(device, 0x0032fc, chan->base.inst->addr >> 12);
-	if (nvkm_msec(device, 2000,
-		if (nvkm_rd32(device, 0x0032fc) != 0xffffffff)
-			break;
-	) < 0) {
-		nvkm_error(subdev, "channel %d [%s] unload timeout\n",
-			   chan->base.chid, chan->base.object.client->name);
-		if (suspend)
-			ret = -EBUSY;
-	}
-	nvkm_wr32(device, 0x00b860, me);
-
-	if (ret == 0) {
-		nvkm_kmap(chan->eng);
-		nvkm_wo32(chan->eng, offset + 0x00, 0x00000000);
-		nvkm_wo32(chan->eng, offset + 0x04, 0x00000000);
-		nvkm_wo32(chan->eng, offset + 0x08, 0x00000000);
-		nvkm_wo32(chan->eng, offset + 0x0c, 0x00000000);
-		nvkm_wo32(chan->eng, offset + 0x10, 0x00000000);
-		nvkm_wo32(chan->eng, offset + 0x14, 0x00000000);
-		nvkm_done(chan->eng);
-	}
-
-	return ret;
-}
-
-static int
-nv50_fifo_chan_engine_init(struct nvkm_fifo_chan *base,
-			   struct nvkm_engine *engine)
-{
-	struct nv50_fifo_chan *chan = nv50_fifo_chan(base);
-	struct nvkm_gpuobj *engn = *nv50_fifo_chan_engine(chan, engine);
-	u64 limit, start;
-	int offset;
-
-	offset = nv50_fifo_chan_engine_addr(engine);
-	if (offset < 0)
-		return 0;
-	limit = engn->addr + engn->size - 1;
-	start = engn->addr;
-
-	nvkm_kmap(chan->eng);
-	nvkm_wo32(chan->eng, offset + 0x00, 0x00190000);
-	nvkm_wo32(chan->eng, offset + 0x04, lower_32_bits(limit));
-	nvkm_wo32(chan->eng, offset + 0x08, lower_32_bits(start));
-	nvkm_wo32(chan->eng, offset + 0x0c, upper_32_bits(limit) << 24 |
-					    upper_32_bits(start));
-	nvkm_wo32(chan->eng, offset + 0x10, 0x00000000);
-	nvkm_wo32(chan->eng, offset + 0x14, 0x00000000);
-	nvkm_done(chan->eng);
-	return 0;
-}
-
-void
-nv50_fifo_chan_engine_dtor(struct nvkm_fifo_chan *base,
-			   struct nvkm_engine *engine)
-{
-	struct nv50_fifo_chan *chan = nv50_fifo_chan(base);
-	nvkm_gpuobj_del(nv50_fifo_chan_engine(chan, engine));
-}
-
-static int
-nv50_fifo_chan_engine_ctor(struct nvkm_fifo_chan *base,
-			   struct nvkm_engine *engine,
-			   struct nvkm_object *object)
-{
-	struct nv50_fifo_chan *chan = nv50_fifo_chan(base);
-
-	if (nv50_fifo_chan_engine_addr(engine) < 0)
-		return 0;
-
-	return nvkm_object_bind(object, NULL, 0, nv50_fifo_chan_engine(chan, engine));
-}
-
-void
-nv50_fifo_chan_object_dtor(struct nvkm_fifo_chan *base, int cookie)
-{
-	struct nv50_fifo_chan *chan = nv50_fifo_chan(base);
-	nvkm_ramht_remove(chan->ramht, cookie);
-}
-
-static int
-nv50_fifo_chan_object_ctor(struct nvkm_fifo_chan *base,
-			   struct nvkm_object *object)
-{
-	struct nv50_fifo_chan *chan = nv50_fifo_chan(base);
-	u32 handle = object->handle;
-	u32 context;
-
-	switch (object->engine->subdev.type) {
-	case NVKM_ENGINE_DMAOBJ:
-	case NVKM_ENGINE_SW    : context = 0x00000000; break;
-	case NVKM_ENGINE_GR    : context = 0x00100000; break;
-	case NVKM_ENGINE_MPEG  : context = 0x00200000; break;
-	default:
-		WARN_ON(1);
-		return -EINVAL;
-	}
-
-	return nvkm_ramht_insert(chan->ramht, object, 0, 4, handle, context);
-}
-
-void
-nv50_fifo_chan_fini(struct nvkm_fifo_chan *base)
-{
-	struct nv50_fifo_chan *chan = nv50_fifo_chan(base);
-	struct nv50_fifo *fifo = chan->fifo;
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	u32 chid = chan->base.chid;
-
-	/* remove channel from runlist, fifo will unload context */
-	nvkm_mask(device, 0x002600 + (chid * 4), 0x80000000, 0x00000000);
-	nv50_fifo_runlist_update(fifo);
-	nvkm_wr32(device, 0x002600 + (chid * 4), 0x00000000);
-}
-
-static void
-nv50_fifo_chan_init(struct nvkm_fifo_chan *base)
-{
-	struct nv50_fifo_chan *chan = nv50_fifo_chan(base);
-	struct nv50_fifo *fifo = chan->fifo;
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	u64 addr = chan->ramfc->addr >> 12;
-	u32 chid = chan->base.chid;
-
-	nvkm_wr32(device, 0x002600 + (chid * 4), 0x80000000 | addr);
-	nv50_fifo_runlist_update(fifo);
-}
-
-void *
-nv50_fifo_chan_dtor(struct nvkm_fifo_chan *base)
-{
-	struct nv50_fifo_chan *chan = nv50_fifo_chan(base);
-	nvkm_ramht_del(&chan->ramht);
-	nvkm_gpuobj_del(&chan->pgd);
-	nvkm_gpuobj_del(&chan->eng);
-	nvkm_gpuobj_del(&chan->cache);
-	nvkm_gpuobj_del(&chan->ramfc);
-	return chan;
-}
-
-static const struct nvkm_fifo_chan_func
-nv50_fifo_chan_func = {
-	.dtor = nv50_fifo_chan_dtor,
-	.init = nv50_fifo_chan_init,
-	.fini = nv50_fifo_chan_fini,
-	.engine_ctor = nv50_fifo_chan_engine_ctor,
-	.engine_dtor = nv50_fifo_chan_engine_dtor,
-	.engine_init = nv50_fifo_chan_engine_init,
-	.engine_fini = nv50_fifo_chan_engine_fini,
-	.object_ctor = nv50_fifo_chan_object_ctor,
-	.object_dtor = nv50_fifo_chan_object_dtor,
-};
-
-int
-nv50_fifo_chan_ctor(struct nv50_fifo *fifo, u64 vmm, u64 push,
-		    const struct nvkm_oclass *oclass,
-		    struct nv50_fifo_chan *chan)
-{
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	int ret;
-
-	if (!vmm)
-		return -EINVAL;
-
-	ret = nvkm_fifo_chan_ctor(&nv50_fifo_chan_func, &fifo->base,
-				  0x10000, 0x1000, false, vmm, push,
-				  BIT(NV50_FIFO_ENGN_SW) |
-				  BIT(NV50_FIFO_ENGN_GR) |
-				  BIT(NV50_FIFO_ENGN_MPEG) |
-				  BIT(NV50_FIFO_ENGN_DMA),
-				  0, 0xc00000, 0x2000, oclass, &chan->base);
-	chan->fifo = fifo;
-	if (ret)
-		return ret;
-
-	ret = nvkm_gpuobj_new(device, 0x0200, 0x1000, true, chan->base.inst,
-			      &chan->ramfc);
-	if (ret)
-		return ret;
-
-	ret = nvkm_gpuobj_new(device, 0x1200, 0, true, chan->base.inst,
-			      &chan->eng);
-	if (ret)
-		return ret;
-
-	ret = nvkm_gpuobj_new(device, 0x4000, 0, false, chan->base.inst,
-			      &chan->pgd);
-	if (ret)
-		return ret;
-
-	return nvkm_ramht_new(device, 0x8000, 16, chan->base.inst, &chan->ramht);
-}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv50.h
deleted file mode 100644
index 3a95730d7ff5..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv50.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-#ifndef __NV50_FIFO_CHAN_H__
-#define __NV50_FIFO_CHAN_H__
-#define nv50_fifo_chan(p) container_of((p), struct nv50_fifo_chan, base)
-#include "chan.h"
-#include "nv50.h"
-
-struct nv50_fifo_chan {
-	struct nv50_fifo *fifo;
-	struct nvkm_fifo_chan base;
-
-	struct nvkm_gpuobj *ramfc;
-	struct nvkm_gpuobj *cache;
-	struct nvkm_gpuobj *eng;
-	struct nvkm_gpuobj *pgd;
-	struct nvkm_ramht *ramht;
-
-#define NV50_FIFO_ENGN_SW   0
-#define NV50_FIFO_ENGN_GR   1
-#define NV50_FIFO_ENGN_MPEG 2
-#define NV50_FIFO_ENGN_DMA  3
-
-#define G84_FIFO_ENGN_SW     0
-#define G84_FIFO_ENGN_GR     1
-#define G84_FIFO_ENGN_MPEG   2
-#define G84_FIFO_ENGN_MSPPP  2
-#define G84_FIFO_ENGN_ME     3
-#define G84_FIFO_ENGN_CE0    3
-#define G84_FIFO_ENGN_VP     4
-#define G84_FIFO_ENGN_MSPDEC 4
-#define G84_FIFO_ENGN_CIPHER 5
-#define G84_FIFO_ENGN_SEC    5
-#define G84_FIFO_ENGN_VIC    5
-#define G84_FIFO_ENGN_BSP    6
-#define G84_FIFO_ENGN_MSVLD  6
-#define G84_FIFO_ENGN_DMA    7
-	struct nvkm_gpuobj *engn[NVKM_FIFO_ENGN_NR];
-};
-
-int nv50_fifo_chan_ctor(struct nv50_fifo *, u64 vmm, u64 push,
-			const struct nvkm_oclass *, struct nv50_fifo_chan *);
-void *nv50_fifo_chan_dtor(struct nvkm_fifo_chan *);
-void nv50_fifo_chan_fini(struct nvkm_fifo_chan *);
-struct nvkm_gpuobj **nv50_fifo_chan_engine(struct nv50_fifo_chan *, struct nvkm_engine *);
-void nv50_fifo_chan_engine_dtor(struct nvkm_fifo_chan *, struct nvkm_engine *);
-void nv50_fifo_chan_object_dtor(struct nvkm_fifo_chan *, int);
-
-int g84_fifo_chan_ctor(struct nv50_fifo *, u64 vmm, u64 push,
-		       const struct nvkm_oclass *, struct nv50_fifo_chan *);
-
-extern const struct nvkm_fifo_chan_oclass nv50_fifo_gpfifo_oclass;
-extern const struct nvkm_fifo_chan_oclass g84_fifo_gpfifo_oclass;
-#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chid.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chid.c
new file mode 100644
index 000000000000..23944d95efd5
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chid.c
@@ -0,0 +1,111 @@
+/*
+ * Copyright 2020 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "chid.h"
+
+void
+nvkm_chid_put(struct nvkm_chid *chid, int id, spinlock_t *data_lock)
+{
+	if (id >= 0) {
+		spin_lock_irq(&chid->lock);
+		spin_lock(data_lock);
+		chid->data[id] = NULL;
+		spin_unlock(data_lock);
+		clear_bit(id, chid->used);
+		spin_unlock_irq(&chid->lock);
+	}
+}
+
+int
+nvkm_chid_get(struct nvkm_chid *chid, void *data)
+{
+	int id = -1, cid;
+
+	spin_lock_irq(&chid->lock);
+	cid = find_first_zero_bit(chid->used, chid->nr);
+	if (cid < chid->nr) {
+		set_bit(cid, chid->used);
+		chid->data[cid] = data;
+		id = cid;
+	}
+	spin_unlock_irq(&chid->lock);
+	return id;
+}
+
+static void
+nvkm_chid_del(struct kref *kref)
+{
+	struct nvkm_chid *chid = container_of(kref, typeof(*chid), kref);
+
+	nvkm_event_fini(&chid->event);
+
+	kvfree(chid->data);
+	kfree(chid);
+}
+
+void
+nvkm_chid_unref(struct nvkm_chid **pchid)
+{
+	struct nvkm_chid *chid = *pchid;
+
+	if (!chid)
+		return;
+
+	kref_put(&chid->kref, nvkm_chid_del);
+	*pchid = NULL;
+}
+
+struct nvkm_chid *
+nvkm_chid_ref(struct nvkm_chid *chid)
+{
+	if (chid)
+		kref_get(&chid->kref);
+
+	return chid;
+}
+
+int
+nvkm_chid_new(const struct nvkm_event_func *func, struct nvkm_subdev *subdev,
+	      int nr, int first, int count, struct nvkm_chid **pchid)
+{
+	struct nvkm_chid *chid;
+	int id;
+
+	if (!(chid = *pchid = kzalloc(struct_size(chid, used, nr), GFP_KERNEL)))
+		return -ENOMEM;
+
+	kref_init(&chid->kref);
+	chid->nr = nr;
+	chid->mask = chid->nr - 1;
+	spin_lock_init(&chid->lock);
+
+	if (!(chid->data = kvzalloc(sizeof(*chid->data) * nr, GFP_KERNEL))) {
+		nvkm_chid_unref(pchid);
+		return -ENOMEM;
+	}
+
+	for (id = 0; id < first; id++)
+		__set_bit(id, chid->used);
+	for (id = first + count; id < nr; id++)
+		__set_bit(id, chid->used);
+
+	return nvkm_event_init(func, subdev, 1, nr, &chid->event);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chid.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chid.h
new file mode 100644
index 000000000000..2a42efb18401
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chid.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: MIT */
+#ifndef __NVKM_CHID_H__
+#define __NVKM_CHID_H__
+#include <core/event.h>
+
+struct nvkm_chid {
+	struct kref kref;
+	int nr;
+	u32 mask;
+
+	struct nvkm_event event;
+
+	void **data;
+
+	spinlock_t lock;
+	unsigned long used[];
+};
+
+int nvkm_chid_new(const struct nvkm_event_func *, struct nvkm_subdev *,
+		  int nr, int first, int count, struct nvkm_chid **pchid);
+struct nvkm_chid *nvkm_chid_ref(struct nvkm_chid *);
+void nvkm_chid_unref(struct nvkm_chid **);
+int nvkm_chid_get(struct nvkm_chid *, void *data);
+void nvkm_chid_put(struct nvkm_chid *, int id, spinlock_t *data_lock);
+#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv04.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv04.c
deleted file mode 100644
index dbcdc5fab990..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv04.c
+++ /dev/null
@@ -1,226 +0,0 @@
-/*
- * Copyright 2012 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs
- */
-#include "channv04.h"
-#include "regsnv04.h"
-
-#include <core/client.h>
-#include <core/ramht.h>
-#include <subdev/instmem.h>
-
-#include <nvif/class.h>
-#include <nvif/cl006b.h>
-#include <nvif/unpack.h>
-
-void
-nv04_fifo_dma_object_dtor(struct nvkm_fifo_chan *base, int cookie)
-{
-	struct nv04_fifo_chan *chan = nv04_fifo_chan(base);
-	struct nvkm_instmem *imem = chan->fifo->base.engine.subdev.device->imem;
-
-	mutex_lock(&chan->fifo->base.mutex);
-	nvkm_ramht_remove(imem->ramht, cookie);
-	mutex_unlock(&chan->fifo->base.mutex);
-}
-
-static int
-nv04_fifo_dma_object_ctor(struct nvkm_fifo_chan *base,
-			  struct nvkm_object *object)
-{
-	struct nv04_fifo_chan *chan = nv04_fifo_chan(base);
-	struct nvkm_instmem *imem = chan->fifo->base.engine.subdev.device->imem;
-	u32 context = 0x80000000 | chan->base.chid << 24;
-	u32 handle  = object->handle;
-	int hash;
-
-	switch (object->engine->subdev.type) {
-	case NVKM_ENGINE_DMAOBJ:
-	case NVKM_ENGINE_SW    : context |= 0x00000000; break;
-	case NVKM_ENGINE_GR    : context |= 0x00010000; break;
-	case NVKM_ENGINE_MPEG  : context |= 0x00020000; break;
-	default:
-		WARN_ON(1);
-		return -EINVAL;
-	}
-
-	mutex_lock(&chan->fifo->base.mutex);
-	hash = nvkm_ramht_insert(imem->ramht, object, chan->base.chid, 4,
-				 handle, context);
-	mutex_unlock(&chan->fifo->base.mutex);
-	return hash;
-}
-
-void
-nv04_fifo_dma_fini(struct nvkm_fifo_chan *base)
-{
-	struct nv04_fifo_chan *chan = nv04_fifo_chan(base);
-	struct nv04_fifo *fifo = chan->fifo;
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	struct nvkm_memory *fctx = device->imem->ramfc;
-	const struct nv04_fifo_ramfc *c;
-	unsigned long flags;
-	u32 mask = fifo->base.nr - 1;
-	u32 data = chan->ramfc;
-	u32 chid;
-
-	/* prevent fifo context switches */
-	spin_lock_irqsave(&fifo->base.lock, flags);
-	nvkm_wr32(device, NV03_PFIFO_CACHES, 0);
-
-	/* if this channel is active, replace it with a null context */
-	chid = nvkm_rd32(device, NV03_PFIFO_CACHE1_PUSH1) & mask;
-	if (chid == chan->base.chid) {
-		nvkm_mask(device, NV04_PFIFO_CACHE1_DMA_PUSH, 0x00000001, 0);
-		nvkm_wr32(device, NV03_PFIFO_CACHE1_PUSH0, 0);
-		nvkm_mask(device, NV04_PFIFO_CACHE1_PULL0, 0x00000001, 0);
-
-		c = fifo->ramfc;
-		nvkm_kmap(fctx);
-		do {
-			u32 rm = ((1ULL << c->bits) - 1) << c->regs;
-			u32 cm = ((1ULL << c->bits) - 1) << c->ctxs;
-			u32 rv = (nvkm_rd32(device, c->regp) &  rm) >> c->regs;
-			u32 cv = (nvkm_ro32(fctx, c->ctxp + data) & ~cm);
-			nvkm_wo32(fctx, c->ctxp + data, cv | (rv << c->ctxs));
-		} while ((++c)->bits);
-		nvkm_done(fctx);
-
-		c = fifo->ramfc;
-		do {
-			nvkm_wr32(device, c->regp, 0x00000000);
-		} while ((++c)->bits);
-
-		nvkm_wr32(device, NV03_PFIFO_CACHE1_GET, 0);
-		nvkm_wr32(device, NV03_PFIFO_CACHE1_PUT, 0);
-		nvkm_wr32(device, NV03_PFIFO_CACHE1_PUSH1, mask);
-		nvkm_wr32(device, NV03_PFIFO_CACHE1_PUSH0, 1);
-		nvkm_wr32(device, NV04_PFIFO_CACHE1_PULL0, 1);
-	}
-
-	/* restore normal operation, after disabling dma mode */
-	nvkm_mask(device, NV04_PFIFO_MODE, 1 << chan->base.chid, 0);
-	nvkm_wr32(device, NV03_PFIFO_CACHES, 1);
-	spin_unlock_irqrestore(&fifo->base.lock, flags);
-}
-
-void
-nv04_fifo_dma_init(struct nvkm_fifo_chan *base)
-{
-	struct nv04_fifo_chan *chan = nv04_fifo_chan(base);
-	struct nv04_fifo *fifo = chan->fifo;
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	u32 mask = 1 << chan->base.chid;
-	unsigned long flags;
-	spin_lock_irqsave(&fifo->base.lock, flags);
-	nvkm_mask(device, NV04_PFIFO_MODE, mask, mask);
-	spin_unlock_irqrestore(&fifo->base.lock, flags);
-}
-
-void *
-nv04_fifo_dma_dtor(struct nvkm_fifo_chan *base)
-{
-	struct nv04_fifo_chan *chan = nv04_fifo_chan(base);
-	struct nv04_fifo *fifo = chan->fifo;
-	struct nvkm_instmem *imem = fifo->base.engine.subdev.device->imem;
-	const struct nv04_fifo_ramfc *c = fifo->ramfc;
-
-	nvkm_kmap(imem->ramfc);
-	do {
-		nvkm_wo32(imem->ramfc, chan->ramfc + c->ctxp, 0x00000000);
-	} while ((++c)->bits);
-	nvkm_done(imem->ramfc);
-	return chan;
-}
-
-const struct nvkm_fifo_chan_func
-nv04_fifo_dma_func = {
-	.dtor = nv04_fifo_dma_dtor,
-	.init = nv04_fifo_dma_init,
-	.fini = nv04_fifo_dma_fini,
-	.object_ctor = nv04_fifo_dma_object_ctor,
-	.object_dtor = nv04_fifo_dma_object_dtor,
-};
-
-static int
-nv04_fifo_dma_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass,
-		  void *data, u32 size, struct nvkm_object **pobject)
-{
-	struct nvkm_object *parent = oclass->parent;
-	union {
-		struct nv03_channel_dma_v0 v0;
-	} *args = data;
-	struct nv04_fifo *fifo = nv04_fifo(base);
-	struct nv04_fifo_chan *chan = NULL;
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	struct nvkm_instmem *imem = device->imem;
-	int ret = -ENOSYS;
-
-	nvif_ioctl(parent, "create channel dma size %d\n", size);
-	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
-		nvif_ioctl(parent, "create channel dma vers %d pushbuf %llx "
-				   "offset %08x\n", args->v0.version,
-			   args->v0.pushbuf, args->v0.offset);
-		if (!args->v0.pushbuf)
-			return -EINVAL;
-	} else
-		return ret;
-
-	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
-		return -ENOMEM;
-	*pobject = &chan->base.object;
-
-	ret = nvkm_fifo_chan_ctor(&nv04_fifo_dma_func, &fifo->base,
-				  0x1000, 0x1000, false, 0, args->v0.pushbuf,
-				  BIT(NV04_FIFO_ENGN_SW) |
-				  BIT(NV04_FIFO_ENGN_GR) |
-				  BIT(NV04_FIFO_ENGN_DMA),
-				  0, 0x800000, 0x10000, oclass, &chan->base);
-	chan->fifo = fifo;
-	if (ret)
-		return ret;
-
-	args->v0.chid = chan->base.chid;
-	chan->ramfc = chan->base.chid * 32;
-
-	nvkm_kmap(imem->ramfc);
-	nvkm_wo32(imem->ramfc, chan->ramfc + 0x00, args->v0.offset);
-	nvkm_wo32(imem->ramfc, chan->ramfc + 0x04, args->v0.offset);
-	nvkm_wo32(imem->ramfc, chan->ramfc + 0x08, chan->base.push->addr >> 4);
-	nvkm_wo32(imem->ramfc, chan->ramfc + 0x10,
-			       NV_PFIFO_CACHE1_DMA_FETCH_TRIG_128_BYTES |
-			       NV_PFIFO_CACHE1_DMA_FETCH_SIZE_128_BYTES |
-#ifdef __BIG_ENDIAN
-			       NV_PFIFO_CACHE1_BIG_ENDIAN |
-#endif
-			       NV_PFIFO_CACHE1_DMA_FETCH_MAX_REQS_8);
-	nvkm_done(imem->ramfc);
-	return 0;
-}
-
-const struct nvkm_fifo_chan_oclass
-nv04_fifo_dma_oclass = {
-	.base.oclass = NV03_CHANNEL_DMA,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv04_fifo_dma_new,
-};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv10.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv10.c
deleted file mode 100644
index 07d80d54a07c..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv10.c
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright 2012 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs
- */
-#include "channv04.h"
-#include "regsnv04.h"
-
-#include <core/client.h>
-#include <core/gpuobj.h>
-#include <subdev/instmem.h>
-
-#include <nvif/class.h>
-#include <nvif/cl006b.h>
-#include <nvif/unpack.h>
-
-static int
-nv10_fifo_dma_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass,
-		  void *data, u32 size, struct nvkm_object **pobject)
-{
-	struct nvkm_object *parent = oclass->parent;
-	union {
-		struct nv03_channel_dma_v0 v0;
-	} *args = data;
-	struct nv04_fifo *fifo = nv04_fifo(base);
-	struct nv04_fifo_chan *chan = NULL;
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	struct nvkm_instmem *imem = device->imem;
-	int ret = -ENOSYS;
-
-	nvif_ioctl(parent, "create channel dma size %d\n", size);
-	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
-		nvif_ioctl(parent, "create channel dma vers %d pushbuf %llx "
-				   "offset %08x\n", args->v0.version,
-			   args->v0.pushbuf, args->v0.offset);
-		if (!args->v0.pushbuf)
-			return -EINVAL;
-	} else
-		return ret;
-
-	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
-		return -ENOMEM;
-	*pobject = &chan->base.object;
-
-	ret = nvkm_fifo_chan_ctor(&nv04_fifo_dma_func, &fifo->base,
-				  0x1000, 0x1000, false, 0, args->v0.pushbuf,
-				  BIT(NV04_FIFO_ENGN_SW) |
-				  BIT(NV04_FIFO_ENGN_GR) |
-				  BIT(NV04_FIFO_ENGN_DMA),
-				  0, 0x800000, 0x10000, oclass, &chan->base);
-	chan->fifo = fifo;
-	if (ret)
-		return ret;
-
-	args->v0.chid = chan->base.chid;
-	chan->ramfc = chan->base.chid * 32;
-
-	nvkm_kmap(imem->ramfc);
-	nvkm_wo32(imem->ramfc, chan->ramfc + 0x00, args->v0.offset);
-	nvkm_wo32(imem->ramfc, chan->ramfc + 0x04, args->v0.offset);
-	nvkm_wo32(imem->ramfc, chan->ramfc + 0x0c, chan->base.push->addr >> 4);
-	nvkm_wo32(imem->ramfc, chan->ramfc + 0x14,
-			       NV_PFIFO_CACHE1_DMA_FETCH_TRIG_128_BYTES |
-			       NV_PFIFO_CACHE1_DMA_FETCH_SIZE_128_BYTES |
-#ifdef __BIG_ENDIAN
-			       NV_PFIFO_CACHE1_BIG_ENDIAN |
-#endif
-			       NV_PFIFO_CACHE1_DMA_FETCH_MAX_REQS_8);
-	nvkm_done(imem->ramfc);
-	return 0;
-}
-
-const struct nvkm_fifo_chan_oclass
-nv10_fifo_dma_oclass = {
-	.base.oclass = NV10_CHANNEL_DMA,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv10_fifo_dma_new,
-};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv17.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv17.c
deleted file mode 100644
index edd70a114218..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv17.c
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright 2012 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs
- */
-#include "channv04.h"
-#include "regsnv04.h"
-
-#include <core/client.h>
-#include <core/gpuobj.h>
-#include <subdev/instmem.h>
-
-#include <nvif/class.h>
-#include <nvif/cl006b.h>
-#include <nvif/unpack.h>
-
-static int
-nv17_fifo_dma_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass,
-		  void *data, u32 size, struct nvkm_object **pobject)
-{
-	struct nvkm_object *parent = oclass->parent;
-	union {
-		struct nv03_channel_dma_v0 v0;
-	} *args = data;
-	struct nv04_fifo *fifo = nv04_fifo(base);
-	struct nv04_fifo_chan *chan = NULL;
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	struct nvkm_instmem *imem = device->imem;
-	int ret = -ENOSYS;
-
-	nvif_ioctl(parent, "create channel dma size %d\n", size);
-	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
-		nvif_ioctl(parent, "create channel dma vers %d pushbuf %llx "
-				   "offset %08x\n", args->v0.version,
-			   args->v0.pushbuf, args->v0.offset);
-		if (!args->v0.pushbuf)
-			return -EINVAL;
-	} else
-		return ret;
-
-	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
-		return -ENOMEM;
-	*pobject = &chan->base.object;
-
-	ret = nvkm_fifo_chan_ctor(&nv04_fifo_dma_func, &fifo->base,
-				  0x1000, 0x1000, false, 0, args->v0.pushbuf,
-				  BIT(NV04_FIFO_ENGN_SW) |
-				  BIT(NV04_FIFO_ENGN_GR) |
-				  BIT(NV04_FIFO_ENGN_MPEG) | /* NV31- */
-				  BIT(NV04_FIFO_ENGN_DMA),
-				  0, 0x800000, 0x10000, oclass, &chan->base);
-	chan->fifo = fifo;
-	if (ret)
-		return ret;
-
-	args->v0.chid = chan->base.chid;
-	chan->ramfc = chan->base.chid * 64;
-
-	nvkm_kmap(imem->ramfc);
-	nvkm_wo32(imem->ramfc, chan->ramfc + 0x00, args->v0.offset);
-	nvkm_wo32(imem->ramfc, chan->ramfc + 0x04, args->v0.offset);
-	nvkm_wo32(imem->ramfc, chan->ramfc + 0x0c, chan->base.push->addr >> 4);
-	nvkm_wo32(imem->ramfc, chan->ramfc + 0x14,
-			       NV_PFIFO_CACHE1_DMA_FETCH_TRIG_128_BYTES |
-			       NV_PFIFO_CACHE1_DMA_FETCH_SIZE_128_BYTES |
-#ifdef __BIG_ENDIAN
-			       NV_PFIFO_CACHE1_BIG_ENDIAN |
-#endif
-			       NV_PFIFO_CACHE1_DMA_FETCH_MAX_REQS_8);
-	nvkm_done(imem->ramfc);
-	return 0;
-}
-
-const struct nvkm_fifo_chan_oclass
-nv17_fifo_dma_oclass = {
-	.base.oclass = NV17_CHANNEL_DMA,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv17_fifo_dma_new,
-};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv40.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv40.c
deleted file mode 100644
index 0411fb908457..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv40.c
+++ /dev/null
@@ -1,254 +0,0 @@
-/*
- * Copyright 2012 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs
- */
-#include "channv04.h"
-#include "regsnv04.h"
-
-#include <core/client.h>
-#include <core/ramht.h>
-#include <subdev/instmem.h>
-
-#include <nvif/class.h>
-#include <nvif/cl006b.h>
-#include <nvif/unpack.h>
-
-static bool
-nv40_fifo_dma_engine(struct nvkm_engine *engine, u32 *reg, u32 *ctx)
-{
-	switch (engine->subdev.type) {
-	case NVKM_ENGINE_DMAOBJ:
-	case NVKM_ENGINE_SW:
-		return false;
-	case NVKM_ENGINE_GR:
-		*reg = 0x0032e0;
-		*ctx = 0x38;
-		return true;
-	case NVKM_ENGINE_MPEG:
-		if (engine->subdev.device->chipset < 0x44)
-			return false;
-		*reg = 0x00330c;
-		*ctx = 0x54;
-		return true;
-	default:
-		WARN_ON(1);
-		return false;
-	}
-}
-
-static struct nvkm_gpuobj **
-nv40_fifo_dma_engn(struct nv04_fifo_chan *chan, struct nvkm_engine *engine)
-{
-	int engi = chan->base.fifo->func->engine_id(chan->base.fifo, engine);
-	if (engi >= 0)
-		return &chan->engn[engi];
-	return NULL;
-}
-
-static int
-nv40_fifo_dma_engine_fini(struct nvkm_fifo_chan *base,
-			  struct nvkm_engine *engine, bool suspend)
-{
-	struct nv04_fifo_chan *chan = nv04_fifo_chan(base);
-	struct nv04_fifo *fifo = chan->fifo;
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	struct nvkm_instmem *imem = device->imem;
-	unsigned long flags;
-	u32 reg, ctx;
-	int chid;
-
-	if (!nv40_fifo_dma_engine(engine, &reg, &ctx))
-		return 0;
-
-	spin_lock_irqsave(&fifo->base.lock, flags);
-	nvkm_mask(device, 0x002500, 0x00000001, 0x00000000);
-
-	chid = nvkm_rd32(device, 0x003204) & (fifo->base.nr - 1);
-	if (chid == chan->base.chid)
-		nvkm_wr32(device, reg, 0x00000000);
-	nvkm_kmap(imem->ramfc);
-	nvkm_wo32(imem->ramfc, chan->ramfc + ctx, 0x00000000);
-	nvkm_done(imem->ramfc);
-
-	nvkm_mask(device, 0x002500, 0x00000001, 0x00000001);
-	spin_unlock_irqrestore(&fifo->base.lock, flags);
-	return 0;
-}
-
-static int
-nv40_fifo_dma_engine_init(struct nvkm_fifo_chan *base,
-			  struct nvkm_engine *engine)
-{
-	struct nv04_fifo_chan *chan = nv04_fifo_chan(base);
-	struct nv04_fifo *fifo = chan->fifo;
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	struct nvkm_instmem *imem = device->imem;
-	unsigned long flags;
-	u32 inst, reg, ctx;
-	int chid;
-
-	if (!nv40_fifo_dma_engine(engine, &reg, &ctx))
-		return 0;
-	inst = (*nv40_fifo_dma_engn(chan, engine))->addr >> 4;
-
-	spin_lock_irqsave(&fifo->base.lock, flags);
-	nvkm_mask(device, 0x002500, 0x00000001, 0x00000000);
-
-	chid = nvkm_rd32(device, 0x003204) & (fifo->base.nr - 1);
-	if (chid == chan->base.chid)
-		nvkm_wr32(device, reg, inst);
-	nvkm_kmap(imem->ramfc);
-	nvkm_wo32(imem->ramfc, chan->ramfc + ctx, inst);
-	nvkm_done(imem->ramfc);
-
-	nvkm_mask(device, 0x002500, 0x00000001, 0x00000001);
-	spin_unlock_irqrestore(&fifo->base.lock, flags);
-	return 0;
-}
-
-static void
-nv40_fifo_dma_engine_dtor(struct nvkm_fifo_chan *base,
-			  struct nvkm_engine *engine)
-{
-	struct nv04_fifo_chan *chan = nv04_fifo_chan(base);
-	nvkm_gpuobj_del(nv40_fifo_dma_engn(chan, engine));
-}
-
-static int
-nv40_fifo_dma_engine_ctor(struct nvkm_fifo_chan *base,
-			  struct nvkm_engine *engine,
-			  struct nvkm_object *object)
-{
-	struct nv04_fifo_chan *chan = nv04_fifo_chan(base);
-	u32 reg, ctx;
-
-	if (!nv40_fifo_dma_engine(engine, &reg, &ctx))
-		return 0;
-
-	return nvkm_object_bind(object, NULL, 0, nv40_fifo_dma_engn(chan, engine));
-}
-
-static int
-nv40_fifo_dma_object_ctor(struct nvkm_fifo_chan *base,
-			  struct nvkm_object *object)
-{
-	struct nv04_fifo_chan *chan = nv04_fifo_chan(base);
-	struct nvkm_instmem *imem = chan->fifo->base.engine.subdev.device->imem;
-	u32 context = chan->base.chid << 23;
-	u32 handle  = object->handle;
-	int hash;
-
-	switch (object->engine->subdev.type) {
-	case NVKM_ENGINE_DMAOBJ:
-	case NVKM_ENGINE_SW    : context |= 0x00000000; break;
-	case NVKM_ENGINE_GR    : context |= 0x00100000; break;
-	case NVKM_ENGINE_MPEG  : context |= 0x00200000; break;
-	default:
-		WARN_ON(1);
-		return -EINVAL;
-	}
-
-	mutex_lock(&chan->fifo->base.mutex);
-	hash = nvkm_ramht_insert(imem->ramht, object, chan->base.chid, 4,
-				 handle, context);
-	mutex_unlock(&chan->fifo->base.mutex);
-	return hash;
-}
-
-static const struct nvkm_fifo_chan_func
-nv40_fifo_dma_func = {
-	.dtor = nv04_fifo_dma_dtor,
-	.init = nv04_fifo_dma_init,
-	.fini = nv04_fifo_dma_fini,
-	.engine_ctor = nv40_fifo_dma_engine_ctor,
-	.engine_dtor = nv40_fifo_dma_engine_dtor,
-	.engine_init = nv40_fifo_dma_engine_init,
-	.engine_fini = nv40_fifo_dma_engine_fini,
-	.object_ctor = nv40_fifo_dma_object_ctor,
-	.object_dtor = nv04_fifo_dma_object_dtor,
-};
-
-static int
-nv40_fifo_dma_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass,
-		  void *data, u32 size, struct nvkm_object **pobject)
-{
-	struct nvkm_object *parent = oclass->parent;
-	union {
-		struct nv03_channel_dma_v0 v0;
-	} *args = data;
-	struct nv04_fifo *fifo = nv04_fifo(base);
-	struct nv04_fifo_chan *chan = NULL;
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	struct nvkm_instmem *imem = device->imem;
-	int ret = -ENOSYS;
-
-	nvif_ioctl(parent, "create channel dma size %d\n", size);
-	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
-		nvif_ioctl(parent, "create channel dma vers %d pushbuf %llx "
-				   "offset %08x\n", args->v0.version,
-			   args->v0.pushbuf, args->v0.offset);
-		if (!args->v0.pushbuf)
-			return -EINVAL;
-	} else
-		return ret;
-
-	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
-		return -ENOMEM;
-	*pobject = &chan->base.object;
-
-	ret = nvkm_fifo_chan_ctor(&nv40_fifo_dma_func, &fifo->base,
-				  0x1000, 0x1000, false, 0, args->v0.pushbuf,
-				  BIT(NV04_FIFO_ENGN_SW) |
-				  BIT(NV04_FIFO_ENGN_GR) |
-				  BIT(NV04_FIFO_ENGN_MPEG) |
-				  BIT(NV04_FIFO_ENGN_DMA),
-				  0, 0xc00000, 0x1000, oclass, &chan->base);
-	chan->fifo = fifo;
-	if (ret)
-		return ret;
-
-	args->v0.chid = chan->base.chid;
-	chan->ramfc = chan->base.chid * 128;
-
-	nvkm_kmap(imem->ramfc);
-	nvkm_wo32(imem->ramfc, chan->ramfc + 0x00, args->v0.offset);
-	nvkm_wo32(imem->ramfc, chan->ramfc + 0x04, args->v0.offset);
-	nvkm_wo32(imem->ramfc, chan->ramfc + 0x0c, chan->base.push->addr >> 4);
-	nvkm_wo32(imem->ramfc, chan->ramfc + 0x18, 0x30000000 |
-			       NV_PFIFO_CACHE1_DMA_FETCH_TRIG_128_BYTES |
-			       NV_PFIFO_CACHE1_DMA_FETCH_SIZE_128_BYTES |
-#ifdef __BIG_ENDIAN
-			       NV_PFIFO_CACHE1_BIG_ENDIAN |
-#endif
-			       NV_PFIFO_CACHE1_DMA_FETCH_MAX_REQS_8);
-	nvkm_wo32(imem->ramfc, chan->ramfc + 0x3c, 0x0001ffff);
-	nvkm_done(imem->ramfc);
-	return 0;
-}
-
-const struct nvkm_fifo_chan_oclass
-nv40_fifo_dma_oclass = {
-	.base.oclass = NV40_CHANNEL_DMA,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv40_fifo_dma_new,
-};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/g84.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/g84.c
index 3885c3830b94..6b229a3fbd97 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/g84.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/g84.c
@@ -21,112 +21,211 @@
  *
  * Authors: Ben Skeggs
  */
-#include "nv50.h"
-#include "channv50.h"
+#include "priv.h"
+#include "cgrp.h"
+#include "chan.h"
+#include "runl.h"
+
+#include <core/ramht.h>
+#include <subdev/timer.h>
+
+#include <nvif/class.h>
 
 static void
-g84_fifo_uevent_fini(struct nvkm_fifo *fifo)
+g84_chan_bind(struct nvkm_chan *chan)
 {
-	struct nvkm_device *device = fifo->engine.subdev.device;
-	nvkm_mask(device, 0x002140, 0x40000000, 0x00000000);
+	struct nvkm_device *device = chan->cgrp->runl->fifo->engine.subdev.device;
+
+	nvkm_wr32(device, 0x002600 + (chan->id * 4), chan->ramfc->addr >> 8);
 }
 
-static void
-g84_fifo_uevent_init(struct nvkm_fifo *fifo)
+static int
+g84_chan_ramfc_write(struct nvkm_chan *chan, u64 offset, u64 length, u32 devm, bool priv)
 {
-	struct nvkm_device *device = fifo->engine.subdev.device;
-	nvkm_mask(device, 0x002140, 0x40000000, 0x40000000);
+	struct nvkm_device *device = chan->cgrp->runl->fifo->engine.subdev.device;
+	const u32 limit2 = ilog2(length / 8);
+	int ret;
+
+	ret = nvkm_gpuobj_new(device, 0x0200, 0, true, chan->inst, &chan->eng);
+	if (ret)
+		return ret;
+
+	ret = nvkm_gpuobj_new(device, 0x4000, 0, false, chan->inst, &chan->pgd);
+	if (ret)
+		return ret;
+
+	ret = nvkm_gpuobj_new(device, 0x1000, 0x400, true, chan->inst, &chan->cache);
+	if (ret)
+		return ret;
+
+	ret = nvkm_gpuobj_new(device, 0x100, 0x100, true, chan->inst, &chan->ramfc);
+	if (ret)
+		return ret;
+
+	ret = nvkm_ramht_new(device, 0x8000, 16, chan->inst, &chan->ramht);
+	if (ret)
+		return ret;
+
+	nvkm_kmap(chan->ramfc);
+	nvkm_wo32(chan->ramfc, 0x3c, 0x403f6078);
+	nvkm_wo32(chan->ramfc, 0x44, 0x01003fff);
+	nvkm_wo32(chan->ramfc, 0x48, chan->push->node->offset >> 4);
+	nvkm_wo32(chan->ramfc, 0x50, lower_32_bits(offset));
+	nvkm_wo32(chan->ramfc, 0x54, upper_32_bits(offset) | (limit2 << 16));
+	nvkm_wo32(chan->ramfc, 0x60, 0x7fffffff);
+	nvkm_wo32(chan->ramfc, 0x78, 0x00000000);
+	nvkm_wo32(chan->ramfc, 0x7c, 0x30000000 | devm);
+	nvkm_wo32(chan->ramfc, 0x80, ((chan->ramht->bits - 9) << 27) |
+				     (4 << 24) /* SEARCH_FULL */ |
+				     (chan->ramht->gpuobj->node->offset >> 4));
+	nvkm_wo32(chan->ramfc, 0x88, chan->cache->addr >> 10);
+	nvkm_wo32(chan->ramfc, 0x98, chan->inst->addr >> 12);
+	nvkm_done(chan->ramfc);
+	return 0;
 }
 
-static struct nvkm_engine *
-g84_fifo_id_engine(struct nvkm_fifo *fifo, int engi)
+static const struct nvkm_chan_func_ramfc
+g84_chan_ramfc = {
+	.write = g84_chan_ramfc_write,
+	.ctxdma = true,
+	.devm = 0xfff,
+};
+
+const struct nvkm_chan_func
+g84_chan = {
+	.inst = &nv50_chan_inst,
+	.userd = &nv50_chan_userd,
+	.ramfc = &g84_chan_ramfc,
+	.bind = g84_chan_bind,
+	.unbind = nv50_chan_unbind,
+	.start = nv50_chan_start,
+	.stop = nv50_chan_stop,
+};
+
+static void
+g84_ectx_bind(struct nvkm_engn *engn, struct nvkm_cctx *cctx, struct nvkm_chan *chan)
 {
-	struct nvkm_device *device = fifo->engine.subdev.device;
-	struct nvkm_engine *engine;
-	enum nvkm_subdev_type type;
-
-	switch (engi) {
-	case G84_FIFO_ENGN_SW    : type = NVKM_ENGINE_SW; break;
-	case G84_FIFO_ENGN_GR    : type = NVKM_ENGINE_GR; break;
-	case G84_FIFO_ENGN_MPEG  :
-		if ((engine = nvkm_device_engine(device, NVKM_ENGINE_MSPPP, 0)))
-			return engine;
-		type = NVKM_ENGINE_MPEG;
-		break;
-	case G84_FIFO_ENGN_ME    :
-		if ((engine = nvkm_device_engine(device, NVKM_ENGINE_CE, 0)))
-			return engine;
-		type = NVKM_ENGINE_ME;
-		break;
-	case G84_FIFO_ENGN_VP    :
-		if ((engine = nvkm_device_engine(device, NVKM_ENGINE_MSPDEC, 0)))
-			return engine;
-		type = NVKM_ENGINE_VP;
-		break;
-	case G84_FIFO_ENGN_CIPHER:
-		if ((engine = nvkm_device_engine(device, NVKM_ENGINE_VIC, 0)))
-			return engine;
-		if ((engine = nvkm_device_engine(device, NVKM_ENGINE_SEC, 0)))
-			return engine;
-		type = NVKM_ENGINE_CIPHER;
-		break;
-	case G84_FIFO_ENGN_BSP   :
-		if ((engine = nvkm_device_engine(device, NVKM_ENGINE_MSVLD, 0)))
-			return engine;
-		type = NVKM_ENGINE_BSP;
-		break;
-	case G84_FIFO_ENGN_DMA   : type = NVKM_ENGINE_DMAOBJ; break;
+	struct nvkm_subdev *subdev = &chan->cgrp->runl->fifo->engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	u64 start = 0, limit = 0;
+	u32 flags = 0, ptr0, save;
+
+	switch (engn->engine->subdev.type) {
+	case NVKM_ENGINE_GR    : ptr0 = 0x0020; break;
+	case NVKM_ENGINE_VP    :
+	case NVKM_ENGINE_MSPDEC: ptr0 = 0x0040; break;
+	case NVKM_ENGINE_MPEG  :
+	case NVKM_ENGINE_MSPPP : ptr0 = 0x0060; break;
+	case NVKM_ENGINE_BSP   :
+	case NVKM_ENGINE_MSVLD : ptr0 = 0x0080; break;
+	case NVKM_ENGINE_CIPHER:
+	case NVKM_ENGINE_SEC   : ptr0 = 0x00a0; break;
+	case NVKM_ENGINE_CE    : ptr0 = 0x00c0; break;
 	default:
 		WARN_ON(1);
-		return NULL;
+		return;
+	}
+
+	if (!cctx) {
+		save = nvkm_mask(device, 0x002520, 0x0000003f, BIT(engn->id - 1));
+		nvkm_wr32(device, 0x0032fc, chan->inst->addr >> 12);
+		nvkm_msec(device, 2000,
+			if (nvkm_rd32(device, 0x0032fc) != 0xffffffff)
+				break;
+		);
+		nvkm_wr32(device, 0x002520, save);
+	} else {
+		flags = 0x00190000;
+		start = cctx->vctx->inst->addr;
+		limit = start + cctx->vctx->inst->size - 1;
 	}
 
-	return nvkm_device_engine(fifo->engine.subdev.device, type, 0);
+	nvkm_kmap(chan->eng);
+	nvkm_wo32(chan->eng, ptr0 + 0x00, flags);
+	nvkm_wo32(chan->eng, ptr0 + 0x04, lower_32_bits(limit));
+	nvkm_wo32(chan->eng, ptr0 + 0x08, lower_32_bits(start));
+	nvkm_wo32(chan->eng, ptr0 + 0x0c, upper_32_bits(limit) << 24 |
+					  lower_32_bits(start));
+	nvkm_wo32(chan->eng, ptr0 + 0x10, 0x00000000);
+	nvkm_wo32(chan->eng, ptr0 + 0x14, 0x00000000);
+	nvkm_done(chan->eng);
 }
 
+const struct nvkm_engn_func
+g84_engn = {
+	.bind = g84_ectx_bind,
+	.ramht_add = nv50_eobj_ramht_add,
+	.ramht_del = nv50_eobj_ramht_del,
+};
+
+static void
+g84_fifo_nonstall_block(struct nvkm_event *event, int type, int index)
+{
+	struct nvkm_fifo *fifo = container_of(event, typeof(*fifo), nonstall.event);
+	unsigned long flags;
+
+	spin_lock_irqsave(&fifo->lock, flags);
+	nvkm_mask(fifo->engine.subdev.device, 0x002140, 0x40000000, 0x00000000);
+	spin_unlock_irqrestore(&fifo->lock, flags);
+}
+
+static void
+g84_fifo_nonstall_allow(struct nvkm_event *event, int type, int index)
+{
+	struct nvkm_fifo *fifo = container_of(event, typeof(*fifo), nonstall.event);
+	unsigned long flags;
+
+	spin_lock_irqsave(&fifo->lock, flags);
+	nvkm_mask(fifo->engine.subdev.device, 0x002140, 0x40000000, 0x40000000);
+	spin_unlock_irqrestore(&fifo->lock, flags);
+}
+
+const struct nvkm_event_func
+g84_fifo_nonstall = {
+	.init = g84_fifo_nonstall_allow,
+	.fini = g84_fifo_nonstall_block,
+};
+
 static int
-g84_fifo_engine_id(struct nvkm_fifo *base, struct nvkm_engine *engine)
+g84_fifo_runl_ctor(struct nvkm_fifo *fifo)
 {
-	switch (engine->subdev.type) {
-	case NVKM_ENGINE_SW    : return G84_FIFO_ENGN_SW;
-	case NVKM_ENGINE_GR    : return G84_FIFO_ENGN_GR;
-	case NVKM_ENGINE_MPEG  :
-	case NVKM_ENGINE_MSPPP : return G84_FIFO_ENGN_MPEG;
-	case NVKM_ENGINE_CE    : return G84_FIFO_ENGN_CE0;
-	case NVKM_ENGINE_VP    :
-	case NVKM_ENGINE_MSPDEC: return G84_FIFO_ENGN_VP;
-	case NVKM_ENGINE_CIPHER:
-	case NVKM_ENGINE_SEC   : return G84_FIFO_ENGN_CIPHER;
-	case NVKM_ENGINE_BSP   :
-	case NVKM_ENGINE_MSVLD : return G84_FIFO_ENGN_BSP;
-	case NVKM_ENGINE_DMAOBJ: return G84_FIFO_ENGN_DMA;
-	default:
-		WARN_ON(1);
-		return -1;
-	}
+	struct nvkm_runl *runl;
+
+	runl = nvkm_runl_new(fifo, 0, 0, 0);
+	if (IS_ERR(runl))
+		return PTR_ERR(runl);
+
+	nvkm_runl_add(runl, 0, fifo->func->engn_sw, NVKM_ENGINE_SW, 0);
+	nvkm_runl_add(runl, 0, fifo->func->engn_sw, NVKM_ENGINE_DMAOBJ, 0);
+	nvkm_runl_add(runl, 1, fifo->func->engn, NVKM_ENGINE_GR, 0);
+	nvkm_runl_add(runl, 2, fifo->func->engn, NVKM_ENGINE_MPEG, 0);
+	nvkm_runl_add(runl, 3, fifo->func->engn, NVKM_ENGINE_ME, 0);
+	nvkm_runl_add(runl, 4, fifo->func->engn, NVKM_ENGINE_VP, 0);
+	nvkm_runl_add(runl, 5, fifo->func->engn, NVKM_ENGINE_CIPHER, 0);
+	nvkm_runl_add(runl, 6, fifo->func->engn, NVKM_ENGINE_BSP, 0);
+	return 0;
 }
 
 static const struct nvkm_fifo_func
 g84_fifo = {
-	.dtor = nv50_fifo_dtor,
-	.oneinit = nv50_fifo_oneinit,
+	.chid_nr = nv50_fifo_chid_nr,
+	.chid_ctor = nv50_fifo_chid_ctor,
+	.runl_ctor = g84_fifo_runl_ctor,
 	.init = nv50_fifo_init,
 	.intr = nv04_fifo_intr,
-	.engine_id = g84_fifo_engine_id,
-	.id_engine = g84_fifo_id_engine,
 	.pause = nv04_fifo_pause,
 	.start = nv04_fifo_start,
-	.uevent_init = g84_fifo_uevent_init,
-	.uevent_fini = g84_fifo_uevent_fini,
-	.chan = {
-		&g84_fifo_gpfifo_oclass,
-		NULL
-	},
+	.nonstall = &g84_fifo_nonstall,
+	.runl = &nv50_runl,
+	.engn = &g84_engn,
+	.engn_sw = &nv50_engn_sw,
+	.cgrp = {{                          }, &nv04_cgrp },
+	.chan = {{ 0, 0, G82_CHANNEL_GPFIFO }, &g84_chan },
 };
 
 int
 g84_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
 	     struct nvkm_fifo **pfifo)
 {
-	return nv50_fifo_new_(&g84_fifo, device, type, inst, pfifo);
+	return nvkm_fifo_new_(&g84_fifo, device, type, inst, pfifo);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/g98.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/g98.c
new file mode 100644
index 000000000000..c6ca050c38bf
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/g98.c
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "priv.h"
+#include "chan.h"
+#include "runl.h"
+
+#include <nvif/class.h>
+
+static int
+g98_fifo_runl_ctor(struct nvkm_fifo *fifo)
+{
+	struct nvkm_runl *runl;
+
+	runl = nvkm_runl_new(fifo, 0, 0, 0);
+	if (IS_ERR(runl))
+		return PTR_ERR(runl);
+
+	nvkm_runl_add(runl, 0, fifo->func->engn_sw, NVKM_ENGINE_SW, 0);
+	nvkm_runl_add(runl, 0, fifo->func->engn_sw, NVKM_ENGINE_DMAOBJ, 0);
+	nvkm_runl_add(runl, 1, fifo->func->engn, NVKM_ENGINE_GR, 0);
+	nvkm_runl_add(runl, 2, fifo->func->engn, NVKM_ENGINE_MSPPP, 0);
+	nvkm_runl_add(runl, 3, fifo->func->engn, NVKM_ENGINE_CE, 0);
+	nvkm_runl_add(runl, 4, fifo->func->engn, NVKM_ENGINE_MSPDEC, 0);
+	nvkm_runl_add(runl, 5, fifo->func->engn, NVKM_ENGINE_SEC, 0);
+	nvkm_runl_add(runl, 6, fifo->func->engn, NVKM_ENGINE_MSVLD, 0);
+	return 0;
+}
+
+static const struct nvkm_fifo_func
+g98_fifo = {
+	.chid_nr = nv50_fifo_chid_nr,
+	.chid_ctor = nv50_fifo_chid_ctor,
+	.runl_ctor = g98_fifo_runl_ctor,
+	.init = nv50_fifo_init,
+	.intr = nv04_fifo_intr,
+	.pause = nv04_fifo_pause,
+	.start = nv04_fifo_start,
+	.nonstall = &g84_fifo_nonstall,
+	.runl = &nv50_runl,
+	.engn = &g84_engn,
+	.engn_sw = &nv50_engn_sw,
+	.cgrp = {{                          }, &nv04_cgrp },
+	.chan = {{ 0, 0, G82_CHANNEL_GPFIFO }, &g84_chan },
+};
+
+int
+g98_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
+	       struct nvkm_fifo **pfifo)
+{
+	return nvkm_fifo_new_(&g98_fifo, device, type, inst, pfifo);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c
new file mode 100644
index 000000000000..12a5d99d5e77
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c
@@ -0,0 +1,550 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "priv.h"
+#include "cgrp.h"
+#include "chan.h"
+#include "chid.h"
+#include "runl.h"
+#include "runq.h"
+
+#include <core/gpuobj.h>
+#include <subdev/top.h>
+#include <subdev/vfn.h>
+
+#include <nvif/class.h>
+
+/*TODO: allocate? */
+#define GA100_FIFO_NONSTALL_VECTOR 0
+
+static u32
+ga100_chan_doorbell_handle(struct nvkm_chan *chan)
+{
+	return (chan->cgrp->runl->doorbell << 16) | chan->id;
+}
+
+static void
+ga100_chan_stop(struct nvkm_chan *chan)
+{
+	struct nvkm_runl *runl = chan->cgrp->runl;
+
+	nvkm_wr32(runl->fifo->engine.subdev.device, runl->chan + (chan->id * 4), 0x00000003);
+}
+
+static void
+ga100_chan_start(struct nvkm_chan *chan)
+{
+	struct nvkm_runl *runl = chan->cgrp->runl;
+	struct nvkm_device *device = runl->fifo->engine.subdev.device;
+	const int gfid = 0;
+
+	nvkm_wr32(device, runl->chan + (chan->id * 4), 0x00000002);
+	nvkm_wr32(device, runl->addr + 0x0090, (gfid << 16) | chan->id); /* INTERNAL_DOORBELL. */
+}
+
+static void
+ga100_chan_unbind(struct nvkm_chan *chan)
+{
+	struct nvkm_runl *runl = chan->cgrp->runl;
+
+	nvkm_wr32(runl->fifo->engine.subdev.device, runl->chan + (chan->id * 4), 0xffffffff);
+}
+
+static int
+ga100_chan_ramfc_write(struct nvkm_chan *chan, u64 offset, u64 length, u32 devm, bool priv)
+{
+	const u32 limit2 = ilog2(length / 8);
+
+	nvkm_kmap(chan->inst);
+	nvkm_wo32(chan->inst, 0x010, 0x0000face);
+	nvkm_wo32(chan->inst, 0x030, 0x7ffff902);
+	nvkm_wo32(chan->inst, 0x048, lower_32_bits(offset));
+	nvkm_wo32(chan->inst, 0x04c, upper_32_bits(offset) | (limit2 << 16));
+	nvkm_wo32(chan->inst, 0x084, 0x20400000);
+	nvkm_wo32(chan->inst, 0x094, 0x30000000 | devm);
+	nvkm_wo32(chan->inst, 0x0e4, priv ? 0x00000020 : 0x00000000);
+	nvkm_wo32(chan->inst, 0x0e8, chan->id);
+	nvkm_wo32(chan->inst, 0x0f4, 0x00001000 | (priv ? 0x00000100 : 0x00000000));
+	nvkm_wo32(chan->inst, 0x0f8, 0x80000000 | GA100_FIFO_NONSTALL_VECTOR);
+	nvkm_mo32(chan->inst, 0x218, 0x00000000, 0x00000000);
+	nvkm_done(chan->inst);
+	return 0;
+}
+
+static const struct nvkm_chan_func_ramfc
+ga100_chan_ramfc = {
+	.write = ga100_chan_ramfc_write,
+	.devm = 0xfff,
+	.priv = true,
+};
+
+const struct nvkm_chan_func
+ga100_chan = {
+	.inst = &gf100_chan_inst,
+	.userd = &gv100_chan_userd,
+	.ramfc = &ga100_chan_ramfc,
+	.unbind = ga100_chan_unbind,
+	.start = ga100_chan_start,
+	.stop = ga100_chan_stop,
+	.preempt = gk110_chan_preempt,
+	.doorbell_handle = ga100_chan_doorbell_handle,
+};
+
+static void
+ga100_cgrp_preempt(struct nvkm_cgrp *cgrp)
+{
+	struct nvkm_runl *runl = cgrp->runl;
+
+	nvkm_wr32(runl->fifo->engine.subdev.device, runl->addr + 0x098, 0x01000000 | cgrp->id);
+}
+
+const struct nvkm_cgrp_func
+ga100_cgrp = {
+	.preempt = ga100_cgrp_preempt,
+};
+
+static int
+ga100_engn_cxid(struct nvkm_engn *engn, bool *cgid)
+{
+	struct nvkm_runl *runl = engn->runl;
+	struct nvkm_device *device = runl->fifo->engine.subdev.device;
+	u32 stat = nvkm_rd32(device, runl->addr + 0x200 + engn->id * 0x40);
+
+	ENGN_DEBUG(engn, "status %08x", stat);
+	*cgid = true;
+
+	switch ((stat & 0x0000e000) >> 13) {
+	case 0 /* INVALID */: return -ENODEV;
+	case 1 /*   VALID */:
+	case 5 /*    SAVE */: return (stat & 0x00000fff);
+	case 6 /*    LOAD */: return (stat & 0x0fff0000) >> 16;
+	case 7 /*  SWITCH */:
+		if (nvkm_engine_chsw_load(engn->engine))
+			return (stat & 0x0fff0000) >> 16;
+		return (stat & 0x00000fff);
+	default:
+		WARN_ON(1);
+		break;
+	}
+
+	return -ENODEV;
+}
+
+const struct nvkm_engn_func
+ga100_engn = {
+	.cxid = ga100_engn_cxid,
+	.ctor = gk104_ectx_ctor,
+	.bind = gv100_ectx_bind,
+};
+
+const struct nvkm_engn_func
+ga100_engn_ce = {
+	.cxid = ga100_engn_cxid,
+	.ctor = gv100_ectx_ce_ctor,
+	.bind = gv100_ectx_ce_bind,
+};
+
+static bool
+ga100_runq_idle(struct nvkm_runq *runq)
+{
+	struct nvkm_device *device = runq->fifo->engine.subdev.device;
+
+	return !(nvkm_rd32(device, 0x04015c + (runq->id * 0x800)) & 0x0000e000);
+}
+
+static bool
+ga100_runq_intr_1(struct nvkm_runq *runq, struct nvkm_runl *runl)
+{
+	struct nvkm_device *device = runq->fifo->engine.subdev.device;
+	u32 inte = nvkm_rd32(device, 0x040180 + (runq->id * 0x800));
+	u32 intr = nvkm_rd32(device, 0x040148 + (runq->id * 0x800));
+	u32 stat = intr & inte;
+
+	if (!stat) {
+		RUNQ_DEBUG(runq, "inte1 %08x %08x", intr, inte);
+		return false;
+	}
+
+	if (stat & 0x80000000) {
+		u32 chid = nvkm_rd32(device, 0x040120 + (runq->id * 0x0800)) & runl->chid->mask;
+		struct nvkm_chan *chan;
+		unsigned long flags;
+
+		RUNQ_ERROR(runq, "CTXNOTVALID chid:%d", chid);
+		chan = nvkm_runl_chan_get_chid(runl, chid, &flags);
+		if (chan) {
+			nvkm_chan_error(chan, true);
+			nvkm_chan_put(&chan, flags);
+		}
+
+		nvkm_mask(device, 0x0400ac + (runq->id * 0x800), 0x00030000, 0x00030000);
+		stat &= ~0x80000000;
+	}
+
+	if (stat) {
+		RUNQ_ERROR(runq, "intr1 %08x", stat);
+		nvkm_wr32(device, 0x0401a0 + (runq->id * 0x800), stat);
+	}
+
+	nvkm_wr32(device, 0x040148 + (runq->id * 0x800), intr);
+	return true;
+}
+
+static bool
+ga100_runq_intr_0(struct nvkm_runq *runq, struct nvkm_runl *runl)
+{
+	struct nvkm_device *device = runq->fifo->engine.subdev.device;
+	u32 inte = nvkm_rd32(device, 0x040170 + (runq->id * 0x800));
+	u32 intr = nvkm_rd32(device, 0x040108 + (runq->id * 0x800));
+	u32 stat = intr & inte;
+
+	if (!stat) {
+		RUNQ_DEBUG(runq, "inte0 %08x %08x", intr, inte);
+		return false;
+	}
+
+	/*TODO: expand on this when fixing up gf100's version. */
+	if (stat & 0xc6afe000) {
+		u32 chid = nvkm_rd32(device, 0x040120 + (runq->id * 0x0800)) & runl->chid->mask;
+		struct nvkm_chan *chan;
+		unsigned long flags;
+
+		RUNQ_ERROR(runq, "intr0 %08x", stat);
+		chan = nvkm_runl_chan_get_chid(runl, chid, &flags);
+		if (chan) {
+			nvkm_chan_error(chan, true);
+			nvkm_chan_put(&chan, flags);
+		}
+
+		stat &= ~0xc6afe000;
+	}
+
+	if (stat) {
+		RUNQ_ERROR(runq, "intr0 %08x", stat);
+		nvkm_wr32(device, 0x040190 + (runq->id * 0x800), stat);
+	}
+
+	nvkm_wr32(device, 0x040108 + (runq->id * 0x800), intr);
+	return true;
+}
+
+static bool
+ga100_runq_intr(struct nvkm_runq *runq, struct nvkm_runl *runl)
+{
+	bool intr0 = ga100_runq_intr_0(runq, runl);
+	bool intr1 = ga100_runq_intr_1(runq, runl);
+
+	return intr0 || intr1;
+}
+
+static void
+ga100_runq_init(struct nvkm_runq *runq)
+{
+	struct nvkm_device *device = runq->fifo->engine.subdev.device;
+
+	nvkm_wr32(device, 0x040108 + (runq->id * 0x800), 0xffffffff); /* INTR_0 */
+	nvkm_wr32(device, 0x040148 + (runq->id * 0x800), 0xffffffff); /* INTR_1 */
+	nvkm_wr32(device, 0x040170 + (runq->id * 0x800), 0xffffffff); /* INTR_0_EN_SET_TREE */
+	nvkm_wr32(device, 0x040180 + (runq->id * 0x800), 0xffffffff); /* INTR_1_EN_SET_TREE */
+}
+
+const struct nvkm_runq_func
+ga100_runq = {
+	.init = ga100_runq_init,
+	.intr = ga100_runq_intr,
+	.idle = ga100_runq_idle,
+};
+
+static bool
+ga100_runl_preempt_pending(struct nvkm_runl *runl)
+{
+	return nvkm_rd32(runl->fifo->engine.subdev.device, runl->addr + 0x098) & 0x00100000;
+}
+
+static void
+ga100_runl_preempt(struct nvkm_runl *runl)
+{
+	nvkm_wr32(runl->fifo->engine.subdev.device, runl->addr + 0x098, 0x00000000);
+}
+
+static void
+ga100_runl_allow(struct nvkm_runl *runl, u32 engm)
+{
+	nvkm_mask(runl->fifo->engine.subdev.device, runl->addr + 0x094, 0x00000001, 0x00000000);
+}
+
+static void
+ga100_runl_block(struct nvkm_runl *runl, u32 engm)
+{
+	nvkm_mask(runl->fifo->engine.subdev.device, runl->addr + 0x094, 0x00000001, 0x00000001);
+}
+
+static bool
+ga100_runl_pending(struct nvkm_runl *runl)
+{
+	struct nvkm_device *device = runl->fifo->engine.subdev.device;
+
+	return nvkm_rd32(device, runl->addr + 0x08c) & 0x00008000;
+}
+
+static void
+ga100_runl_commit(struct nvkm_runl *runl, struct nvkm_memory *memory, u32 start, int count)
+{
+	struct nvkm_device *device = runl->fifo->engine.subdev.device;
+	u64 addr = nvkm_memory_addr(memory) + start;
+
+	nvkm_wr32(device, runl->addr + 0x080, lower_32_bits(addr));
+	nvkm_wr32(device, runl->addr + 0x084, upper_32_bits(addr));
+	nvkm_wr32(device, runl->addr + 0x088, count);
+}
+
+static irqreturn_t
+ga100_runl_intr(struct nvkm_inth *inth)
+{
+	struct nvkm_runl *runl = container_of(inth, typeof(*runl), inth);
+	struct nvkm_engn *engn;
+	struct nvkm_device *device = runl->fifo->engine.subdev.device;
+	u32 inte = nvkm_rd32(device, runl->addr + 0x120);
+	u32 intr = nvkm_rd32(device, runl->addr + 0x100);
+	u32 stat = intr & inte;
+	u32 info;
+
+	if (!stat) {
+		RUNL_DEBUG(runl, "inte %08x %08x", intr, inte);
+		return IRQ_NONE;
+	}
+
+	if (stat & 0x00000007) {
+		nvkm_runl_foreach_engn_cond(engn, runl, stat & BIT(engn->id)) {
+			info = nvkm_rd32(device, runl->addr + 0x224 + (engn->id * 0x40));
+
+			tu102_fifo_intr_ctxsw_timeout_info(engn, info);
+
+			nvkm_wr32(device, runl->addr + 0x100, BIT(engn->id));
+			stat &= ~BIT(engn->id);
+		}
+	}
+
+	if (stat & 0x00000300) {
+		nvkm_wr32(device, runl->addr + 0x100, stat & 0x00000300);
+		stat &= ~0x00000300;
+	}
+
+	if (stat & 0x00010000) {
+		if (runl->runq[0]) {
+			if (runl->runq[0]->func->intr(runl->runq[0], runl))
+				stat &= ~0x00010000;
+		}
+	}
+
+	if (stat & 0x00020000) {
+		if (runl->runq[1]) {
+			if (runl->runq[1]->func->intr(runl->runq[1], runl))
+				stat &= ~0x00020000;
+		}
+	}
+
+	if (stat) {
+		RUNL_ERROR(runl, "intr %08x", stat);
+		nvkm_wr32(device, runl->addr + 0x140, stat);
+	}
+
+	nvkm_wr32(device, runl->addr + 0x180, 0x00000001);
+	return IRQ_HANDLED;
+}
+
+static void
+ga100_runl_fini(struct nvkm_runl *runl)
+{
+	nvkm_mask(runl->fifo->engine.subdev.device, runl->addr + 0x300, 0x80000000, 0x00000000);
+	nvkm_inth_block(&runl->inth);
+}
+
+static void
+ga100_runl_init(struct nvkm_runl *runl)
+{
+	struct nvkm_fifo *fifo = runl->fifo;
+	struct nvkm_runq *runq;
+	struct nvkm_device *device = fifo->engine.subdev.device;
+	int i;
+
+	/* Submit NULL runlist and preempt. */
+	nvkm_wr32(device, runl->addr + 0x088, 0x00000000);
+	runl->func->preempt(runl);
+
+	/* Enable doorbell. */
+	nvkm_mask(device, runl->addr + 0x300, 0x80000000, 0x80000000);
+
+	nvkm_wr32(device, runl->addr + 0x100, 0xffffffff); /* INTR_0 */
+	nvkm_wr32(device, runl->addr + 0x140, 0xffffffff); /* INTR_0_EN_CLEAR_TREE(0) */
+	nvkm_wr32(device, runl->addr + 0x120, 0x000f1307); /* INTR_0_EN_SET_TREE(0) */
+	nvkm_wr32(device, runl->addr + 0x148, 0xffffffff); /* INTR_0_EN_CLEAR_TREE(1) */
+	nvkm_wr32(device, runl->addr + 0x128, 0x00000000); /* INTR_0_EN_SET_TREE(1) */
+
+	/* Init PBDMA(s). */
+	for (i = 0; i < runl->runq_nr; i++) {
+		runq = runl->runq[i];
+		runq->func->init(runq);
+	}
+
+	nvkm_inth_allow(&runl->inth);
+}
+
+const struct nvkm_runl_func
+ga100_runl = {
+	.init = ga100_runl_init,
+	.fini = ga100_runl_fini,
+	.size = 16,
+	.update = nv50_runl_update,
+	.insert_cgrp = gv100_runl_insert_cgrp,
+	.insert_chan = gv100_runl_insert_chan,
+	.commit = ga100_runl_commit,
+	.wait = nv50_runl_wait,
+	.pending = ga100_runl_pending,
+	.block = ga100_runl_block,
+	.allow = ga100_runl_allow,
+	.preempt = ga100_runl_preempt,
+	.preempt_pending = ga100_runl_preempt_pending,
+};
+
+static int
+ga100_runl_new(struct nvkm_fifo *fifo, int id, u32 addr, struct nvkm_runl **prunl)
+{
+	struct nvkm_device *device = fifo->engine.subdev.device;
+	struct nvkm_runl *runl;
+	u32 chcfg  = nvkm_rd32(device, addr + 0x004);
+	u32 chnum  = 1 << (chcfg & 0x0000000f);
+	u32 chaddr = (chcfg & 0xfffffff0);
+	u32 dbcfg  = nvkm_rd32(device, addr + 0x008);
+	u32 vector = nvkm_rd32(device, addr + 0x160);
+	int i, ret;
+
+	runl = *prunl = nvkm_runl_new(fifo, id, addr, chnum);
+	if (IS_ERR(runl))
+		return PTR_ERR(runl);
+
+	for (i = 0; i < 2; i++) {
+		u32 pbcfg = nvkm_rd32(device, addr + 0x010 + (i * 0x04));
+		if (pbcfg & 0x80000000) {
+			runl->runq[runl->runq_nr] =
+				nvkm_runq_new(fifo, ((pbcfg & 0x03fffc00) - 0x040000) / 0x800);
+			if (!runl->runq[runl->runq_nr])
+				return -ENOMEM;
+
+			runl->runq_nr++;
+		}
+	}
+
+	ret = nvkm_inth_add(&device->vfn->intr, vector & 0x00000fff, NVKM_INTR_PRIO_NORMAL,
+			    &fifo->engine.subdev, ga100_runl_intr, &runl->inth);
+	if (ret)
+		return ret;
+
+	runl->chan = chaddr;
+	runl->doorbell = dbcfg >> 16;
+	return 0;
+}
+
+static irqreturn_t
+ga100_fifo_nonstall_intr(struct nvkm_inth *inth)
+{
+	struct nvkm_fifo *fifo = container_of(inth, typeof(*fifo), nonstall.intr);
+
+	nvkm_event_ntfy(&fifo->nonstall.event, 0, NVKM_FIFO_NONSTALL_EVENT);
+	return IRQ_HANDLED;
+}
+
+static void
+ga100_fifo_nonstall_block(struct nvkm_event *event, int type, int index)
+{
+	struct nvkm_fifo *fifo = container_of(event, typeof(*fifo), nonstall.event);
+
+	nvkm_inth_block(&fifo->nonstall.intr);
+}
+
+static void
+ga100_fifo_nonstall_allow(struct nvkm_event *event, int type, int index)
+{
+	struct nvkm_fifo *fifo = container_of(event, typeof(*fifo), nonstall.event);
+
+	nvkm_inth_allow(&fifo->nonstall.intr);
+}
+
+const struct nvkm_event_func
+ga100_fifo_nonstall = {
+	.init = ga100_fifo_nonstall_allow,
+	.fini = ga100_fifo_nonstall_block,
+};
+
+int
+ga100_fifo_nonstall_ctor(struct nvkm_fifo *fifo)
+{
+	return nvkm_inth_add(&fifo->engine.subdev.device->vfn->intr, GA100_FIFO_NONSTALL_VECTOR,
+			     NVKM_INTR_PRIO_NORMAL, &fifo->engine.subdev, ga100_fifo_nonstall_intr,
+			     &fifo->nonstall.intr);
+}
+
+int
+ga100_fifo_runl_ctor(struct nvkm_fifo *fifo)
+{
+	struct nvkm_device *device = fifo->engine.subdev.device;
+	struct nvkm_top_device *tdev;
+	struct nvkm_runl *runl;
+	int id = 0, ret;
+
+	nvkm_list_foreach(tdev, &device->top->device, head, tdev->runlist >= 0) {
+		runl = nvkm_runl_get(fifo, -1, tdev->runlist);
+		if (!runl) {
+			ret = ga100_runl_new(fifo, id++, tdev->runlist, &runl);
+			if (ret)
+				return ret;
+		}
+
+		if (tdev->engine < 0)
+			continue;
+
+		nvkm_runl_add(runl, tdev->engine, (tdev->type == NVKM_ENGINE_CE) ?
+			      fifo->func->engn_ce : fifo->func->engn, tdev->type, tdev->inst);
+	}
+
+	return 0;
+}
+
+static const struct nvkm_fifo_func
+ga100_fifo = {
+	.runl_ctor = ga100_fifo_runl_ctor,
+	.mmu_fault = &tu102_fifo_mmu_fault,
+	.nonstall_ctor = ga100_fifo_nonstall_ctor,
+	.nonstall = &ga100_fifo_nonstall,
+	.runl = &ga100_runl,
+	.runq = &ga100_runq,
+	.engn = &ga100_engn,
+	.engn_ce = &ga100_engn_ce,
+	.cgrp = {{ 0, 0, KEPLER_CHANNEL_GROUP_A  }, &ga100_cgrp, .force = true },
+	.chan = {{ 0, 0, AMPERE_CHANNEL_GPFIFO_A }, &ga100_chan },
+};
+
+int
+ga100_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
+	       struct nvkm_fifo **pfifo)
+{
+	return nvkm_fifo_new_(&ga100_fifo, device, type, inst, pfifo);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c
index c630dbd2911a..2cdf5da339b6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c
@@ -19,293 +19,27 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  */
-#define ga102_fifo(p) container_of((p), struct ga102_fifo, base.engine)
-#define ga102_chan(p) container_of((p), struct ga102_chan, object)
-#include <engine/fifo.h>
-#include "user.h"
+#include "priv.h"
 
-#include <core/memory.h>
-#include <subdev/mmu.h>
-#include <subdev/timer.h>
-#include <subdev/top.h>
-
-#include <nvif/cl0080.h>
-#include <nvif/clc36f.h>
 #include <nvif/class.h>
 
-struct ga102_fifo {
-	struct nvkm_fifo base;
-};
-
-struct ga102_chan {
-	struct nvkm_object object;
-
-	struct {
-		u32 runl;
-		u32 chan;
-	} ctrl;
-
-	struct nvkm_memory *mthd;
-	struct nvkm_memory *inst;
-	struct nvkm_memory *user;
-	struct nvkm_memory *runl;
-
-	struct nvkm_vmm *vmm;
-};
-
-static int
-ga102_chan_sclass(struct nvkm_object *object, int index, struct nvkm_oclass *oclass)
-{
-	if (index == 0) {
-		oclass->ctor = nvkm_object_new;
-		oclass->base = (struct nvkm_sclass) { -1, -1, AMPERE_DMA_COPY_B };
-		return 0;
-	}
-
-	return -EINVAL;
-}
-
-static int
-ga102_chan_map(struct nvkm_object *object, void *argv, u32 argc,
-	       enum nvkm_object_map *type, u64 *addr, u64 *size)
-{
-	struct ga102_chan *chan = ga102_chan(object);
-	struct nvkm_device *device = chan->object.engine->subdev.device;
-	u64 bar2 = nvkm_memory_bar2(chan->user);
-
-	if (bar2 == ~0ULL)
-		return -EFAULT;
-
-	*type = NVKM_OBJECT_MAP_IO;
-	*addr = device->func->resource_addr(device, 3) + bar2;
-	*size = 0x1000;
-	return 0;
-}
-
-static int
-ga102_chan_fini(struct nvkm_object *object, bool suspend)
-{
-	struct ga102_chan *chan = ga102_chan(object);
-	struct nvkm_device *device = chan->object.engine->subdev.device;
-
-	nvkm_wr32(device, chan->ctrl.chan, 0x00000003);
-
-	nvkm_wr32(device, chan->ctrl.runl + 0x098, 0x01000000);
-	nvkm_msec(device, 2000,
-		if (!(nvkm_rd32(device, chan->ctrl.runl + 0x098) & 0x00100000))
-			break;
-	);
-
-	nvkm_wr32(device, chan->ctrl.runl + 0x088, 0);
-
-	nvkm_wr32(device, chan->ctrl.chan, 0xffffffff);
-	return 0;
-}
-
-static int
-ga102_chan_init(struct nvkm_object *object)
-{
-	struct ga102_chan *chan = ga102_chan(object);
-	struct nvkm_device *device = chan->object.engine->subdev.device;
-
-	nvkm_mask(device, chan->ctrl.runl + 0x300, 0x80000000, 0x80000000);
-
-	nvkm_wr32(device, chan->ctrl.runl + 0x080, lower_32_bits(nvkm_memory_addr(chan->runl)));
-	nvkm_wr32(device, chan->ctrl.runl + 0x084, upper_32_bits(nvkm_memory_addr(chan->runl)));
-	nvkm_wr32(device, chan->ctrl.runl + 0x088, 2);
-
-	nvkm_wr32(device, chan->ctrl.chan, 0x00000002);
-	nvkm_wr32(device, chan->ctrl.runl + 0x0090, 0);
-	return 0;
-}
-
-static void *
-ga102_chan_dtor(struct nvkm_object *object)
-{
-	struct ga102_chan *chan = ga102_chan(object);
-
-	if (chan->vmm) {
-		nvkm_vmm_part(chan->vmm, chan->inst);
-		nvkm_vmm_unref(&chan->vmm);
-	}
-
-	nvkm_memory_unref(&chan->runl);
-	nvkm_memory_unref(&chan->user);
-	nvkm_memory_unref(&chan->inst);
-	nvkm_memory_unref(&chan->mthd);
-	return chan;
-}
-
-static const struct nvkm_object_func
-ga102_chan = {
-	.dtor = ga102_chan_dtor,
-	.init = ga102_chan_init,
-	.fini = ga102_chan_fini,
-	.map = ga102_chan_map,
-	.sclass = ga102_chan_sclass,
-};
-
-static int
-ga102_chan_new(struct nvkm_device *device,
-	       const struct nvkm_oclass *oclass, void *argv, u32 argc, struct nvkm_object **pobject)
-{
-	struct volta_channel_gpfifo_a_v0 *args = argv;
-	struct nvkm_top_device *tdev;
-	struct nvkm_vmm *vmm;
-	struct ga102_chan *chan;
-	int ret;
-
-	if (argc != sizeof(*args))
-		return -ENOSYS;
-
-	vmm = nvkm_uvmm_search(oclass->client, args->vmm);
-	if (IS_ERR(vmm))
-		return PTR_ERR(vmm);
-
-	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
-		return -ENOMEM;
-
-	nvkm_object_ctor(&ga102_chan, oclass, &chan->object);
-	*pobject = &chan->object;
-
-	list_for_each_entry(tdev, &device->top->device, head) {
-		if (tdev->type == NVKM_ENGINE_CE) {
-			chan->ctrl.runl = tdev->runlist;
-			break;
-		}
-	}
-
-	if (!chan->ctrl.runl)
-		return -ENODEV;
-
-	chan->ctrl.chan = nvkm_rd32(device, chan->ctrl.runl + 0x004) & 0xfffffff0;
-
-	args->chid = 0;
-	args->inst = 0;
-	args->token = nvkm_rd32(device, chan->ctrl.runl + 0x008) & 0xffff0000;
-
-	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x1000, true, &chan->mthd);
-	if (ret)
-		return ret;
-
-	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x1000, true, &chan->inst);
-	if (ret)
-		return ret;
-
-	nvkm_kmap(chan->inst);
-	nvkm_wo32(chan->inst, 0x010, 0x0000face);
-	nvkm_wo32(chan->inst, 0x030, 0x7ffff902);
-	nvkm_wo32(chan->inst, 0x048, lower_32_bits(args->ioffset));
-	nvkm_wo32(chan->inst, 0x04c, upper_32_bits(args->ioffset) |
-				     (order_base_2(args->ilength / 8) << 16));
-	nvkm_wo32(chan->inst, 0x084, 0x20400000);
-	nvkm_wo32(chan->inst, 0x094, 0x30000001);
-	nvkm_wo32(chan->inst, 0x0ac, 0x00020000);
-	nvkm_wo32(chan->inst, 0x0e4, 0x00000000);
-	nvkm_wo32(chan->inst, 0x0e8, 0);
-	nvkm_wo32(chan->inst, 0x0f4, 0x00001000);
-	nvkm_wo32(chan->inst, 0x0f8, 0x10003080);
-	nvkm_mo32(chan->inst, 0x218, 0x00000000, 0x00000000);
-	nvkm_wo32(chan->inst, 0x220, lower_32_bits(nvkm_memory_bar2(chan->mthd)));
-	nvkm_wo32(chan->inst, 0x224, upper_32_bits(nvkm_memory_bar2(chan->mthd)));
-	nvkm_done(chan->inst);
-
-	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x1000, true, &chan->user);
-	if (ret)
-		return ret;
-
-	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x1000, true, &chan->runl);
-	if (ret)
-		return ret;
-
-	nvkm_kmap(chan->runl);
-	nvkm_wo32(chan->runl, 0x00, 0x80030001);
-	nvkm_wo32(chan->runl, 0x04, 1);
-	nvkm_wo32(chan->runl, 0x08, 0);
-	nvkm_wo32(chan->runl, 0x0c, 0x00000000);
-	nvkm_wo32(chan->runl, 0x10, lower_32_bits(nvkm_memory_addr(chan->user)));
-	nvkm_wo32(chan->runl, 0x14, upper_32_bits(nvkm_memory_addr(chan->user)));
-	nvkm_wo32(chan->runl, 0x18, lower_32_bits(nvkm_memory_addr(chan->inst)));
-	nvkm_wo32(chan->runl, 0x1c, upper_32_bits(nvkm_memory_addr(chan->inst)));
-	nvkm_done(chan->runl);
-
-	ret = nvkm_vmm_join(vmm, chan->inst);
-	if (ret)
-		return ret;
-
-	chan->vmm = nvkm_vmm_ref(vmm);
-	return 0;
-}
-
-static const struct nvkm_device_oclass
-ga102_chan_oclass = {
-	.ctor = ga102_chan_new,
-};
-
-static int
-ga102_user_new(struct nvkm_device *device,
-	       const struct nvkm_oclass *oclass, void *argv, u32 argc, struct nvkm_object **pobject)
-{
-	return tu102_fifo_user_new(oclass, argv, argc, pobject);
-}
-
-static const struct nvkm_device_oclass
-ga102_user_oclass = {
-	.ctor = ga102_user_new,
-};
-
-static int
-ga102_fifo_sclass(struct nvkm_oclass *oclass, int index, const struct nvkm_device_oclass **class)
-{
-	if (index == 0) {
-		oclass->base = (struct nvkm_sclass) { -1, -1, VOLTA_USERMODE_A };
-		*class = &ga102_user_oclass;
-		return 0;
-	} else
-	if (index == 1) {
-		oclass->base = (struct nvkm_sclass) { 0, 0, AMPERE_CHANNEL_GPFIFO_B };
-		*class = &ga102_chan_oclass;
-		return 0;
-	}
-
-	return 2;
-}
-
-static int
-ga102_fifo_info(struct nvkm_engine *engine, u64 mthd, u64 *data)
-{
-	switch (mthd) {
-	case NV_DEVICE_HOST_CHANNELS: *data = 1; return 0;
-	default:
-		break;
-	}
-
-	return -ENOSYS;
-}
-
-static void *
-ga102_fifo_dtor(struct nvkm_engine *engine)
-{
-	return ga102_fifo(engine);
-}
-
-static const struct nvkm_engine_func
+static const struct nvkm_fifo_func
 ga102_fifo = {
-	.dtor = ga102_fifo_dtor,
-	.info = ga102_fifo_info,
-	.base.sclass = ga102_fifo_sclass,
+	.runl_ctor = ga100_fifo_runl_ctor,
+	.mmu_fault = &tu102_fifo_mmu_fault,
+	.nonstall_ctor = ga100_fifo_nonstall_ctor,
+	.nonstall = &ga100_fifo_nonstall,
+	.runl = &ga100_runl,
+	.runq = &ga100_runq,
+	.engn = &ga100_engn,
+	.engn_ce = &ga100_engn_ce,
+	.cgrp = {{ 0, 0, KEPLER_CHANNEL_GROUP_A  }, &ga100_cgrp, .force = true },
+	.chan = {{ 0, 0, AMPERE_CHANNEL_GPFIFO_B }, &ga100_chan },
 };
 
 int
 ga102_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
 	       struct nvkm_fifo **pfifo)
 {
-	struct ga102_fifo *fifo;
-
-	if (!(fifo = kzalloc(sizeof(*fifo), GFP_KERNEL)))
-		return -ENOMEM;
-
-	nvkm_engine_ctor(&ga102_fifo, device, type, inst, true, &fifo->base.engine);
-	*pfifo = &fifo->base;
-	return 0;
+	return nvkm_fifo_new_(&ga102_fifo, device, type, inst, pfifo);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.c
index 8b4f36b3e34b..5bb65258c36d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.c
@@ -21,186 +21,456 @@
  *
  * Authors: Ben Skeggs
  */
-#include "gf100.h"
-#include "changf100.h"
+#include "priv.h"
+#include "cgrp.h"
+#include "chan.h"
+#include "chid.h"
+#include "runl.h"
+#include "runq.h"
 
-#include <core/client.h>
-#include <core/enum.h>
 #include <core/gpuobj.h>
 #include <subdev/bar.h>
 #include <subdev/fault.h>
+#include <subdev/mc.h>
+#include <subdev/mmu.h>
 #include <engine/sw.h>
 
 #include <nvif/class.h>
 
+void
+gf100_chan_preempt(struct nvkm_chan *chan)
+{
+	nvkm_wr32(chan->cgrp->runl->fifo->engine.subdev.device, 0x002634, chan->id);
+}
+
 static void
-gf100_fifo_uevent_init(struct nvkm_fifo *fifo)
+gf100_chan_stop(struct nvkm_chan *chan)
 {
-	struct nvkm_device *device = fifo->engine.subdev.device;
-	nvkm_mask(device, 0x002140, 0x80000000, 0x80000000);
+	struct nvkm_device *device = chan->cgrp->runl->fifo->engine.subdev.device;
+
+	nvkm_mask(device, 0x003004 + (chan->id * 8), 0x00000001, 0x00000000);
 }
 
 static void
-gf100_fifo_uevent_fini(struct nvkm_fifo *fifo)
+gf100_chan_start(struct nvkm_chan *chan)
 {
+	struct nvkm_device *device = chan->cgrp->runl->fifo->engine.subdev.device;
+
+	nvkm_wr32(device, 0x003004 + (chan->id * 8), 0x001f0001);
+}
+
+static void gf100_fifo_intr_engine(struct nvkm_fifo *);
+
+static void
+gf100_chan_unbind(struct nvkm_chan *chan)
+{
+	struct nvkm_fifo *fifo = chan->cgrp->runl->fifo;
 	struct nvkm_device *device = fifo->engine.subdev.device;
-	nvkm_mask(device, 0x002140, 0x80000000, 0x00000000);
+
+	/*TODO: Is this cargo-culted, or necessary? RM does *something* here... Why? */
+	gf100_fifo_intr_engine(fifo);
+
+	nvkm_wr32(device, 0x003000 + (chan->id * 8), 0x00000000);
 }
 
+static void
+gf100_chan_bind(struct nvkm_chan *chan)
+{
+	struct nvkm_device *device = chan->cgrp->runl->fifo->engine.subdev.device;
+
+	nvkm_wr32(device, 0x003000 + (chan->id * 8), 0xc0000000 | chan->inst->addr >> 12);
+}
+
+static int
+gf100_chan_ramfc_write(struct nvkm_chan *chan, u64 offset, u64 length, u32 devm, bool priv)
+{
+	const u64 userd = nvkm_memory_addr(chan->userd.mem) + chan->userd.base;
+	const u32 limit2 = ilog2(length / 8);
+
+	nvkm_kmap(chan->inst);
+	nvkm_wo32(chan->inst, 0x08, lower_32_bits(userd));
+	nvkm_wo32(chan->inst, 0x0c, upper_32_bits(userd));
+	nvkm_wo32(chan->inst, 0x10, 0x0000face);
+	nvkm_wo32(chan->inst, 0x30, 0xfffff902);
+	nvkm_wo32(chan->inst, 0x48, lower_32_bits(offset));
+	nvkm_wo32(chan->inst, 0x4c, upper_32_bits(offset) | (limit2 << 16));
+	nvkm_wo32(chan->inst, 0x54, 0x00000002);
+	nvkm_wo32(chan->inst, 0x84, 0x20400000);
+	nvkm_wo32(chan->inst, 0x94, 0x30000000 | devm);
+	nvkm_wo32(chan->inst, 0x9c, 0x00000100);
+	nvkm_wo32(chan->inst, 0xa4, 0x1f1f1f1f);
+	nvkm_wo32(chan->inst, 0xa8, 0x1f1f1f1f);
+	nvkm_wo32(chan->inst, 0xac, 0x0000001f);
+	nvkm_wo32(chan->inst, 0xb8, 0xf8000000);
+	nvkm_wo32(chan->inst, 0xf8, 0x10003080); /* 0x002310 */
+	nvkm_wo32(chan->inst, 0xfc, 0x10000010); /* 0x002350 */
+	nvkm_done(chan->inst);
+	return 0;
+}
+
+static const struct nvkm_chan_func_ramfc
+gf100_chan_ramfc = {
+	.write = gf100_chan_ramfc_write,
+	.devm = 0xfff,
+};
+
 void
-gf100_fifo_runlist_commit(struct gf100_fifo *fifo)
+gf100_chan_userd_clear(struct nvkm_chan *chan)
 {
-	struct gf100_fifo_chan *chan;
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	struct nvkm_memory *cur;
-	int nr = 0;
-	int target;
+	nvkm_kmap(chan->userd.mem);
+	nvkm_wo32(chan->userd.mem, chan->userd.base + 0x040, 0x00000000);
+	nvkm_wo32(chan->userd.mem, chan->userd.base + 0x044, 0x00000000);
+	nvkm_wo32(chan->userd.mem, chan->userd.base + 0x048, 0x00000000);
+	nvkm_wo32(chan->userd.mem, chan->userd.base + 0x04c, 0x00000000);
+	nvkm_wo32(chan->userd.mem, chan->userd.base + 0x050, 0x00000000);
+	nvkm_wo32(chan->userd.mem, chan->userd.base + 0x058, 0x00000000);
+	nvkm_wo32(chan->userd.mem, chan->userd.base + 0x05c, 0x00000000);
+	nvkm_wo32(chan->userd.mem, chan->userd.base + 0x060, 0x00000000);
+	nvkm_wo32(chan->userd.mem, chan->userd.base + 0x088, 0x00000000);
+	nvkm_wo32(chan->userd.mem, chan->userd.base + 0x08c, 0x00000000);
+	nvkm_done(chan->userd.mem);
+}
 
-	mutex_lock(&fifo->base.mutex);
-	cur = fifo->runlist.mem[fifo->runlist.active];
-	fifo->runlist.active = !fifo->runlist.active;
+static const struct nvkm_chan_func_userd
+gf100_chan_userd = {
+	.bar = 1,
+	.size = 0x1000,
+	.clear = gf100_chan_userd_clear,
+};
 
-	nvkm_kmap(cur);
-	list_for_each_entry(chan, &fifo->chan, head) {
-		nvkm_wo32(cur, (nr * 8) + 0, chan->base.chid);
-		nvkm_wo32(cur, (nr * 8) + 4, 0x00000004);
-		nr++;
-	}
-	nvkm_done(cur);
+const struct nvkm_chan_func_inst
+gf100_chan_inst = {
+	.size = 0x1000,
+	.zero = true,
+	.vmm = true,
+};
 
-	switch (nvkm_memory_target(cur)) {
-	case NVKM_MEM_TARGET_VRAM: target = 0; break;
-	case NVKM_MEM_TARGET_NCOH: target = 3; break;
+static const struct nvkm_chan_func
+gf100_chan = {
+	.inst = &gf100_chan_inst,
+	.userd = &gf100_chan_userd,
+	.ramfc = &gf100_chan_ramfc,
+	.bind = gf100_chan_bind,
+	.unbind = gf100_chan_unbind,
+	.start = gf100_chan_start,
+	.stop = gf100_chan_stop,
+	.preempt = gf100_chan_preempt,
+};
+
+static void
+gf100_ectx_bind(struct nvkm_engn *engn, struct nvkm_cctx *cctx, struct nvkm_chan *chan)
+{
+	u64 addr = 0ULL;
+	u32 ptr0;
+
+	switch (engn->engine->subdev.type) {
+	case NVKM_ENGINE_SW    : return;
+	case NVKM_ENGINE_GR    : ptr0 = 0x0210; break;
+	case NVKM_ENGINE_CE    : ptr0 = 0x0230 + (engn->engine->subdev.inst * 0x10); break;
+	case NVKM_ENGINE_MSPDEC: ptr0 = 0x0250; break;
+	case NVKM_ENGINE_MSPPP : ptr0 = 0x0260; break;
+	case NVKM_ENGINE_MSVLD : ptr0 = 0x0270; break;
 	default:
-		mutex_unlock(&fifo->base.mutex);
 		WARN_ON(1);
 		return;
 	}
 
-	nvkm_wr32(device, 0x002270, (nvkm_memory_addr(cur) >> 12) |
-				    (target << 28));
-	nvkm_wr32(device, 0x002274, 0x01f00000 | nr);
+	if (cctx) {
+		addr  = cctx->vctx->vma->addr;
+		addr |= 4ULL;
+	}
 
-	if (wait_event_timeout(fifo->runlist.wait,
-			       !(nvkm_rd32(device, 0x00227c) & 0x00100000),
-			       msecs_to_jiffies(2000)) == 0)
-		nvkm_error(subdev, "runlist update timeout\n");
-	mutex_unlock(&fifo->base.mutex);
+	nvkm_kmap(chan->inst);
+	nvkm_wo32(chan->inst, ptr0 + 0, lower_32_bits(addr));
+	nvkm_wo32(chan->inst, ptr0 + 4, upper_32_bits(addr));
+	nvkm_done(chan->inst);
 }
 
-void
-gf100_fifo_runlist_remove(struct gf100_fifo *fifo, struct gf100_fifo_chan *chan)
+static int
+gf100_ectx_ctor(struct nvkm_engn *engn, struct nvkm_vctx *vctx)
+{
+	int ret;
+
+	ret = nvkm_vmm_get(vctx->vmm, 12, vctx->inst->size, &vctx->vma);
+	if (ret)
+		return ret;
+
+	return nvkm_memory_map(vctx->inst, 0, vctx->vmm, vctx->vma, NULL, 0);
+}
+
+bool
+gf100_engn_mmu_fault_triggered(struct nvkm_engn *engn)
 {
-	mutex_lock(&fifo->base.mutex);
-	list_del_init(&chan->head);
-	mutex_unlock(&fifo->base.mutex);
+	struct nvkm_runl *runl = engn->runl;
+	struct nvkm_fifo *fifo = runl->fifo;
+	struct nvkm_device *device = fifo->engine.subdev.device;
+	u32 data = nvkm_rd32(device, 0x002a30 + (engn->id * 4));
+
+	ENGN_DEBUG(engn, "%08x: mmu fault triggered", data);
+	if (!(data & 0x00000100))
+		return false;
+
+	spin_lock(&fifo->lock);
+	nvkm_mask(device, 0x002a30 + (engn->id * 4), 0x00000100, 0x00000000);
+	if (atomic_dec_and_test(&runl->rc_triggered))
+		nvkm_mask(device, 0x002140, 0x00000100, 0x00000100);
+	spin_unlock(&fifo->lock);
+	return true;
 }
 
 void
-gf100_fifo_runlist_insert(struct gf100_fifo *fifo, struct gf100_fifo_chan *chan)
+gf100_engn_mmu_fault_trigger(struct nvkm_engn *engn)
 {
-	mutex_lock(&fifo->base.mutex);
-	list_add_tail(&chan->head, &fifo->chan);
-	mutex_unlock(&fifo->base.mutex);
+	struct nvkm_runl *runl = engn->runl;
+	struct nvkm_fifo *fifo = runl->fifo;
+	struct nvkm_device *device = fifo->engine.subdev.device;
+
+	ENGN_DEBUG(engn, "triggering mmu fault on 0x%02x", engn->fault);
+	spin_lock(&fifo->lock);
+	if (atomic_inc_return(&runl->rc_triggered) == 1)
+		nvkm_mask(device, 0x002140, 0x00000100, 0x00000000);
+	nvkm_wr32(device, 0x002100, 0x00000100);
+	nvkm_wr32(device, 0x002a30 + (engn->id * 4), 0x00000100 | engn->fault);
+	spin_unlock(&fifo->lock);
 }
 
-static struct nvkm_engine *
-gf100_fifo_id_engine(struct nvkm_fifo *fifo, int engi)
+/*TODO: clean all this up. */
+struct gf100_engn_status {
+	bool busy;
+	bool save;
+	bool unk0;
+	bool unk1;
+	u8   chid;
+};
+
+static void
+gf100_engn_status(struct nvkm_engn *engn, struct gf100_engn_status *status)
 {
-	enum nvkm_subdev_type type;
-	int inst;
+	u32 stat = nvkm_rd32(engn->engine->subdev.device, 0x002640 + (engn->id * 4));
 
-	switch (engi) {
-	case GF100_FIFO_ENGN_GR    : type = NVKM_ENGINE_GR    ; inst = 0; break;
-	case GF100_FIFO_ENGN_MSPDEC: type = NVKM_ENGINE_MSPDEC; inst = 0; break;
-	case GF100_FIFO_ENGN_MSPPP : type = NVKM_ENGINE_MSPPP ; inst = 0; break;
-	case GF100_FIFO_ENGN_MSVLD : type = NVKM_ENGINE_MSVLD ; inst = 0; break;
-	case GF100_FIFO_ENGN_CE0   : type = NVKM_ENGINE_CE    ; inst = 0; break;
-	case GF100_FIFO_ENGN_CE1   : type = NVKM_ENGINE_CE    ; inst = 1; break;
-	case GF100_FIFO_ENGN_SW    : type = NVKM_ENGINE_SW    ; inst = 0; break;
-	default:
-		WARN_ON(1);
-		return NULL;
-	}
+	status->busy = (stat & 0x10000000);
+	status->save = (stat & 0x00100000);
+	status->unk0 = (stat & 0x00004000);
+	status->unk1 = (stat & 0x00001000);
+	status->chid = (stat & 0x0000007f);
 
-	return nvkm_device_engine(fifo->engine.subdev.device, type, inst);
+	ENGN_DEBUG(engn, "%08x: busy %d save %d unk0 %d unk1 %d chid %d",
+		   stat, status->busy, status->save, status->unk0, status->unk1, status->chid);
 }
 
 static int
-gf100_fifo_engine_id(struct nvkm_fifo *base, struct nvkm_engine *engine)
-{
-	switch (engine->subdev.type) {
-	case NVKM_ENGINE_GR    : return GF100_FIFO_ENGN_GR;
-	case NVKM_ENGINE_MSPDEC: return GF100_FIFO_ENGN_MSPDEC;
-	case NVKM_ENGINE_MSPPP : return GF100_FIFO_ENGN_MSPPP;
-	case NVKM_ENGINE_MSVLD : return GF100_FIFO_ENGN_MSVLD;
-	case NVKM_ENGINE_CE    : return GF100_FIFO_ENGN_CE0 + engine->subdev.inst;
-	case NVKM_ENGINE_SW    : return GF100_FIFO_ENGN_SW;
-	default:
-		WARN_ON(1);
-		return -1;
+gf100_engn_cxid(struct nvkm_engn *engn, bool *cgid)
+{
+	struct gf100_engn_status status;
+
+	gf100_engn_status(engn, &status);
+	if (status.busy) {
+		*cgid = false;
+		return status.chid;
 	}
+
+	return -ENODEV;
 }
 
-static void
-gf100_fifo_recover_work(struct work_struct *w)
+static bool
+gf100_engn_chsw(struct nvkm_engn *engn)
 {
-	struct gf100_fifo *fifo = container_of(w, typeof(*fifo), recover.work);
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	struct nvkm_engine *engine;
-	unsigned long flags;
-	u32 engm, engn, todo;
+	struct gf100_engn_status status;
+
+	gf100_engn_status(engn, &status);
+	if (status.busy && (status.unk0 || status.unk1))
+		return true;
 
-	spin_lock_irqsave(&fifo->base.lock, flags);
-	engm = fifo->recover.mask;
-	fifo->recover.mask = 0ULL;
-	spin_unlock_irqrestore(&fifo->base.lock, flags);
+	return false;
+}
 
-	nvkm_mask(device, 0x002630, engm, engm);
+static const struct nvkm_engn_func
+gf100_engn = {
+	.chsw = gf100_engn_chsw,
+	.cxid = gf100_engn_cxid,
+	.mmu_fault_trigger = gf100_engn_mmu_fault_trigger,
+	.mmu_fault_triggered = gf100_engn_mmu_fault_triggered,
+	.ctor = gf100_ectx_ctor,
+	.bind = gf100_ectx_bind,
+};
 
-	for (todo = engm; engn = __ffs(todo), todo; todo &= ~BIT_ULL(engn)) {
-		if ((engine = gf100_fifo_id_engine(&fifo->base, engn))) {
-			nvkm_subdev_fini(&engine->subdev, false);
-			WARN_ON(nvkm_subdev_init(&engine->subdev));
+const struct nvkm_engn_func
+gf100_engn_sw = {
+};
+
+static const struct nvkm_bitfield
+gf100_runq_intr_0_names[] = {
+/*	{ 0x00008000, "" }	seen with null ib push */
+	{ 0x00200000, "ILLEGAL_MTHD" },
+	{ 0x00800000, "EMPTY_SUBC" },
+	{}
+};
+
+bool
+gf100_runq_intr(struct nvkm_runq *runq, struct nvkm_runl *null)
+{
+	struct nvkm_subdev *subdev = &runq->fifo->engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 mask = nvkm_rd32(device, 0x04010c + (runq->id * 0x2000));
+	u32 stat = nvkm_rd32(device, 0x040108 + (runq->id * 0x2000)) & mask;
+	u32 addr = nvkm_rd32(device, 0x0400c0 + (runq->id * 0x2000));
+	u32 data = nvkm_rd32(device, 0x0400c4 + (runq->id * 0x2000));
+	u32 chid = nvkm_rd32(device, 0x040120 + (runq->id * 0x2000)) & runq->fifo->chid->mask;
+	u32 subc = (addr & 0x00070000) >> 16;
+	u32 mthd = (addr & 0x00003ffc);
+	u32 show = stat;
+	struct nvkm_chan *chan;
+	unsigned long flags;
+	char msg[128];
+
+	if (stat & 0x00800000) {
+		if (device->sw) {
+			if (nvkm_sw_mthd(device->sw, chid, subc, mthd, data))
+				show &= ~0x00800000;
 		}
 	}
 
-	gf100_fifo_runlist_commit(fifo);
-	nvkm_wr32(device, 0x00262c, engm);
-	nvkm_mask(device, 0x002630, engm, 0x00000000);
+	if (show) {
+		nvkm_snprintbf(msg, sizeof(msg), runq->func->intr_0_names, show);
+		chan = nvkm_chan_get_chid(&runq->fifo->engine, chid, &flags);
+		nvkm_error(subdev, "PBDMA%d: %08x [%s] ch %d [%010llx %s] "
+				   "subc %d mthd %04x data %08x\n",
+			   runq->id, show, msg, chid, chan ? chan->inst->addr : 0,
+			   chan ? chan->name : "unknown", subc, mthd, data);
+
+		/*TODO: use proper procedure for clearing each exception / debug output */
+		if ((stat & 0xc67fe000) && chan)
+			nvkm_chan_error(chan, true);
+		nvkm_chan_put(&chan, flags);
+	}
+
+	nvkm_wr32(device, 0x0400c0 + (runq->id * 0x2000), 0x80600008);
+	nvkm_wr32(device, 0x040108 + (runq->id * 0x2000), stat);
+	return true;
+}
+
+void
+gf100_runq_init(struct nvkm_runq *runq)
+{
+	struct nvkm_device *device = runq->fifo->engine.subdev.device;
+
+	nvkm_mask(device, 0x04013c + (runq->id * 0x2000), 0x10000100, 0x00000000);
+	nvkm_wr32(device, 0x040108 + (runq->id * 0x2000), 0xffffffff); /* INTR */
+	nvkm_wr32(device, 0x04010c + (runq->id * 0x2000), 0xfffffeff); /* INTREN */
+}
+
+static const struct nvkm_runq_func
+gf100_runq = {
+	.init = gf100_runq_init,
+	.intr = gf100_runq_intr,
+	.intr_0_names = gf100_runq_intr_0_names,
+};
+
+bool
+gf100_runl_preempt_pending(struct nvkm_runl *runl)
+{
+	return nvkm_rd32(runl->fifo->engine.subdev.device, 0x002634) & 0x00100000;
 }
 
 static void
-gf100_fifo_recover(struct gf100_fifo *fifo, struct nvkm_engine *engine,
-		   struct gf100_fifo_chan *chan)
+gf100_runl_fault_clear(struct nvkm_runl *runl)
 {
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	u32 chid = chan->base.chid;
-	int engi = gf100_fifo_engine_id(&fifo->base, engine);
+	nvkm_mask(runl->fifo->engine.subdev.device, 0x00262c, 0x00000000, 0x00000000);
+}
+
+static void
+gf100_runl_allow(struct nvkm_runl *runl, u32 engm)
+{
+	nvkm_mask(runl->fifo->engine.subdev.device, 0x002630, engm, 0x00000000);
+}
+
+static void
+gf100_runl_block(struct nvkm_runl *runl, u32 engm)
+{
+	nvkm_mask(runl->fifo->engine.subdev.device, 0x002630, engm, engm);
+}
+
+static bool
+gf100_runl_pending(struct nvkm_runl *runl)
+{
+	return nvkm_rd32(runl->fifo->engine.subdev.device, 0x00227c) & 0x00100000;
+}
+
+static void
+gf100_runl_commit(struct nvkm_runl *runl, struct nvkm_memory *memory, u32 start, int count)
+{
+	struct nvkm_device *device = runl->fifo->engine.subdev.device;
+	u64 addr = nvkm_memory_addr(memory) + start;
+	int target;
+
+	switch (nvkm_memory_target(memory)) {
+	case NVKM_MEM_TARGET_VRAM: target = 0; break;
+	case NVKM_MEM_TARGET_NCOH: target = 3; break;
+	default:
+		WARN_ON(1);
+		return;
+	}
+
+	nvkm_wr32(device, 0x002270, (target << 28) | (addr >> 12));
+	nvkm_wr32(device, 0x002274, 0x01f00000 | count);
+}
+
+static void
+gf100_runl_insert_chan(struct nvkm_chan *chan, struct nvkm_memory *memory, u64 offset)
+{
+	nvkm_wo32(memory, offset + 0, chan->id);
+	nvkm_wo32(memory, offset + 4, 0x00000004);
+}
 
-	nvkm_error(subdev, "%s engine fault on channel %d, recovering...\n",
-		   engine->subdev.name, chid);
-	assert_spin_locked(&fifo->base.lock);
+static const struct nvkm_runl_func
+gf100_runl = {
+	.size = 8,
+	.update = nv50_runl_update,
+	.insert_chan = gf100_runl_insert_chan,
+	.commit = gf100_runl_commit,
+	.wait = nv50_runl_wait,
+	.pending = gf100_runl_pending,
+	.block = gf100_runl_block,
+	.allow = gf100_runl_allow,
+	.fault_clear = gf100_runl_fault_clear,
+	.preempt_pending = gf100_runl_preempt_pending,
+};
 
-	nvkm_mask(device, 0x003004 + (chid * 0x08), 0x00000001, 0x00000000);
-	list_del_init(&chan->head);
-	chan->killed = true;
+static void
+gf100_fifo_nonstall_allow(struct nvkm_event *event, int type, int index)
+{
+	struct nvkm_fifo *fifo = container_of(event, typeof(*fifo), nonstall.event);
+	unsigned long flags;
 
-	if (engi >= 0 && engi != GF100_FIFO_ENGN_SW)
-		fifo->recover.mask |= BIT(engi);
-	schedule_work(&fifo->recover.work);
-	nvkm_fifo_kevent(&fifo->base, chid);
+	spin_lock_irqsave(&fifo->lock, flags);
+	nvkm_mask(fifo->engine.subdev.device, 0x002140, 0x80000000, 0x80000000);
+	spin_unlock_irqrestore(&fifo->lock, flags);
 }
 
+void
+gf100_fifo_nonstall_block(struct nvkm_event *event, int type, int index)
+{
+	struct nvkm_fifo *fifo = container_of(event, typeof(*fifo), nonstall.event);
+	unsigned long flags;
+
+	spin_lock_irqsave(&fifo->lock, flags);
+	nvkm_mask(fifo->engine.subdev.device, 0x002140, 0x80000000, 0x00000000);
+	spin_unlock_irqrestore(&fifo->lock, flags);
+}
+
+const struct nvkm_event_func
+gf100_fifo_nonstall = {
+	.init = gf100_fifo_nonstall_allow,
+	.fini = gf100_fifo_nonstall_block,
+};
+
 static const struct nvkm_enum
-gf100_fifo_fault_engine[] = {
+gf100_fifo_mmu_fault_engine[] = {
 	{ 0x00, "PGRAPH", NULL, NVKM_ENGINE_GR },
 	{ 0x03, "PEEPHOLE", NULL, NVKM_ENGINE_IFB },
 	{ 0x04, "BAR1", NULL, NVKM_SUBDEV_BAR },
 	{ 0x05, "BAR3", NULL, NVKM_SUBDEV_INSTMEM },
-	{ 0x07, "PFIFO", NULL, NVKM_ENGINE_FIFO },
+	{ 0x07, "PFIFO" },
 	{ 0x10, "PMSVLD", NULL, NVKM_ENGINE_MSVLD },
 	{ 0x11, "PMSPPP", NULL, NVKM_ENGINE_MSPPP },
 	{ 0x13, "PCOUNTER" },
@@ -212,7 +482,7 @@ gf100_fifo_fault_engine[] = {
 };
 
 static const struct nvkm_enum
-gf100_fifo_fault_reason[] = {
+gf100_fifo_mmu_fault_reason[] = {
 	{ 0x00, "PT_NOT_PRESENT" },
 	{ 0x01, "PT_TOO_SHORT" },
 	{ 0x02, "PAGE_NOT_PRESENT" },
@@ -226,7 +496,7 @@ gf100_fifo_fault_reason[] = {
 };
 
 static const struct nvkm_enum
-gf100_fifo_fault_hubclient[] = {
+gf100_fifo_mmu_fault_hubclient[] = {
 	{ 0x01, "PCOPY0" },
 	{ 0x02, "PCOPY1" },
 	{ 0x04, "DISPATCH" },
@@ -245,7 +515,7 @@ gf100_fifo_fault_hubclient[] = {
 };
 
 static const struct nvkm_enum
-gf100_fifo_fault_gpcclient[] = {
+gf100_fifo_mmu_fault_gpcclient[] = {
 	{ 0x01, "TEX" },
 	{ 0x0c, "ESETUP" },
 	{ 0x0e, "CTXCTL" },
@@ -253,29 +523,55 @@ gf100_fifo_fault_gpcclient[] = {
 	{}
 };
 
-static void
-gf100_fifo_fault(struct nvkm_fifo *base, struct nvkm_fault_data *info)
+const struct nvkm_enum
+gf100_fifo_mmu_fault_access[] = {
+	{ 0x00, "READ" },
+	{ 0x01, "WRITE" },
+	{}
+};
+
+void
+gf100_fifo_mmu_fault_recover(struct nvkm_fifo *fifo, struct nvkm_fault_data *info)
 {
-	struct gf100_fifo *fifo = gf100_fifo(base);
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+	struct nvkm_subdev *subdev = &fifo->engine.subdev;
 	struct nvkm_device *device = subdev->device;
-	const struct nvkm_enum *er, *eu, *ec;
+	const struct nvkm_enum *er, *ee, *ec, *ea;
 	struct nvkm_engine *engine = NULL;
-	struct nvkm_fifo_chan *chan;
+	struct nvkm_runl *runl;
+	struct nvkm_engn *engn;
+	struct nvkm_chan *chan;
 	unsigned long flags;
-	char gpcid[8] = "";
+	char ct[8] = "HUB/";
+
+	/* Lookup engine by MMU fault ID. */
+	nvkm_runl_foreach(runl, fifo) {
+		engn = nvkm_runl_find_engn(engn, runl, engn->fault == info->engine);
+		if (engn) {
+			/* Fault triggered by CTXSW_TIMEOUT recovery procedure. */
+			if (engn->func->mmu_fault_triggered &&
+			    engn->func->mmu_fault_triggered(engn)) {
+				nvkm_runl_rc_engn(runl, engn);
+				return;
+			}
+
+			engine = engn->engine;
+			break;
+		}
+	}
 
-	er = nvkm_enum_find(gf100_fifo_fault_reason, info->reason);
-	eu = nvkm_enum_find(gf100_fifo_fault_engine, info->engine);
+	er = nvkm_enum_find(fifo->func->mmu_fault->reason, info->reason);
+	ee = nvkm_enum_find(fifo->func->mmu_fault->engine, info->engine);
 	if (info->hub) {
-		ec = nvkm_enum_find(gf100_fifo_fault_hubclient, info->client);
+		ec = nvkm_enum_find(fifo->func->mmu_fault->hubclient, info->client);
 	} else {
-		ec = nvkm_enum_find(gf100_fifo_fault_gpcclient, info->client);
-		snprintf(gpcid, sizeof(gpcid), "GPC%d/", info->gpc);
+		ec = nvkm_enum_find(fifo->func->mmu_fault->gpcclient, info->client);
+		snprintf(ct, sizeof(ct), "GPC%d/", info->gpc);
 	}
+	ea = nvkm_enum_find(fifo->func->mmu_fault->access, info->access);
 
-	if (eu && eu->data2) {
-		switch (eu->data2) {
+	/* Handle BAR faults. */
+	if (ee && ee->data2) {
+		switch (ee->data2) {
 		case NVKM_SUBDEV_BAR:
 			nvkm_bar_bar1_reset(device);
 			break;
@@ -286,77 +582,104 @@ gf100_fifo_fault(struct nvkm_fifo *base, struct nvkm_fault_data *info)
 			nvkm_mask(device, 0x001718, 0x00000000, 0x00000000);
 			break;
 		default:
-			engine = nvkm_device_engine(device, eu->data2, eu->inst);
 			break;
 		}
 	}
 
-	chan = nvkm_fifo_chan_inst(&fifo->base, info->inst, &flags);
+	chan = nvkm_chan_get_inst(&fifo->engine, info->inst, &flags);
 
 	nvkm_error(subdev,
-		   "%s fault at %010llx engine %02x [%s] client %02x [%s%s] "
-		   "reason %02x [%s] on channel %d [%010llx %s]\n",
-		   info->access ? "write" : "read", info->addr,
-		   info->engine, eu ? eu->name : "",
-		   info->client, gpcid, ec ? ec->name : "",
-		   info->reason, er ? er->name : "", chan ? chan->chid : -1,
-		   info->inst, chan ? chan->object.client->name : "unknown");
-
-	if (engine && chan)
-		gf100_fifo_recover(fifo, engine, (void *)chan);
-	nvkm_fifo_chan_put(&fifo->base, flags, &chan);
+		   "fault %02x [%s] at %016llx engine %02x [%s] client %02x "
+		   "[%s%s] reason %02x [%s] on channel %d [%010llx %s]\n",
+		   info->access, ea ? ea->name : "", info->addr,
+		   info->engine, ee ? ee->name : engine ? engine->subdev.name : "",
+		   info->client, ct, ec ? ec->name : "",
+		   info->reason, er ? er->name : "",
+		   chan ? chan->id : -1, info->inst, chan ? chan->name : "unknown");
+
+	/* Handle host/engine faults. */
+	if (chan)
+		nvkm_runl_rc_cgrp(chan->cgrp);
+
+	nvkm_chan_put(&chan, flags);
 }
 
-static const struct nvkm_enum
-gf100_fifo_sched_reason[] = {
-	{ 0x0a, "CTXSW_TIMEOUT" },
-	{}
+static const struct nvkm_fifo_func_mmu_fault
+gf100_fifo_mmu_fault = {
+	.recover = gf100_fifo_mmu_fault_recover,
+	.access = gf100_fifo_mmu_fault_access,
+	.engine = gf100_fifo_mmu_fault_engine,
+	.reason = gf100_fifo_mmu_fault_reason,
+	.hubclient = gf100_fifo_mmu_fault_hubclient,
+	.gpcclient = gf100_fifo_mmu_fault_gpcclient,
 };
 
-static void
-gf100_fifo_intr_sched_ctxsw(struct gf100_fifo *fifo)
+void
+gf100_fifo_intr_ctxsw_timeout(struct nvkm_fifo *fifo, u32 engm)
 {
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	struct nvkm_engine *engine;
-	struct gf100_fifo_chan *chan;
-	unsigned long flags;
-	u32 engn;
-
-	spin_lock_irqsave(&fifo->base.lock, flags);
-	for (engn = 0; engn < 6; engn++) {
-		u32 stat = nvkm_rd32(device, 0x002640 + (engn * 0x04));
-		u32 busy = (stat & 0x80000000);
-		u32 save = (stat & 0x00100000); /* maybe? */
-		u32 unk0 = (stat & 0x00040000);
-		u32 unk1 = (stat & 0x00001000);
-		u32 chid = (stat & 0x0000007f);
-		(void)save;
-
-		if (busy && unk0 && unk1) {
-			list_for_each_entry(chan, &fifo->chan, head) {
-				if (chan->base.chid == chid) {
-					engine = gf100_fifo_id_engine(&fifo->base, engn);
-					if (!engine)
-						break;
-					gf100_fifo_recover(fifo, engine, chan);
-					break;
+	struct nvkm_runl *runl;
+	struct nvkm_engn *engn, *engn2;
+	bool cgid, cgid2;
+	int id, id2;
+
+	nvkm_runl_foreach(runl, fifo) {
+		/* Stop the runlist, and go through all engines serving it. */
+		nvkm_runl_block(runl);
+		nvkm_runl_foreach_engn_cond(engn, runl, engm & BIT(engn->id)) {
+			/* Determine what channel (group) the engine is on. */
+			id = engn->func->cxid(engn, &cgid);
+			if (id >= 0) {
+				/* Trigger MMU fault on any engine(s) on that channel (group). */
+				nvkm_runl_foreach_engn_cond(engn2, runl, engn2->func->cxid) {
+					id2 = engn2->func->cxid(engn2, &cgid2);
+					if (cgid2 == cgid && id2 == id)
+						engn2->func->mmu_fault_trigger(engn2);
 				}
 			}
 		}
+		nvkm_runl_allow(runl); /* HW will keep runlist blocked via ERROR_SCHED_DISABLE. */
 	}
-	spin_unlock_irqrestore(&fifo->base.lock, flags);
 }
 
 static void
-gf100_fifo_intr_sched(struct gf100_fifo *fifo)
+gf100_fifo_intr_sched_ctxsw(struct nvkm_fifo *fifo)
 {
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+	struct nvkm_runl *runl;
+	struct nvkm_engn *engn;
+	u32 engm = 0;
+
+	/* Look for any engines that are busy, and awaiting chsw ack. */
+	nvkm_runl_foreach(runl, fifo) {
+		nvkm_runl_foreach_engn_cond(engn, runl, engn->func->chsw) {
+			if (WARN_ON(engn->fault < 0) || !engn->func->chsw(engn))
+				continue;
+
+			engm |= BIT(engn->id);
+		}
+	}
+
+	if (!engm)
+		return;
+
+	fifo->func->intr_ctxsw_timeout(fifo, engm);
+}
+
+static const struct nvkm_enum
+gf100_fifo_intr_sched_names[] = {
+	{ 0x0a, "CTXSW_TIMEOUT" },
+	{}
+};
+
+void
+gf100_fifo_intr_sched(struct nvkm_fifo *fifo)
+{
+	struct nvkm_subdev *subdev = &fifo->engine.subdev;
 	struct nvkm_device *device = subdev->device;
 	u32 intr = nvkm_rd32(device, 0x00254c);
 	u32 code = intr & 0x000000ff;
 	const struct nvkm_enum *en;
 
-	en = nvkm_enum_find(gf100_fifo_sched_reason, code);
+	en = nvkm_enum_find(gf100_fifo_intr_sched_names, code);
 
 	nvkm_error(subdev, "SCHED_ERROR %02x [%s]\n", code, en ? en->name : "");
 
@@ -370,7 +693,7 @@ gf100_fifo_intr_sched(struct gf100_fifo *fifo)
 }
 
 void
-gf100_fifo_intr_fault(struct nvkm_fifo *fifo, int unit)
+gf100_fifo_intr_mmu_fault_unit(struct nvkm_fifo *fifo, int unit)
 {
 	struct nvkm_device *device = fifo->engine.subdev.device;
 	u32 inst = nvkm_rd32(device, 0x002800 + (unit * 0x10));
@@ -393,61 +716,45 @@ gf100_fifo_intr_fault(struct nvkm_fifo *fifo, int unit)
 	nvkm_fifo_fault(fifo, &info);
 }
 
-static const struct nvkm_bitfield
-gf100_fifo_pbdma_intr[] = {
-/*	{ 0x00008000, "" }	seen with null ib push */
-	{ 0x00200000, "ILLEGAL_MTHD" },
-	{ 0x00800000, "EMPTY_SUBC" },
-	{}
-};
-
-static void
-gf100_fifo_intr_pbdma(struct gf100_fifo *fifo, int unit)
+void
+gf100_fifo_intr_mmu_fault(struct nvkm_fifo *fifo)
 {
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	u32 stat = nvkm_rd32(device, 0x040108 + (unit * 0x2000));
-	u32 addr = nvkm_rd32(device, 0x0400c0 + (unit * 0x2000));
-	u32 data = nvkm_rd32(device, 0x0400c4 + (unit * 0x2000));
-	u32 chid = nvkm_rd32(device, 0x040120 + (unit * 0x2000)) & 0x7f;
-	u32 subc = (addr & 0x00070000) >> 16;
-	u32 mthd = (addr & 0x00003ffc);
-	struct nvkm_fifo_chan *chan;
-	unsigned long flags;
-	u32 show= stat;
-	char msg[128];
+	struct nvkm_device *device = fifo->engine.subdev.device;
+	unsigned long mask = nvkm_rd32(device, 0x00259c);
+	int unit;
 
-	if (stat & 0x00800000) {
-		if (device->sw) {
-			if (nvkm_sw_mthd(device->sw, chid, subc, mthd, data))
-				show &= ~0x00800000;
-		}
+	for_each_set_bit(unit, &mask, 32) {
+		fifo->func->intr_mmu_fault_unit(fifo, unit);
+		nvkm_wr32(device, 0x00259c, BIT(unit));
 	}
+}
 
-	if (show) {
-		nvkm_snprintbf(msg, sizeof(msg), gf100_fifo_pbdma_intr, show);
-		chan = nvkm_fifo_chan_chid(&fifo->base, chid, &flags);
-		nvkm_error(subdev, "PBDMA%d: %08x [%s] ch %d [%010llx %s] "
-				   "subc %d mthd %04x data %08x\n",
-			   unit, show, msg, chid, chan ? chan->inst->addr : 0,
-			   chan ? chan->object.client->name : "unknown",
-			   subc, mthd, data);
-		nvkm_fifo_chan_put(&fifo->base, flags, &chan);
+bool
+gf100_fifo_intr_pbdma(struct nvkm_fifo *fifo)
+{
+	struct nvkm_device *device = fifo->engine.subdev.device;
+	struct nvkm_runq *runq;
+	u32 mask = nvkm_rd32(device, 0x0025a0);
+	bool handled = false;
+
+	nvkm_runq_foreach_cond(runq, fifo, mask & BIT(runq->id)) {
+		if (runq->func->intr(runq, NULL))
+			handled = true;
+
+		nvkm_wr32(device, 0x0025a0, BIT(runq->id));
 	}
 
-	nvkm_wr32(device, 0x0400c0 + (unit * 0x2000), 0x80600008);
-	nvkm_wr32(device, 0x040108 + (unit * 0x2000), stat);
+	return handled;
 }
 
 static void
-gf100_fifo_intr_runlist(struct gf100_fifo *fifo)
+gf100_fifo_intr_runlist(struct nvkm_fifo *fifo)
 {
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+	struct nvkm_subdev *subdev = &fifo->engine.subdev;
 	struct nvkm_device *device = subdev->device;
 	u32 intr = nvkm_rd32(device, 0x002a00);
 
 	if (intr & 0x10000000) {
-		wake_up(&fifo->runlist.wait);
 		nvkm_wr32(device, 0x002a00, 0x10000000);
 		intr &= ~0x10000000;
 	}
@@ -459,9 +766,9 @@ gf100_fifo_intr_runlist(struct gf100_fifo *fifo)
 }
 
 static void
-gf100_fifo_intr_engine_unit(struct gf100_fifo *fifo, int engn)
+gf100_fifo_intr_engine_unit(struct nvkm_fifo *fifo, int engn)
 {
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+	struct nvkm_subdev *subdev = &fifo->engine.subdev;
 	struct nvkm_device *device = subdev->device;
 	u32 intr = nvkm_rd32(device, 0x0025a8 + (engn * 0x04));
 	u32 inte = nvkm_rd32(device, 0x002628);
@@ -472,22 +779,22 @@ gf100_fifo_intr_engine_unit(struct gf100_fifo *fifo, int engn)
 	for (unkn = 0; unkn < 8; unkn++) {
 		u32 ints = (intr >> (unkn * 0x04)) & inte;
 		if (ints & 0x1) {
-			nvkm_fifo_uevent(&fifo->base);
+			nvkm_event_ntfy(&fifo->nonstall.event, 0, NVKM_FIFO_NONSTALL_EVENT);
 			ints &= ~1;
 		}
 		if (ints) {
-			nvkm_error(subdev, "ENGINE %d %d %01x",
-				   engn, unkn, ints);
+			nvkm_error(subdev, "ENGINE %d %d %01x", engn, unkn, ints);
 			nvkm_mask(device, 0x002628, ints, 0);
 		}
 	}
 }
 
-void
-gf100_fifo_intr_engine(struct gf100_fifo *fifo)
+static void
+gf100_fifo_intr_engine(struct nvkm_fifo *fifo)
 {
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
+	struct nvkm_device *device = fifo->engine.subdev.device;
 	u32 mask = nvkm_rd32(device, 0x0025a4);
+
 	while (mask) {
 		u32 unit = __ffs(mask);
 		gf100_fifo_intr_engine_unit(fifo, unit);
@@ -495,11 +802,11 @@ gf100_fifo_intr_engine(struct gf100_fifo *fifo)
 	}
 }
 
-static void
-gf100_fifo_intr(struct nvkm_fifo *base)
+static irqreturn_t
+gf100_fifo_intr(struct nvkm_inth *inth)
 {
-	struct gf100_fifo *fifo = gf100_fifo(base);
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+	struct nvkm_fifo *fifo = container_of(inth, typeof(*fifo), engine.subdev.inth);
+	struct nvkm_subdev *subdev = &fifo->engine.subdev;
 	struct nvkm_device *device = subdev->device;
 	u32 mask = nvkm_rd32(device, 0x002140);
 	u32 stat = nvkm_rd32(device, 0x002100) & mask;
@@ -532,25 +839,13 @@ gf100_fifo_intr(struct nvkm_fifo *base)
 	}
 
 	if (stat & 0x10000000) {
-		u32 mask = nvkm_rd32(device, 0x00259c);
-		while (mask) {
-			u32 unit = __ffs(mask);
-			gf100_fifo_intr_fault(&fifo->base, unit);
-			nvkm_wr32(device, 0x00259c, (1 << unit));
-			mask &= ~(1 << unit);
-		}
+		gf100_fifo_intr_mmu_fault(fifo);
 		stat &= ~0x10000000;
 	}
 
 	if (stat & 0x20000000) {
-		u32 mask = nvkm_rd32(device, 0x0025a0);
-		while (mask) {
-			u32 unit = __ffs(mask);
-			gf100_fifo_intr_pbdma(fifo, unit);
-			nvkm_wr32(device, 0x0025a0, (1 << unit));
-			mask &= ~(1 << unit);
-		}
-		stat &= ~0x20000000;
+		if (gf100_fifo_intr_pbdma(fifo))
+			stat &= ~0x20000000;
 	}
 
 	if (stat & 0x40000000) {
@@ -565,71 +860,26 @@ gf100_fifo_intr(struct nvkm_fifo *base)
 
 	if (stat) {
 		nvkm_error(subdev, "INTR %08x\n", stat);
+		spin_lock(&fifo->lock);
 		nvkm_mask(device, 0x002140, stat, 0x00000000);
+		spin_unlock(&fifo->lock);
 		nvkm_wr32(device, 0x002100, stat);
 	}
-}
-
-static int
-gf100_fifo_oneinit(struct nvkm_fifo *base)
-{
-	struct gf100_fifo *fifo = gf100_fifo(base);
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	struct nvkm_vmm *bar = nvkm_bar_bar1_vmm(device);
-	int ret;
-
-	/* Determine number of PBDMAs by checking valid enable bits. */
-	nvkm_wr32(device, 0x002204, 0xffffffff);
-	fifo->pbdma_nr = hweight32(nvkm_rd32(device, 0x002204));
-	nvkm_debug(subdev, "%d PBDMA(s)\n", fifo->pbdma_nr);
-
-
-	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x1000,
-			      false, &fifo->runlist.mem[0]);
-	if (ret)
-		return ret;
-
-	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x1000,
-			      false, &fifo->runlist.mem[1]);
-	if (ret)
-		return ret;
 
-	init_waitqueue_head(&fifo->runlist.wait);
-
-	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 128 * 0x1000,
-			      0x1000, false, &fifo->user.mem);
-	if (ret)
-		return ret;
-
-	ret = nvkm_vmm_get(bar, 12, nvkm_memory_size(fifo->user.mem),
-			   &fifo->user.bar);
-	if (ret)
-		return ret;
-
-	return nvkm_memory_map(fifo->user.mem, 0, bar, fifo->user.bar, NULL, 0);
-}
-
-static void
-gf100_fifo_fini(struct nvkm_fifo *base)
-{
-	struct gf100_fifo *fifo = gf100_fifo(base);
-	flush_work(&fifo->recover.work);
+	return IRQ_HANDLED;
 }
 
 static void
-gf100_fifo_init(struct nvkm_fifo *base)
+gf100_fifo_init_pbdmas(struct nvkm_fifo *fifo, u32 mask)
 {
-	struct gf100_fifo *fifo = gf100_fifo(base);
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	int i;
+	struct nvkm_device *device = fifo->engine.subdev.device;
 
 	/* Enable PBDMAs. */
-	nvkm_wr32(device, 0x000204, (1 << fifo->pbdma_nr) - 1);
-	nvkm_wr32(device, 0x002204, (1 << fifo->pbdma_nr) - 1);
+	nvkm_wr32(device, 0x000204, mask);
+	nvkm_wr32(device, 0x002204, mask);
 
 	/* Assign engines to PBDMAs. */
-	if (fifo->pbdma_nr >= 3) {
+	if ((mask & 7) == 7) {
 		nvkm_wr32(device, 0x002208, ~(1 << 0)); /* PGRAPH */
 		nvkm_wr32(device, 0x00220c, ~(1 << 1)); /* PVP */
 		nvkm_wr32(device, 0x002210, ~(1 << 1)); /* PMSPP */
@@ -638,62 +888,82 @@ gf100_fifo_init(struct nvkm_fifo *base)
 		nvkm_wr32(device, 0x00221c, ~(1 << 1)); /* PCE1 */
 	}
 
-	/* PBDMA[n] */
-	for (i = 0; i < fifo->pbdma_nr; i++) {
-		nvkm_mask(device, 0x04013c + (i * 0x2000), 0x10000100, 0x00000000);
-		nvkm_wr32(device, 0x040108 + (i * 0x2000), 0xffffffff); /* INTR */
-		nvkm_wr32(device, 0x04010c + (i * 0x2000), 0xfffffeff); /* INTREN */
-	}
+	nvkm_mask(device, 0x002a04, 0xbfffffff, 0xbfffffff);
+}
+
+static void
+gf100_fifo_init(struct nvkm_fifo *fifo)
+{
+	struct nvkm_device *device = fifo->engine.subdev.device;
 
 	nvkm_mask(device, 0x002200, 0x00000001, 0x00000001);
-	nvkm_wr32(device, 0x002254, 0x10000000 | fifo->user.bar->addr >> 12);
+	nvkm_wr32(device, 0x002254, 0x10000000 | fifo->userd.bar1->addr >> 12);
 
 	nvkm_wr32(device, 0x002100, 0xffffffff);
 	nvkm_wr32(device, 0x002140, 0x7fffffff);
 	nvkm_wr32(device, 0x002628, 0x00000001); /* ENGINE_INTR_EN */
 }
 
-static void *
-gf100_fifo_dtor(struct nvkm_fifo *base)
+static int
+gf100_fifo_runl_ctor(struct nvkm_fifo *fifo)
+{
+	struct nvkm_runl *runl;
+
+	runl = nvkm_runl_new(fifo, 0, 0, 0);
+	if (IS_ERR(runl))
+		return PTR_ERR(runl);
+
+	nvkm_runl_add(runl,  0, fifo->func->engn, NVKM_ENGINE_GR, 0);
+	nvkm_runl_add(runl,  1, fifo->func->engn, NVKM_ENGINE_MSPDEC, 0);
+	nvkm_runl_add(runl,  2, fifo->func->engn, NVKM_ENGINE_MSPPP, 0);
+	nvkm_runl_add(runl,  3, fifo->func->engn, NVKM_ENGINE_MSVLD, 0);
+	nvkm_runl_add(runl,  4, fifo->func->engn, NVKM_ENGINE_CE, 0);
+	nvkm_runl_add(runl,  5, fifo->func->engn, NVKM_ENGINE_CE, 1);
+	nvkm_runl_add(runl, 15,   &gf100_engn_sw, NVKM_ENGINE_SW, 0);
+	return 0;
+}
+
+int
+gf100_fifo_runq_nr(struct nvkm_fifo *fifo)
+{
+	struct nvkm_device *device = fifo->engine.subdev.device;
+	u32 save;
+
+	/* Determine number of PBDMAs by checking valid enable bits. */
+	save = nvkm_mask(device, 0x000204, 0xffffffff, 0xffffffff);
+	save = nvkm_mask(device, 0x000204, 0xffffffff, save);
+	return hweight32(save);
+}
+
+int
+gf100_fifo_chid_ctor(struct nvkm_fifo *fifo, int nr)
 {
-	struct gf100_fifo *fifo = gf100_fifo(base);
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	nvkm_vmm_put(nvkm_bar_bar1_vmm(device), &fifo->user.bar);
-	nvkm_memory_unref(&fifo->user.mem);
-	nvkm_memory_unref(&fifo->runlist.mem[0]);
-	nvkm_memory_unref(&fifo->runlist.mem[1]);
-	return fifo;
+	return nvkm_chid_new(&nvkm_chan_event, &fifo->engine.subdev, nr, 0, nr, &fifo->chid);
 }
 
 static const struct nvkm_fifo_func
 gf100_fifo = {
-	.dtor = gf100_fifo_dtor,
-	.oneinit = gf100_fifo_oneinit,
+	.chid_nr = nv50_fifo_chid_nr,
+	.chid_ctor = gf100_fifo_chid_ctor,
+	.runq_nr = gf100_fifo_runq_nr,
+	.runl_ctor = gf100_fifo_runl_ctor,
 	.init = gf100_fifo_init,
-	.fini = gf100_fifo_fini,
+	.init_pbdmas = gf100_fifo_init_pbdmas,
 	.intr = gf100_fifo_intr,
-	.fault = gf100_fifo_fault,
-	.engine_id = gf100_fifo_engine_id,
-	.id_engine = gf100_fifo_id_engine,
-	.uevent_init = gf100_fifo_uevent_init,
-	.uevent_fini = gf100_fifo_uevent_fini,
-	.chan = {
-		&gf100_fifo_gpfifo_oclass,
-		NULL
-	},
+	.intr_mmu_fault_unit = gf100_fifo_intr_mmu_fault_unit,
+	.intr_ctxsw_timeout = gf100_fifo_intr_ctxsw_timeout,
+	.mmu_fault = &gf100_fifo_mmu_fault,
+	.nonstall = &gf100_fifo_nonstall,
+	.runl = &gf100_runl,
+	.runq = &gf100_runq,
+	.engn = &gf100_engn,
+	.cgrp = {{                            }, &nv04_cgrp },
+	.chan = {{ 0, 0, FERMI_CHANNEL_GPFIFO }, &gf100_chan },
 };
 
 int
 gf100_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
 	       struct nvkm_fifo **pfifo)
 {
-	struct gf100_fifo *fifo;
-
-	if (!(fifo = kzalloc(sizeof(*fifo), GFP_KERNEL)))
-		return -ENOMEM;
-	INIT_LIST_HEAD(&fifo->chan);
-	INIT_WORK(&fifo->recover.work, gf100_fifo_recover_work);
-	*pfifo = &fifo->base;
-
-	return nvkm_fifo_ctor(&gf100_fifo, device, type, inst, 128, &fifo->base);
+	return nvkm_fifo_new_(&gf100_fifo, device, type, inst, pfifo);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.h
deleted file mode 100644
index b8642490eb2f..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-#ifndef __GF100_FIFO_H__
-#define __GF100_FIFO_H__
-#define gf100_fifo(p) container_of((p), struct gf100_fifo, base)
-#include "priv.h"
-
-#include <subdev/mmu.h>
-
-struct gf100_fifo_chan;
-struct gf100_fifo {
-	struct nvkm_fifo base;
-
-	struct list_head chan;
-
-	struct {
-		struct work_struct work;
-		u64 mask;
-	} recover;
-
-	int pbdma_nr;
-
-	struct {
-		struct nvkm_memory *mem[2];
-		int active;
-		wait_queue_head_t wait;
-	} runlist;
-
-	struct {
-		struct nvkm_memory *mem;
-		struct nvkm_vma *bar;
-	} user;
-};
-
-void gf100_fifo_intr_engine(struct gf100_fifo *);
-void gf100_fifo_runlist_insert(struct gf100_fifo *, struct gf100_fifo_chan *);
-void gf100_fifo_runlist_remove(struct gf100_fifo *, struct gf100_fifo_chan *);
-void gf100_fifo_runlist_commit(struct gf100_fifo *);
-#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
index e771bd519ee2..d8a4d773a58c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
@@ -21,643 +21,318 @@
  *
  * Authors: Ben Skeggs
  */
-#include "gk104.h"
+#include "priv.h"
 #include "cgrp.h"
-#include "changk104.h"
+#include "chan.h"
+#include "chid.h"
+#include "runl.h"
+#include "runq.h"
 
-#include <core/client.h>
 #include <core/gpuobj.h>
-#include <subdev/bar.h>
-#include <subdev/fault.h>
-#include <subdev/timer.h>
+#include <subdev/mc.h>
+#include <subdev/mmu.h>
 #include <subdev/top.h>
-#include <engine/sw.h>
 
 #include <nvif/class.h>
-#include <nvif/cl0080.h>
+#include <nvif/if900d.h>
 
 void
-gk104_fifo_engine_status(struct gk104_fifo *fifo, int engn,
-			 struct gk104_fifo_engine_status *status)
+gk104_chan_stop(struct nvkm_chan *chan)
 {
-	struct nvkm_engine *engine = fifo->engine[engn].engine;
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	u32 stat = nvkm_rd32(device, 0x002640 + (engn * 0x08));
-
-	status->busy     = !!(stat & 0x80000000);
-	status->faulted  = !!(stat & 0x40000000);
-	status->next.tsg = !!(stat & 0x10000000);
-	status->next.id  =   (stat & 0x0fff0000) >> 16;
-	status->chsw     = !!(stat & 0x00008000);
-	status->save     = !!(stat & 0x00004000);
-	status->load     = !!(stat & 0x00002000);
-	status->prev.tsg = !!(stat & 0x00001000);
-	status->prev.id  =   (stat & 0x00000fff);
-	status->chan     = NULL;
-
-	if (status->busy && status->chsw) {
-		if (status->load && status->save) {
-			if (engine && nvkm_engine_chsw_load(engine))
-				status->chan = &status->next;
-			else
-				status->chan = &status->prev;
-		} else
-		if (status->load) {
-			status->chan = &status->next;
-		} else {
-			status->chan = &status->prev;
-		}
-	} else
-	if (status->load) {
-		status->chan = &status->prev;
-	}
+	struct nvkm_device *device = chan->cgrp->runl->fifo->engine.subdev.device;
 
-	nvkm_debug(subdev, "engine %02d: busy %d faulted %d chsw %d "
-			   "save %d load %d %sid %d%s-> %sid %d%s\n",
-		   engn, status->busy, status->faulted,
-		   status->chsw, status->save, status->load,
-		   status->prev.tsg ? "tsg" : "ch", status->prev.id,
-		   status->chan == &status->prev ? "*" : " ",
-		   status->next.tsg ? "tsg" : "ch", status->next.id,
-		   status->chan == &status->next ? "*" : " ");
-}
-
-int
-gk104_fifo_class_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass,
-		     void *argv, u32 argc, struct nvkm_object **pobject)
-{
-	struct gk104_fifo *fifo = gk104_fifo(base);
-	if (oclass->engn == &fifo->func->chan) {
-		const struct gk104_fifo_chan_user *user = oclass->engn;
-		return user->ctor(fifo, oclass, argv, argc, pobject);
-	} else
-	if (oclass->engn == &fifo->func->user) {
-		const struct gk104_fifo_user_user *user = oclass->engn;
-		return user->ctor(oclass, argv, argc, pobject);
-	}
-	WARN_ON(1);
-	return -EINVAL;
-}
-
-int
-gk104_fifo_class_get(struct nvkm_fifo *base, int index,
-		     struct nvkm_oclass *oclass)
-{
-	struct gk104_fifo *fifo = gk104_fifo(base);
-	int c = 0;
-
-	if (fifo->func->user.ctor && c++ == index) {
-		oclass->base =  fifo->func->user.user;
-		oclass->engn = &fifo->func->user;
-		return 0;
-	}
-
-	if (fifo->func->chan.ctor && c++ == index) {
-		oclass->base =  fifo->func->chan.user;
-		oclass->engn = &fifo->func->chan;
-		return 0;
-	}
-
-	return c;
+	nvkm_mask(device, 0x800004 + (chan->id * 8), 0x00000800, 0x00000800);
 }
 
 void
-gk104_fifo_uevent_fini(struct nvkm_fifo *fifo)
+gk104_chan_start(struct nvkm_chan *chan)
 {
-	struct nvkm_device *device = fifo->engine.subdev.device;
-	nvkm_mask(device, 0x002140, 0x80000000, 0x00000000);
-}
+	struct nvkm_device *device = chan->cgrp->runl->fifo->engine.subdev.device;
 
-void
-gk104_fifo_uevent_init(struct nvkm_fifo *fifo)
-{
-	struct nvkm_device *device = fifo->engine.subdev.device;
-	nvkm_mask(device, 0x002140, 0x80000000, 0x80000000);
+	nvkm_mask(device, 0x800004 + (chan->id * 8), 0x00000400, 0x00000400);
 }
 
 void
-gk104_fifo_runlist_commit(struct gk104_fifo *fifo, int runl,
-			  struct nvkm_memory *mem, int nr)
+gk104_chan_unbind(struct nvkm_chan *chan)
 {
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	int target;
+	struct nvkm_device *device = chan->cgrp->runl->fifo->engine.subdev.device;
 
-	switch (nvkm_memory_target(mem)) {
-	case NVKM_MEM_TARGET_VRAM: target = 0; break;
-	case NVKM_MEM_TARGET_NCOH: target = 3; break;
-	default:
-		WARN_ON(1);
-		return;
-	}
-
-	nvkm_wr32(device, 0x002270, (nvkm_memory_addr(mem) >> 12) |
-				    (target << 28));
-	nvkm_wr32(device, 0x002274, (runl << 20) | nr);
-
-	if (nvkm_msec(device, 2000,
-		if (!(nvkm_rd32(device, 0x002284 + (runl * 0x08)) & 0x00100000))
-			break;
-	) < 0)
-		nvkm_error(subdev, "runlist %d update timeout\n", runl);
+	nvkm_wr32(device, 0x800000 + (chan->id * 8), 0x00000000);
 }
 
 void
-gk104_fifo_runlist_update(struct gk104_fifo *fifo, int runl)
+gk104_chan_bind_inst(struct nvkm_chan *chan)
 {
-	const struct gk104_fifo_runlist_func *func = fifo->func->runlist;
-	struct gk104_fifo_chan *chan;
-	struct nvkm_memory *mem;
-	struct nvkm_fifo_cgrp *cgrp;
-	int nr = 0;
-
-	mutex_lock(&fifo->base.mutex);
-	mem = fifo->runlist[runl].mem[fifo->runlist[runl].next];
-	fifo->runlist[runl].next = !fifo->runlist[runl].next;
-
-	nvkm_kmap(mem);
-	list_for_each_entry(chan, &fifo->runlist[runl].chan, head) {
-		func->chan(chan, mem, nr++ * func->size);
-	}
-
-	list_for_each_entry(cgrp, &fifo->runlist[runl].cgrp, head) {
-		func->cgrp(cgrp, mem, nr++ * func->size);
-		list_for_each_entry(chan, &cgrp->chan, head) {
-			func->chan(chan, mem, nr++ * func->size);
-		}
-	}
-	nvkm_done(mem);
+	struct nvkm_device *device = chan->cgrp->runl->fifo->engine.subdev.device;
 
-	func->commit(fifo, runl, mem, nr);
-	mutex_unlock(&fifo->base.mutex);
+	nvkm_wr32(device, 0x800000 + (chan->id * 8), 0x80000000 | chan->inst->addr >> 12);
 }
 
 void
-gk104_fifo_runlist_remove(struct gk104_fifo *fifo, struct gk104_fifo_chan *chan)
-{
-	struct nvkm_fifo_cgrp *cgrp = chan->cgrp;
-	mutex_lock(&fifo->base.mutex);
-	if (!list_empty(&chan->head)) {
-		list_del_init(&chan->head);
-		if (cgrp && !--cgrp->chan_nr)
-			list_del_init(&cgrp->head);
-	}
-	mutex_unlock(&fifo->base.mutex);
-}
-
-void
-gk104_fifo_runlist_insert(struct gk104_fifo *fifo, struct gk104_fifo_chan *chan)
-{
-	struct nvkm_fifo_cgrp *cgrp = chan->cgrp;
-	mutex_lock(&fifo->base.mutex);
-	if (cgrp) {
-		if (!cgrp->chan_nr++)
-			list_add_tail(&cgrp->head, &fifo->runlist[chan->runl].cgrp);
-		list_add_tail(&chan->head, &cgrp->chan);
-	} else {
-		list_add_tail(&chan->head, &fifo->runlist[chan->runl].chan);
-	}
-	mutex_unlock(&fifo->base.mutex);
-}
-
-void
-gk104_fifo_runlist_chan(struct gk104_fifo_chan *chan,
-			struct nvkm_memory *memory, u32 offset)
-{
-	nvkm_wo32(memory, offset + 0, chan->base.chid);
-	nvkm_wo32(memory, offset + 4, 0x00000000);
-}
-
-const struct gk104_fifo_runlist_func
-gk104_fifo_runlist = {
-	.size = 8,
-	.chan = gk104_fifo_runlist_chan,
-	.commit = gk104_fifo_runlist_commit,
+gk104_chan_bind(struct nvkm_chan *chan)
+{
+	struct nvkm_runl *runl = chan->cgrp->runl;
+	struct nvkm_device *device = runl->fifo->engine.subdev.device;
+
+	nvkm_mask(device, 0x800004 + (chan->id * 8), 0x000f0000, runl->id << 16);
+	gk104_chan_bind_inst(chan);
+}
+
+static int
+gk104_chan_ramfc_write(struct nvkm_chan *chan, u64 offset, u64 length, u32 devm, bool priv)
+{
+	const u64 userd = nvkm_memory_addr(chan->userd.mem) + chan->userd.base;
+	const u32 limit2 = ilog2(length / 8);
+
+	nvkm_kmap(chan->inst);
+	nvkm_wo32(chan->inst, 0x08, lower_32_bits(userd));
+	nvkm_wo32(chan->inst, 0x0c, upper_32_bits(userd));
+	nvkm_wo32(chan->inst, 0x10, 0x0000face);
+	nvkm_wo32(chan->inst, 0x30, 0xfffff902);
+	nvkm_wo32(chan->inst, 0x48, lower_32_bits(offset));
+	nvkm_wo32(chan->inst, 0x4c, upper_32_bits(offset) | (limit2 << 16));
+	nvkm_wo32(chan->inst, 0x84, 0x20400000);
+	nvkm_wo32(chan->inst, 0x94, 0x30000000 | devm);
+	nvkm_wo32(chan->inst, 0x9c, 0x00000100);
+	nvkm_wo32(chan->inst, 0xac, 0x0000001f);
+	nvkm_wo32(chan->inst, 0xe4, priv ? 0x00000020 : 0x00000000);
+	nvkm_wo32(chan->inst, 0xe8, chan->id);
+	nvkm_wo32(chan->inst, 0xb8, 0xf8000000);
+	nvkm_wo32(chan->inst, 0xf8, 0x10003080); /* 0x002310 */
+	nvkm_wo32(chan->inst, 0xfc, 0x10000010); /* 0x002350 */
+	nvkm_done(chan->inst);
+	return 0;
+}
+
+const struct nvkm_chan_func_ramfc
+gk104_chan_ramfc = {
+	.write = gk104_chan_ramfc_write,
+	.devm = 0xfff,
+	.priv = true,
 };
 
-void
-gk104_fifo_pbdma_init(struct gk104_fifo *fifo)
-{
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	nvkm_wr32(device, 0x000204, (1 << fifo->pbdma_nr) - 1);
-}
-
-int
-gk104_fifo_pbdma_nr(struct gk104_fifo *fifo)
-{
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	/* Determine number of PBDMAs by checking valid enable bits. */
-	nvkm_wr32(device, 0x000204, 0xffffffff);
-	return hweight32(nvkm_rd32(device, 0x000204));
-}
-
-const struct gk104_fifo_pbdma_func
-gk104_fifo_pbdma = {
-	.nr = gk104_fifo_pbdma_nr,
-	.init = gk104_fifo_pbdma_init,
+const struct nvkm_chan_func_userd
+gk104_chan_userd = {
+	.bar = 1,
+	.size = 0x200,
+	.clear = gf100_chan_userd_clear,
 };
 
-struct nvkm_engine *
-gk104_fifo_id_engine(struct nvkm_fifo *base, int engi)
-{
-	if (engi == GK104_FIFO_ENGN_SW)
-		return nvkm_device_engine(base->engine.subdev.device, NVKM_ENGINE_SW, 0);
-
-	return gk104_fifo(base)->engine[engi].engine;
-}
-
-int
-gk104_fifo_engine_id(struct nvkm_fifo *base, struct nvkm_engine *engine)
-{
-	struct gk104_fifo *fifo = gk104_fifo(base);
-	int engn;
+static const struct nvkm_chan_func
+gk104_chan = {
+	.inst = &gf100_chan_inst,
+	.userd = &gk104_chan_userd,
+	.ramfc = &gk104_chan_ramfc,
+	.bind = gk104_chan_bind,
+	.unbind = gk104_chan_unbind,
+	.start = gk104_chan_start,
+	.stop = gk104_chan_stop,
+	.preempt = gf100_chan_preempt,
+};
 
-	if (engine->subdev.type == NVKM_ENGINE_SW)
-		return GK104_FIFO_ENGN_SW;
+static void
+gk104_ectx_bind(struct nvkm_engn *engn, struct nvkm_cctx *cctx, struct nvkm_chan *chan)
+{
+	u32 ptr0, ptr1 = 0;
+	u64 addr = 0ULL;
+
+	switch (engn->engine->subdev.type) {
+	case NVKM_ENGINE_SW    : return;
+	case NVKM_ENGINE_GR    : ptr0 = 0x0210; break;
+	case NVKM_ENGINE_SEC   : ptr0 = 0x0220; break;
+	case NVKM_ENGINE_MSPDEC: ptr0 = 0x0250; break;
+	case NVKM_ENGINE_MSPPP : ptr0 = 0x0260; break;
+	case NVKM_ENGINE_MSVLD : ptr0 = 0x0270; break;
+	case NVKM_ENGINE_VIC   : ptr0 = 0x0280; break;
+	case NVKM_ENGINE_MSENC : ptr0 = 0x0290; break;
+	case NVKM_ENGINE_NVDEC :
+		ptr1 = 0x0270;
+		ptr0 = 0x0210;
+		break;
+	case NVKM_ENGINE_NVENC :
+		if (!engn->engine->subdev.inst)
+			ptr1 = 0x0290;
+		ptr0 = 0x0210;
+		break;
+	default:
+		WARN_ON(1);
+		return;
+	}
 
-	for (engn = 0; engn < fifo->engine_nr && engine; engn++) {
-		if (fifo->engine[engn].engine == engine)
-			return engn;
+	if (cctx) {
+		addr  = cctx->vctx->vma->addr;
+		addr |= 4ULL;
 	}
 
-	WARN_ON(1);
-	return -1;
+	nvkm_kmap(chan->inst);
+	nvkm_wo32(chan->inst, ptr0 + 0, lower_32_bits(addr));
+	nvkm_wo32(chan->inst, ptr0 + 4, upper_32_bits(addr));
+	if (ptr1) {
+		nvkm_wo32(chan->inst, ptr1 + 0, lower_32_bits(addr));
+		nvkm_wo32(chan->inst, ptr1 + 4, upper_32_bits(addr));
+	}
+	nvkm_done(chan->inst);
 }
 
-static void
-gk104_fifo_recover_work(struct work_struct *w)
+int
+gk104_ectx_ctor(struct nvkm_engn *engn, struct nvkm_vctx *vctx)
 {
-	struct gk104_fifo *fifo = container_of(w, typeof(*fifo), recover.work);
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	struct nvkm_engine *engine;
-	unsigned long flags;
-	u32 engm, runm, todo;
-	int engn, runl;
-
-	spin_lock_irqsave(&fifo->base.lock, flags);
-	runm = fifo->recover.runm;
-	engm = fifo->recover.engm;
-	fifo->recover.engm = 0;
-	fifo->recover.runm = 0;
-	spin_unlock_irqrestore(&fifo->base.lock, flags);
-
-	nvkm_mask(device, 0x002630, runm, runm);
-
-	for (todo = engm; engn = __ffs(todo), todo; todo &= ~BIT(engn)) {
-		if ((engine = fifo->engine[engn].engine)) {
-			nvkm_subdev_fini(&engine->subdev, false);
-			WARN_ON(nvkm_subdev_init(&engine->subdev));
-		}
-	}
+	struct gf100_vmm_map_v0 args = { .priv = 1 };
+	int ret;
 
-	for (todo = runm; runl = __ffs(todo), todo; todo &= ~BIT(runl))
-		gk104_fifo_runlist_update(fifo, runl);
+	ret = nvkm_vmm_get(vctx->vmm, 12, vctx->inst->size, &vctx->vma);
+	if (ret)
+		return ret;
 
-	nvkm_wr32(device, 0x00262c, runm);
-	nvkm_mask(device, 0x002630, runm, 0x00000000);
+	return nvkm_memory_map(vctx->inst, 0, vctx->vmm, vctx->vma, &args, sizeof(args));
 }
 
-static void gk104_fifo_recover_engn(struct gk104_fifo *fifo, int engn);
+/*TODO: clean this up */
+struct gk104_engn_status {
+	bool busy;
+	bool faulted;
+	bool chsw;
+	bool save;
+	bool load;
+	struct {
+		bool tsg;
+		u32 id;
+	} prev, next, *chan;
+};
 
 static void
-gk104_fifo_recover_runl(struct gk104_fifo *fifo, int runl)
-{
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	const u32 runm = BIT(runl);
-
-	assert_spin_locked(&fifo->base.lock);
-	if (fifo->recover.runm & runm)
-		return;
-	fifo->recover.runm |= runm;
-
-	/* Block runlist to prevent channel assignment(s) from changing. */
-	nvkm_mask(device, 0x002630, runm, runm);
-
-	/* Schedule recovery. */
-	nvkm_warn(subdev, "runlist %d: scheduled for recovery\n", runl);
-	schedule_work(&fifo->recover.work);
-}
-
-static struct gk104_fifo_chan *
-gk104_fifo_recover_chid(struct gk104_fifo *fifo, int runl, int chid)
+gk104_engn_status(struct nvkm_engn *engn, struct gk104_engn_status *status)
 {
-	struct gk104_fifo_chan *chan;
-	struct nvkm_fifo_cgrp *cgrp;
+	u32 stat = nvkm_rd32(engn->runl->fifo->engine.subdev.device, 0x002640 + (engn->id * 0x08));
 
-	list_for_each_entry(chan, &fifo->runlist[runl].chan, head) {
-		if (chan->base.chid == chid) {
-			list_del_init(&chan->head);
-			return chan;
-		}
-	}
+	status->busy     = !!(stat & 0x80000000);
+	status->faulted  = !!(stat & 0x40000000);
+	status->next.tsg = !!(stat & 0x10000000);
+	status->next.id  =   (stat & 0x0fff0000) >> 16;
+	status->chsw     = !!(stat & 0x00008000);
+	status->save     = !!(stat & 0x00004000);
+	status->load     = !!(stat & 0x00002000);
+	status->prev.tsg = !!(stat & 0x00001000);
+	status->prev.id  =   (stat & 0x00000fff);
+	status->chan     = NULL;
 
-	list_for_each_entry(cgrp, &fifo->runlist[runl].cgrp, head) {
-		if (cgrp->id == chid) {
-			chan = list_first_entry(&cgrp->chan, typeof(*chan), head);
-			list_del_init(&chan->head);
-			if (!--cgrp->chan_nr)
-				list_del_init(&cgrp->head);
-			return chan;
+	if (status->busy && status->chsw) {
+		if (status->load && status->save) {
+			if (nvkm_engine_chsw_load(engn->engine))
+				status->chan = &status->next;
+			else
+				status->chan = &status->prev;
+		} else
+		if (status->load) {
+			status->chan = &status->next;
+		} else {
+			status->chan = &status->prev;
 		}
+	} else
+	if (status->load) {
+		status->chan = &status->prev;
 	}
 
-	return NULL;
-}
-
-static void
-gk104_fifo_recover_chan(struct nvkm_fifo *base, int chid)
-{
-	struct gk104_fifo *fifo = gk104_fifo(base);
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	const u32  stat = nvkm_rd32(device, 0x800004 + (chid * 0x08));
-	const u32  runl = (stat & 0x000f0000) >> 16;
-	const bool used = (stat & 0x00000001);
-	unsigned long engn, engm = fifo->runlist[runl].engm;
-	struct gk104_fifo_chan *chan;
-
-	assert_spin_locked(&fifo->base.lock);
-	if (!used)
-		return;
-
-	/* Lookup SW state for channel, and mark it as dead. */
-	chan = gk104_fifo_recover_chid(fifo, runl, chid);
-	if (chan) {
-		chan->killed = true;
-		nvkm_fifo_kevent(&fifo->base, chid);
-	}
-
-	/* Disable channel. */
-	nvkm_wr32(device, 0x800004 + (chid * 0x08), stat | 0x00000800);
-	nvkm_warn(subdev, "channel %d: killed\n", chid);
-
-	/* Block channel assignments from changing during recovery. */
-	gk104_fifo_recover_runl(fifo, runl);
-
-	/* Schedule recovery for any engines the channel is on. */
-	for_each_set_bit(engn, &engm, fifo->engine_nr) {
-		struct gk104_fifo_engine_status status;
-		gk104_fifo_engine_status(fifo, engn, &status);
-		if (!status.chan || status.chan->id != chid)
-			continue;
-		gk104_fifo_recover_engn(fifo, engn);
-	}
+	ENGN_DEBUG(engn, "%08x: busy %d faulted %d chsw %d save %d load %d %sid %d%s-> %sid %d%s",
+		   stat, status->busy, status->faulted, status->chsw, status->save, status->load,
+		   status->prev.tsg ? "tsg" : "ch", status->prev.id,
+		   status->chan == &status->prev ? "*" : " ",
+		   status->next.tsg ? "tsg" : "ch", status->next.id,
+		   status->chan == &status->next ? "*" : " ");
 }
 
-static void
-gk104_fifo_recover_engn(struct gk104_fifo *fifo, int engn)
+int
+gk104_engn_cxid(struct nvkm_engn *engn, bool *cgid)
 {
-	struct nvkm_engine *engine = fifo->engine[engn].engine;
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	const u32 runl = fifo->engine[engn].runl;
-	const u32 engm = BIT(engn);
-	struct gk104_fifo_engine_status status;
-	int mmui = -1;
-
-	assert_spin_locked(&fifo->base.lock);
-	if (fifo->recover.engm & engm)
-		return;
-	fifo->recover.engm |= engm;
-
-	/* Block channel assignments from changing during recovery. */
-	gk104_fifo_recover_runl(fifo, runl);
+	struct gk104_engn_status status;
 
-	/* Determine which channel (if any) is currently on the engine. */
-	gk104_fifo_engine_status(fifo, engn, &status);
+	gk104_engn_status(engn, &status);
 	if (status.chan) {
-		/* The channel is not longer viable, kill it. */
-		gk104_fifo_recover_chan(&fifo->base, status.chan->id);
+		*cgid = status.chan->tsg;
+		return status.chan->id;
 	}
 
-	/* Determine MMU fault ID for the engine, if we're not being
-	 * called from the fault handler already.
-	 */
-	if (!status.faulted && engine) {
-		mmui = nvkm_top_fault_id(device, engine->subdev.type, engine->subdev.inst);
-		if (mmui < 0) {
-			const struct nvkm_enum *en = fifo->func->fault.engine;
-			for (; en && en->name; en++) {
-				if (en->data2 == engine->subdev.type &&
-				    en->inst  == engine->subdev.inst) {
-					mmui = en->value;
-					break;
-				}
-			}
-		}
-		WARN_ON(mmui < 0);
-	}
-
-	/* Trigger a MMU fault for the engine.
-	 *
-	 * No good idea why this is needed, but nvgpu does something similar,
-	 * and it makes recovery from CTXSW_TIMEOUT a lot more reliable.
-	 */
-	if (mmui >= 0) {
-		nvkm_wr32(device, 0x002a30 + (engn * 0x04), 0x00000100 | mmui);
-
-		/* Wait for fault to trigger. */
-		nvkm_msec(device, 2000,
-			gk104_fifo_engine_status(fifo, engn, &status);
-			if (status.faulted)
-				break;
-		);
-
-		/* Release MMU fault trigger, and ACK the fault. */
-		nvkm_wr32(device, 0x002a30 + (engn * 0x04), 0x00000000);
-		nvkm_wr32(device, 0x00259c, BIT(mmui));
-		nvkm_wr32(device, 0x002100, 0x10000000);
-	}
-
-	/* Schedule recovery. */
-	nvkm_warn(subdev, "engine %d: scheduled for recovery\n", engn);
-	schedule_work(&fifo->recover.work);
+	return -ENODEV;
 }
 
-static void
-gk104_fifo_fault(struct nvkm_fifo *base, struct nvkm_fault_data *info)
+bool
+gk104_engn_chsw(struct nvkm_engn *engn)
 {
-	struct gk104_fifo *fifo = gk104_fifo(base);
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	const struct nvkm_enum *er, *ee, *ec, *ea;
-	struct nvkm_engine *engine = NULL;
-	struct nvkm_fifo_chan *chan;
-	unsigned long flags;
-	const char *en = "";
-	char ct[8] = "HUB/";
-
-	er = nvkm_enum_find(fifo->func->fault.reason, info->reason);
-	ee = nvkm_enum_find(fifo->func->fault.engine, info->engine);
-	if (info->hub) {
-		ec = nvkm_enum_find(fifo->func->fault.hubclient, info->client);
-	} else {
-		ec = nvkm_enum_find(fifo->func->fault.gpcclient, info->client);
-		snprintf(ct, sizeof(ct), "GPC%d/", info->gpc);
-	}
-	ea = nvkm_enum_find(fifo->func->fault.access, info->access);
+	struct gk104_engn_status status;
 
-	if (ee && ee->data2) {
-		switch (ee->data2) {
-		case NVKM_SUBDEV_BAR:
-			nvkm_bar_bar1_reset(device);
-			break;
-		case NVKM_SUBDEV_INSTMEM:
-			nvkm_bar_bar2_reset(device);
-			break;
-		case NVKM_ENGINE_IFB:
-			nvkm_mask(device, 0x001718, 0x00000000, 0x00000000);
-			break;
-		default:
-			engine = nvkm_device_engine(device, ee->data2, 0);
-			break;
-		}
-	}
-
-	if (ee == NULL) {
-		struct nvkm_subdev *subdev = nvkm_top_fault(device, info->engine);
-		if (subdev) {
-			if (subdev->func == &nvkm_engine)
-				engine = container_of(subdev, typeof(*engine), subdev);
-			en = engine->subdev.name;
-		}
-	} else {
-		en = ee->name;
-	}
-
-	spin_lock_irqsave(&fifo->base.lock, flags);
-	chan = nvkm_fifo_chan_inst_locked(&fifo->base, info->inst);
-
-	nvkm_error(subdev,
-		   "fault %02x [%s] at %016llx engine %02x [%s] client %02x "
-		   "[%s%s] reason %02x [%s] on channel %d [%010llx %s]\n",
-		   info->access, ea ? ea->name : "", info->addr,
-		   info->engine, ee ? ee->name : en,
-		   info->client, ct, ec ? ec->name : "",
-		   info->reason, er ? er->name : "", chan ? chan->chid : -1,
-		   info->inst, chan ? chan->object.client->name : "unknown");
-
-	/* Kill the channel that caused the fault. */
-	if (chan)
-		gk104_fifo_recover_chan(&fifo->base, chan->chid);
-
-	/* Channel recovery will probably have already done this for the
-	 * correct engine(s), but just in case we can't find the channel
-	 * information...
-	 */
-	if (engine) {
-		int engn = fifo->base.func->engine_id(&fifo->base, engine);
-		if (engn >= 0 && engn != GK104_FIFO_ENGN_SW)
-			gk104_fifo_recover_engn(fifo, engn);
-	}
+	gk104_engn_status(engn, &status);
+	if (status.busy && status.chsw)
+		return true;
 
-	spin_unlock_irqrestore(&fifo->base.lock, flags);
+	return false;
 }
 
-static const struct nvkm_enum
-gk104_fifo_bind_reason[] = {
-	{ 0x01, "BIND_NOT_UNBOUND" },
-	{ 0x02, "SNOOP_WITHOUT_BAR1" },
-	{ 0x03, "UNBIND_WHILE_RUNNING" },
-	{ 0x05, "INVALID_RUNLIST" },
-	{ 0x06, "INVALID_CTX_TGT" },
-	{ 0x0b, "UNBIND_WHILE_PARKED" },
-	{}
+const struct nvkm_engn_func
+gk104_engn = {
+	.chsw = gk104_engn_chsw,
+	.cxid = gk104_engn_cxid,
+	.mmu_fault_trigger = gf100_engn_mmu_fault_trigger,
+	.mmu_fault_triggered = gf100_engn_mmu_fault_triggered,
+	.ctor = gk104_ectx_ctor,
+	.bind = gk104_ectx_bind,
 };
 
-void
-gk104_fifo_intr_bind(struct gk104_fifo *fifo)
+const struct nvkm_engn_func
+gk104_engn_ce = {
+	.chsw = gk104_engn_chsw,
+	.cxid = gk104_engn_cxid,
+	.mmu_fault_trigger = gf100_engn_mmu_fault_trigger,
+	.mmu_fault_triggered = gf100_engn_mmu_fault_triggered,
+};
+
+bool
+gk104_runq_idle(struct nvkm_runq *runq)
 {
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	u32 intr = nvkm_rd32(device, 0x00252c);
-	u32 code = intr & 0x000000ff;
-	const struct nvkm_enum *en =
-		nvkm_enum_find(gk104_fifo_bind_reason, code);
+	struct nvkm_device *device = runq->fifo->engine.subdev.device;
 
-	nvkm_error(subdev, "BIND_ERROR %02x [%s]\n", code, en ? en->name : "");
+	return !(nvkm_rd32(device, 0x003080 + (runq->id * 4)) & 0x0000e000);
 }
 
-static const struct nvkm_enum
-gk104_fifo_sched_reason[] = {
-	{ 0x0a, "CTXSW_TIMEOUT" },
+static const struct nvkm_bitfield
+gk104_runq_intr_1_names[] = {
+	{ 0x00000001, "HCE_RE_ILLEGAL_OP" },
+	{ 0x00000002, "HCE_RE_ALIGNB" },
+	{ 0x00000004, "HCE_PRIV" },
+	{ 0x00000008, "HCE_ILLEGAL_MTHD" },
+	{ 0x00000010, "HCE_ILLEGAL_CLASS" },
 	{}
 };
 
-static void
-gk104_fifo_intr_sched_ctxsw(struct gk104_fifo *fifo)
+static bool
+gk104_runq_intr_1(struct nvkm_runq *runq)
 {
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	unsigned long flags, engm = 0;
-	u32 engn;
-
-	/* We need to ACK the SCHED_ERROR here, and prevent it reasserting,
-	 * as MMU_FAULT cannot be triggered while it's pending.
-	 */
-	spin_lock_irqsave(&fifo->base.lock, flags);
-	nvkm_mask(device, 0x002140, 0x00000100, 0x00000000);
-	nvkm_wr32(device, 0x002100, 0x00000100);
-
-	for (engn = 0; engn < fifo->engine_nr; engn++) {
-		struct gk104_fifo_engine_status status;
-
-		gk104_fifo_engine_status(fifo, engn, &status);
-		if (!status.busy || !status.chsw)
-			continue;
-
-		engm |= BIT(engn);
-	}
-
-	for_each_set_bit(engn, &engm, fifo->engine_nr)
-		gk104_fifo_recover_engn(fifo, engn);
-
-	nvkm_mask(device, 0x002140, 0x00000100, 0x00000100);
-	spin_unlock_irqrestore(&fifo->base.lock, flags);
-}
-
-static void
-gk104_fifo_intr_sched(struct gk104_fifo *fifo)
-{
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+	struct nvkm_subdev *subdev = &runq->fifo->engine.subdev;
 	struct nvkm_device *device = subdev->device;
-	u32 intr = nvkm_rd32(device, 0x00254c);
-	u32 code = intr & 0x000000ff;
-	const struct nvkm_enum *en =
-		nvkm_enum_find(gk104_fifo_sched_reason, code);
-
-	nvkm_error(subdev, "SCHED_ERROR %02x [%s]\n", code, en ? en->name : "");
+	u32 mask = nvkm_rd32(device, 0x04014c + (runq->id * 0x2000));
+	u32 stat = nvkm_rd32(device, 0x040148 + (runq->id * 0x2000)) & mask;
+	u32 chid = nvkm_rd32(device, 0x040120 + (runq->id * 0x2000)) & 0xfff;
+	char msg[128];
 
-	switch (code) {
-	case 0x0a:
-		gk104_fifo_intr_sched_ctxsw(fifo);
-		break;
-	default:
-		break;
+	if (stat & 0x80000000) {
+		if (runq->func->intr_1_ctxnotvalid &&
+		    runq->func->intr_1_ctxnotvalid(runq, chid))
+			stat &= ~0x80000000;
 	}
-}
 
-void
-gk104_fifo_intr_chsw(struct gk104_fifo *fifo)
-{
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	u32 stat = nvkm_rd32(device, 0x00256c);
-	nvkm_error(subdev, "CHSW_ERROR %08x\n", stat);
-	nvkm_wr32(device, 0x00256c, stat);
-}
+	if (stat) {
+		nvkm_snprintbf(msg, sizeof(msg), gk104_runq_intr_1_names, stat);
+		nvkm_error(subdev, "PBDMA%d: %08x [%s] ch %d %08x %08x\n",
+			   runq->id, stat, msg, chid,
+			   nvkm_rd32(device, 0x040150 + (runq->id * 0x2000)),
+			   nvkm_rd32(device, 0x040154 + (runq->id * 0x2000)));
+	}
 
-void
-gk104_fifo_intr_dropped_fault(struct gk104_fifo *fifo)
-{
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	u32 stat = nvkm_rd32(device, 0x00259c);
-	nvkm_error(subdev, "DROPPED_MMU_FAULT %08x\n", stat);
+	nvkm_wr32(device, 0x040148 + (runq->id * 0x2000), stat);
+	return true;
 }
 
-static const struct nvkm_bitfield gk104_fifo_pbdma_intr_0[] = {
+const struct nvkm_bitfield
+gk104_runq_intr_0_names[] = {
 	{ 0x00000001, "MEMREQ" },
 	{ 0x00000002, "MEMACK_TIMEOUT" },
 	{ 0x00000004, "MEMACK_EXTRA" },
@@ -691,430 +366,111 @@ static const struct nvkm_bitfield gk104_fifo_pbdma_intr_0[] = {
 	{}
 };
 
-void
-gk104_fifo_intr_pbdma_0(struct gk104_fifo *fifo, int unit)
+bool
+gk104_runq_intr(struct nvkm_runq *runq, struct nvkm_runl *null)
 {
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	u32 mask = nvkm_rd32(device, 0x04010c + (unit * 0x2000));
-	u32 stat = nvkm_rd32(device, 0x040108 + (unit * 0x2000)) & mask;
-	u32 addr = nvkm_rd32(device, 0x0400c0 + (unit * 0x2000));
-	u32 data = nvkm_rd32(device, 0x0400c4 + (unit * 0x2000));
-	u32 chid = nvkm_rd32(device, 0x040120 + (unit * 0x2000)) & 0xfff;
-	u32 subc = (addr & 0x00070000) >> 16;
-	u32 mthd = (addr & 0x00003ffc);
-	u32 show = stat;
-	struct nvkm_fifo_chan *chan;
-	unsigned long flags;
-	char msg[128];
-
-	if (stat & 0x00800000) {
-		if (device->sw) {
-			if (nvkm_sw_mthd(device->sw, chid, subc, mthd, data))
-				show &= ~0x00800000;
-		}
-	}
-
-	nvkm_wr32(device, 0x0400c0 + (unit * 0x2000), 0x80600008);
-
-	if (show) {
-		nvkm_snprintbf(msg, sizeof(msg), gk104_fifo_pbdma_intr_0, show);
-		chan = nvkm_fifo_chan_chid(&fifo->base, chid, &flags);
-		nvkm_error(subdev, "PBDMA%d: %08x [%s] ch %d [%010llx %s] "
-				   "subc %d mthd %04x data %08x\n",
-			   unit, show, msg, chid, chan ? chan->inst->addr : 0,
-			   chan ? chan->object.client->name : "unknown",
-			   subc, mthd, data);
-		nvkm_fifo_chan_put(&fifo->base, flags, &chan);
-	}
+	bool intr0 = gf100_runq_intr(runq, NULL);
+	bool intr1 = gk104_runq_intr_1(runq);
 
-	nvkm_wr32(device, 0x040108 + (unit * 0x2000), stat);
+	return intr0 || intr1;
 }
 
-static const struct nvkm_bitfield gk104_fifo_pbdma_intr_1[] = {
-	{ 0x00000001, "HCE_RE_ILLEGAL_OP" },
-	{ 0x00000002, "HCE_RE_ALIGNB" },
-	{ 0x00000004, "HCE_PRIV" },
-	{ 0x00000008, "HCE_ILLEGAL_MTHD" },
-	{ 0x00000010, "HCE_ILLEGAL_CLASS" },
-	{}
-};
-
 void
-gk104_fifo_intr_pbdma_1(struct gk104_fifo *fifo, int unit)
+gk104_runq_init(struct nvkm_runq *runq)
 {
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	u32 mask = nvkm_rd32(device, 0x04014c + (unit * 0x2000));
-	u32 stat = nvkm_rd32(device, 0x040148 + (unit * 0x2000)) & mask;
-	u32 chid = nvkm_rd32(device, 0x040120 + (unit * 0x2000)) & 0xfff;
-	char msg[128];
+	struct nvkm_device *device = runq->fifo->engine.subdev.device;
 
-	if (stat) {
-		nvkm_snprintbf(msg, sizeof(msg), gk104_fifo_pbdma_intr_1, stat);
-		nvkm_error(subdev, "PBDMA%d: %08x [%s] ch %d %08x %08x\n",
-			   unit, stat, msg, chid,
-			   nvkm_rd32(device, 0x040150 + (unit * 0x2000)),
-			   nvkm_rd32(device, 0x040154 + (unit * 0x2000)));
-	}
+	gf100_runq_init(runq);
 
-	nvkm_wr32(device, 0x040148 + (unit * 0x2000), stat);
+	nvkm_wr32(device, 0x040148 + (runq->id * 0x2000), 0xffffffff); /* HCE.INTR */
+	nvkm_wr32(device, 0x04014c + (runq->id * 0x2000), 0xffffffff); /* HCE.INTREN */
 }
 
-void
-gk104_fifo_intr_runlist(struct gk104_fifo *fifo)
+static u32
+gk104_runq_runm(struct nvkm_runq *runq)
 {
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	u32 mask = nvkm_rd32(device, 0x002a00);
-	while (mask) {
-		int runl = __ffs(mask);
-		wake_up(&fifo->runlist[runl].wait);
-		nvkm_wr32(device, 0x002a00, 1 << runl);
-		mask &= ~(1 << runl);
-	}
+	return nvkm_rd32(runq->fifo->engine.subdev.device, 0x002390 + (runq->id * 0x04));
 }
 
+const struct nvkm_runq_func
+gk104_runq = {
+	.init = gk104_runq_init,
+	.intr = gk104_runq_intr,
+	.intr_0_names = gk104_runq_intr_0_names,
+	.idle = gk104_runq_idle,
+};
+
 void
-gk104_fifo_intr_engine(struct gk104_fifo *fifo)
+gk104_runl_fault_clear(struct nvkm_runl *runl)
 {
-	nvkm_fifo_uevent(&fifo->base);
-}
-
-static void
-gk104_fifo_intr(struct nvkm_fifo *base)
-{
-	struct gk104_fifo *fifo = gk104_fifo(base);
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	u32 mask = nvkm_rd32(device, 0x002140);
-	u32 stat = nvkm_rd32(device, 0x002100) & mask;
-
-	if (stat & 0x00000001) {
-		gk104_fifo_intr_bind(fifo);
-		nvkm_wr32(device, 0x002100, 0x00000001);
-		stat &= ~0x00000001;
-	}
-
-	if (stat & 0x00000010) {
-		nvkm_error(subdev, "PIO_ERROR\n");
-		nvkm_wr32(device, 0x002100, 0x00000010);
-		stat &= ~0x00000010;
-	}
-
-	if (stat & 0x00000100) {
-		gk104_fifo_intr_sched(fifo);
-		nvkm_wr32(device, 0x002100, 0x00000100);
-		stat &= ~0x00000100;
-	}
-
-	if (stat & 0x00010000) {
-		gk104_fifo_intr_chsw(fifo);
-		nvkm_wr32(device, 0x002100, 0x00010000);
-		stat &= ~0x00010000;
-	}
-
-	if (stat & 0x00800000) {
-		nvkm_error(subdev, "FB_FLUSH_TIMEOUT\n");
-		nvkm_wr32(device, 0x002100, 0x00800000);
-		stat &= ~0x00800000;
-	}
-
-	if (stat & 0x01000000) {
-		nvkm_error(subdev, "LB_ERROR\n");
-		nvkm_wr32(device, 0x002100, 0x01000000);
-		stat &= ~0x01000000;
-	}
-
-	if (stat & 0x08000000) {
-		gk104_fifo_intr_dropped_fault(fifo);
-		nvkm_wr32(device, 0x002100, 0x08000000);
-		stat &= ~0x08000000;
-	}
-
-	if (stat & 0x10000000) {
-		u32 mask = nvkm_rd32(device, 0x00259c);
-		while (mask) {
-			u32 unit = __ffs(mask);
-			fifo->func->intr.fault(&fifo->base, unit);
-			nvkm_wr32(device, 0x00259c, (1 << unit));
-			mask &= ~(1 << unit);
-		}
-		stat &= ~0x10000000;
-	}
-
-	if (stat & 0x20000000) {
-		u32 mask = nvkm_rd32(device, 0x0025a0);
-		while (mask) {
-			u32 unit = __ffs(mask);
-			gk104_fifo_intr_pbdma_0(fifo, unit);
-			gk104_fifo_intr_pbdma_1(fifo, unit);
-			nvkm_wr32(device, 0x0025a0, (1 << unit));
-			mask &= ~(1 << unit);
-		}
-		stat &= ~0x20000000;
-	}
-
-	if (stat & 0x40000000) {
-		gk104_fifo_intr_runlist(fifo);
-		stat &= ~0x40000000;
-	}
-
-	if (stat & 0x80000000) {
-		nvkm_wr32(device, 0x002100, 0x80000000);
-		gk104_fifo_intr_engine(fifo);
-		stat &= ~0x80000000;
-	}
-
-	if (stat) {
-		nvkm_error(subdev, "INTR %08x\n", stat);
-		nvkm_mask(device, 0x002140, stat, 0x00000000);
-		nvkm_wr32(device, 0x002100, stat);
-	}
+	nvkm_wr32(runl->fifo->engine.subdev.device, 0x00262c, BIT(runl->id));
 }
 
 void
-gk104_fifo_fini(struct nvkm_fifo *base)
+gk104_runl_allow(struct nvkm_runl *runl, u32 engm)
 {
-	struct gk104_fifo *fifo = gk104_fifo(base);
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	flush_work(&fifo->recover.work);
-	/* allow mmu fault interrupts, even when we're not using fifo */
-	nvkm_mask(device, 0x002140, 0x10000000, 0x10000000);
+	nvkm_mask(runl->fifo->engine.subdev.device, 0x002630, BIT(runl->id), 0x00000000);
 }
 
-int
-gk104_fifo_info(struct nvkm_fifo *base, u64 mthd, u64 *data)
+void
+gk104_runl_block(struct nvkm_runl *runl, u32 engm)
 {
-	struct gk104_fifo *fifo = gk104_fifo(base);
-	switch (mthd) {
-	case NV_DEVICE_HOST_RUNLISTS:
-		*data = (1ULL << fifo->runlist_nr) - 1;
-		return 0;
-	case NV_DEVICE_HOST_RUNLIST_ENGINES: {
-		if (*data < fifo->runlist_nr) {
-			unsigned long engm = fifo->runlist[*data].engm;
-			struct nvkm_engine *engine;
-			int engn;
-			*data = 0;
-			for_each_set_bit(engn, &engm, fifo->engine_nr) {
-				if ((engine = fifo->engine[engn].engine)) {
-#define CASE(n) case NVKM_ENGINE_##n: *data |= NV_DEVICE_HOST_RUNLIST_ENGINES_##n; break
-					switch (engine->subdev.type) {
-					CASE(SW    );
-					CASE(GR    );
-					CASE(MPEG  );
-					CASE(ME    );
-					CASE(CIPHER);
-					CASE(BSP   );
-					CASE(VP    );
-					CASE(CE    );
-					CASE(SEC   );
-					CASE(MSVLD );
-					CASE(MSPDEC);
-					CASE(MSPPP );
-					CASE(MSENC );
-					CASE(VIC   );
-					CASE(SEC2  );
-					CASE(NVDEC );
-					CASE(NVENC );
-					default:
-						WARN_ON(1);
-						break;
-					}
-				}
-			}
-			return 0;
-		}
-	}
-		return -EINVAL;
-	default:
-		return -EINVAL;
-	}
+	nvkm_mask(runl->fifo->engine.subdev.device, 0x002630, BIT(runl->id), BIT(runl->id));
 }
 
-int
-gk104_fifo_oneinit(struct nvkm_fifo *base)
+bool
+gk104_runl_pending(struct nvkm_runl *runl)
 {
-	struct gk104_fifo *fifo = gk104_fifo(base);
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	struct nvkm_vmm *bar = nvkm_bar_bar1_vmm(device);
-	struct nvkm_top_device *tdev;
-	int pbid, ret, i, j;
-	u32 *map;
-
-	fifo->pbdma_nr = fifo->func->pbdma->nr(fifo);
-	nvkm_debug(subdev, "%d PBDMA(s)\n", fifo->pbdma_nr);
-
-	/* Read PBDMA->runlist(s) mapping from HW. */
-	if (!(map = kcalloc(fifo->pbdma_nr, sizeof(*map), GFP_KERNEL)))
-		return -ENOMEM;
+	struct nvkm_device *device = runl->fifo->engine.subdev.device;
 
-	for (i = 0; i < fifo->pbdma_nr; i++)
-		map[i] = nvkm_rd32(device, 0x002390 + (i * 0x04));
-
-	/* Determine runlist configuration from topology device info. */
-	list_for_each_entry(tdev, &device->top->device, head) {
-		const int engn = tdev->engine;
-		char _en[16], *en;
-
-		if (engn < 0)
-			continue;
-
-		/* Determine which PBDMA handles requests for this engine. */
-		for (j = 0, pbid = -1; j < fifo->pbdma_nr; j++) {
-			if (map[j] & BIT(tdev->runlist)) {
-				pbid = j;
-				break;
-			}
-		}
-
-		fifo->engine[engn].engine = nvkm_device_engine(device, tdev->type, tdev->inst);
-		if (!fifo->engine[engn].engine) {
-			snprintf(_en, sizeof(_en), "%s, %d",
-				 nvkm_subdev_type[tdev->type], tdev->inst);
-			en = _en;
-		} else {
-			en = fifo->engine[engn].engine->subdev.name;
-		}
-
-		nvkm_debug(subdev, "engine %2d: runlist %2d pbdma %2d (%s)\n",
-			   tdev->engine, tdev->runlist, pbid, en);
-
-		fifo->engine[engn].runl = tdev->runlist;
-		fifo->engine[engn].pbid = pbid;
-		fifo->engine_nr = max(fifo->engine_nr, engn + 1);
-		fifo->runlist[tdev->runlist].engm |= BIT(engn);
-		fifo->runlist[tdev->runlist].engm_sw |= BIT(engn);
-		if (tdev->type == NVKM_ENGINE_GR)
-			fifo->runlist[tdev->runlist].engm_sw |= BIT(GK104_FIFO_ENGN_SW);
-		fifo->runlist_nr = max(fifo->runlist_nr, tdev->runlist + 1);
-	}
-
-	kfree(map);
-
-	for (i = 0; i < fifo->runlist_nr; i++) {
-		for (j = 0; j < ARRAY_SIZE(fifo->runlist[i].mem); j++) {
-			ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST,
-					      fifo->base.nr * 2/* TSG+chan */ *
-					      fifo->func->runlist->size,
-					      0x1000, false,
-					      &fifo->runlist[i].mem[j]);
-			if (ret)
-				return ret;
-		}
-
-		init_waitqueue_head(&fifo->runlist[i].wait);
-		INIT_LIST_HEAD(&fifo->runlist[i].cgrp);
-		INIT_LIST_HEAD(&fifo->runlist[i].chan);
-	}
-
-	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST,
-			      fifo->base.nr * 0x200, 0x1000, true,
-			      &fifo->user.mem);
-	if (ret)
-		return ret;
-
-	ret = nvkm_vmm_get(bar, 12, nvkm_memory_size(fifo->user.mem),
-			   &fifo->user.bar);
-	if (ret)
-		return ret;
-
-	return nvkm_memory_map(fifo->user.mem, 0, bar, fifo->user.bar, NULL, 0);
+	return nvkm_rd32(device, 0x002284 + (runl->id * 0x08)) & 0x00100000;
 }
 
 void
-gk104_fifo_init(struct nvkm_fifo *base)
+gk104_runl_commit(struct nvkm_runl *runl, struct nvkm_memory *memory, u32 start, int count)
 {
-	struct gk104_fifo *fifo = gk104_fifo(base);
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	int i;
-
-	/* Enable PBDMAs. */
-	fifo->func->pbdma->init(fifo);
-
-	/* PBDMA[n] */
-	for (i = 0; i < fifo->pbdma_nr; i++) {
-		nvkm_mask(device, 0x04013c + (i * 0x2000), 0x10000100, 0x00000000);
-		nvkm_wr32(device, 0x040108 + (i * 0x2000), 0xffffffff); /* INTR */
-		nvkm_wr32(device, 0x04010c + (i * 0x2000), 0xfffffeff); /* INTREN */
-	}
-
-	/* PBDMA[n].HCE */
-	for (i = 0; i < fifo->pbdma_nr; i++) {
-		nvkm_wr32(device, 0x040148 + (i * 0x2000), 0xffffffff); /* INTR */
-		nvkm_wr32(device, 0x04014c + (i * 0x2000), 0xffffffff); /* INTREN */
-	}
-
-	nvkm_wr32(device, 0x002254, 0x10000000 | fifo->user.bar->addr >> 12);
-
-	if (fifo->func->pbdma->init_timeout)
-		fifo->func->pbdma->init_timeout(fifo);
-
-	nvkm_wr32(device, 0x002100, 0xffffffff);
-	nvkm_wr32(device, 0x002140, 0x7fffffff);
-}
-
-void *
-gk104_fifo_dtor(struct nvkm_fifo *base)
-{
-	struct gk104_fifo *fifo = gk104_fifo(base);
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	int i;
-
-	nvkm_vmm_put(nvkm_bar_bar1_vmm(device), &fifo->user.bar);
-	nvkm_memory_unref(&fifo->user.mem);
+	struct nvkm_fifo *fifo = runl->fifo;
+	struct nvkm_device *device = fifo->engine.subdev.device;
+	u64 addr = nvkm_memory_addr(memory) + start;
+	int target;
 
-	for (i = 0; i < fifo->runlist_nr; i++) {
-		nvkm_memory_unref(&fifo->runlist[i].mem[1]);
-		nvkm_memory_unref(&fifo->runlist[i].mem[0]);
+	switch (nvkm_memory_target(memory)) {
+	case NVKM_MEM_TARGET_VRAM: target = 0; break;
+	case NVKM_MEM_TARGET_NCOH: target = 3; break;
+	default:
+		WARN_ON(1);
+		return;
 	}
 
-	return fifo;
+	spin_lock_irq(&fifo->lock);
+	nvkm_wr32(device, 0x002270, (target << 28) | (addr >> 12));
+	nvkm_wr32(device, 0x002274, (runl->id << 20) | count);
+	spin_unlock_irq(&fifo->lock);
 }
 
-static const struct nvkm_fifo_func
-gk104_fifo_ = {
-	.dtor = gk104_fifo_dtor,
-	.oneinit = gk104_fifo_oneinit,
-	.info = gk104_fifo_info,
-	.init = gk104_fifo_init,
-	.fini = gk104_fifo_fini,
-	.intr = gk104_fifo_intr,
-	.fault = gk104_fifo_fault,
-	.engine_id = gk104_fifo_engine_id,
-	.id_engine = gk104_fifo_id_engine,
-	.uevent_init = gk104_fifo_uevent_init,
-	.uevent_fini = gk104_fifo_uevent_fini,
-	.recover_chan = gk104_fifo_recover_chan,
-	.class_get = gk104_fifo_class_get,
-	.class_new = gk104_fifo_class_new,
-};
-
-int
-gk104_fifo_new_(const struct gk104_fifo_func *func, struct nvkm_device *device,
-		enum nvkm_subdev_type type, int inst, int nr, struct nvkm_fifo **pfifo)
+void
+gk104_runl_insert_chan(struct nvkm_chan *chan, struct nvkm_memory *memory, u64 offset)
 {
-	struct gk104_fifo *fifo;
-
-	if (!(fifo = kzalloc(sizeof(*fifo), GFP_KERNEL)))
-		return -ENOMEM;
-	fifo->func = func;
-	INIT_WORK(&fifo->recover.work, gk104_fifo_recover_work);
-	*pfifo = &fifo->base;
-
-	return nvkm_fifo_ctor(&gk104_fifo_, device, type, inst, nr, &fifo->base);
+	nvkm_wo32(memory, offset + 0, chan->id);
+	nvkm_wo32(memory, offset + 4, 0x00000000);
 }
 
-const struct nvkm_enum
-gk104_fifo_fault_access[] = {
-	{ 0x0, "READ" },
-	{ 0x1, "WRITE" },
-	{}
+static const struct nvkm_runl_func
+gk104_runl = {
+	.size = 8,
+	.update = nv50_runl_update,
+	.insert_chan = gk104_runl_insert_chan,
+	.commit = gk104_runl_commit,
+	.wait = nv50_runl_wait,
+	.pending = gk104_runl_pending,
+	.block = gk104_runl_block,
+	.allow = gk104_runl_allow,
+	.fault_clear = gk104_runl_fault_clear,
+	.preempt_pending = gf100_runl_preempt_pending,
 };
 
-const struct nvkm_enum
-gk104_fifo_fault_engine[] = {
+static const struct nvkm_enum
+gk104_fifo_mmu_fault_engine[] = {
 	{ 0x00, "GR", NULL, NVKM_ENGINE_GR },
 	{ 0x01, "DISPLAY" },
 	{ 0x02, "CAPTURE" },
@@ -1122,14 +478,14 @@ gk104_fifo_fault_engine[] = {
 	{ 0x04, "BAR1", NULL, NVKM_SUBDEV_BAR },
 	{ 0x05, "BAR2", NULL, NVKM_SUBDEV_INSTMEM },
 	{ 0x06, "SCHED" },
-	{ 0x07, "HOST0", NULL, NVKM_ENGINE_FIFO },
-	{ 0x08, "HOST1", NULL, NVKM_ENGINE_FIFO },
-	{ 0x09, "HOST2", NULL, NVKM_ENGINE_FIFO },
-	{ 0x0a, "HOST3", NULL, NVKM_ENGINE_FIFO },
-	{ 0x0b, "HOST4", NULL, NVKM_ENGINE_FIFO },
-	{ 0x0c, "HOST5", NULL, NVKM_ENGINE_FIFO },
-	{ 0x0d, "HOST6", NULL, NVKM_ENGINE_FIFO },
-	{ 0x0e, "HOST7", NULL, NVKM_ENGINE_FIFO },
+	{ 0x07, "HOST0" },
+	{ 0x08, "HOST1" },
+	{ 0x09, "HOST2" },
+	{ 0x0a, "HOST3" },
+	{ 0x0b, "HOST4" },
+	{ 0x0c, "HOST5" },
+	{ 0x0d, "HOST6" },
+	{ 0x0e, "HOST7" },
 	{ 0x0f, "HOSTSR" },
 	{ 0x10, "MSVLD", NULL, NVKM_ENGINE_MSVLD },
 	{ 0x11, "MSPPP", NULL, NVKM_ENGINE_MSPPP },
@@ -1145,7 +501,7 @@ gk104_fifo_fault_engine[] = {
 };
 
 const struct nvkm_enum
-gk104_fifo_fault_reason[] = {
+gk104_fifo_mmu_fault_reason[] = {
 	{ 0x00, "PDE" },
 	{ 0x01, "PDE_SIZE" },
 	{ 0x02, "PTE" },
@@ -1166,7 +522,7 @@ gk104_fifo_fault_reason[] = {
 };
 
 const struct nvkm_enum
-gk104_fifo_fault_hubclient[] = {
+gk104_fifo_mmu_fault_hubclient[] = {
 	{ 0x00, "VIP" },
 	{ 0x01, "CE0" },
 	{ 0x02, "CE1" },
@@ -1203,7 +559,7 @@ gk104_fifo_fault_hubclient[] = {
 };
 
 const struct nvkm_enum
-gk104_fifo_fault_gpcclient[] = {
+gk104_fifo_mmu_fault_gpcclient[] = {
 	{ 0x00, "L1_0" }, { 0x01, "T1_0" }, { 0x02, "PE_0" },
 	{ 0x03, "L1_1" }, { 0x04, "T1_1" }, { 0x05, "PE_1" },
 	{ 0x06, "L1_2" }, { 0x07, "T1_2" }, { 0x08, "PE_2" },
@@ -1228,22 +584,250 @@ gk104_fifo_fault_gpcclient[] = {
 	{}
 };
 
-static const struct gk104_fifo_func
+const struct nvkm_fifo_func_mmu_fault
+gk104_fifo_mmu_fault = {
+	.recover = gf100_fifo_mmu_fault_recover,
+	.access = gf100_fifo_mmu_fault_access,
+	.engine = gk104_fifo_mmu_fault_engine,
+	.reason = gk104_fifo_mmu_fault_reason,
+	.hubclient = gk104_fifo_mmu_fault_hubclient,
+	.gpcclient = gk104_fifo_mmu_fault_gpcclient,
+};
+
+static const struct nvkm_enum
+gk104_fifo_intr_bind_reason[] = {
+	{ 0x01, "BIND_NOT_UNBOUND" },
+	{ 0x02, "SNOOP_WITHOUT_BAR1" },
+	{ 0x03, "UNBIND_WHILE_RUNNING" },
+	{ 0x05, "INVALID_RUNLIST" },
+	{ 0x06, "INVALID_CTX_TGT" },
+	{ 0x0b, "UNBIND_WHILE_PARKED" },
+	{}
+};
+
+void
+gk104_fifo_intr_bind(struct nvkm_fifo *fifo)
+{
+	struct nvkm_subdev *subdev = &fifo->engine.subdev;
+	u32 intr = nvkm_rd32(subdev->device, 0x00252c);
+	u32 code = intr & 0x000000ff;
+	const struct nvkm_enum *en = nvkm_enum_find(gk104_fifo_intr_bind_reason, code);
+
+	nvkm_error(subdev, "BIND_ERROR %02x [%s]\n", code, en ? en->name : "");
+}
+
+void
+gk104_fifo_intr_chsw(struct nvkm_fifo *fifo)
+{
+	struct nvkm_subdev *subdev = &fifo->engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 stat = nvkm_rd32(device, 0x00256c);
+
+	nvkm_error(subdev, "CHSW_ERROR %08x\n", stat);
+	nvkm_wr32(device, 0x00256c, stat);
+}
+
+static void
+gk104_fifo_intr_dropped_fault(struct nvkm_fifo *fifo)
+{
+	struct nvkm_subdev *subdev = &fifo->engine.subdev;
+	u32 stat = nvkm_rd32(subdev->device, 0x00259c);
+
+	nvkm_error(subdev, "DROPPED_MMU_FAULT %08x\n", stat);
+}
+
+void
+gk104_fifo_intr_runlist(struct nvkm_fifo *fifo)
+{
+	struct nvkm_device *device = fifo->engine.subdev.device;
+	struct nvkm_runl *runl;
+	u32 mask = nvkm_rd32(device, 0x002a00);
+
+	nvkm_runl_foreach_cond(runl, fifo, mask & BIT(runl->id)) {
+		nvkm_wr32(device, 0x002a00, BIT(runl->id));
+	}
+}
+
+irqreturn_t
+gk104_fifo_intr(struct nvkm_inth *inth)
+{
+	struct nvkm_fifo *fifo = container_of(inth, typeof(*fifo), engine.subdev.inth);
+	struct nvkm_subdev *subdev = &fifo->engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 mask = nvkm_rd32(device, 0x002140);
+	u32 stat = nvkm_rd32(device, 0x002100) & mask;
+
+	if (stat & 0x00000001) {
+		gk104_fifo_intr_bind(fifo);
+		nvkm_wr32(device, 0x002100, 0x00000001);
+		stat &= ~0x00000001;
+	}
+
+	if (stat & 0x00000010) {
+		nvkm_error(subdev, "PIO_ERROR\n");
+		nvkm_wr32(device, 0x002100, 0x00000010);
+		stat &= ~0x00000010;
+	}
+
+	if (stat & 0x00000100) {
+		gf100_fifo_intr_sched(fifo);
+		nvkm_wr32(device, 0x002100, 0x00000100);
+		stat &= ~0x00000100;
+	}
+
+	if (stat & 0x00010000) {
+		gk104_fifo_intr_chsw(fifo);
+		nvkm_wr32(device, 0x002100, 0x00010000);
+		stat &= ~0x00010000;
+	}
+
+	if (stat & 0x00800000) {
+		nvkm_error(subdev, "FB_FLUSH_TIMEOUT\n");
+		nvkm_wr32(device, 0x002100, 0x00800000);
+		stat &= ~0x00800000;
+	}
+
+	if (stat & 0x01000000) {
+		nvkm_error(subdev, "LB_ERROR\n");
+		nvkm_wr32(device, 0x002100, 0x01000000);
+		stat &= ~0x01000000;
+	}
+
+	if (stat & 0x08000000) {
+		gk104_fifo_intr_dropped_fault(fifo);
+		nvkm_wr32(device, 0x002100, 0x08000000);
+		stat &= ~0x08000000;
+	}
+
+	if (stat & 0x10000000) {
+		gf100_fifo_intr_mmu_fault(fifo);
+		stat &= ~0x10000000;
+	}
+
+	if (stat & 0x20000000) {
+		if (gf100_fifo_intr_pbdma(fifo))
+			stat &= ~0x20000000;
+	}
+
+	if (stat & 0x40000000) {
+		gk104_fifo_intr_runlist(fifo);
+		stat &= ~0x40000000;
+	}
+
+	if (stat & 0x80000000) {
+		nvkm_wr32(device, 0x002100, 0x80000000);
+		nvkm_event_ntfy(&fifo->nonstall.event, 0, NVKM_FIFO_NONSTALL_EVENT);
+		stat &= ~0x80000000;
+	}
+
+	if (stat) {
+		nvkm_error(subdev, "INTR %08x\n", stat);
+		spin_lock(&fifo->lock);
+		nvkm_mask(device, 0x002140, stat, 0x00000000);
+		spin_unlock(&fifo->lock);
+		nvkm_wr32(device, 0x002100, stat);
+	}
+
+	return IRQ_HANDLED;
+}
+
+void
+gk104_fifo_init_pbdmas(struct nvkm_fifo *fifo, u32 mask)
+{
+	struct nvkm_device *device = fifo->engine.subdev.device;
+
+	nvkm_wr32(device, 0x000204, mask);
+	nvkm_mask(device, 0x002a04, 0xbfffffff, 0xbfffffff);
+}
+
+void
+gk104_fifo_init(struct nvkm_fifo *fifo)
+{
+	struct nvkm_device *device = fifo->engine.subdev.device;
+
+	if (fifo->func->chan.func->userd->bar == 1)
+		nvkm_wr32(device, 0x002254, 0x10000000 | fifo->userd.bar1->addr >> 12);
+
+	nvkm_wr32(device, 0x002100, 0xffffffff);
+	nvkm_wr32(device, 0x002140, 0x7fffffff);
+}
+
+int
+gk104_fifo_runl_ctor(struct nvkm_fifo *fifo)
+{
+	struct nvkm_device *device = fifo->engine.subdev.device;
+	struct nvkm_top_device *tdev;
+	struct nvkm_runl *runl;
+	struct nvkm_runq *runq;
+	const struct nvkm_engn_func *func;
+
+	nvkm_list_foreach(tdev, &device->top->device, head, tdev->runlist >= 0) {
+		runl = nvkm_runl_get(fifo, tdev->runlist, tdev->runlist);
+		if (!runl) {
+			runl = nvkm_runl_new(fifo, tdev->runlist, tdev->runlist, 0);
+			if (IS_ERR(runl))
+				return PTR_ERR(runl);
+
+			nvkm_runq_foreach_cond(runq, fifo, gk104_runq_runm(runq) & BIT(runl->id)) {
+				if (WARN_ON(runl->runq_nr == ARRAY_SIZE(runl->runq)))
+					return -ENOMEM;
+
+				runl->runq[runl->runq_nr++] = runq;
+			}
+
+		}
+
+		if (tdev->engine < 0)
+			continue;
+
+		switch (tdev->type) {
+		case NVKM_ENGINE_CE:
+			func = fifo->func->engn_ce;
+			break;
+		case NVKM_ENGINE_GR:
+			nvkm_runl_add(runl, 15, &gf100_engn_sw, NVKM_ENGINE_SW, 0);
+			fallthrough;
+		default:
+			func = fifo->func->engn;
+			break;
+		}
+
+		nvkm_runl_add(runl, tdev->engine, func, tdev->type, tdev->inst);
+	}
+
+	return 0;
+}
+
+int
+gk104_fifo_chid_nr(struct nvkm_fifo *fifo)
+{
+	return 4096;
+}
+
+static const struct nvkm_fifo_func
 gk104_fifo = {
-	.intr.fault = gf100_fifo_intr_fault,
-	.pbdma = &gk104_fifo_pbdma,
-	.fault.access = gk104_fifo_fault_access,
-	.fault.engine = gk104_fifo_fault_engine,
-	.fault.reason = gk104_fifo_fault_reason,
-	.fault.hubclient = gk104_fifo_fault_hubclient,
-	.fault.gpcclient = gk104_fifo_fault_gpcclient,
-	.runlist = &gk104_fifo_runlist,
-	.chan = {{0,0,KEPLER_CHANNEL_GPFIFO_A}, gk104_fifo_gpfifo_new },
+	.chid_nr = gk104_fifo_chid_nr,
+	.chid_ctor = gf100_fifo_chid_ctor,
+	.runq_nr = gf100_fifo_runq_nr,
+	.runl_ctor = gk104_fifo_runl_ctor,
+	.init = gk104_fifo_init,
+	.init_pbdmas = gk104_fifo_init_pbdmas,
+	.intr = gk104_fifo_intr,
+	.intr_mmu_fault_unit = gf100_fifo_intr_mmu_fault_unit,
+	.intr_ctxsw_timeout = gf100_fifo_intr_ctxsw_timeout,
+	.mmu_fault = &gk104_fifo_mmu_fault,
+	.nonstall = &gf100_fifo_nonstall,
+	.runl = &gk104_runl,
+	.runq = &gk104_runq,
+	.engn = &gk104_engn,
+	.engn_ce = &gk104_engn_ce,
+	.cgrp = {{                               }, &nv04_cgrp },
+	.chan = {{ 0, 0, KEPLER_CHANNEL_GPFIFO_A }, &gk104_chan },
 };
 
 int
 gk104_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
 	       struct nvkm_fifo **pfifo)
 {
-	return gk104_fifo_new_(&gk104_fifo, device, type, inst, 4096, pfifo);
+	return nvkm_fifo_new_(&gk104_fifo, device, type, inst, pfifo);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.h
deleted file mode 100644
index f2d12ae73944..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.h
+++ /dev/null
@@ -1,168 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-#ifndef __GK104_FIFO_H__
-#define __GK104_FIFO_H__
-#define gk104_fifo(p) container_of((p), struct gk104_fifo, base)
-#include "priv.h"
-struct nvkm_fifo_cgrp;
-
-#include <core/enum.h>
-#include <subdev/mmu.h>
-
-struct gk104_fifo_chan;
-struct gk104_fifo {
-	const struct gk104_fifo_func *func;
-	struct nvkm_fifo base;
-
-	struct {
-		struct work_struct work;
-		u32 engm;
-		u32 runm;
-	} recover;
-
-	int pbdma_nr;
-
-	struct {
-		struct nvkm_engine *engine;
-		int runl;
-		int pbid;
-	} engine[16];
-	int engine_nr;
-
-	struct {
-		struct nvkm_memory *mem[2];
-		int next;
-		wait_queue_head_t wait;
-		struct list_head cgrp;
-		struct list_head chan;
-		u32 engm;
-		u32 engm_sw;
-	} runlist[16];
-	int runlist_nr;
-
-	struct {
-		struct nvkm_memory *mem;
-		struct nvkm_vma *bar;
-	} user;
-};
-
-struct gk104_fifo_func {
-	struct {
-		void (*fault)(struct nvkm_fifo *, int unit);
-	} intr;
-
-	const struct gk104_fifo_pbdma_func {
-		int (*nr)(struct gk104_fifo *);
-		void (*init)(struct gk104_fifo *);
-		void (*init_timeout)(struct gk104_fifo *);
-	} *pbdma;
-
-	struct {
-		const struct nvkm_enum *access;
-		const struct nvkm_enum *engine;
-		const struct nvkm_enum *reason;
-		const struct nvkm_enum *hubclient;
-		const struct nvkm_enum *gpcclient;
-	} fault;
-
-	const struct gk104_fifo_runlist_func {
-		u8 size;
-		void (*cgrp)(struct nvkm_fifo_cgrp *,
-			     struct nvkm_memory *, u32 offset);
-		void (*chan)(struct gk104_fifo_chan *,
-			     struct nvkm_memory *, u32 offset);
-		void (*commit)(struct gk104_fifo *, int runl,
-			       struct nvkm_memory *, int entries);
-	} *runlist;
-
-	struct gk104_fifo_user_user {
-		struct nvkm_sclass user;
-		int (*ctor)(const struct nvkm_oclass *, void *, u32,
-			    struct nvkm_object **);
-	} user;
-
-	struct gk104_fifo_chan_user {
-		struct nvkm_sclass user;
-		int (*ctor)(struct gk104_fifo *, const struct nvkm_oclass *,
-			    void *, u32, struct nvkm_object **);
-	} chan;
-	bool cgrp_force;
-};
-
-struct gk104_fifo_engine_status {
-	bool busy;
-	bool faulted;
-	bool chsw;
-	bool save;
-	bool load;
-	struct {
-		bool tsg;
-		u32 id;
-	} prev, next, *chan;
-};
-
-int gk104_fifo_new_(const struct gk104_fifo_func *, struct nvkm_device *, enum nvkm_subdev_type,
-		    int index, int nr, struct nvkm_fifo **);
-void gk104_fifo_runlist_insert(struct gk104_fifo *, struct gk104_fifo_chan *);
-void gk104_fifo_runlist_remove(struct gk104_fifo *, struct gk104_fifo_chan *);
-void gk104_fifo_runlist_update(struct gk104_fifo *, int runl);
-void gk104_fifo_engine_status(struct gk104_fifo *fifo, int engn,
-			      struct gk104_fifo_engine_status *status);
-void gk104_fifo_intr_bind(struct gk104_fifo *fifo);
-void gk104_fifo_intr_chsw(struct gk104_fifo *fifo);
-void gk104_fifo_intr_dropped_fault(struct gk104_fifo *fifo);
-void gk104_fifo_intr_pbdma_0(struct gk104_fifo *fifo, int unit);
-void gk104_fifo_intr_pbdma_1(struct gk104_fifo *fifo, int unit);
-void gk104_fifo_intr_runlist(struct gk104_fifo *fifo);
-void gk104_fifo_intr_engine(struct gk104_fifo *fifo);
-void *gk104_fifo_dtor(struct nvkm_fifo *base);
-int gk104_fifo_oneinit(struct nvkm_fifo *base);
-int gk104_fifo_info(struct nvkm_fifo *base, u64 mthd, u64 *data);
-void gk104_fifo_init(struct nvkm_fifo *base);
-void gk104_fifo_fini(struct nvkm_fifo *base);
-int gk104_fifo_class_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass,
-			 void *argv, u32 argc, struct nvkm_object **pobject);
-int gk104_fifo_class_get(struct nvkm_fifo *base, int index,
-			 struct nvkm_oclass *oclass);
-void gk104_fifo_uevent_fini(struct nvkm_fifo *fifo);
-void gk104_fifo_uevent_init(struct nvkm_fifo *fifo);
-
-extern const struct gk104_fifo_pbdma_func gk104_fifo_pbdma;
-int gk104_fifo_pbdma_nr(struct gk104_fifo *);
-void gk104_fifo_pbdma_init(struct gk104_fifo *);
-extern const struct nvkm_enum gk104_fifo_fault_access[];
-extern const struct nvkm_enum gk104_fifo_fault_engine[];
-extern const struct nvkm_enum gk104_fifo_fault_reason[];
-extern const struct nvkm_enum gk104_fifo_fault_hubclient[];
-extern const struct nvkm_enum gk104_fifo_fault_gpcclient[];
-extern const struct gk104_fifo_runlist_func gk104_fifo_runlist;
-void gk104_fifo_runlist_chan(struct gk104_fifo_chan *,
-			     struct nvkm_memory *, u32);
-void gk104_fifo_runlist_commit(struct gk104_fifo *, int runl,
-			       struct nvkm_memory *, int);
-
-extern const struct gk104_fifo_runlist_func gk110_fifo_runlist;
-void gk110_fifo_runlist_cgrp(struct nvkm_fifo_cgrp *,
-			     struct nvkm_memory *, u32);
-
-extern const struct gk104_fifo_pbdma_func gk208_fifo_pbdma;
-void gk208_fifo_pbdma_init_timeout(struct gk104_fifo *);
-
-void gm107_fifo_intr_fault(struct nvkm_fifo *, int);
-extern const struct nvkm_enum gm107_fifo_fault_engine[];
-extern const struct gk104_fifo_runlist_func gm107_fifo_runlist;
-
-extern const struct gk104_fifo_pbdma_func gm200_fifo_pbdma;
-int gm200_fifo_pbdma_nr(struct gk104_fifo *);
-
-void gp100_fifo_intr_fault(struct nvkm_fifo *, int);
-extern const struct nvkm_enum gp100_fifo_fault_engine[];
-
-extern const struct nvkm_enum gv100_fifo_fault_access[];
-extern const struct nvkm_enum gv100_fifo_fault_reason[];
-extern const struct nvkm_enum gv100_fifo_fault_hubclient[];
-extern const struct nvkm_enum gv100_fifo_fault_gpcclient[];
-void gv100_fifo_runlist_cgrp(struct nvkm_fifo_cgrp *,
-			     struct nvkm_memory *, u32);
-void gv100_fifo_runlist_chan(struct gk104_fifo_chan *,
-			     struct nvkm_memory *, u32);
-#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk110.c
index 915278c7e012..a8ff21cf7712 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk110.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk110.c
@@ -21,47 +21,112 @@
  *
  * Authors: Ben Skeggs
  */
-#include "gk104.h"
+#include "priv.h"
 #include "cgrp.h"
-#include "changk104.h"
+#include "chan.h"
+#include "chid.h"
+#include "runl.h"
 
 #include <core/memory.h>
+#include <subdev/timer.h>
 
 #include <nvif/class.h>
 
 void
-gk110_fifo_runlist_cgrp(struct nvkm_fifo_cgrp *cgrp,
-			struct nvkm_memory *memory, u32 offset)
+gk110_chan_preempt(struct nvkm_chan *chan)
+{
+	struct nvkm_cgrp *cgrp = chan->cgrp;
+
+	if (cgrp->hw) {
+		cgrp->func->preempt(cgrp);
+		return;
+	}
+
+	gf100_chan_preempt(chan);
+}
+
+const struct nvkm_chan_func
+gk110_chan = {
+	.inst = &gf100_chan_inst,
+	.userd = &gk104_chan_userd,
+	.ramfc = &gk104_chan_ramfc,
+	.bind = gk104_chan_bind,
+	.unbind = gk104_chan_unbind,
+	.start = gk104_chan_start,
+	.stop = gk104_chan_stop,
+	.preempt = gk110_chan_preempt,
+};
+
+static void
+gk110_cgrp_preempt(struct nvkm_cgrp *cgrp)
+{
+	nvkm_wr32(cgrp->runl->fifo->engine.subdev.device, 0x002634, 0x01000000 | cgrp->id);
+}
+
+const struct nvkm_cgrp_func
+gk110_cgrp = {
+	.preempt = gk110_cgrp_preempt,
+};
+
+void
+gk110_runl_insert_cgrp(struct nvkm_cgrp *cgrp, struct nvkm_memory *memory, u64 offset)
 {
 	nvkm_wo32(memory, offset + 0, (cgrp->chan_nr << 26) | (128 << 18) |
 				      (3 << 14) | 0x00002000 | cgrp->id);
 	nvkm_wo32(memory, offset + 4, 0x00000000);
 }
 
-const struct gk104_fifo_runlist_func
-gk110_fifo_runlist = {
+const struct nvkm_runl_func
+gk110_runl = {
 	.size = 8,
-	.cgrp = gk110_fifo_runlist_cgrp,
-	.chan = gk104_fifo_runlist_chan,
-	.commit = gk104_fifo_runlist_commit,
+	.update = nv50_runl_update,
+	.insert_cgrp = gk110_runl_insert_cgrp,
+	.insert_chan = gk104_runl_insert_chan,
+	.commit = gk104_runl_commit,
+	.wait = nv50_runl_wait,
+	.pending = gk104_runl_pending,
+	.block = gk104_runl_block,
+	.allow = gk104_runl_allow,
+	.fault_clear = gk104_runl_fault_clear,
+	.preempt_pending = gf100_runl_preempt_pending,
 };
 
-static const struct gk104_fifo_func
+int
+gk110_fifo_chid_ctor(struct nvkm_fifo *fifo, int nr)
+{
+	int ret;
+
+	ret = nvkm_chid_new(&nvkm_chan_event, &fifo->engine.subdev, nr, 0, nr, &fifo->cgid);
+	if (ret)
+		return ret;
+
+	return gf100_fifo_chid_ctor(fifo, nr);
+}
+
+static const struct nvkm_fifo_func
 gk110_fifo = {
-	.intr.fault = gf100_fifo_intr_fault,
-	.pbdma = &gk104_fifo_pbdma,
-	.fault.access = gk104_fifo_fault_access,
-	.fault.engine = gk104_fifo_fault_engine,
-	.fault.reason = gk104_fifo_fault_reason,
-	.fault.hubclient = gk104_fifo_fault_hubclient,
-	.fault.gpcclient = gk104_fifo_fault_gpcclient,
-	.runlist = &gk110_fifo_runlist,
-	.chan = {{0,0,KEPLER_CHANNEL_GPFIFO_B}, gk104_fifo_gpfifo_new },
+	.chid_nr = gk104_fifo_chid_nr,
+	.chid_ctor = gk110_fifo_chid_ctor,
+	.runq_nr = gf100_fifo_runq_nr,
+	.runl_ctor = gk104_fifo_runl_ctor,
+	.init = gk104_fifo_init,
+	.init_pbdmas = gk104_fifo_init_pbdmas,
+	.intr = gk104_fifo_intr,
+	.intr_mmu_fault_unit = gf100_fifo_intr_mmu_fault_unit,
+	.intr_ctxsw_timeout = gf100_fifo_intr_ctxsw_timeout,
+	.mmu_fault = &gk104_fifo_mmu_fault,
+	.nonstall = &gf100_fifo_nonstall,
+	.runl = &gk110_runl,
+	.runq = &gk104_runq,
+	.engn = &gk104_engn,
+	.engn_ce = &gk104_engn_ce,
+	.cgrp = {{ 0, 0, KEPLER_CHANNEL_GROUP_A  }, &gk110_cgrp },
+	.chan = {{ 0, 0, KEPLER_CHANNEL_GPFIFO_B }, &gk110_chan },
 };
 
 int
 gk110_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
 	       struct nvkm_fifo **pfifo)
 {
-	return gk104_fifo_new_(&gk110_fifo, device, type, inst, 4096, pfifo);
+	return nvkm_fifo_new_(&gk110_fifo, device, type, inst, pfifo);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk208.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk208.c
index cb703693de52..8fa2b0be141a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk208.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk208.c
@@ -21,44 +21,57 @@
  *
  * Authors: Ben Skeggs
  */
-#include "gk104.h"
-#include "changk104.h"
+#include "priv.h"
+#include "runq.h"
 
 #include <nvif/class.h>
 
 void
-gk208_fifo_pbdma_init_timeout(struct gk104_fifo *fifo)
+gk208_runq_init(struct nvkm_runq *runq)
 {
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	int i;
+	gk104_runq_init(runq);
 
-	for (i = 0; i < fifo->pbdma_nr; i++)
-		nvkm_wr32(device, 0x04012c + (i * 0x2000), 0x0000ffff);
+	nvkm_wr32(runq->fifo->engine.subdev.device, 0x04012c + (runq->id * 0x2000), 0x000f4240);
 }
 
-const struct gk104_fifo_pbdma_func
-gk208_fifo_pbdma = {
-	.nr = gk104_fifo_pbdma_nr,
-	.init = gk104_fifo_pbdma_init,
-	.init_timeout = gk208_fifo_pbdma_init_timeout,
+const struct nvkm_runq_func
+gk208_runq = {
+	.init = gk208_runq_init,
+	.intr = gk104_runq_intr,
+	.intr_0_names = gk104_runq_intr_0_names,
+	.idle = gk104_runq_idle,
 };
 
-static const struct gk104_fifo_func
+static int
+gk208_fifo_chid_nr(struct nvkm_fifo *fifo)
+{
+	return 1024;
+}
+
+static const struct nvkm_fifo_func
 gk208_fifo = {
-	.intr.fault = gf100_fifo_intr_fault,
-	.pbdma = &gk208_fifo_pbdma,
-	.fault.access = gk104_fifo_fault_access,
-	.fault.engine = gk104_fifo_fault_engine,
-	.fault.reason = gk104_fifo_fault_reason,
-	.fault.hubclient = gk104_fifo_fault_hubclient,
-	.fault.gpcclient = gk104_fifo_fault_gpcclient,
-	.runlist = &gk110_fifo_runlist,
-	.chan = {{0,0,KEPLER_CHANNEL_GPFIFO_A}, gk104_fifo_gpfifo_new },
+	.chid_nr = gk208_fifo_chid_nr,
+	.chid_ctor = gk110_fifo_chid_ctor,
+	.runq_nr = gf100_fifo_runq_nr,
+	.runl_ctor = gk104_fifo_runl_ctor,
+	.init = gk104_fifo_init,
+	.init_pbdmas = gk104_fifo_init_pbdmas,
+	.intr = gk104_fifo_intr,
+	.intr_mmu_fault_unit = gf100_fifo_intr_mmu_fault_unit,
+	.intr_ctxsw_timeout = gf100_fifo_intr_ctxsw_timeout,
+	.mmu_fault = &gk104_fifo_mmu_fault,
+	.nonstall = &gf100_fifo_nonstall,
+	.runl = &gk110_runl,
+	.runq = &gk208_runq,
+	.engn = &gk104_engn,
+	.engn_ce = &gk104_engn_ce,
+	.cgrp = {{ 0, 0, KEPLER_CHANNEL_GROUP_A  }, &gk110_cgrp },
+	.chan = {{ 0, 0, KEPLER_CHANNEL_GPFIFO_A }, &gk110_chan },
 };
 
 int
 gk208_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
 	       struct nvkm_fifo **pfifo)
 {
-	return gk104_fifo_new_(&gk208_fifo, device, type, inst, 1024, pfifo);
+	return nvkm_fifo_new_(&gk208_fifo, device, type, inst, pfifo);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk20a.c
index 6e35cf44c640..b63ca836130f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk20a.c
@@ -19,27 +19,34 @@
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  */
-#include "gk104.h"
-#include "changk104.h"
+#include "priv.h"
 
 #include <nvif/class.h>
 
-static const struct gk104_fifo_func
+static const struct nvkm_fifo_func
 gk20a_fifo = {
-	.intr.fault = gf100_fifo_intr_fault,
-	.pbdma = &gk208_fifo_pbdma,
-	.fault.access = gk104_fifo_fault_access,
-	.fault.engine = gk104_fifo_fault_engine,
-	.fault.reason = gk104_fifo_fault_reason,
-	.fault.hubclient = gk104_fifo_fault_hubclient,
-	.fault.gpcclient = gk104_fifo_fault_gpcclient,
-	.runlist = &gk110_fifo_runlist,
-	.chan = {{0,0,KEPLER_CHANNEL_GPFIFO_A}, gk104_fifo_gpfifo_new },
+	.chid_nr = nv50_fifo_chid_nr,
+	.chid_ctor = gk110_fifo_chid_ctor,
+	.runq_nr = gf100_fifo_runq_nr,
+	.runl_ctor = gk104_fifo_runl_ctor,
+	.init = gk104_fifo_init,
+	.init_pbdmas = gk104_fifo_init_pbdmas,
+	.intr = gk104_fifo_intr,
+	.intr_mmu_fault_unit = gf100_fifo_intr_mmu_fault_unit,
+	.intr_ctxsw_timeout = gf100_fifo_intr_ctxsw_timeout,
+	.mmu_fault = &gk104_fifo_mmu_fault,
+	.nonstall = &gf100_fifo_nonstall,
+	.runl = &gk110_runl,
+	.runq = &gk208_runq,
+	.engn = &gk104_engn,
+	.engn_ce = &gk104_engn_ce,
+	.cgrp = {{                               }, &gk110_cgrp },
+	.chan = {{ 0, 0, KEPLER_CHANNEL_GPFIFO_A }, &gk110_chan },
 };
 
 int
 gk20a_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
 	       struct nvkm_fifo **pfifo)
 {
-	return gk104_fifo_new_(&gk20a_fifo, device, type, inst, 128, pfifo);
+	return nvkm_fifo_new_(&gk20a_fifo, device, type, inst, pfifo);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm107.c
index 7af6e687d474..5ba60021b510 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm107.c
@@ -21,46 +21,65 @@
  *
  * Authors: Ben Skeggs
  */
-#include "gk104.h"
-#include "changk104.h"
+#include "priv.h"
+#include "chan.h"
+#include "runl.h"
 
 #include <core/gpuobj.h>
 #include <subdev/fault.h>
 
 #include <nvif/class.h>
 
+const struct nvkm_chan_func
+gm107_chan = {
+	.inst = &gf100_chan_inst,
+	.userd = &gk104_chan_userd,
+	.ramfc = &gk104_chan_ramfc,
+	.bind = gk104_chan_bind_inst,
+	.unbind = gk104_chan_unbind,
+	.start = gk104_chan_start,
+	.stop = gk104_chan_stop,
+	.preempt = gk110_chan_preempt,
+};
+
 static void
-gm107_fifo_runlist_chan(struct gk104_fifo_chan *chan,
-			struct nvkm_memory *memory, u32 offset)
+gm107_runl_insert_chan(struct nvkm_chan *chan, struct nvkm_memory *memory, u64 offset)
 {
-	nvkm_wo32(memory, offset + 0, chan->base.chid);
-	nvkm_wo32(memory, offset + 4, chan->base.inst->addr >> 12);
+	nvkm_wo32(memory, offset + 0, chan->id);
+	nvkm_wo32(memory, offset + 4, chan->inst->addr >> 12);
 }
 
-const struct gk104_fifo_runlist_func
-gm107_fifo_runlist = {
+const struct nvkm_runl_func
+gm107_runl = {
 	.size = 8,
-	.cgrp = gk110_fifo_runlist_cgrp,
-	.chan = gm107_fifo_runlist_chan,
-	.commit = gk104_fifo_runlist_commit,
+	.update = nv50_runl_update,
+	.insert_cgrp = gk110_runl_insert_cgrp,
+	.insert_chan = gm107_runl_insert_chan,
+	.commit = gk104_runl_commit,
+	.wait = nv50_runl_wait,
+	.pending = gk104_runl_pending,
+	.block = gk104_runl_block,
+	.allow = gk104_runl_allow,
+	.fault_clear = gk104_runl_fault_clear,
+	.preempt_pending = gf100_runl_preempt_pending,
 };
 
-const struct nvkm_enum
-gm107_fifo_fault_engine[] = {
+static const struct nvkm_enum
+gm107_fifo_mmu_fault_engine[] = {
 	{ 0x01, "DISPLAY" },
 	{ 0x02, "CAPTURE" },
 	{ 0x03, "IFB", NULL, NVKM_ENGINE_IFB },
 	{ 0x04, "BAR1", NULL, NVKM_SUBDEV_BAR },
 	{ 0x05, "BAR2", NULL, NVKM_SUBDEV_INSTMEM },
 	{ 0x06, "SCHED" },
-	{ 0x07, "HOST0", NULL, NVKM_ENGINE_FIFO },
-	{ 0x08, "HOST1", NULL, NVKM_ENGINE_FIFO },
-	{ 0x09, "HOST2", NULL, NVKM_ENGINE_FIFO },
-	{ 0x0a, "HOST3", NULL, NVKM_ENGINE_FIFO },
-	{ 0x0b, "HOST4", NULL, NVKM_ENGINE_FIFO },
-	{ 0x0c, "HOST5", NULL, NVKM_ENGINE_FIFO },
-	{ 0x0d, "HOST6", NULL, NVKM_ENGINE_FIFO },
-	{ 0x0e, "HOST7", NULL, NVKM_ENGINE_FIFO },
+	{ 0x07, "HOST0" },
+	{ 0x08, "HOST1" },
+	{ 0x09, "HOST2" },
+	{ 0x0a, "HOST3" },
+	{ 0x0b, "HOST4" },
+	{ 0x0c, "HOST5" },
+	{ 0x0d, "HOST6" },
+	{ 0x0e, "HOST7" },
 	{ 0x0f, "HOSTSR" },
 	{ 0x13, "PERF" },
 	{ 0x17, "PMU" },
@@ -68,8 +87,18 @@ gm107_fifo_fault_engine[] = {
 	{}
 };
 
+const struct nvkm_fifo_func_mmu_fault
+gm107_fifo_mmu_fault = {
+	.recover = gf100_fifo_mmu_fault_recover,
+	.access = gf100_fifo_mmu_fault_access,
+	.engine = gm107_fifo_mmu_fault_engine,
+	.reason = gk104_fifo_mmu_fault_reason,
+	.hubclient = gk104_fifo_mmu_fault_hubclient,
+	.gpcclient = gk104_fifo_mmu_fault_gpcclient,
+};
+
 void
-gm107_fifo_intr_fault(struct nvkm_fifo *fifo, int unit)
+gm107_fifo_intr_mmu_fault_unit(struct nvkm_fifo *fifo, int unit)
 {
 	struct nvkm_device *device = fifo->engine.subdev.device;
 	u32 inst = nvkm_rd32(device, 0x002800 + (unit * 0x10));
@@ -92,22 +121,36 @@ gm107_fifo_intr_fault(struct nvkm_fifo *fifo, int unit)
 	nvkm_fifo_fault(fifo, &info);
 }
 
-static const struct gk104_fifo_func
+static int
+gm107_fifo_chid_nr(struct nvkm_fifo *fifo)
+{
+	return 2048;
+}
+
+static const struct nvkm_fifo_func
 gm107_fifo = {
-	.intr.fault = gm107_fifo_intr_fault,
-	.pbdma = &gk208_fifo_pbdma,
-	.fault.access = gk104_fifo_fault_access,
-	.fault.engine = gm107_fifo_fault_engine,
-	.fault.reason = gk104_fifo_fault_reason,
-	.fault.hubclient = gk104_fifo_fault_hubclient,
-	.fault.gpcclient = gk104_fifo_fault_gpcclient,
-	.runlist = &gm107_fifo_runlist,
-	.chan = {{0,0,KEPLER_CHANNEL_GPFIFO_B}, gk104_fifo_gpfifo_new },
+	.chid_nr = gm107_fifo_chid_nr,
+	.chid_ctor = gk110_fifo_chid_ctor,
+	.runq_nr = gf100_fifo_runq_nr,
+	.runl_ctor = gk104_fifo_runl_ctor,
+	.init = gk104_fifo_init,
+	.init_pbdmas = gk104_fifo_init_pbdmas,
+	.intr = gk104_fifo_intr,
+	.intr_mmu_fault_unit = gm107_fifo_intr_mmu_fault_unit,
+	.intr_ctxsw_timeout = gf100_fifo_intr_ctxsw_timeout,
+	.mmu_fault = &gm107_fifo_mmu_fault,
+	.nonstall = &gf100_fifo_nonstall,
+	.runl = &gm107_runl,
+	.runq = &gk208_runq,
+	.engn = &gk104_engn,
+	.engn_ce = &gk104_engn_ce,
+	.cgrp = {{ 0, 0, KEPLER_CHANNEL_GROUP_A  }, &gk110_cgrp },
+	.chan = {{ 0, 0, KEPLER_CHANNEL_GPFIFO_B }, &gm107_chan },
 };
 
 int
 gm107_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
 	       struct nvkm_fifo **pfifo)
 {
-	return gk104_fifo_new_(&gm107_fifo, device, type, inst, 2048, pfifo);
+	return nvkm_fifo_new_(&gm107_fifo, device, type, inst, pfifo);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm200.c
index 573658cb6c73..d92d1ac39191 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm200.c
@@ -21,41 +21,46 @@
  *
  * Authors: Ben Skeggs
  */
-#include "gk104.h"
-#include "changk104.h"
+#include "priv.h"
 
 #include <nvif/class.h>
 
 int
-gm200_fifo_pbdma_nr(struct gk104_fifo *fifo)
+gm200_fifo_runq_nr(struct nvkm_fifo *fifo)
 {
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	return nvkm_rd32(device, 0x002004) & 0x000000ff;
+	return nvkm_rd32(fifo->engine.subdev.device, 0x002004) & 0x000000ff;
 }
 
-const struct gk104_fifo_pbdma_func
-gm200_fifo_pbdma = {
-	.nr = gm200_fifo_pbdma_nr,
-	.init = gk104_fifo_pbdma_init,
-	.init_timeout = gk208_fifo_pbdma_init_timeout,
-};
+int
+gm200_fifo_chid_nr(struct nvkm_fifo *fifo)
+{
+	return nvkm_rd32(fifo->engine.subdev.device, 0x002008);
+}
 
-static const struct gk104_fifo_func
+static const struct nvkm_fifo_func
 gm200_fifo = {
-	.intr.fault = gm107_fifo_intr_fault,
-	.pbdma = &gm200_fifo_pbdma,
-	.fault.access = gk104_fifo_fault_access,
-	.fault.engine = gm107_fifo_fault_engine,
-	.fault.reason = gk104_fifo_fault_reason,
-	.fault.hubclient = gk104_fifo_fault_hubclient,
-	.fault.gpcclient = gk104_fifo_fault_gpcclient,
-	.runlist = &gm107_fifo_runlist,
-	.chan = {{0,0,MAXWELL_CHANNEL_GPFIFO_A}, gk104_fifo_gpfifo_new },
+	.chid_nr = gm200_fifo_chid_nr,
+	.chid_ctor = gk110_fifo_chid_ctor,
+	.runq_nr = gm200_fifo_runq_nr,
+	.runl_ctor = gk104_fifo_runl_ctor,
+	.init = gk104_fifo_init,
+	.init_pbdmas = gk104_fifo_init_pbdmas,
+	.intr = gk104_fifo_intr,
+	.intr_mmu_fault_unit = gm107_fifo_intr_mmu_fault_unit,
+	.intr_ctxsw_timeout = gf100_fifo_intr_ctxsw_timeout,
+	.mmu_fault = &gm107_fifo_mmu_fault,
+	.nonstall = &gf100_fifo_nonstall,
+	.runl = &gm107_runl,
+	.runq = &gk208_runq,
+	.engn = &gk104_engn,
+	.engn_ce = &gk104_engn_ce,
+	.cgrp = {{ 0, 0,  KEPLER_CHANNEL_GROUP_A  }, &gk110_cgrp },
+	.chan = {{ 0, 0, MAXWELL_CHANNEL_GPFIFO_A }, &gm107_chan },
 };
 
 int
 gm200_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
 	       struct nvkm_fifo **pfifo)
 {
-	return gk104_fifo_new_(&gm200_fifo, device, type, inst, 4096, pfifo);
+	return nvkm_fifo_new_(&gm200_fifo, device, type, inst, pfifo);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm20b.c
deleted file mode 100644
index 556c97e54f14..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm20b.c
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-#include "gk104.h"
-#include "changk104.h"
-
-#include <nvif/class.h>
-
-static const struct gk104_fifo_func
-gm20b_fifo = {
-	.intr.fault = gm107_fifo_intr_fault,
-	.pbdma = &gm200_fifo_pbdma,
-	.fault.access = gk104_fifo_fault_access,
-	.fault.engine = gm107_fifo_fault_engine,
-	.fault.reason = gk104_fifo_fault_reason,
-	.fault.hubclient = gk104_fifo_fault_hubclient,
-	.fault.gpcclient = gk104_fifo_fault_gpcclient,
-	.runlist = &gm107_fifo_runlist,
-	.chan = {{0,0,MAXWELL_CHANNEL_GPFIFO_A}, gk104_fifo_gpfifo_new },
-};
-
-int
-gm20b_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
-	       struct nvkm_fifo **pfifo)
-{
-	return gk104_fifo_new_(&gm20b_fifo, device, type, inst, 512, pfifo);
-}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp100.c
index 6b46b6b65b87..65bdb6a7d517 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp100.c
@@ -21,30 +21,54 @@
  *
  * Authors: Ben Skeggs
  */
-#include "gk104.h"
-#include "changk104.h"
+#include "priv.h"
+#include "runl.h"
 
+#include <core/gpuobj.h>
 #include <subdev/fault.h>
 
 #include <nvif/class.h>
 
-const struct nvkm_enum
-gp100_fifo_fault_engine[] = {
+static void
+gp100_runl_insert_chan(struct nvkm_chan *chan, struct nvkm_memory *memory, u64 offset)
+{
+	nvkm_wo32(memory, offset + 0, chan->id | chan->runq << 14);
+	nvkm_wo32(memory, offset + 4, chan->inst->addr >> 12);
+}
+
+static const struct nvkm_runl_func
+gp100_runl = {
+	.runqs = 2,
+	.size = 8,
+	.update = nv50_runl_update,
+	.insert_cgrp = gk110_runl_insert_cgrp,
+	.insert_chan = gp100_runl_insert_chan,
+	.commit = gk104_runl_commit,
+	.wait = nv50_runl_wait,
+	.pending = gk104_runl_pending,
+	.block = gk104_runl_block,
+	.allow = gk104_runl_allow,
+	.fault_clear = gk104_runl_fault_clear,
+	.preempt_pending = gf100_runl_preempt_pending,
+};
+
+static const struct nvkm_enum
+gp100_fifo_mmu_fault_engine[] = {
 	{ 0x01, "DISPLAY" },
 	{ 0x03, "IFB", NULL, NVKM_ENGINE_IFB },
 	{ 0x04, "BAR1", NULL, NVKM_SUBDEV_BAR },
 	{ 0x05, "BAR2", NULL, NVKM_SUBDEV_INSTMEM },
-	{ 0x06, "HOST0", NULL, NVKM_ENGINE_FIFO },
-	{ 0x07, "HOST1", NULL, NVKM_ENGINE_FIFO },
-	{ 0x08, "HOST2", NULL, NVKM_ENGINE_FIFO },
-	{ 0x09, "HOST3", NULL, NVKM_ENGINE_FIFO },
-	{ 0x0a, "HOST4", NULL, NVKM_ENGINE_FIFO },
-	{ 0x0b, "HOST5", NULL, NVKM_ENGINE_FIFO },
-	{ 0x0c, "HOST6", NULL, NVKM_ENGINE_FIFO },
-	{ 0x0d, "HOST7", NULL, NVKM_ENGINE_FIFO },
-	{ 0x0e, "HOST8", NULL, NVKM_ENGINE_FIFO },
-	{ 0x0f, "HOST9", NULL, NVKM_ENGINE_FIFO },
-	{ 0x10, "HOST10", NULL, NVKM_ENGINE_FIFO },
+	{ 0x06, "HOST0" },
+	{ 0x07, "HOST1" },
+	{ 0x08, "HOST2" },
+	{ 0x09, "HOST3" },
+	{ 0x0a, "HOST4" },
+	{ 0x0b, "HOST5" },
+	{ 0x0c, "HOST6" },
+	{ 0x0d, "HOST7" },
+	{ 0x0e, "HOST8" },
+	{ 0x0f, "HOST9" },
+	{ 0x10, "HOST10" },
 	{ 0x13, "PERF" },
 	{ 0x17, "PMU" },
 	{ 0x18, "PTP" },
@@ -52,8 +76,18 @@ gp100_fifo_fault_engine[] = {
 	{}
 };
 
-void
-gp100_fifo_intr_fault(struct nvkm_fifo *fifo, int unit)
+static const struct nvkm_fifo_func_mmu_fault
+gp100_fifo_mmu_fault = {
+	.recover = gf100_fifo_mmu_fault_recover,
+	.access = gf100_fifo_mmu_fault_access,
+	.engine = gp100_fifo_mmu_fault_engine,
+	.reason = gk104_fifo_mmu_fault_reason,
+	.hubclient = gk104_fifo_mmu_fault_hubclient,
+	.gpcclient = gk104_fifo_mmu_fault_gpcclient,
+};
+
+static void
+gp100_fifo_intr_mmu_fault_unit(struct nvkm_fifo *fifo, int unit)
 {
 	struct nvkm_device *device = fifo->engine.subdev.device;
 	u32 inst = nvkm_rd32(device, 0x002800 + (unit * 0x10));
@@ -76,23 +110,30 @@ gp100_fifo_intr_fault(struct nvkm_fifo *fifo, int unit)
 	nvkm_fifo_fault(fifo, &info);
 }
 
-static const struct gk104_fifo_func
+static const struct nvkm_fifo_func
 gp100_fifo = {
-	.intr.fault = gp100_fifo_intr_fault,
-	.pbdma = &gm200_fifo_pbdma,
-	.fault.access = gk104_fifo_fault_access,
-	.fault.engine = gp100_fifo_fault_engine,
-	.fault.reason = gk104_fifo_fault_reason,
-	.fault.hubclient = gk104_fifo_fault_hubclient,
-	.fault.gpcclient = gk104_fifo_fault_gpcclient,
-	.runlist = &gm107_fifo_runlist,
-	.chan = {{0,0,PASCAL_CHANNEL_GPFIFO_A}, gk104_fifo_gpfifo_new },
-	.cgrp_force = true,
+	.chid_nr = gm200_fifo_chid_nr,
+	.chid_ctor = gk110_fifo_chid_ctor,
+	.runq_nr = gm200_fifo_runq_nr,
+	.runl_ctor = gk104_fifo_runl_ctor,
+	.init = gk104_fifo_init,
+	.init_pbdmas = gk104_fifo_init_pbdmas,
+	.intr = gk104_fifo_intr,
+	.intr_mmu_fault_unit = gp100_fifo_intr_mmu_fault_unit,
+	.intr_ctxsw_timeout = gf100_fifo_intr_ctxsw_timeout,
+	.mmu_fault = &gp100_fifo_mmu_fault,
+	.nonstall = &gf100_fifo_nonstall,
+	.runl = &gp100_runl,
+	.runq = &gk208_runq,
+	.engn = &gk104_engn,
+	.engn_ce = &gk104_engn_ce,
+	.cgrp = {{ 0, 0, KEPLER_CHANNEL_GROUP_A  }, &gk110_cgrp, .force = true },
+	.chan = {{ 0, 0, PASCAL_CHANNEL_GPFIFO_A }, &gm107_chan },
 };
 
 int
 gp100_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
 	       struct nvkm_fifo **pfifo)
 {
-	return gk104_fifo_new_(&gp100_fifo, device, type, inst, 4096, pfifo);
+	return nvkm_fifo_new_(&gp100_fifo, device, type, inst, pfifo);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp10b.c
deleted file mode 100644
index 7a5929cb4d29..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp10b.c
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-#include "gk104.h"
-#include "changk104.h"
-
-#include <nvif/class.h>
-
-static const struct gk104_fifo_func
-gp10b_fifo = {
-	.intr.fault = gp100_fifo_intr_fault,
-	.pbdma = &gm200_fifo_pbdma,
-	.fault.access = gk104_fifo_fault_access,
-	.fault.engine = gp100_fifo_fault_engine,
-	.fault.reason = gk104_fifo_fault_reason,
-	.fault.hubclient = gk104_fifo_fault_hubclient,
-	.fault.gpcclient = gk104_fifo_fault_gpcclient,
-	.runlist = &gm107_fifo_runlist,
-	.chan = {{0,0,PASCAL_CHANNEL_GPFIFO_A}, gk104_fifo_gpfifo_new },
-	.cgrp_force = true,
-};
-
-int
-gp10b_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
-	       struct nvkm_fifo **pfifo)
-{
-	return gk104_fifo_new_(&gp10b_fifo, device, type, inst, 512, pfifo);
-}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifog84.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifog84.c
deleted file mode 100644
index 2121f517b1dd..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifog84.c
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright 2012 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs
- */
-#include "channv50.h"
-
-#include <core/client.h>
-#include <core/ramht.h>
-
-#include <nvif/class.h>
-#include <nvif/cl826f.h>
-#include <nvif/unpack.h>
-
-static int
-g84_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass,
-		    void *data, u32 size, struct nvkm_object **pobject)
-{
-	struct nvkm_object *parent = oclass->parent;
-	union {
-		struct g82_channel_gpfifo_v0 v0;
-	} *args = data;
-	struct nv50_fifo *fifo = nv50_fifo(base);
-	struct nv50_fifo_chan *chan;
-	u64 ioffset, ilength;
-	int ret = -ENOSYS;
-
-	nvif_ioctl(parent, "create channel gpfifo size %d\n", size);
-	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
-		nvif_ioctl(parent, "create channel gpfifo vers %d vmm %llx "
-				   "pushbuf %llx ioffset %016llx "
-				   "ilength %08x\n",
-			   args->v0.version, args->v0.vmm, args->v0.pushbuf,
-			   args->v0.ioffset, args->v0.ilength);
-		if (!args->v0.pushbuf)
-			return -EINVAL;
-	} else
-		return ret;
-
-	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
-		return -ENOMEM;
-	*pobject = &chan->base.object;
-
-	ret = g84_fifo_chan_ctor(fifo, args->v0.vmm, args->v0.pushbuf,
-				 oclass, chan);
-	if (ret)
-		return ret;
-
-	args->v0.chid = chan->base.chid;
-	ioffset = args->v0.ioffset;
-	ilength = order_base_2(args->v0.ilength / 8);
-
-	nvkm_kmap(chan->ramfc);
-	nvkm_wo32(chan->ramfc, 0x3c, 0x403f6078);
-	nvkm_wo32(chan->ramfc, 0x44, 0x01003fff);
-	nvkm_wo32(chan->ramfc, 0x48, chan->base.push->node->offset >> 4);
-	nvkm_wo32(chan->ramfc, 0x50, lower_32_bits(ioffset));
-	nvkm_wo32(chan->ramfc, 0x54, upper_32_bits(ioffset) | (ilength << 16));
-	nvkm_wo32(chan->ramfc, 0x60, 0x7fffffff);
-	nvkm_wo32(chan->ramfc, 0x78, 0x00000000);
-	nvkm_wo32(chan->ramfc, 0x7c, 0x30000001);
-	nvkm_wo32(chan->ramfc, 0x80, ((chan->ramht->bits - 9) << 27) |
-				     (4 << 24) /* SEARCH_FULL */ |
-				     (chan->ramht->gpuobj->node->offset >> 4));
-	nvkm_wo32(chan->ramfc, 0x88, chan->cache->addr >> 10);
-	nvkm_wo32(chan->ramfc, 0x98, chan->base.inst->addr >> 12);
-	nvkm_done(chan->ramfc);
-	return 0;
-}
-
-const struct nvkm_fifo_chan_oclass
-g84_fifo_gpfifo_oclass = {
-	.base.oclass = G82_CHANNEL_GPFIFO,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = g84_fifo_gpfifo_new,
-};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c
deleted file mode 100644
index 4e78bbe3b94b..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c
+++ /dev/null
@@ -1,308 +0,0 @@
-/*
- * Copyright 2012 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs
- */
-#include "changf100.h"
-
-#include <core/client.h>
-#include <core/gpuobj.h>
-#include <subdev/fb.h>
-#include <subdev/timer.h>
-
-#include <nvif/class.h>
-#include <nvif/cl906f.h>
-#include <nvif/unpack.h>
-
-int
-gf100_fifo_chan_ntfy(struct nvkm_fifo_chan *chan, u32 type,
-		     struct nvkm_event **pevent)
-{
-	switch (type) {
-	case NV906F_V0_NTFY_NON_STALL_INTERRUPT:
-		*pevent = &chan->fifo->uevent;
-		return 0;
-	case NV906F_V0_NTFY_KILLED:
-		*pevent = &chan->fifo->kevent;
-		return 0;
-	default:
-		break;
-	}
-	return -EINVAL;
-}
-
-static u32
-gf100_fifo_gpfifo_engine_addr(struct nvkm_engine *engine)
-{
-	switch (engine->subdev.type) {
-	case NVKM_ENGINE_SW    : return 0;
-	case NVKM_ENGINE_GR    : return 0x0210;
-	case NVKM_ENGINE_CE    : return 0x0230 + (engine->subdev.inst * 0x10);
-	case NVKM_ENGINE_MSPDEC: return 0x0250;
-	case NVKM_ENGINE_MSPPP : return 0x0260;
-	case NVKM_ENGINE_MSVLD : return 0x0270;
-	default:
-		WARN_ON(1);
-		return 0;
-	}
-}
-
-static struct gf100_fifo_engn *
-gf100_fifo_gpfifo_engine(struct gf100_fifo_chan *chan, struct nvkm_engine *engine)
-{
-	int engi = chan->base.fifo->func->engine_id(chan->base.fifo, engine);
-	if (engi >= 0)
-		return &chan->engn[engi];
-	return NULL;
-}
-
-static int
-gf100_fifo_gpfifo_engine_fini(struct nvkm_fifo_chan *base,
-			      struct nvkm_engine *engine, bool suspend)
-{
-	const u32 offset = gf100_fifo_gpfifo_engine_addr(engine);
-	struct gf100_fifo_chan *chan = gf100_fifo_chan(base);
-	struct nvkm_subdev *subdev = &chan->fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	struct nvkm_gpuobj *inst = chan->base.inst;
-	int ret = 0;
-
-	mutex_lock(&chan->fifo->base.mutex);
-	nvkm_wr32(device, 0x002634, chan->base.chid);
-	if (nvkm_msec(device, 2000,
-		if (nvkm_rd32(device, 0x002634) == chan->base.chid)
-			break;
-	) < 0) {
-		nvkm_error(subdev, "channel %d [%s] kick timeout\n",
-			   chan->base.chid, chan->base.object.client->name);
-		ret = -ETIMEDOUT;
-	}
-	mutex_unlock(&chan->fifo->base.mutex);
-
-	if (ret && suspend)
-		return ret;
-
-	if (offset) {
-		nvkm_kmap(inst);
-		nvkm_wo32(inst, offset + 0x00, 0x00000000);
-		nvkm_wo32(inst, offset + 0x04, 0x00000000);
-		nvkm_done(inst);
-	}
-
-	return ret;
-}
-
-static int
-gf100_fifo_gpfifo_engine_init(struct nvkm_fifo_chan *base,
-			      struct nvkm_engine *engine)
-{
-	const u32 offset = gf100_fifo_gpfifo_engine_addr(engine);
-	struct gf100_fifo_chan *chan = gf100_fifo_chan(base);
-	struct gf100_fifo_engn *engn = gf100_fifo_gpfifo_engine(chan, engine);
-	struct nvkm_gpuobj *inst = chan->base.inst;
-
-	if (offset) {
-		nvkm_kmap(inst);
-		nvkm_wo32(inst, offset + 0x00, lower_32_bits(engn->vma->addr) | 4);
-		nvkm_wo32(inst, offset + 0x04, upper_32_bits(engn->vma->addr));
-		nvkm_done(inst);
-	}
-
-	return 0;
-}
-
-static void
-gf100_fifo_gpfifo_engine_dtor(struct nvkm_fifo_chan *base,
-			      struct nvkm_engine *engine)
-{
-	struct gf100_fifo_chan *chan = gf100_fifo_chan(base);
-	struct gf100_fifo_engn *engn = gf100_fifo_gpfifo_engine(chan, engine);
-	nvkm_vmm_put(chan->base.vmm, &engn->vma);
-	nvkm_gpuobj_del(&engn->inst);
-}
-
-static int
-gf100_fifo_gpfifo_engine_ctor(struct nvkm_fifo_chan *base,
-			      struct nvkm_engine *engine,
-			      struct nvkm_object *object)
-{
-	struct gf100_fifo_chan *chan = gf100_fifo_chan(base);
-	struct gf100_fifo_engn *engn = gf100_fifo_gpfifo_engine(chan, engine);
-	int ret;
-
-	if (!gf100_fifo_gpfifo_engine_addr(engine))
-		return 0;
-
-	ret = nvkm_object_bind(object, NULL, 0, &engn->inst);
-	if (ret)
-		return ret;
-
-	ret = nvkm_vmm_get(chan->base.vmm, 12, engn->inst->size, &engn->vma);
-	if (ret)
-		return ret;
-
-	return nvkm_memory_map(engn->inst, 0, chan->base.vmm, engn->vma, NULL, 0);
-}
-
-static void
-gf100_fifo_gpfifo_fini(struct nvkm_fifo_chan *base)
-{
-	struct gf100_fifo_chan *chan = gf100_fifo_chan(base);
-	struct gf100_fifo *fifo = chan->fifo;
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	u32 coff = chan->base.chid * 8;
-
-	if (!list_empty(&chan->head) && !chan->killed) {
-		gf100_fifo_runlist_remove(fifo, chan);
-		nvkm_mask(device, 0x003004 + coff, 0x00000001, 0x00000000);
-		gf100_fifo_runlist_commit(fifo);
-	}
-
-	gf100_fifo_intr_engine(fifo);
-
-	nvkm_wr32(device, 0x003000 + coff, 0x00000000);
-}
-
-static void
-gf100_fifo_gpfifo_init(struct nvkm_fifo_chan *base)
-{
-	struct gf100_fifo_chan *chan = gf100_fifo_chan(base);
-	struct gf100_fifo *fifo = chan->fifo;
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	u32 addr = chan->base.inst->addr >> 12;
-	u32 coff = chan->base.chid * 8;
-
-	nvkm_wr32(device, 0x003000 + coff, 0xc0000000 | addr);
-
-	if (list_empty(&chan->head) && !chan->killed) {
-		gf100_fifo_runlist_insert(fifo, chan);
-		nvkm_wr32(device, 0x003004 + coff, 0x001f0001);
-		gf100_fifo_runlist_commit(fifo);
-	}
-}
-
-static void *
-gf100_fifo_gpfifo_dtor(struct nvkm_fifo_chan *base)
-{
-	return gf100_fifo_chan(base);
-}
-
-static const struct nvkm_fifo_chan_func
-gf100_fifo_gpfifo_func = {
-	.dtor = gf100_fifo_gpfifo_dtor,
-	.init = gf100_fifo_gpfifo_init,
-	.fini = gf100_fifo_gpfifo_fini,
-	.ntfy = gf100_fifo_chan_ntfy,
-	.engine_ctor = gf100_fifo_gpfifo_engine_ctor,
-	.engine_dtor = gf100_fifo_gpfifo_engine_dtor,
-	.engine_init = gf100_fifo_gpfifo_engine_init,
-	.engine_fini = gf100_fifo_gpfifo_engine_fini,
-};
-
-static int
-gf100_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass,
-		      void *data, u32 size, struct nvkm_object **pobject)
-{
-	union {
-		struct fermi_channel_gpfifo_v0 v0;
-	} *args = data;
-	struct gf100_fifo *fifo = gf100_fifo(base);
-	struct nvkm_object *parent = oclass->parent;
-	struct gf100_fifo_chan *chan;
-	u64 usermem, ioffset, ilength;
-	int ret = -ENOSYS, i;
-
-	nvif_ioctl(parent, "create channel gpfifo size %d\n", size);
-	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
-		nvif_ioctl(parent, "create channel gpfifo vers %d vmm %llx "
-				   "ioffset %016llx ilength %08x\n",
-			   args->v0.version, args->v0.vmm, args->v0.ioffset,
-			   args->v0.ilength);
-		if (!args->v0.vmm)
-			return -EINVAL;
-	} else
-		return ret;
-
-	/* allocate channel */
-	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
-		return -ENOMEM;
-	*pobject = &chan->base.object;
-	chan->fifo = fifo;
-	INIT_LIST_HEAD(&chan->head);
-
-	ret = nvkm_fifo_chan_ctor(&gf100_fifo_gpfifo_func, &fifo->base,
-				  0x1000, 0x1000, true, args->v0.vmm, 0,
-				  BIT(GF100_FIFO_ENGN_GR) |
-				  BIT(GF100_FIFO_ENGN_MSPDEC) |
-				  BIT(GF100_FIFO_ENGN_MSPPP) |
-				  BIT(GF100_FIFO_ENGN_MSVLD) |
-				  BIT(GF100_FIFO_ENGN_CE0) |
-				  BIT(GF100_FIFO_ENGN_CE1) |
-				  BIT(GF100_FIFO_ENGN_SW),
-				  1, fifo->user.bar->addr, 0x1000,
-				  oclass, &chan->base);
-	if (ret)
-		return ret;
-
-	args->v0.chid = chan->base.chid;
-
-	/* clear channel control registers */
-
-	usermem = chan->base.chid * 0x1000;
-	ioffset = args->v0.ioffset;
-	ilength = order_base_2(args->v0.ilength / 8);
-
-	nvkm_kmap(fifo->user.mem);
-	for (i = 0; i < 0x1000; i += 4)
-		nvkm_wo32(fifo->user.mem, usermem + i, 0x00000000);
-	nvkm_done(fifo->user.mem);
-	usermem = nvkm_memory_addr(fifo->user.mem) + usermem;
-
-	/* RAMFC */
-	nvkm_kmap(chan->base.inst);
-	nvkm_wo32(chan->base.inst, 0x08, lower_32_bits(usermem));
-	nvkm_wo32(chan->base.inst, 0x0c, upper_32_bits(usermem));
-	nvkm_wo32(chan->base.inst, 0x10, 0x0000face);
-	nvkm_wo32(chan->base.inst, 0x30, 0xfffff902);
-	nvkm_wo32(chan->base.inst, 0x48, lower_32_bits(ioffset));
-	nvkm_wo32(chan->base.inst, 0x4c, upper_32_bits(ioffset) |
-					 (ilength << 16));
-	nvkm_wo32(chan->base.inst, 0x54, 0x00000002);
-	nvkm_wo32(chan->base.inst, 0x84, 0x20400000);
-	nvkm_wo32(chan->base.inst, 0x94, 0x30000001);
-	nvkm_wo32(chan->base.inst, 0x9c, 0x00000100);
-	nvkm_wo32(chan->base.inst, 0xa4, 0x1f1f1f1f);
-	nvkm_wo32(chan->base.inst, 0xa8, 0x1f1f1f1f);
-	nvkm_wo32(chan->base.inst, 0xac, 0x0000001f);
-	nvkm_wo32(chan->base.inst, 0xb8, 0xf8000000);
-	nvkm_wo32(chan->base.inst, 0xf8, 0x10003080); /* 0x002310 */
-	nvkm_wo32(chan->base.inst, 0xfc, 0x10000010); /* 0x002350 */
-	nvkm_done(chan->base.inst);
-	return 0;
-}
-
-const struct nvkm_fifo_chan_oclass
-gf100_fifo_gpfifo_oclass = {
-	.base.oclass = FERMI_CHANNEL_GPFIFO,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = gf100_fifo_gpfifo_new,
-};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c
deleted file mode 100644
index 80456ec70e8a..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c
+++ /dev/null
@@ -1,361 +0,0 @@
-/*
- * Copyright 2012 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs
- */
-#include "changk104.h"
-#include "cgrp.h"
-
-#include <core/client.h>
-#include <core/gpuobj.h>
-#include <subdev/fb.h>
-#include <subdev/mmu.h>
-#include <subdev/timer.h>
-
-#include <nvif/class.h>
-#include <nvif/cla06f.h>
-#include <nvif/unpack.h>
-
-int
-gk104_fifo_gpfifo_kick_locked(struct gk104_fifo_chan *chan)
-{
-	struct gk104_fifo *fifo = chan->fifo;
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	struct nvkm_client *client = chan->base.object.client;
-	struct nvkm_fifo_cgrp *cgrp = chan->cgrp;
-	int ret = 0;
-
-	if (cgrp)
-		nvkm_wr32(device, 0x002634, cgrp->id | 0x01000000);
-	else
-		nvkm_wr32(device, 0x002634, chan->base.chid);
-	if (nvkm_msec(device, 2000,
-		if (!(nvkm_rd32(device, 0x002634) & 0x00100000))
-			break;
-	) < 0) {
-		nvkm_error(subdev, "%s %d [%s] kick timeout\n",
-			   cgrp ? "tsg" : "channel",
-			   cgrp ? cgrp->id : chan->base.chid, client->name);
-		nvkm_fifo_recover_chan(&fifo->base, chan->base.chid);
-		ret = -ETIMEDOUT;
-	}
-	return ret;
-}
-
-int
-gk104_fifo_gpfifo_kick(struct gk104_fifo_chan *chan)
-{
-	int ret;
-	mutex_lock(&chan->base.fifo->mutex);
-	ret = gk104_fifo_gpfifo_kick_locked(chan);
-	mutex_unlock(&chan->base.fifo->mutex);
-	return ret;
-}
-
-static u32
-gk104_fifo_gpfifo_engine_addr(struct nvkm_engine *engine)
-{
-	switch (engine->subdev.type) {
-	case NVKM_ENGINE_SW    :
-	case NVKM_ENGINE_CE    : return 0;
-	case NVKM_ENGINE_GR    : return 0x0210;
-	case NVKM_ENGINE_SEC   : return 0x0220;
-	case NVKM_ENGINE_MSPDEC: return 0x0250;
-	case NVKM_ENGINE_MSPPP : return 0x0260;
-	case NVKM_ENGINE_MSVLD : return 0x0270;
-	case NVKM_ENGINE_VIC   : return 0x0280;
-	case NVKM_ENGINE_MSENC : return 0x0290;
-	case NVKM_ENGINE_NVDEC : return 0x02100270;
-	case NVKM_ENGINE_NVENC :
-		if (engine->subdev.inst)
-			return 0x0210;
-		return 0x02100290;
-	default:
-		WARN_ON(1);
-		return 0;
-	}
-}
-
-struct gk104_fifo_engn *
-gk104_fifo_gpfifo_engine(struct gk104_fifo_chan *chan, struct nvkm_engine *engine)
-{
-	int engi = chan->base.fifo->func->engine_id(chan->base.fifo, engine);
-	if (engi >= 0)
-		return &chan->engn[engi];
-	return NULL;
-}
-
-static int
-gk104_fifo_gpfifo_engine_fini(struct nvkm_fifo_chan *base,
-			      struct nvkm_engine *engine, bool suspend)
-{
-	struct gk104_fifo_chan *chan = gk104_fifo_chan(base);
-	struct nvkm_gpuobj *inst = chan->base.inst;
-	u32 offset = gk104_fifo_gpfifo_engine_addr(engine);
-	int ret;
-
-	ret = gk104_fifo_gpfifo_kick(chan);
-	if (ret && suspend)
-		return ret;
-
-	if (offset) {
-		nvkm_kmap(inst);
-		nvkm_wo32(inst, (offset & 0xffff) + 0x00, 0x00000000);
-		nvkm_wo32(inst, (offset & 0xffff) + 0x04, 0x00000000);
-		if ((offset >>= 16)) {
-			nvkm_wo32(inst, offset + 0x00, 0x00000000);
-			nvkm_wo32(inst, offset + 0x04, 0x00000000);
-		}
-		nvkm_done(inst);
-	}
-
-	return ret;
-}
-
-static int
-gk104_fifo_gpfifo_engine_init(struct nvkm_fifo_chan *base,
-			      struct nvkm_engine *engine)
-{
-	struct gk104_fifo_chan *chan = gk104_fifo_chan(base);
-	struct gk104_fifo_engn *engn = gk104_fifo_gpfifo_engine(chan, engine);
-	struct nvkm_gpuobj *inst = chan->base.inst;
-	u32 offset = gk104_fifo_gpfifo_engine_addr(engine);
-
-	if (offset) {
-		u32 datalo = lower_32_bits(engn->vma->addr) | 0x00000004;
-		u32 datahi = upper_32_bits(engn->vma->addr);
-		nvkm_kmap(inst);
-		nvkm_wo32(inst, (offset & 0xffff) + 0x00, datalo);
-		nvkm_wo32(inst, (offset & 0xffff) + 0x04, datahi);
-		if ((offset >>= 16)) {
-			nvkm_wo32(inst, offset + 0x00, datalo);
-			nvkm_wo32(inst, offset + 0x04, datahi);
-		}
-		nvkm_done(inst);
-	}
-
-	return 0;
-}
-
-void
-gk104_fifo_gpfifo_engine_dtor(struct nvkm_fifo_chan *base,
-			      struct nvkm_engine *engine)
-{
-	struct gk104_fifo_chan *chan = gk104_fifo_chan(base);
-	struct gk104_fifo_engn *engn = gk104_fifo_gpfifo_engine(chan, engine);
-	nvkm_vmm_put(chan->base.vmm, &engn->vma);
-	nvkm_gpuobj_del(&engn->inst);
-}
-
-int
-gk104_fifo_gpfifo_engine_ctor(struct nvkm_fifo_chan *base,
-			      struct nvkm_engine *engine,
-			      struct nvkm_object *object)
-{
-	struct gk104_fifo_chan *chan = gk104_fifo_chan(base);
-	struct gk104_fifo_engn *engn = gk104_fifo_gpfifo_engine(chan, engine);
-	int ret;
-
-	if (!gk104_fifo_gpfifo_engine_addr(engine)) {
-		if (engine->subdev.type != NVKM_ENGINE_CE ||
-		    engine->subdev.device->card_type < GV100)
-			return 0;
-	}
-
-	ret = nvkm_object_bind(object, NULL, 0, &engn->inst);
-	if (ret)
-		return ret;
-
-	if (!gk104_fifo_gpfifo_engine_addr(engine))
-		return 0;
-
-	ret = nvkm_vmm_get(chan->base.vmm, 12, engn->inst->size, &engn->vma);
-	if (ret)
-		return ret;
-
-	return nvkm_memory_map(engn->inst, 0, chan->base.vmm, engn->vma, NULL, 0);
-}
-
-void
-gk104_fifo_gpfifo_fini(struct nvkm_fifo_chan *base)
-{
-	struct gk104_fifo_chan *chan = gk104_fifo_chan(base);
-	struct gk104_fifo *fifo = chan->fifo;
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	u32 coff = chan->base.chid * 8;
-
-	if (!list_empty(&chan->head)) {
-		gk104_fifo_runlist_remove(fifo, chan);
-		nvkm_mask(device, 0x800004 + coff, 0x00000800, 0x00000800);
-		gk104_fifo_gpfifo_kick(chan);
-		gk104_fifo_runlist_update(fifo, chan->runl);
-	}
-
-	nvkm_wr32(device, 0x800000 + coff, 0x00000000);
-}
-
-void
-gk104_fifo_gpfifo_init(struct nvkm_fifo_chan *base)
-{
-	struct gk104_fifo_chan *chan = gk104_fifo_chan(base);
-	struct gk104_fifo *fifo = chan->fifo;
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	u32 addr = chan->base.inst->addr >> 12;
-	u32 coff = chan->base.chid * 8;
-
-	nvkm_mask(device, 0x800004 + coff, 0x000f0000, chan->runl << 16);
-	nvkm_wr32(device, 0x800000 + coff, 0x80000000 | addr);
-
-	if (list_empty(&chan->head) && !chan->killed) {
-		gk104_fifo_runlist_insert(fifo, chan);
-		nvkm_mask(device, 0x800004 + coff, 0x00000400, 0x00000400);
-		gk104_fifo_runlist_update(fifo, chan->runl);
-		nvkm_mask(device, 0x800004 + coff, 0x00000400, 0x00000400);
-	}
-}
-
-void *
-gk104_fifo_gpfifo_dtor(struct nvkm_fifo_chan *base)
-{
-	struct gk104_fifo_chan *chan = gk104_fifo_chan(base);
-	kfree(chan->cgrp);
-	return chan;
-}
-
-const struct nvkm_fifo_chan_func
-gk104_fifo_gpfifo_func = {
-	.dtor = gk104_fifo_gpfifo_dtor,
-	.init = gk104_fifo_gpfifo_init,
-	.fini = gk104_fifo_gpfifo_fini,
-	.ntfy = gf100_fifo_chan_ntfy,
-	.engine_ctor = gk104_fifo_gpfifo_engine_ctor,
-	.engine_dtor = gk104_fifo_gpfifo_engine_dtor,
-	.engine_init = gk104_fifo_gpfifo_engine_init,
-	.engine_fini = gk104_fifo_gpfifo_engine_fini,
-};
-
-static int
-gk104_fifo_gpfifo_new_(struct gk104_fifo *fifo, u64 *runlists, u16 *chid,
-		       u64 vmm, u64 ioffset, u64 ilength, u64 *inst, bool priv,
-		       const struct nvkm_oclass *oclass,
-		       struct nvkm_object **pobject)
-{
-	struct gk104_fifo_chan *chan;
-	int runlist = ffs(*runlists) -1, ret, i;
-	u64 usermem;
-
-	if (!vmm || runlist < 0 || runlist >= fifo->runlist_nr)
-		return -EINVAL;
-	*runlists = BIT_ULL(runlist);
-
-	/* Allocate the channel. */
-	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
-		return -ENOMEM;
-	*pobject = &chan->base.object;
-	chan->fifo = fifo;
-	chan->runl = runlist;
-	INIT_LIST_HEAD(&chan->head);
-
-	ret = nvkm_fifo_chan_ctor(&gk104_fifo_gpfifo_func, &fifo->base,
-				  0x1000, 0x1000, true, vmm, 0, fifo->runlist[runlist].engm_sw,
-				  1, fifo->user.bar->addr, 0x200,
-				  oclass, &chan->base);
-	if (ret)
-		return ret;
-
-	*chid = chan->base.chid;
-	*inst = chan->base.inst->addr;
-
-	/* Hack to support GPUs where even individual channels should be
-	 * part of a channel group.
-	 */
-	if (fifo->func->cgrp_force) {
-		if (!(chan->cgrp = kmalloc(sizeof(*chan->cgrp), GFP_KERNEL)))
-			return -ENOMEM;
-		chan->cgrp->id = chan->base.chid;
-		INIT_LIST_HEAD(&chan->cgrp->head);
-		INIT_LIST_HEAD(&chan->cgrp->chan);
-		chan->cgrp->chan_nr = 0;
-	}
-
-	/* Clear channel control registers. */
-	usermem = chan->base.chid * 0x200;
-	ilength = order_base_2(ilength / 8);
-
-	nvkm_kmap(fifo->user.mem);
-	for (i = 0; i < 0x200; i += 4)
-		nvkm_wo32(fifo->user.mem, usermem + i, 0x00000000);
-	nvkm_done(fifo->user.mem);
-	usermem = nvkm_memory_addr(fifo->user.mem) + usermem;
-
-	/* RAMFC */
-	nvkm_kmap(chan->base.inst);
-	nvkm_wo32(chan->base.inst, 0x08, lower_32_bits(usermem));
-	nvkm_wo32(chan->base.inst, 0x0c, upper_32_bits(usermem));
-	nvkm_wo32(chan->base.inst, 0x10, 0x0000face);
-	nvkm_wo32(chan->base.inst, 0x30, 0xfffff902);
-	nvkm_wo32(chan->base.inst, 0x48, lower_32_bits(ioffset));
-	nvkm_wo32(chan->base.inst, 0x4c, upper_32_bits(ioffset) |
-					 (ilength << 16));
-	nvkm_wo32(chan->base.inst, 0x84, 0x20400000);
-	nvkm_wo32(chan->base.inst, 0x94, 0x30000001);
-	nvkm_wo32(chan->base.inst, 0x9c, 0x00000100);
-	nvkm_wo32(chan->base.inst, 0xac, 0x0000001f);
-	nvkm_wo32(chan->base.inst, 0xe4, priv ? 0x00000020 : 0x00000000);
-	nvkm_wo32(chan->base.inst, 0xe8, chan->base.chid);
-	nvkm_wo32(chan->base.inst, 0xb8, 0xf8000000);
-	nvkm_wo32(chan->base.inst, 0xf8, 0x10003080); /* 0x002310 */
-	nvkm_wo32(chan->base.inst, 0xfc, 0x10000010); /* 0x002350 */
-	nvkm_done(chan->base.inst);
-	return 0;
-}
-
-int
-gk104_fifo_gpfifo_new(struct gk104_fifo *fifo, const struct nvkm_oclass *oclass,
-		      void *data, u32 size, struct nvkm_object **pobject)
-{
-	struct nvkm_object *parent = oclass->parent;
-	union {
-		struct kepler_channel_gpfifo_a_v0 v0;
-	} *args = data;
-	int ret = -ENOSYS;
-
-	nvif_ioctl(parent, "create channel gpfifo size %d\n", size);
-	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
-		nvif_ioctl(parent, "create channel gpfifo vers %d vmm %llx "
-				   "ioffset %016llx ilength %08x "
-				   "runlist %016llx priv %d\n",
-			   args->v0.version, args->v0.vmm, args->v0.ioffset,
-			   args->v0.ilength, args->v0.runlist, args->v0.priv);
-		return gk104_fifo_gpfifo_new_(fifo,
-					      &args->v0.runlist,
-					      &args->v0.chid,
-					       args->v0.vmm,
-					       args->v0.ioffset,
-					       args->v0.ilength,
-					      &args->v0.inst,
-					       args->v0.priv,
-					      oclass, pobject);
-	}
-
-	return ret;
-}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogv100.c
deleted file mode 100644
index 428f9b41165c..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogv100.c
+++ /dev/null
@@ -1,241 +0,0 @@
-/*
- * Copyright 2018 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-#include "changk104.h"
-#include "cgrp.h"
-
-#include <core/client.h>
-#include <core/gpuobj.h>
-
-#include <nvif/clc36f.h>
-#include <nvif/unpack.h>
-
-static u32
-gv100_fifo_gpfifo_submit_token(struct nvkm_fifo_chan *chan)
-{
-	return chan->chid;
-}
-
-static int
-gv100_fifo_gpfifo_engine_valid(struct gk104_fifo_chan *chan, bool ce, bool valid)
-{
-	struct nvkm_subdev *subdev = &chan->base.fifo->engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	const u32 mask = ce ? 0x00020000 : 0x00010000;
-	const u32 data = valid ? mask : 0x00000000;
-	int ret;
-
-	/* Block runlist to prevent the channel from being rescheduled. */
-	mutex_lock(&chan->fifo->base.mutex);
-	nvkm_mask(device, 0x002630, BIT(chan->runl), BIT(chan->runl));
-
-	/* Preempt the channel. */
-	ret = gk104_fifo_gpfifo_kick_locked(chan);
-	if (ret == 0) {
-		/* Update engine context validity. */
-		nvkm_kmap(chan->base.inst);
-		nvkm_mo32(chan->base.inst, 0x0ac, mask, data);
-		nvkm_done(chan->base.inst);
-	}
-
-	/* Resume runlist. */
-	nvkm_mask(device, 0x002630, BIT(chan->runl), 0);
-	mutex_unlock(&chan->fifo->base.mutex);
-	return ret;
-}
-
-int
-gv100_fifo_gpfifo_engine_fini(struct nvkm_fifo_chan *base,
-			      struct nvkm_engine *engine, bool suspend)
-{
-	struct gk104_fifo_chan *chan = gk104_fifo_chan(base);
-	struct nvkm_gpuobj *inst = chan->base.inst;
-	int ret;
-
-	if (engine->subdev.type == NVKM_ENGINE_CE) {
-		ret = gv100_fifo_gpfifo_engine_valid(chan, true, false);
-		if (ret && suspend)
-			return ret;
-
-		nvkm_kmap(inst);
-		nvkm_wo32(chan->base.inst, 0x220, 0x00000000);
-		nvkm_wo32(chan->base.inst, 0x224, 0x00000000);
-		nvkm_done(inst);
-		return ret;
-	}
-
-	ret = gv100_fifo_gpfifo_engine_valid(chan, false, false);
-	if (ret && suspend)
-		return ret;
-
-	nvkm_kmap(inst);
-	nvkm_wo32(inst, 0x0210, 0x00000000);
-	nvkm_wo32(inst, 0x0214, 0x00000000);
-	nvkm_done(inst);
-	return ret;
-}
-
-int
-gv100_fifo_gpfifo_engine_init(struct nvkm_fifo_chan *base,
-			      struct nvkm_engine *engine)
-{
-	struct gk104_fifo_chan *chan = gk104_fifo_chan(base);
-	struct gk104_fifo_engn *engn = gk104_fifo_gpfifo_engine(chan, engine);
-	struct nvkm_gpuobj *inst = chan->base.inst;
-
-	if (engine->subdev.type == NVKM_ENGINE_CE) {
-		const u64 bar2 = nvkm_memory_bar2(engn->inst->memory);
-
-		nvkm_kmap(inst);
-		nvkm_wo32(chan->base.inst, 0x220, lower_32_bits(bar2));
-		nvkm_wo32(chan->base.inst, 0x224, upper_32_bits(bar2));
-		nvkm_done(inst);
-
-		return gv100_fifo_gpfifo_engine_valid(chan, true, true);
-	}
-
-	nvkm_kmap(inst);
-	nvkm_wo32(inst, 0x210, lower_32_bits(engn->vma->addr) | 0x00000004);
-	nvkm_wo32(inst, 0x214, upper_32_bits(engn->vma->addr));
-	nvkm_done(inst);
-
-	return gv100_fifo_gpfifo_engine_valid(chan, false, true);
-}
-
-static const struct nvkm_fifo_chan_func
-gv100_fifo_gpfifo = {
-	.dtor = gk104_fifo_gpfifo_dtor,
-	.init = gk104_fifo_gpfifo_init,
-	.fini = gk104_fifo_gpfifo_fini,
-	.ntfy = gf100_fifo_chan_ntfy,
-	.engine_ctor = gk104_fifo_gpfifo_engine_ctor,
-	.engine_dtor = gk104_fifo_gpfifo_engine_dtor,
-	.engine_init = gv100_fifo_gpfifo_engine_init,
-	.engine_fini = gv100_fifo_gpfifo_engine_fini,
-	.submit_token = gv100_fifo_gpfifo_submit_token,
-};
-
-int
-gv100_fifo_gpfifo_new_(const struct nvkm_fifo_chan_func *func,
-		       struct gk104_fifo *fifo, u64 *runlists, u16 *chid,
-		       u64 vmm, u64 ioffset, u64 ilength, u64 *inst, bool priv,
-		       u32 *token, const struct nvkm_oclass *oclass,
-		       struct nvkm_object **pobject)
-{
-	struct gk104_fifo_chan *chan;
-	int runlist = ffs(*runlists) -1, ret, i;
-	u64 usermem;
-
-	if (!vmm || runlist < 0 || runlist >= fifo->runlist_nr)
-		return -EINVAL;
-	*runlists = BIT_ULL(runlist);
-
-	/* Allocate the channel. */
-	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
-		return -ENOMEM;
-	*pobject = &chan->base.object;
-	chan->fifo = fifo;
-	chan->runl = runlist;
-	INIT_LIST_HEAD(&chan->head);
-
-	ret = nvkm_fifo_chan_ctor(func, &fifo->base, 0x1000, 0x1000, true, vmm,
-				  0, fifo->runlist[runlist].engm, 1, fifo->user.bar->addr, 0x200,
-				  oclass, &chan->base);
-	if (ret)
-		return ret;
-
-	*chid = chan->base.chid;
-	*inst = chan->base.inst->addr;
-	*token = chan->base.func->submit_token(&chan->base);
-
-	/* Hack to support GPUs where even individual channels should be
-	 * part of a channel group.
-	 */
-	if (fifo->func->cgrp_force) {
-		if (!(chan->cgrp = kmalloc(sizeof(*chan->cgrp), GFP_KERNEL)))
-			return -ENOMEM;
-		chan->cgrp->id = chan->base.chid;
-		INIT_LIST_HEAD(&chan->cgrp->head);
-		INIT_LIST_HEAD(&chan->cgrp->chan);
-		chan->cgrp->chan_nr = 0;
-	}
-
-	/* Clear channel control registers. */
-	usermem = chan->base.chid * 0x200;
-	ilength = order_base_2(ilength / 8);
-
-	nvkm_kmap(fifo->user.mem);
-	for (i = 0; i < 0x200; i += 4)
-		nvkm_wo32(fifo->user.mem, usermem + i, 0x00000000);
-	nvkm_done(fifo->user.mem);
-	usermem = nvkm_memory_addr(fifo->user.mem) + usermem;
-
-	/* RAMFC */
-	nvkm_kmap(chan->base.inst);
-	nvkm_wo32(chan->base.inst, 0x008, lower_32_bits(usermem));
-	nvkm_wo32(chan->base.inst, 0x00c, upper_32_bits(usermem));
-	nvkm_wo32(chan->base.inst, 0x010, 0x0000face);
-	nvkm_wo32(chan->base.inst, 0x030, 0x7ffff902);
-	nvkm_wo32(chan->base.inst, 0x048, lower_32_bits(ioffset));
-	nvkm_wo32(chan->base.inst, 0x04c, upper_32_bits(ioffset) |
-					  (ilength << 16));
-	nvkm_wo32(chan->base.inst, 0x084, 0x20400000);
-	nvkm_wo32(chan->base.inst, 0x094, 0x30000001);
-	nvkm_wo32(chan->base.inst, 0x0e4, priv ? 0x00000020 : 0x00000000);
-	nvkm_wo32(chan->base.inst, 0x0e8, chan->base.chid);
-	nvkm_wo32(chan->base.inst, 0x0f4, 0x00001000);
-	nvkm_wo32(chan->base.inst, 0x0f8, 0x10003080);
-	nvkm_mo32(chan->base.inst, 0x218, 0x00000000, 0x00000000);
-	nvkm_done(chan->base.inst);
-	return 0;
-}
-
-int
-gv100_fifo_gpfifo_new(struct gk104_fifo *fifo, const struct nvkm_oclass *oclass,
-		      void *data, u32 size, struct nvkm_object **pobject)
-{
-	struct nvkm_object *parent = oclass->parent;
-	union {
-		struct volta_channel_gpfifo_a_v0 v0;
-	} *args = data;
-	int ret = -ENOSYS;
-
-	nvif_ioctl(parent, "create channel gpfifo size %d\n", size);
-	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
-		nvif_ioctl(parent, "create channel gpfifo vers %d vmm %llx "
-				   "ioffset %016llx ilength %08x "
-				   "runlist %016llx priv %d\n",
-			   args->v0.version, args->v0.vmm, args->v0.ioffset,
-			   args->v0.ilength, args->v0.runlist, args->v0.priv);
-		return gv100_fifo_gpfifo_new_(&gv100_fifo_gpfifo, fifo,
-					      &args->v0.runlist,
-					      &args->v0.chid,
-					       args->v0.vmm,
-					       args->v0.ioffset,
-					       args->v0.ilength,
-					      &args->v0.inst,
-					       args->v0.priv,
-					      &args->v0.token,
-					      oclass, pobject);
-	}
-
-	return ret;
-}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifonv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifonv50.c
deleted file mode 100644
index d8f28ec1e4a8..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifonv50.c
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Copyright 2012 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs
- */
-#include "channv50.h"
-
-#include <core/client.h>
-#include <core/ramht.h>
-
-#include <nvif/class.h>
-#include <nvif/cl506f.h>
-#include <nvif/unpack.h>
-
-static int
-nv50_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass,
-		     void *data, u32 size, struct nvkm_object **pobject)
-{
-	struct nvkm_object *parent = oclass->parent;
-	union {
-		struct nv50_channel_gpfifo_v0 v0;
-	} *args = data;
-	struct nv50_fifo *fifo = nv50_fifo(base);
-	struct nv50_fifo_chan *chan;
-	u64 ioffset, ilength;
-	int ret = -ENOSYS;
-
-	nvif_ioctl(parent, "create channel gpfifo size %d\n", size);
-	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
-		nvif_ioctl(parent, "create channel gpfifo vers %d vmm %llx "
-				   "pushbuf %llx ioffset %016llx "
-				   "ilength %08x\n",
-			   args->v0.version, args->v0.vmm, args->v0.pushbuf,
-			   args->v0.ioffset, args->v0.ilength);
-		if (!args->v0.pushbuf)
-			return -EINVAL;
-	} else
-		return ret;
-
-	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
-		return -ENOMEM;
-	*pobject = &chan->base.object;
-
-	ret = nv50_fifo_chan_ctor(fifo, args->v0.vmm, args->v0.pushbuf,
-				  oclass, chan);
-	if (ret)
-		return ret;
-
-	args->v0.chid = chan->base.chid;
-	ioffset = args->v0.ioffset;
-	ilength = order_base_2(args->v0.ilength / 8);
-
-	nvkm_kmap(chan->ramfc);
-	nvkm_wo32(chan->ramfc, 0x3c, 0x403f6078);
-	nvkm_wo32(chan->ramfc, 0x44, 0x01003fff);
-	nvkm_wo32(chan->ramfc, 0x48, chan->base.push->node->offset >> 4);
-	nvkm_wo32(chan->ramfc, 0x50, lower_32_bits(ioffset));
-	nvkm_wo32(chan->ramfc, 0x54, upper_32_bits(ioffset) | (ilength << 16));
-	nvkm_wo32(chan->ramfc, 0x60, 0x7fffffff);
-	nvkm_wo32(chan->ramfc, 0x78, 0x00000000);
-	nvkm_wo32(chan->ramfc, 0x7c, 0x30000001);
-	nvkm_wo32(chan->ramfc, 0x80, ((chan->ramht->bits - 9) << 27) |
-				     (4 << 24) /* SEARCH_FULL */ |
-				     (chan->ramht->gpuobj->node->offset >> 4));
-	nvkm_done(chan->ramfc);
-	return 0;
-}
-
-const struct nvkm_fifo_chan_oclass
-nv50_fifo_gpfifo_oclass = {
-	.base.oclass = NV50_CHANNEL_GPFIFO,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv50_fifo_gpfifo_new,
-};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifotu102.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifotu102.c
deleted file mode 100644
index 99aafa103a31..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifotu102.c
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright 2018 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-#include "changk104.h"
-#include "cgrp.h"
-
-#include <core/client.h>
-#include <core/gpuobj.h>
-
-#include <nvif/clc36f.h>
-#include <nvif/unpack.h>
-
-static u32
-tu102_fifo_gpfifo_submit_token(struct nvkm_fifo_chan *base)
-{
-	struct gk104_fifo_chan *chan = gk104_fifo_chan(base);
-	return (chan->runl << 16) | chan->base.chid;
-}
-
-static const struct nvkm_fifo_chan_func
-tu102_fifo_gpfifo = {
-	.dtor = gk104_fifo_gpfifo_dtor,
-	.init = gk104_fifo_gpfifo_init,
-	.fini = gk104_fifo_gpfifo_fini,
-	.ntfy = gf100_fifo_chan_ntfy,
-	.engine_ctor = gk104_fifo_gpfifo_engine_ctor,
-	.engine_dtor = gk104_fifo_gpfifo_engine_dtor,
-	.engine_init = gv100_fifo_gpfifo_engine_init,
-	.engine_fini = gv100_fifo_gpfifo_engine_fini,
-	.submit_token = tu102_fifo_gpfifo_submit_token,
-};
-
-int
-tu102_fifo_gpfifo_new(struct gk104_fifo *fifo, const struct nvkm_oclass *oclass,
-		      void *data, u32 size, struct nvkm_object **pobject)
-{
-	struct nvkm_object *parent = oclass->parent;
-	union {
-		struct volta_channel_gpfifo_a_v0 v0;
-	} *args = data;
-	int ret = -ENOSYS;
-
-	nvif_ioctl(parent, "create channel gpfifo size %d\n", size);
-	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
-		nvif_ioctl(parent, "create channel gpfifo vers %d vmm %llx "
-				   "ioffset %016llx ilength %08x "
-				   "runlist %016llx priv %d\n",
-			   args->v0.version, args->v0.vmm, args->v0.ioffset,
-			   args->v0.ilength, args->v0.runlist, args->v0.priv);
-		return gv100_fifo_gpfifo_new_(&tu102_fifo_gpfifo, fifo,
-					      &args->v0.runlist,
-					      &args->v0.chid,
-					       args->v0.vmm,
-					       args->v0.ioffset,
-					       args->v0.ilength,
-					      &args->v0.inst,
-					       args->v0.priv,
-					      &args->v0.token,
-					      oclass, pobject);
-	}
-
-	return ret;
-}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gv100.c
index faf0fe9f704c..33066c8cdc64 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gv100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gv100.c
@@ -19,32 +19,180 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  */
-#include "gk104.h"
+#include "priv.h"
+#include "chan.h"
+#include "chid.h"
 #include "cgrp.h"
-#include "changk104.h"
-#include "user.h"
+#include "runl.h"
+#include "runq.h"
 
 #include <core/gpuobj.h>
+#include <subdev/mmu.h>
 
 #include <nvif/class.h>
 
+static u32
+gv100_chan_doorbell_handle(struct nvkm_chan *chan)
+{
+	return chan->id;
+}
+
+static int
+gv100_chan_ramfc_write(struct nvkm_chan *chan, u64 offset, u64 length, u32 devm, bool priv)
+{
+	const u64 userd = nvkm_memory_addr(chan->userd.mem) + chan->userd.base;
+	const u32 limit2 = ilog2(length / 8);
+
+	nvkm_kmap(chan->inst);
+	nvkm_wo32(chan->inst, 0x008, lower_32_bits(userd));
+	nvkm_wo32(chan->inst, 0x00c, upper_32_bits(userd));
+	nvkm_wo32(chan->inst, 0x010, 0x0000face);
+	nvkm_wo32(chan->inst, 0x030, 0x7ffff902);
+	nvkm_wo32(chan->inst, 0x048, lower_32_bits(offset));
+	nvkm_wo32(chan->inst, 0x04c, upper_32_bits(offset) | (limit2 << 16));
+	nvkm_wo32(chan->inst, 0x084, 0x20400000);
+	nvkm_wo32(chan->inst, 0x094, 0x30000000 | devm);
+	nvkm_wo32(chan->inst, 0x0e4, priv ? 0x00000020 : 0x00000000);
+	nvkm_wo32(chan->inst, 0x0e8, chan->id);
+	nvkm_wo32(chan->inst, 0x0f4, 0x00001000 | (priv ? 0x00000100 : 0x00000000));
+	nvkm_wo32(chan->inst, 0x0f8, 0x10003080);
+	nvkm_mo32(chan->inst, 0x218, 0x00000000, 0x00000000);
+	nvkm_done(chan->inst);
+	return 0;
+}
+
+const struct nvkm_chan_func_ramfc
+gv100_chan_ramfc = {
+	.write = gv100_chan_ramfc_write,
+	.devm = 0xfff,
+	.priv = true,
+};
+
+const struct nvkm_chan_func_userd
+gv100_chan_userd = {
+	.bar = -1,
+	.size = 0x200,
+	.clear = gf100_chan_userd_clear,
+};
+
+static const struct nvkm_chan_func
+gv100_chan = {
+	.inst = &gf100_chan_inst,
+	.userd = &gv100_chan_userd,
+	.ramfc = &gv100_chan_ramfc,
+	.bind = gk104_chan_bind_inst,
+	.unbind = gk104_chan_unbind,
+	.start = gk104_chan_start,
+	.stop = gk104_chan_stop,
+	.preempt = gk110_chan_preempt,
+	.doorbell_handle = gv100_chan_doorbell_handle,
+};
+
+void
+gv100_ectx_bind(struct nvkm_engn *engn, struct nvkm_cctx *cctx, struct nvkm_chan *chan)
+{
+	u64 addr = 0ULL;
+
+	if (cctx) {
+		addr  = cctx->vctx->vma->addr;
+		addr |= 4ULL;
+	}
+
+	nvkm_kmap(chan->inst);
+	nvkm_wo32(chan->inst, 0x210, lower_32_bits(addr));
+	nvkm_wo32(chan->inst, 0x214, upper_32_bits(addr));
+	nvkm_mo32(chan->inst, 0x0ac, 0x00010000, cctx ? 0x00010000 : 0x00000000);
+	nvkm_done(chan->inst);
+}
+
+const struct nvkm_engn_func
+gv100_engn = {
+	.chsw = gk104_engn_chsw,
+	.cxid = gk104_engn_cxid,
+	.ctor = gk104_ectx_ctor,
+	.bind = gv100_ectx_bind,
+};
+
+void
+gv100_ectx_ce_bind(struct nvkm_engn *engn, struct nvkm_cctx *cctx, struct nvkm_chan *chan)
+{
+	const u64 bar2 = cctx ? nvkm_memory_bar2(cctx->vctx->inst->memory) : 0ULL;
+
+	nvkm_kmap(chan->inst);
+	nvkm_wo32(chan->inst, 0x220, lower_32_bits(bar2));
+	nvkm_wo32(chan->inst, 0x224, upper_32_bits(bar2));
+	nvkm_mo32(chan->inst, 0x0ac, 0x00020000, cctx ? 0x00020000 : 0x00000000);
+	nvkm_done(chan->inst);
+}
+
+int
+gv100_ectx_ce_ctor(struct nvkm_engn *engn, struct nvkm_vctx *vctx)
+{
+	if (nvkm_memory_bar2(vctx->inst->memory) == ~0ULL)
+		return -EFAULT;
+
+	return 0;
+}
+
+const struct nvkm_engn_func
+gv100_engn_ce = {
+	.chsw = gk104_engn_chsw,
+	.cxid = gk104_engn_cxid,
+	.ctor = gv100_ectx_ce_ctor,
+	.bind = gv100_ectx_ce_bind,
+};
+
+static bool
+gv100_runq_intr_1_ctxnotvalid(struct nvkm_runq *runq, int chid)
+{
+	struct nvkm_fifo *fifo = runq->fifo;
+	struct nvkm_device *device = fifo->engine.subdev.device;
+	struct nvkm_chan *chan;
+	unsigned long flags;
+
+	RUNQ_ERROR(runq, "CTXNOTVALID chid:%d", chid);
+
+	chan = nvkm_chan_get_chid(&fifo->engine, chid, &flags);
+	if (WARN_ON_ONCE(!chan))
+		return false;
+
+	nvkm_chan_error(chan, true);
+	nvkm_chan_put(&chan, flags);
+
+	nvkm_mask(device, 0x0400ac + (runq->id * 0x2000), 0x00030000, 0x00030000);
+	nvkm_wr32(device, 0x040148 + (runq->id * 0x2000), 0x80000000);
+	return true;
+}
+
+const struct nvkm_runq_func
+gv100_runq = {
+	.init = gk208_runq_init,
+	.intr = gk104_runq_intr,
+	.intr_0_names = gk104_runq_intr_0_names,
+	.intr_1_ctxnotvalid = gv100_runq_intr_1_ctxnotvalid,
+	.idle = gk104_runq_idle,
+};
+
+void
+gv100_runl_preempt(struct nvkm_runl *runl)
+{
+	nvkm_wr32(runl->fifo->engine.subdev.device, 0x002638, BIT(runl->id));
+}
+
 void
-gv100_fifo_runlist_chan(struct gk104_fifo_chan *chan,
-			struct nvkm_memory *memory, u32 offset)
+gv100_runl_insert_chan(struct nvkm_chan *chan, struct nvkm_memory *memory, u64 offset)
 {
-	struct nvkm_memory *usermem = chan->fifo->user.mem;
-	const u64 user = nvkm_memory_addr(usermem) + (chan->base.chid * 0x200);
-	const u64 inst = chan->base.inst->addr;
+	const u64 user = nvkm_memory_addr(chan->userd.mem) + chan->userd.base;
+	const u64 inst = chan->inst->addr;
 
-	nvkm_wo32(memory, offset + 0x0, lower_32_bits(user));
+	nvkm_wo32(memory, offset + 0x0, lower_32_bits(user) | chan->runq << 1);
 	nvkm_wo32(memory, offset + 0x4, upper_32_bits(user));
-	nvkm_wo32(memory, offset + 0x8, lower_32_bits(inst) | chan->base.chid);
+	nvkm_wo32(memory, offset + 0x8, lower_32_bits(inst) | chan->id);
 	nvkm_wo32(memory, offset + 0xc, upper_32_bits(inst));
 }
 
 void
-gv100_fifo_runlist_cgrp(struct nvkm_fifo_cgrp *cgrp,
-			struct nvkm_memory *memory, u32 offset)
+gv100_runl_insert_cgrp(struct nvkm_cgrp *cgrp, struct nvkm_memory *memory, u64 offset)
 {
 	nvkm_wo32(memory, offset + 0x0, (128 << 24) | (3 << 16) | 0x00000001);
 	nvkm_wo32(memory, offset + 0x4, cgrp->chan_nr);
@@ -52,16 +200,24 @@ gv100_fifo_runlist_cgrp(struct nvkm_fifo_cgrp *cgrp,
 	nvkm_wo32(memory, offset + 0xc, 0x00000000);
 }
 
-static const struct gk104_fifo_runlist_func
-gv100_fifo_runlist = {
+static const struct nvkm_runl_func
+gv100_runl = {
+	.runqs = 2,
 	.size = 16,
-	.cgrp = gv100_fifo_runlist_cgrp,
-	.chan = gv100_fifo_runlist_chan,
-	.commit = gk104_fifo_runlist_commit,
+	.update = nv50_runl_update,
+	.insert_cgrp = gv100_runl_insert_cgrp,
+	.insert_chan = gv100_runl_insert_chan,
+	.commit = gk104_runl_commit,
+	.wait = nv50_runl_wait,
+	.pending = gk104_runl_pending,
+	.block = gk104_runl_block,
+	.allow = gk104_runl_allow,
+	.preempt = gv100_runl_preempt,
+	.preempt_pending = gf100_runl_preempt_pending,
 };
 
 const struct nvkm_enum
-gv100_fifo_fault_gpcclient[] = {
+gv100_fifo_mmu_fault_gpcclient[] = {
 	{ 0x00, "T1_0" },
 	{ 0x01, "T1_1" },
 	{ 0x02, "T1_2" },
@@ -163,7 +319,7 @@ gv100_fifo_fault_gpcclient[] = {
 };
 
 const struct nvkm_enum
-gv100_fifo_fault_hubclient[] = {
+gv100_fifo_mmu_fault_hubclient[] = {
 	{ 0x00, "VIP" },
 	{ 0x01, "CE0" },
 	{ 0x02, "CE1" },
@@ -225,7 +381,7 @@ gv100_fifo_fault_hubclient[] = {
 };
 
 const struct nvkm_enum
-gv100_fifo_fault_reason[] = {
+gv100_fifo_mmu_fault_reason[] = {
 	{ 0x00, "PDE" },
 	{ 0x01, "PDE_SIZE" },
 	{ 0x02, "PTE" },
@@ -246,7 +402,7 @@ gv100_fifo_fault_reason[] = {
 };
 
 static const struct nvkm_enum
-gv100_fifo_fault_engine[] = {
+gv100_fifo_mmu_fault_engine[] = {
 	{ 0x01, "DISPLAY" },
 	{ 0x03, "PTP" },
 	{ 0x04, "BAR1", NULL, NVKM_SUBDEV_BAR },
@@ -273,7 +429,7 @@ gv100_fifo_fault_engine[] = {
 };
 
 const struct nvkm_enum
-gv100_fifo_fault_access[] = {
+gv100_fifo_mmu_fault_access[] = {
 	{ 0x0, "VIRT_READ" },
 	{ 0x1, "VIRT_WRITE" },
 	{ 0x2, "VIRT_ATOMIC" },
@@ -286,23 +442,51 @@ gv100_fifo_fault_access[] = {
 	{}
 };
 
-static const struct gk104_fifo_func
+static const struct nvkm_fifo_func_mmu_fault
+gv100_fifo_mmu_fault = {
+	.recover = gf100_fifo_mmu_fault_recover,
+	.access = gv100_fifo_mmu_fault_access,
+	.engine = gv100_fifo_mmu_fault_engine,
+	.reason = gv100_fifo_mmu_fault_reason,
+	.hubclient = gv100_fifo_mmu_fault_hubclient,
+	.gpcclient = gv100_fifo_mmu_fault_gpcclient,
+};
+
+static void
+gv100_fifo_intr_ctxsw_timeout(struct nvkm_fifo *fifo, u32 engm)
+{
+	struct nvkm_runl *runl;
+	struct nvkm_engn *engn;
+
+	nvkm_runl_foreach(runl, fifo) {
+		nvkm_runl_foreach_engn_cond(engn, runl, engm & BIT(engn->id))
+			nvkm_runl_rc_engn(runl, engn);
+	}
+}
+
+static const struct nvkm_fifo_func
 gv100_fifo = {
-	.pbdma = &gm200_fifo_pbdma,
-	.fault.access = gv100_fifo_fault_access,
-	.fault.engine = gv100_fifo_fault_engine,
-	.fault.reason = gv100_fifo_fault_reason,
-	.fault.hubclient = gv100_fifo_fault_hubclient,
-	.fault.gpcclient = gv100_fifo_fault_gpcclient,
-	.runlist = &gv100_fifo_runlist,
-	.user = {{-1,-1,VOLTA_USERMODE_A      }, gv100_fifo_user_new   },
-	.chan = {{ 0, 0,VOLTA_CHANNEL_GPFIFO_A}, gv100_fifo_gpfifo_new },
-	.cgrp_force = true,
+	.chid_nr = gm200_fifo_chid_nr,
+	.chid_ctor = gk110_fifo_chid_ctor,
+	.runq_nr = gm200_fifo_runq_nr,
+	.runl_ctor = gk104_fifo_runl_ctor,
+	.init = gk104_fifo_init,
+	.init_pbdmas = gk104_fifo_init_pbdmas,
+	.intr = gk104_fifo_intr,
+	.intr_ctxsw_timeout = gv100_fifo_intr_ctxsw_timeout,
+	.mmu_fault = &gv100_fifo_mmu_fault,
+	.nonstall = &gf100_fifo_nonstall,
+	.runl = &gv100_runl,
+	.runq = &gv100_runq,
+	.engn = &gv100_engn,
+	.engn_ce = &gv100_engn_ce,
+	.cgrp = {{ 0, 0, KEPLER_CHANNEL_GROUP_A  }, &gk110_cgrp, .force = true },
+	.chan = {{ 0, 0,  VOLTA_CHANNEL_GPFIFO_A }, &gv100_chan },
 };
 
 int
 gv100_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
 	       struct nvkm_fifo **pfifo)
 {
-	return gk104_fifo_new_(&gv100_fifo, device, type, inst, 4096, pfifo);
+	return nvkm_fifo_new_(&gv100_fifo, device, type, inst, pfifo);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv04.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv04.c
index c6730c124769..674faf002b20 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv04.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv04.c
@@ -21,38 +21,201 @@
  *
  * Authors: Ben Skeggs
  */
-#include "nv04.h"
-#include "channv04.h"
+#include "priv.h"
+#include "cgrp.h"
+#include "chan.h"
+#include "chid.h"
+#include "runl.h"
+
 #include "regsnv04.h"
 
-#include <core/client.h>
 #include <core/ramht.h>
 #include <subdev/instmem.h>
+#include <subdev/mc.h>
 #include <subdev/timer.h>
 #include <engine/sw.h>
 
-static const struct nv04_fifo_ramfc
-nv04_fifo_ramfc[] = {
-	{ 32,  0, 0x00,  0, NV04_PFIFO_CACHE1_DMA_PUT },
-	{ 32,  0, 0x04,  0, NV04_PFIFO_CACHE1_DMA_GET },
-	{ 16,  0, 0x08,  0, NV04_PFIFO_CACHE1_DMA_INSTANCE },
-	{ 16, 16, 0x08,  0, NV04_PFIFO_CACHE1_DMA_DCOUNT },
-	{ 32,  0, 0x0c,  0, NV04_PFIFO_CACHE1_DMA_STATE },
-	{ 32,  0, 0x10,  0, NV04_PFIFO_CACHE1_DMA_FETCH },
-	{ 32,  0, 0x14,  0, NV04_PFIFO_CACHE1_ENGINE },
-	{ 32,  0, 0x18,  0, NV04_PFIFO_CACHE1_PULL1 },
-	{}
+#include <nvif/class.h>
+
+void
+nv04_chan_stop(struct nvkm_chan *chan)
+{
+	struct nvkm_fifo *fifo = chan->cgrp->runl->fifo;
+	struct nvkm_device *device = fifo->engine.subdev.device;
+	struct nvkm_memory *fctx = device->imem->ramfc;
+	const struct nvkm_ramfc_layout *c;
+	unsigned long flags;
+	u32 data = chan->ramfc_offset;
+	u32 chid;
+
+	/* prevent fifo context switches */
+	spin_lock_irqsave(&fifo->lock, flags);
+	nvkm_wr32(device, NV03_PFIFO_CACHES, 0);
+
+	/* if this channel is active, replace it with a null context */
+	chid = nvkm_rd32(device, NV03_PFIFO_CACHE1_PUSH1) & fifo->chid->mask;
+	if (chid == chan->id) {
+		nvkm_mask(device, NV04_PFIFO_CACHE1_DMA_PUSH, 0x00000001, 0);
+		nvkm_wr32(device, NV03_PFIFO_CACHE1_PUSH0, 0);
+		nvkm_mask(device, NV04_PFIFO_CACHE1_PULL0, 0x00000001, 0);
+
+		c = chan->func->ramfc->layout;
+		nvkm_kmap(fctx);
+		do {
+			u32 rm = ((1ULL << c->bits) - 1) << c->regs;
+			u32 cm = ((1ULL << c->bits) - 1) << c->ctxs;
+			u32 rv = (nvkm_rd32(device, c->regp) &  rm) >> c->regs;
+			u32 cv = (nvkm_ro32(fctx, c->ctxp + data) & ~cm);
+			nvkm_wo32(fctx, c->ctxp + data, cv | (rv << c->ctxs));
+		} while ((++c)->bits);
+		nvkm_done(fctx);
+
+		c = chan->func->ramfc->layout;
+		do {
+			nvkm_wr32(device, c->regp, 0x00000000);
+		} while ((++c)->bits);
+
+		nvkm_wr32(device, NV03_PFIFO_CACHE1_GET, 0);
+		nvkm_wr32(device, NV03_PFIFO_CACHE1_PUT, 0);
+		nvkm_wr32(device, NV03_PFIFO_CACHE1_PUSH1, fifo->chid->mask);
+		nvkm_wr32(device, NV03_PFIFO_CACHE1_PUSH0, 1);
+		nvkm_wr32(device, NV04_PFIFO_CACHE1_PULL0, 1);
+	}
+
+	/* restore normal operation, after disabling dma mode */
+	nvkm_mask(device, NV04_PFIFO_MODE, BIT(chan->id), 0);
+	nvkm_wr32(device, NV03_PFIFO_CACHES, 1);
+	spin_unlock_irqrestore(&fifo->lock, flags);
+}
+
+void
+nv04_chan_start(struct nvkm_chan *chan)
+{
+	struct nvkm_fifo *fifo = chan->cgrp->runl->fifo;
+	unsigned long flags;
+
+	spin_lock_irqsave(&fifo->lock, flags);
+	nvkm_mask(fifo->engine.subdev.device, NV04_PFIFO_MODE, BIT(chan->id), BIT(chan->id));
+	spin_unlock_irqrestore(&fifo->lock, flags);
+}
+
+void
+nv04_chan_ramfc_clear(struct nvkm_chan *chan)
+{
+	struct nvkm_memory *ramfc = chan->cgrp->runl->fifo->engine.subdev.device->imem->ramfc;
+	const struct nvkm_ramfc_layout *c = chan->func->ramfc->layout;
+
+	nvkm_kmap(ramfc);
+	do {
+		nvkm_wo32(ramfc, chan->ramfc_offset + c->ctxp, 0x00000000);
+	} while ((++c)->bits);
+	nvkm_done(ramfc);
+}
+
+static int
+nv04_chan_ramfc_write(struct nvkm_chan *chan, u64 offset, u64 length, u32 devm, bool priv)
+{
+	struct nvkm_memory *ramfc = chan->cgrp->runl->fifo->engine.subdev.device->imem->ramfc;
+	const u32 base = chan->id * 32;
+
+	chan->ramfc_offset = base;
+
+	nvkm_kmap(ramfc);
+	nvkm_wo32(ramfc, base + 0x00, offset);
+	nvkm_wo32(ramfc, base + 0x04, offset);
+	nvkm_wo32(ramfc, base + 0x08, chan->push->addr >> 4);
+	nvkm_wo32(ramfc, base + 0x10, NV_PFIFO_CACHE1_DMA_FETCH_TRIG_128_BYTES |
+				      NV_PFIFO_CACHE1_DMA_FETCH_SIZE_128_BYTES |
+#ifdef __BIG_ENDIAN
+				      NV_PFIFO_CACHE1_BIG_ENDIAN |
+#endif
+				      NV_PFIFO_CACHE1_DMA_FETCH_MAX_REQS_8);
+	nvkm_done(ramfc);
+	return 0;
+}
+
+static const struct nvkm_chan_func_ramfc
+nv04_chan_ramfc = {
+	.layout = (const struct nvkm_ramfc_layout[]) {
+		{ 32,  0, 0x00,  0, NV04_PFIFO_CACHE1_DMA_PUT },
+		{ 32,  0, 0x04,  0, NV04_PFIFO_CACHE1_DMA_GET },
+		{ 16,  0, 0x08,  0, NV04_PFIFO_CACHE1_DMA_INSTANCE },
+		{ 16, 16, 0x08,  0, NV04_PFIFO_CACHE1_DMA_DCOUNT },
+		{ 32,  0, 0x0c,  0, NV04_PFIFO_CACHE1_DMA_STATE },
+		{ 32,  0, 0x10,  0, NV04_PFIFO_CACHE1_DMA_FETCH },
+		{ 32,  0, 0x14,  0, NV04_PFIFO_CACHE1_ENGINE },
+		{ 32,  0, 0x18,  0, NV04_PFIFO_CACHE1_PULL1 },
+		{}
+	},
+	.write = nv04_chan_ramfc_write,
+	.clear = nv04_chan_ramfc_clear,
+	.ctxdma = true,
+};
+
+const struct nvkm_chan_func_userd
+nv04_chan_userd = {
+	.bar = 0,
+	.base = 0x800000,
+	.size = 0x010000,
+};
+
+const struct nvkm_chan_func_inst
+nv04_chan_inst = {
+	.size = 0x1000,
+};
+
+static const struct nvkm_chan_func
+nv04_chan = {
+	.inst = &nv04_chan_inst,
+	.userd = &nv04_chan_userd,
+	.ramfc = &nv04_chan_ramfc,
+	.start = nv04_chan_start,
+	.stop = nv04_chan_stop,
+};
+
+const struct nvkm_cgrp_func
+nv04_cgrp = {
+};
+
+void
+nv04_eobj_ramht_del(struct nvkm_chan *chan, int hash)
+{
+	struct nvkm_fifo *fifo = chan->cgrp->runl->fifo;
+	struct nvkm_instmem *imem = fifo->engine.subdev.device->imem;
+
+	mutex_lock(&fifo->mutex);
+	nvkm_ramht_remove(imem->ramht, hash);
+	mutex_unlock(&fifo->mutex);
+}
+
+static int
+nv04_eobj_ramht_add(struct nvkm_engn *engn, struct nvkm_object *eobj, struct nvkm_chan *chan)
+{
+	struct nvkm_fifo *fifo = chan->cgrp->runl->fifo;
+	struct nvkm_instmem *imem = fifo->engine.subdev.device->imem;
+	u32 context = 0x80000000 | chan->id << 24 | engn->id << 16;
+	int hash;
+
+	mutex_lock(&fifo->mutex);
+	hash = nvkm_ramht_insert(imem->ramht, eobj, chan->id, 4, eobj->handle, context);
+	mutex_unlock(&fifo->mutex);
+	return hash;
+}
+
+const struct nvkm_engn_func
+nv04_engn = {
+	.ramht_add = nv04_eobj_ramht_add,
+	.ramht_del = nv04_eobj_ramht_del,
 };
 
 void
-nv04_fifo_pause(struct nvkm_fifo *base, unsigned long *pflags)
-__acquires(fifo->base.lock)
+nv04_fifo_pause(struct nvkm_fifo *fifo, unsigned long *pflags)
+__acquires(fifo->lock)
 {
-	struct nv04_fifo *fifo = nv04_fifo(base);
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
+	struct nvkm_device *device = fifo->engine.subdev.device;
 	unsigned long flags;
 
-	spin_lock_irqsave(&fifo->base.lock, flags);
+	spin_lock_irqsave(&fifo->lock, flags);
 	*pflags = flags;
 
 	nvkm_wr32(device, NV03_PFIFO_CACHES, 0x00000000);
@@ -81,50 +244,21 @@ __acquires(fifo->base.lock)
 }
 
 void
-nv04_fifo_start(struct nvkm_fifo *base, unsigned long *pflags)
-__releases(fifo->base.lock)
+nv04_fifo_start(struct nvkm_fifo *fifo, unsigned long *pflags)
+__releases(fifo->lock)
 {
-	struct nv04_fifo *fifo = nv04_fifo(base);
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
+	struct nvkm_device *device = fifo->engine.subdev.device;
 	unsigned long flags = *pflags;
 
 	nvkm_mask(device, NV04_PFIFO_CACHE1_PULL0, 0x00000001, 0x00000001);
 	nvkm_wr32(device, NV03_PFIFO_CACHES, 0x00000001);
 
-	spin_unlock_irqrestore(&fifo->base.lock, flags);
+	spin_unlock_irqrestore(&fifo->lock, flags);
 }
 
-struct nvkm_engine *
-nv04_fifo_id_engine(struct nvkm_fifo *fifo, int engi)
-{
-	enum nvkm_subdev_type type;
-
-	switch (engi) {
-	case NV04_FIFO_ENGN_SW  : type = NVKM_ENGINE_SW; break;
-	case NV04_FIFO_ENGN_GR  : type = NVKM_ENGINE_GR; break;
-	case NV04_FIFO_ENGN_MPEG: type = NVKM_ENGINE_MPEG; break;
-	case NV04_FIFO_ENGN_DMA : type = NVKM_ENGINE_DMAOBJ; break;
-	default:
-		WARN_ON(1);
-		return NULL;
-	}
-
-	return nvkm_device_engine(fifo->engine.subdev.device, type, 0);
-}
-
-int
-nv04_fifo_engine_id(struct nvkm_fifo *base, struct nvkm_engine *engine)
-{
-	switch (engine->subdev.type) {
-	case NVKM_ENGINE_SW    : return NV04_FIFO_ENGN_SW;
-	case NVKM_ENGINE_GR    : return NV04_FIFO_ENGN_GR;
-	case NVKM_ENGINE_MPEG  : return NV04_FIFO_ENGN_MPEG;
-	case NVKM_ENGINE_DMAOBJ: return NV04_FIFO_ENGN_DMA;
-	default:
-		WARN_ON(1);
-		return 0;
-	}
-}
+const struct nvkm_runl_func
+nv04_runl = {
+};
 
 static const char *
 nv_dma_state_err(u32 state)
@@ -166,11 +300,11 @@ nv04_fifo_swmthd(struct nvkm_device *device, u32 chid, u32 addr, u32 data)
 }
 
 static void
-nv04_fifo_cache_error(struct nv04_fifo *fifo, u32 chid, u32 get)
+nv04_fifo_intr_cache_error(struct nvkm_fifo *fifo, u32 chid, u32 get)
 {
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+	struct nvkm_subdev *subdev = &fifo->engine.subdev;
 	struct nvkm_device *device = subdev->device;
-	struct nvkm_fifo_chan *chan;
+	struct nvkm_chan *chan;
 	unsigned long flags;
 	u32 pull0 = nvkm_rd32(device, 0x003250);
 	u32 mthd, data;
@@ -193,12 +327,12 @@ nv04_fifo_cache_error(struct nv04_fifo *fifo, u32 chid, u32 get)
 
 	if (!(pull0 & 0x00000100) ||
 	    !nv04_fifo_swmthd(device, chid, mthd, data)) {
-		chan = nvkm_fifo_chan_chid(&fifo->base, chid, &flags);
+		chan = nvkm_chan_get_chid(&fifo->engine, chid, &flags);
 		nvkm_error(subdev, "CACHE_ERROR - "
 			   "ch %d [%s] subc %d mthd %04x data %08x\n",
-			   chid, chan ? chan->object.client->name : "unknown",
+			   chid, chan ? chan->name : "unknown",
 			   (mthd >> 13) & 7, mthd & 0x1ffc, data);
-		nvkm_fifo_chan_put(&fifo->base, flags, &chan);
+		nvkm_chan_put(&chan, flags);
 	}
 
 	nvkm_wr32(device, NV04_PFIFO_CACHE1_DMA_PUSH, 0);
@@ -217,20 +351,20 @@ nv04_fifo_cache_error(struct nv04_fifo *fifo, u32 chid, u32 get)
 }
 
 static void
-nv04_fifo_dma_pusher(struct nv04_fifo *fifo, u32 chid)
+nv04_fifo_intr_dma_pusher(struct nvkm_fifo *fifo, u32 chid)
 {
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+	struct nvkm_subdev *subdev = &fifo->engine.subdev;
 	struct nvkm_device *device = subdev->device;
 	u32 dma_get = nvkm_rd32(device, 0x003244);
 	u32 dma_put = nvkm_rd32(device, 0x003240);
 	u32 push = nvkm_rd32(device, 0x003220);
 	u32 state = nvkm_rd32(device, 0x003228);
-	struct nvkm_fifo_chan *chan;
+	struct nvkm_chan *chan;
 	unsigned long flags;
 	const char *name;
 
-	chan = nvkm_fifo_chan_chid(&fifo->base, chid, &flags);
-	name = chan ? chan->object.client->name : "unknown";
+	chan = nvkm_chan_get_chid(&fifo->engine, chid, &flags);
+	name = chan ? chan->name : "unknown";
 	if (device->card_type == NV_50) {
 		u32 ho_get = nvkm_rd32(device, 0x003328);
 		u32 ho_put = nvkm_rd32(device, 0x003320);
@@ -261,18 +395,18 @@ nv04_fifo_dma_pusher(struct nv04_fifo *fifo, u32 chid)
 		if (dma_get != dma_put)
 			nvkm_wr32(device, 0x003244, dma_put);
 	}
-	nvkm_fifo_chan_put(&fifo->base, flags, &chan);
+	nvkm_chan_put(&chan, flags);
 
 	nvkm_wr32(device, 0x003228, 0x00000000);
 	nvkm_wr32(device, 0x003220, 0x00000001);
 	nvkm_wr32(device, 0x002100, NV_PFIFO_INTR_DMA_PUSHER);
 }
 
-void
-nv04_fifo_intr(struct nvkm_fifo *base)
+irqreturn_t
+nv04_fifo_intr(struct nvkm_inth *inth)
 {
-	struct nv04_fifo *fifo = nv04_fifo(base);
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+	struct nvkm_fifo *fifo = container_of(inth, typeof(*fifo), engine.subdev.inth);
+	struct nvkm_subdev *subdev = &fifo->engine.subdev;
 	struct nvkm_device *device = subdev->device;
 	u32 mask = nvkm_rd32(device, NV03_PFIFO_INTR_EN_0);
 	u32 stat = nvkm_rd32(device, NV03_PFIFO_INTR_0) & mask;
@@ -281,16 +415,16 @@ nv04_fifo_intr(struct nvkm_fifo *base)
 	reassign = nvkm_rd32(device, NV03_PFIFO_CACHES) & 1;
 	nvkm_wr32(device, NV03_PFIFO_CACHES, 0);
 
-	chid = nvkm_rd32(device, NV03_PFIFO_CACHE1_PUSH1) & (fifo->base.nr - 1);
+	chid = nvkm_rd32(device, NV03_PFIFO_CACHE1_PUSH1) & fifo->chid->mask;
 	get  = nvkm_rd32(device, NV03_PFIFO_CACHE1_GET);
 
 	if (stat & NV_PFIFO_INTR_CACHE_ERROR) {
-		nv04_fifo_cache_error(fifo, chid, get);
+		nv04_fifo_intr_cache_error(fifo, chid, get);
 		stat &= ~NV_PFIFO_INTR_CACHE_ERROR;
 	}
 
 	if (stat & NV_PFIFO_INTR_DMA_PUSHER) {
-		nv04_fifo_dma_pusher(fifo, chid);
+		nv04_fifo_intr_dma_pusher(fifo, chid);
 		stat &= ~NV_PFIFO_INTR_DMA_PUSHER;
 	}
 
@@ -313,7 +447,7 @@ nv04_fifo_intr(struct nvkm_fifo *base)
 
 		if (stat & 0x40000000) {
 			nvkm_wr32(device, 0x002100, 0x40000000);
-			nvkm_fifo_uevent(&fifo->base);
+			nvkm_event_ntfy(&fifo->nonstall.event, 0, NVKM_FIFO_NONSTALL_EVENT);
 			stat &= ~0x40000000;
 		}
 	}
@@ -325,13 +459,13 @@ nv04_fifo_intr(struct nvkm_fifo *base)
 	}
 
 	nvkm_wr32(device, NV03_PFIFO_CACHES, reassign);
+	return IRQ_HANDLED;
 }
 
 void
-nv04_fifo_init(struct nvkm_fifo *base)
+nv04_fifo_init(struct nvkm_fifo *fifo)
 {
-	struct nv04_fifo *fifo = nv04_fifo(base);
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
+	struct nvkm_device *device = fifo->engine.subdev.device;
 	struct nvkm_instmem *imem = device->imem;
 	struct nvkm_ramht *ramht = imem->ramht;
 	struct nvkm_memory *ramro = imem->ramro;
@@ -346,7 +480,7 @@ nv04_fifo_init(struct nvkm_fifo *base)
 	nvkm_wr32(device, NV03_PFIFO_RAMRO, nvkm_memory_addr(ramro) >> 8);
 	nvkm_wr32(device, NV03_PFIFO_RAMFC, nvkm_memory_addr(ramfc) >> 8);
 
-	nvkm_wr32(device, NV03_PFIFO_CACHE1_PUSH1, fifo->base.nr - 1);
+	nvkm_wr32(device, NV03_PFIFO_CACHE1_PUSH1, fifo->chid->mask);
 
 	nvkm_wr32(device, NV03_PFIFO_INTR_0, 0xffffffff);
 	nvkm_wr32(device, NV03_PFIFO_INTR_EN_0, 0xffffffff);
@@ -357,43 +491,53 @@ nv04_fifo_init(struct nvkm_fifo *base)
 }
 
 int
-nv04_fifo_new_(const struct nvkm_fifo_func *func, struct nvkm_device *device,
-	       enum nvkm_subdev_type type, int inst, int nr, const struct nv04_fifo_ramfc *ramfc,
-	       struct nvkm_fifo **pfifo)
+nv04_fifo_runl_ctor(struct nvkm_fifo *fifo)
 {
-	struct nv04_fifo *fifo;
-	int ret;
-
-	if (!(fifo = kzalloc(sizeof(*fifo), GFP_KERNEL)))
-		return -ENOMEM;
-	fifo->ramfc = ramfc;
-	*pfifo = &fifo->base;
+	struct nvkm_runl *runl;
 
-	ret = nvkm_fifo_ctor(func, device, type, inst, nr, &fifo->base);
-	if (ret)
-		return ret;
+	runl = nvkm_runl_new(fifo, 0, 0, 0);
+	if (IS_ERR(runl))
+		return PTR_ERR(runl);
 
-	set_bit(nr - 1, fifo->base.mask); /* inactive channel */
+	nvkm_runl_add(runl, 0, fifo->func->engn_sw, NVKM_ENGINE_SW, 0);
+	nvkm_runl_add(runl, 0, fifo->func->engn_sw, NVKM_ENGINE_DMAOBJ, 0);
+	nvkm_runl_add(runl, 1, fifo->func->engn   , NVKM_ENGINE_GR, 0);
+	nvkm_runl_add(runl, 2, fifo->func->engn   , NVKM_ENGINE_MPEG, 0); /* NV31- */
 	return 0;
 }
 
+int
+nv04_fifo_chid_ctor(struct nvkm_fifo *fifo, int nr)
+{
+	/* The last CHID is reserved by HW as a "channel invalid" marker. */
+	return nvkm_chid_new(&nvkm_chan_event, &fifo->engine.subdev, nr, 0, nr - 1, &fifo->chid);
+}
+
+static int
+nv04_fifo_chid_nr(struct nvkm_fifo *fifo)
+{
+	return 16;
+}
+
 static const struct nvkm_fifo_func
 nv04_fifo = {
+	.chid_nr = nv04_fifo_chid_nr,
+	.chid_ctor = nv04_fifo_chid_ctor,
+	.runl_ctor = nv04_fifo_runl_ctor,
 	.init = nv04_fifo_init,
 	.intr = nv04_fifo_intr,
-	.engine_id = nv04_fifo_engine_id,
-	.id_engine = nv04_fifo_id_engine,
 	.pause = nv04_fifo_pause,
 	.start = nv04_fifo_start,
-	.chan = {
-		&nv04_fifo_dma_oclass,
-		NULL
-	},
+	.runl = &nv04_runl,
+	.engn = &nv04_engn,
+	.engn_sw = &nv04_engn,
+	.cgrp = {{                        }, &nv04_cgrp },
+	.chan = {{ 0, 0, NV03_CHANNEL_DMA }, &nv04_chan },
 };
 
 int
 nv04_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
 	      struct nvkm_fifo **pfifo)
 {
-	return nv04_fifo_new_(&nv04_fifo, device, type, inst, 16, nv04_fifo_ramfc, pfifo);
+	return nvkm_fifo_new_(&nv04_fifo, device, type, inst, pfifo);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv04.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv04.h
deleted file mode 100644
index 3f23bcde4a54..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv04.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-#ifndef __NV04_FIFO_H__
-#define __NV04_FIFO_H__
-#define nv04_fifo(p) container_of((p), struct nv04_fifo, base)
-#include "priv.h"
-
-struct nv04_fifo_ramfc {
-	unsigned bits:6;
-	unsigned ctxs:5;
-	unsigned ctxp:8;
-	unsigned regs:5;
-	unsigned regp;
-};
-
-struct nv04_fifo {
-	struct nvkm_fifo base;
-	const struct nv04_fifo_ramfc *ramfc;
-};
-
-int nv04_fifo_new_(const struct nvkm_fifo_func *, struct nvkm_device *, enum nvkm_subdev_type, int,
-		   int nr, const struct nv04_fifo_ramfc *, struct nvkm_fifo **);
-void nv04_fifo_init(struct nvkm_fifo *);
-#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv10.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv10.c
index f8887f0f2f82..a4bcf6b0a7e2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv10.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv10.c
@@ -21,41 +21,93 @@
  *
  * Authors: Ben Skeggs
  */
-#include "nv04.h"
-#include "channv04.h"
+#include "priv.h"
+#include "cgrp.h"
+#include "chan.h"
+#include "runl.h"
+
+#include <core/gpuobj.h>
+#include <subdev/instmem.h>
+
 #include "regsnv04.h"
 
-static const struct nv04_fifo_ramfc
-nv10_fifo_ramfc[] = {
-	{ 32,  0, 0x00,  0, NV04_PFIFO_CACHE1_DMA_PUT },
-	{ 32,  0, 0x04,  0, NV04_PFIFO_CACHE1_DMA_GET },
-	{ 32,  0, 0x08,  0, NV10_PFIFO_CACHE1_REF_CNT },
-	{ 16,  0, 0x0c,  0, NV04_PFIFO_CACHE1_DMA_INSTANCE },
-	{ 16, 16, 0x0c,  0, NV04_PFIFO_CACHE1_DMA_DCOUNT },
-	{ 32,  0, 0x10,  0, NV04_PFIFO_CACHE1_DMA_STATE },
-	{ 32,  0, 0x14,  0, NV04_PFIFO_CACHE1_DMA_FETCH },
-	{ 32,  0, 0x18,  0, NV04_PFIFO_CACHE1_ENGINE },
-	{ 32,  0, 0x1c,  0, NV04_PFIFO_CACHE1_PULL1 },
-	{}
+#include <nvif/class.h>
+
+static int
+nv10_chan_ramfc_write(struct nvkm_chan *chan, u64 offset, u64 length, u32 devm, bool priv)
+{
+	struct nvkm_memory *ramfc = chan->cgrp->runl->fifo->engine.subdev.device->imem->ramfc;
+	const u32 base = chan->id * 32;
+
+	chan->ramfc_offset = base;
+
+	nvkm_kmap(ramfc);
+	nvkm_wo32(ramfc, base + 0x00, offset);
+	nvkm_wo32(ramfc, base + 0x04, offset);
+	nvkm_wo32(ramfc, base + 0x0c, chan->push->addr >> 4);
+	nvkm_wo32(ramfc, base + 0x14, NV_PFIFO_CACHE1_DMA_FETCH_TRIG_128_BYTES |
+				      NV_PFIFO_CACHE1_DMA_FETCH_SIZE_128_BYTES |
+#ifdef __BIG_ENDIAN
+				      NV_PFIFO_CACHE1_BIG_ENDIAN |
+#endif
+				      NV_PFIFO_CACHE1_DMA_FETCH_MAX_REQS_8);
+	nvkm_done(ramfc);
+	return 0;
+}
+
+static const struct nvkm_chan_func_ramfc
+nv10_chan_ramfc = {
+	.layout = (const struct nvkm_ramfc_layout[]) {
+		{ 32,  0, 0x00,  0, NV04_PFIFO_CACHE1_DMA_PUT },
+		{ 32,  0, 0x04,  0, NV04_PFIFO_CACHE1_DMA_GET },
+		{ 32,  0, 0x08,  0, NV10_PFIFO_CACHE1_REF_CNT },
+		{ 16,  0, 0x0c,  0, NV04_PFIFO_CACHE1_DMA_INSTANCE },
+		{ 16, 16, 0x0c,  0, NV04_PFIFO_CACHE1_DMA_DCOUNT },
+		{ 32,  0, 0x10,  0, NV04_PFIFO_CACHE1_DMA_STATE },
+		{ 32,  0, 0x14,  0, NV04_PFIFO_CACHE1_DMA_FETCH },
+		{ 32,  0, 0x18,  0, NV04_PFIFO_CACHE1_ENGINE },
+		{ 32,  0, 0x1c,  0, NV04_PFIFO_CACHE1_PULL1 },
+		{}
+	},
+	.write = nv10_chan_ramfc_write,
+	.clear = nv04_chan_ramfc_clear,
+	.ctxdma = true,
+};
+
+static const struct nvkm_chan_func
+nv10_chan = {
+	.inst = &nv04_chan_inst,
+	.userd = &nv04_chan_userd,
+	.ramfc = &nv10_chan_ramfc,
+	.start = nv04_chan_start,
+	.stop = nv04_chan_stop,
 };
 
+int
+nv10_fifo_chid_nr(struct nvkm_fifo *fifo)
+{
+	return 32;
+}
+
 static const struct nvkm_fifo_func
 nv10_fifo = {
+	.chid_nr = nv10_fifo_chid_nr,
+	.chid_ctor = nv04_fifo_chid_ctor,
+	.runl_ctor = nv04_fifo_runl_ctor,
 	.init = nv04_fifo_init,
 	.intr = nv04_fifo_intr,
-	.engine_id = nv04_fifo_engine_id,
-	.id_engine = nv04_fifo_id_engine,
 	.pause = nv04_fifo_pause,
 	.start = nv04_fifo_start,
-	.chan = {
-		&nv10_fifo_dma_oclass,
-		NULL
-	},
+	.runl = &nv04_runl,
+	.engn = &nv04_engn,
+	.engn_sw = &nv04_engn,
+	.cgrp = {{                        }, &nv04_cgrp },
+	.chan = {{ 0, 0, NV10_CHANNEL_DMA }, &nv10_chan },
 };
 
 int
 nv10_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
 	      struct nvkm_fifo **pfifo)
 {
-	return nv04_fifo_new_(&nv10_fifo, device, type, inst, 32, nv10_fifo_ramfc, pfifo);
+	return nvkm_fifo_new_(&nv10_fifo, device, type, inst, pfifo);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv17.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv17.c
index 3f94c7b5b054..c70f44fd4f3b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv17.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv17.c
@@ -21,37 +21,78 @@
  *
  * Authors: Ben Skeggs
  */
-#include "nv04.h"
-#include "channv04.h"
+#include "priv.h"
+#include "cgrp.h"
+#include "chan.h"
+#include "chid.h"
+#include "runl.h"
+
 #include "regsnv04.h"
 
 #include <core/ramht.h>
 #include <subdev/instmem.h>
 
-static const struct nv04_fifo_ramfc
-nv17_fifo_ramfc[] = {
-	{ 32,  0, 0x00,  0, NV04_PFIFO_CACHE1_DMA_PUT },
-	{ 32,  0, 0x04,  0, NV04_PFIFO_CACHE1_DMA_GET },
-	{ 32,  0, 0x08,  0, NV10_PFIFO_CACHE1_REF_CNT },
-	{ 16,  0, 0x0c,  0, NV04_PFIFO_CACHE1_DMA_INSTANCE },
-	{ 16, 16, 0x0c,  0, NV04_PFIFO_CACHE1_DMA_DCOUNT },
-	{ 32,  0, 0x10,  0, NV04_PFIFO_CACHE1_DMA_STATE },
-	{ 32,  0, 0x14,  0, NV04_PFIFO_CACHE1_DMA_FETCH },
-	{ 32,  0, 0x18,  0, NV04_PFIFO_CACHE1_ENGINE },
-	{ 32,  0, 0x1c,  0, NV04_PFIFO_CACHE1_PULL1 },
-	{ 32,  0, 0x20,  0, NV10_PFIFO_CACHE1_ACQUIRE_VALUE },
-	{ 32,  0, 0x24,  0, NV10_PFIFO_CACHE1_ACQUIRE_TIMESTAMP },
-	{ 32,  0, 0x28,  0, NV10_PFIFO_CACHE1_ACQUIRE_TIMEOUT },
-	{ 32,  0, 0x2c,  0, NV10_PFIFO_CACHE1_SEMAPHORE },
-	{ 32,  0, 0x30,  0, NV10_PFIFO_CACHE1_DMA_SUBROUTINE },
-	{}
+#include <nvif/class.h>
+
+static int
+nv17_chan_ramfc_write(struct nvkm_chan *chan, u64 offset, u64 length, u32 devm, bool priv)
+{
+	struct nvkm_memory *ramfc = chan->cgrp->runl->fifo->engine.subdev.device->imem->ramfc;
+	const u32 base = chan->id * 64;
+
+	chan->ramfc_offset = base;
+
+	nvkm_kmap(ramfc);
+	nvkm_wo32(ramfc, base + 0x00, offset);
+	nvkm_wo32(ramfc, base + 0x04, offset);
+	nvkm_wo32(ramfc, base + 0x0c, chan->push->addr >> 4);
+	nvkm_wo32(ramfc, base + 0x14, NV_PFIFO_CACHE1_DMA_FETCH_TRIG_128_BYTES |
+				      NV_PFIFO_CACHE1_DMA_FETCH_SIZE_128_BYTES |
+#ifdef __BIG_ENDIAN
+				      NV_PFIFO_CACHE1_BIG_ENDIAN |
+#endif
+				      NV_PFIFO_CACHE1_DMA_FETCH_MAX_REQS_8);
+	nvkm_done(ramfc);
+	return 0;
+}
+
+static const struct nvkm_chan_func_ramfc
+nv17_chan_ramfc = {
+	.layout = (const struct nvkm_ramfc_layout[]) {
+		{ 32,  0, 0x00,  0, NV04_PFIFO_CACHE1_DMA_PUT },
+		{ 32,  0, 0x04,  0, NV04_PFIFO_CACHE1_DMA_GET },
+		{ 32,  0, 0x08,  0, NV10_PFIFO_CACHE1_REF_CNT },
+		{ 16,  0, 0x0c,  0, NV04_PFIFO_CACHE1_DMA_INSTANCE },
+		{ 16, 16, 0x0c,  0, NV04_PFIFO_CACHE1_DMA_DCOUNT },
+		{ 32,  0, 0x10,  0, NV04_PFIFO_CACHE1_DMA_STATE },
+		{ 32,  0, 0x14,  0, NV04_PFIFO_CACHE1_DMA_FETCH },
+		{ 32,  0, 0x18,  0, NV04_PFIFO_CACHE1_ENGINE },
+		{ 32,  0, 0x1c,  0, NV04_PFIFO_CACHE1_PULL1 },
+		{ 32,  0, 0x20,  0, NV10_PFIFO_CACHE1_ACQUIRE_VALUE },
+		{ 32,  0, 0x24,  0, NV10_PFIFO_CACHE1_ACQUIRE_TIMESTAMP },
+		{ 32,  0, 0x28,  0, NV10_PFIFO_CACHE1_ACQUIRE_TIMEOUT },
+		{ 32,  0, 0x2c,  0, NV10_PFIFO_CACHE1_SEMAPHORE },
+		{ 32,  0, 0x30,  0, NV10_PFIFO_CACHE1_DMA_SUBROUTINE },
+		{}
+	},
+	.write = nv17_chan_ramfc_write,
+	.clear = nv04_chan_ramfc_clear,
+	.ctxdma = true,
+};
+
+static const struct nvkm_chan_func
+nv17_chan = {
+	.inst = &nv04_chan_inst,
+	.userd = &nv04_chan_userd,
+	.ramfc = &nv17_chan_ramfc,
+	.start = nv04_chan_start,
+	.stop = nv04_chan_stop,
 };
 
 static void
-nv17_fifo_init(struct nvkm_fifo *base)
+nv17_fifo_init(struct nvkm_fifo *fifo)
 {
-	struct nv04_fifo *fifo = nv04_fifo(base);
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
+	struct nvkm_device *device = fifo->engine.subdev.device;
 	struct nvkm_instmem *imem = device->imem;
 	struct nvkm_ramht *ramht = imem->ramht;
 	struct nvkm_memory *ramro = imem->ramro;
@@ -67,7 +108,7 @@ nv17_fifo_init(struct nvkm_fifo *base)
 	nvkm_wr32(device, NV03_PFIFO_RAMFC, nvkm_memory_addr(ramfc) >> 8 |
 					    0x00010000);
 
-	nvkm_wr32(device, NV03_PFIFO_CACHE1_PUSH1, fifo->base.nr - 1);
+	nvkm_wr32(device, NV03_PFIFO_CACHE1_PUSH1, fifo->chid->mask);
 
 	nvkm_wr32(device, NV03_PFIFO_INTR_0, 0xffffffff);
 	nvkm_wr32(device, NV03_PFIFO_INTR_EN_0, 0xffffffff);
@@ -79,21 +120,23 @@ nv17_fifo_init(struct nvkm_fifo *base)
 
 static const struct nvkm_fifo_func
 nv17_fifo = {
+	.chid_nr = nv10_fifo_chid_nr,
+	.chid_ctor = nv04_fifo_chid_ctor,
+	.runl_ctor = nv04_fifo_runl_ctor,
 	.init = nv17_fifo_init,
 	.intr = nv04_fifo_intr,
-	.engine_id = nv04_fifo_engine_id,
-	.id_engine = nv04_fifo_id_engine,
 	.pause = nv04_fifo_pause,
 	.start = nv04_fifo_start,
-	.chan = {
-		&nv17_fifo_dma_oclass,
-		NULL
-	},
+	.runl = &nv04_runl,
+	.engn = &nv04_engn,
+	.engn_sw = &nv04_engn,
+	.cgrp = {{                        }, &nv04_cgrp },
+	.chan = {{ 0, 0, NV17_CHANNEL_DMA }, &nv17_chan },
 };
 
 int
 nv17_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
 	      struct nvkm_fifo **pfifo)
 {
-	return nv04_fifo_new_(&nv17_fifo, device, type, inst, 32, nv17_fifo_ramfc, pfifo);
+	return nvkm_fifo_new_(&nv17_fifo, device, type, inst, pfifo);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv40.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv40.c
index f9ea46809bc0..e50a94b6d7f8 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv40.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv40.c
@@ -21,46 +21,166 @@
  *
  * Authors: Ben Skeggs
  */
-#include "nv04.h"
-#include "channv04.h"
+#include "priv.h"
+#include "cgrp.h"
+#include "chan.h"
+#include "chid.h"
+#include "runl.h"
+
 #include "regsnv04.h"
 
 #include <core/ramht.h>
 #include <subdev/fb.h>
 #include <subdev/instmem.h>
 
-static const struct nv04_fifo_ramfc
-nv40_fifo_ramfc[] = {
-	{ 32,  0, 0x00,  0, NV04_PFIFO_CACHE1_DMA_PUT },
-	{ 32,  0, 0x04,  0, NV04_PFIFO_CACHE1_DMA_GET },
-	{ 32,  0, 0x08,  0, NV10_PFIFO_CACHE1_REF_CNT },
-	{ 32,  0, 0x0c,  0, NV04_PFIFO_CACHE1_DMA_INSTANCE },
-	{ 32,  0, 0x10,  0, NV04_PFIFO_CACHE1_DMA_DCOUNT },
-	{ 32,  0, 0x14,  0, NV04_PFIFO_CACHE1_DMA_STATE },
-	{ 28,  0, 0x18,  0, NV04_PFIFO_CACHE1_DMA_FETCH },
-	{  2, 28, 0x18, 28, 0x002058 },
-	{ 32,  0, 0x1c,  0, NV04_PFIFO_CACHE1_ENGINE },
-	{ 32,  0, 0x20,  0, NV04_PFIFO_CACHE1_PULL1 },
-	{ 32,  0, 0x24,  0, NV10_PFIFO_CACHE1_ACQUIRE_VALUE },
-	{ 32,  0, 0x28,  0, NV10_PFIFO_CACHE1_ACQUIRE_TIMESTAMP },
-	{ 32,  0, 0x2c,  0, NV10_PFIFO_CACHE1_ACQUIRE_TIMEOUT },
-	{ 32,  0, 0x30,  0, NV10_PFIFO_CACHE1_SEMAPHORE },
-	{ 32,  0, 0x34,  0, NV10_PFIFO_CACHE1_DMA_SUBROUTINE },
-	{ 32,  0, 0x38,  0, NV40_PFIFO_GRCTX_INSTANCE },
-	{ 17,  0, 0x3c,  0, NV04_PFIFO_DMA_TIMESLICE },
-	{ 32,  0, 0x40,  0, 0x0032e4 },
-	{ 32,  0, 0x44,  0, 0x0032e8 },
-	{ 32,  0, 0x4c,  0, 0x002088 },
-	{ 32,  0, 0x50,  0, 0x003300 },
-	{ 32,  0, 0x54,  0, 0x00330c },
-	{}
+#include <nvif/class.h>
+
+static int
+nv40_chan_ramfc_write(struct nvkm_chan *chan, u64 offset, u64 length, u32 devm, bool priv)
+{
+	struct nvkm_memory *ramfc = chan->cgrp->runl->fifo->engine.subdev.device->imem->ramfc;
+	const u32 base = chan->id * 128;
+
+	chan->ramfc_offset = base;
+
+	nvkm_kmap(ramfc);
+	nvkm_wo32(ramfc, base + 0x00, offset);
+	nvkm_wo32(ramfc, base + 0x04, offset);
+	nvkm_wo32(ramfc, base + 0x0c, chan->push->addr >> 4);
+	nvkm_wo32(ramfc, base + 0x18, 0x30000000 |
+				      NV_PFIFO_CACHE1_DMA_FETCH_TRIG_128_BYTES |
+				      NV_PFIFO_CACHE1_DMA_FETCH_SIZE_128_BYTES |
+#ifdef __BIG_ENDIAN
+				      NV_PFIFO_CACHE1_BIG_ENDIAN |
+#endif
+				      NV_PFIFO_CACHE1_DMA_FETCH_MAX_REQS_8);
+	nvkm_wo32(ramfc, base + 0x3c, 0x0001ffff);
+	nvkm_done(ramfc);
+	return 0;
+}
+
+static const struct nvkm_chan_func_ramfc
+nv40_chan_ramfc = {
+	.layout = (const struct nvkm_ramfc_layout[]) {
+		{ 32,  0, 0x00,  0, NV04_PFIFO_CACHE1_DMA_PUT },
+		{ 32,  0, 0x04,  0, NV04_PFIFO_CACHE1_DMA_GET },
+		{ 32,  0, 0x08,  0, NV10_PFIFO_CACHE1_REF_CNT },
+		{ 32,  0, 0x0c,  0, NV04_PFIFO_CACHE1_DMA_INSTANCE },
+		{ 32,  0, 0x10,  0, NV04_PFIFO_CACHE1_DMA_DCOUNT },
+		{ 32,  0, 0x14,  0, NV04_PFIFO_CACHE1_DMA_STATE },
+		{ 28,  0, 0x18,  0, NV04_PFIFO_CACHE1_DMA_FETCH },
+		{  2, 28, 0x18, 28, 0x002058 },
+		{ 32,  0, 0x1c,  0, NV04_PFIFO_CACHE1_ENGINE },
+		{ 32,  0, 0x20,  0, NV04_PFIFO_CACHE1_PULL1 },
+		{ 32,  0, 0x24,  0, NV10_PFIFO_CACHE1_ACQUIRE_VALUE },
+		{ 32,  0, 0x28,  0, NV10_PFIFO_CACHE1_ACQUIRE_TIMESTAMP },
+		{ 32,  0, 0x2c,  0, NV10_PFIFO_CACHE1_ACQUIRE_TIMEOUT },
+		{ 32,  0, 0x30,  0, NV10_PFIFO_CACHE1_SEMAPHORE },
+		{ 32,  0, 0x34,  0, NV10_PFIFO_CACHE1_DMA_SUBROUTINE },
+		{ 32,  0, 0x38,  0, NV40_PFIFO_GRCTX_INSTANCE },
+		{ 17,  0, 0x3c,  0, NV04_PFIFO_DMA_TIMESLICE },
+		{ 32,  0, 0x40,  0, 0x0032e4 },
+		{ 32,  0, 0x44,  0, 0x0032e8 },
+		{ 32,  0, 0x4c,  0, 0x002088 },
+		{ 32,  0, 0x50,  0, 0x003300 },
+		{ 32,  0, 0x54,  0, 0x00330c },
+		{}
+	},
+	.write = nv40_chan_ramfc_write,
+	.clear = nv04_chan_ramfc_clear,
+	.ctxdma = true,
+};
+
+static const struct nvkm_chan_func_userd
+nv40_chan_userd = {
+	.bar = 0,
+	.base = 0xc00000,
+	.size = 0x001000,
+};
+
+static const struct nvkm_chan_func
+nv40_chan = {
+	.inst = &nv04_chan_inst,
+	.userd = &nv40_chan_userd,
+	.ramfc = &nv40_chan_ramfc,
+	.start = nv04_chan_start,
+	.stop = nv04_chan_stop,
 };
 
+static int
+nv40_eobj_ramht_add(struct nvkm_engn *engn, struct nvkm_object *eobj, struct nvkm_chan *chan)
+{
+	struct nvkm_fifo *fifo = chan->cgrp->runl->fifo;
+	struct nvkm_instmem *imem = fifo->engine.subdev.device->imem;
+	u32 context = chan->id << 23 | engn->id << 20;
+	int hash;
+
+	mutex_lock(&fifo->mutex);
+	hash = nvkm_ramht_insert(imem->ramht, eobj, chan->id, 4, eobj->handle, context);
+	mutex_unlock(&fifo->mutex);
+	return hash;
+}
+
 static void
-nv40_fifo_init(struct nvkm_fifo *base)
+nv40_ectx_bind(struct nvkm_engn *engn, struct nvkm_cctx *cctx, struct nvkm_chan *chan)
 {
-	struct nv04_fifo *fifo = nv04_fifo(base);
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
+	struct nvkm_fifo *fifo = chan->cgrp->runl->fifo;
+	struct nvkm_device *device = fifo->engine.subdev.device;
+	struct nvkm_memory *ramfc = device->imem->ramfc;
+	u32 inst = 0x00000000, reg, ctx;
+	int chid;
+
+	switch (engn->engine->subdev.type) {
+	case NVKM_ENGINE_GR:
+		reg = 0x0032e0;
+		ctx = 0x38;
+		break;
+	case NVKM_ENGINE_MPEG:
+		if (WARN_ON(device->chipset < 0x44))
+			return;
+		reg = 0x00330c;
+		ctx = 0x54;
+		break;
+	default:
+		WARN_ON(1);
+		return;
+	}
+
+	if (cctx)
+		inst = cctx->vctx->inst->addr >> 4;
+
+	spin_lock_irq(&fifo->lock);
+	nvkm_mask(device, 0x002500, 0x00000001, 0x00000000);
+
+	chid = nvkm_rd32(device, 0x003204) & (fifo->chid->nr - 1);
+	if (chid == chan->id)
+		nvkm_wr32(device, reg, inst);
+
+	nvkm_kmap(ramfc);
+	nvkm_wo32(ramfc, chan->ramfc_offset + ctx, inst);
+	nvkm_done(ramfc);
+
+	nvkm_mask(device, 0x002500, 0x00000001, 0x00000001);
+	spin_unlock_irq(&fifo->lock);
+}
+
+static const struct nvkm_engn_func
+nv40_engn = {
+	.bind = nv40_ectx_bind,
+	.ramht_add = nv40_eobj_ramht_add,
+	.ramht_del = nv04_eobj_ramht_del,
+};
+
+static const struct nvkm_engn_func
+nv40_engn_sw = {
+	.ramht_add = nv40_eobj_ramht_add,
+	.ramht_del = nv04_eobj_ramht_del,
+};
+
+static void
+nv40_fifo_init(struct nvkm_fifo *fifo)
+{
+	struct nvkm_device *device = fifo->engine.subdev.device;
 	struct nvkm_fb *fb = device->fb;
 	struct nvkm_instmem *imem = device->imem;
 	struct nvkm_ramht *ramht = imem->ramht;
@@ -98,7 +218,7 @@ nv40_fifo_init(struct nvkm_fifo *base)
 		break;
 	}
 
-	nvkm_wr32(device, NV03_PFIFO_CACHE1_PUSH1, fifo->base.nr - 1);
+	nvkm_wr32(device, NV03_PFIFO_CACHE1_PUSH1, fifo->chid->mask);
 
 	nvkm_wr32(device, NV03_PFIFO_INTR_0, 0xffffffff);
 	nvkm_wr32(device, NV03_PFIFO_INTR_EN_0, 0xffffffff);
@@ -110,21 +230,23 @@ nv40_fifo_init(struct nvkm_fifo *base)
 
 static const struct nvkm_fifo_func
 nv40_fifo = {
+	.chid_nr = nv10_fifo_chid_nr,
+	.chid_ctor = nv04_fifo_chid_ctor,
+	.runl_ctor = nv04_fifo_runl_ctor,
 	.init = nv40_fifo_init,
 	.intr = nv04_fifo_intr,
-	.engine_id = nv04_fifo_engine_id,
-	.id_engine = nv04_fifo_id_engine,
 	.pause = nv04_fifo_pause,
 	.start = nv04_fifo_start,
-	.chan = {
-		&nv40_fifo_dma_oclass,
-		NULL
-	},
+	.runl = &nv04_runl,
+	.engn = &nv40_engn,
+	.engn_sw = &nv40_engn_sw,
+	.cgrp = {{                        }, &nv04_cgrp },
+	.chan = {{ 0, 0, NV40_CHANNEL_DMA }, &nv40_chan },
 };
 
 int
 nv40_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
 	      struct nvkm_fifo **pfifo)
 {
-	return nv04_fifo_new_(&nv40_fifo, device, type, inst, 32, nv40_fifo_ramfc, pfifo);
+	return nvkm_fifo_new_(&nv40_fifo, device, type, inst, pfifo);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv50.c
index a08742cf425a..954b5f3a7d57 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv50.c
@@ -21,62 +21,325 @@
  *
  * Authors: Ben Skeggs
  */
-#include "nv50.h"
-#include "channv50.h"
+#include "priv.h"
+#include "cgrp.h"
+#include "chan.h"
+#include "chid.h"
+#include "runl.h"
 
-#include <core/gpuobj.h>
+#include <core/ramht.h>
+#include <subdev/timer.h>
 
-static void
-nv50_fifo_runlist_update_locked(struct nv50_fifo *fifo)
+#include <nvif/class.h>
+
+void
+nv50_eobj_ramht_del(struct nvkm_chan *chan, int hash)
 {
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	struct nvkm_memory *cur;
-	int i, p;
+	nvkm_ramht_remove(chan->ramht, hash);
+}
 
-	cur = fifo->runlist[fifo->cur_runlist];
-	fifo->cur_runlist = !fifo->cur_runlist;
+int
+nv50_eobj_ramht_add(struct nvkm_engn *engn, struct nvkm_object *eobj, struct nvkm_chan *chan)
+{
+	return nvkm_ramht_insert(chan->ramht, eobj, 0, 4, eobj->handle, engn->id << 20);
+}
 
-	nvkm_kmap(cur);
-	for (i = 0, p = 0; i < fifo->base.nr; i++) {
-		if (nvkm_rd32(device, 0x002600 + (i * 4)) & 0x80000000)
-			nvkm_wo32(cur, p++ * 4, i);
-	}
-	nvkm_done(cur);
+void
+nv50_chan_stop(struct nvkm_chan *chan)
+{
+	struct nvkm_device *device = chan->cgrp->runl->fifo->engine.subdev.device;
 
-	nvkm_wr32(device, 0x0032f4, nvkm_memory_addr(cur) >> 12);
-	nvkm_wr32(device, 0x0032ec, p);
-	nvkm_wr32(device, 0x002500, 0x00000101);
+	nvkm_mask(device, 0x002600 + (chan->id * 4), 0x80000000, 0x00000000);
 }
 
 void
-nv50_fifo_runlist_update(struct nv50_fifo *fifo)
+nv50_chan_start(struct nvkm_chan *chan)
 {
-	mutex_lock(&fifo->base.mutex);
-	nv50_fifo_runlist_update_locked(fifo);
-	mutex_unlock(&fifo->base.mutex);
+	struct nvkm_device *device = chan->cgrp->runl->fifo->engine.subdev.device;
+
+	nvkm_mask(device, 0x002600 + (chan->id * 4), 0x80000000, 0x80000000);
 }
 
-int
-nv50_fifo_oneinit(struct nvkm_fifo *base)
+void
+nv50_chan_unbind(struct nvkm_chan *chan)
 {
-	struct nv50_fifo *fifo = nv50_fifo(base);
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
+	struct nvkm_device *device = chan->cgrp->runl->fifo->engine.subdev.device;
+
+	nvkm_wr32(device, 0x002600 + (chan->id * 4), 0x00000000);
+}
+
+static void
+nv50_chan_bind(struct nvkm_chan *chan)
+{
+	struct nvkm_device *device = chan->cgrp->runl->fifo->engine.subdev.device;
+
+	nvkm_wr32(device, 0x002600 + (chan->id * 4), chan->ramfc->addr >> 12);
+}
+
+static int
+nv50_chan_ramfc_write(struct nvkm_chan *chan, u64 offset, u64 length, u32 devm, bool priv)
+{
+	struct nvkm_device *device = chan->cgrp->runl->fifo->engine.subdev.device;
+	const u32 limit2 = ilog2(length / 8);
 	int ret;
 
-	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 128 * 4, 0x1000,
-			      false, &fifo->runlist[0]);
+	ret = nvkm_gpuobj_new(device, 0x0200, 0x1000, true, chan->inst, &chan->ramfc);
+	if (ret)
+		return ret;
+
+	ret = nvkm_gpuobj_new(device, 0x1200, 0, true, chan->inst, &chan->eng);
 	if (ret)
 		return ret;
 
-	return nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 128 * 4, 0x1000,
-			       false, &fifo->runlist[1]);
+	ret = nvkm_gpuobj_new(device, 0x4000, 0, false, chan->inst, &chan->pgd);
+	if (ret)
+		return ret;
+
+	ret = nvkm_ramht_new(device, 0x8000, 16, chan->inst, &chan->ramht);
+	if (ret)
+		return ret;
+
+	nvkm_kmap(chan->ramfc);
+	nvkm_wo32(chan->ramfc, 0x3c, 0x403f6078);
+	nvkm_wo32(chan->ramfc, 0x44, 0x01003fff);
+	nvkm_wo32(chan->ramfc, 0x48, chan->push->node->offset >> 4);
+	nvkm_wo32(chan->ramfc, 0x50, lower_32_bits(offset));
+	nvkm_wo32(chan->ramfc, 0x54, upper_32_bits(offset) | (limit2 << 16));
+	nvkm_wo32(chan->ramfc, 0x60, 0x7fffffff);
+	nvkm_wo32(chan->ramfc, 0x78, 0x00000000);
+	nvkm_wo32(chan->ramfc, 0x7c, 0x30000000 | devm);
+	nvkm_wo32(chan->ramfc, 0x80, ((chan->ramht->bits - 9) << 27) |
+				     (4 << 24) /* SEARCH_FULL */ |
+				     (chan->ramht->gpuobj->node->offset >> 4));
+	nvkm_done(chan->ramfc);
+	return 0;
+}
+
+static const struct nvkm_chan_func_ramfc
+nv50_chan_ramfc = {
+	.write = nv50_chan_ramfc_write,
+	.ctxdma = true,
+	.devm = 0xfff,
+};
+
+const struct nvkm_chan_func_userd
+nv50_chan_userd = {
+	.bar = 0,
+	.base = 0xc00000,
+	.size = 0x002000,
+};
+
+const struct nvkm_chan_func_inst
+nv50_chan_inst = {
+	.size = 0x10000,
+	.vmm = true,
+};
+
+static const struct nvkm_chan_func
+nv50_chan = {
+	.inst = &nv50_chan_inst,
+	.userd = &nv50_chan_userd,
+	.ramfc = &nv50_chan_ramfc,
+	.bind = nv50_chan_bind,
+	.unbind = nv50_chan_unbind,
+	.start = nv50_chan_start,
+	.stop = nv50_chan_stop,
+};
+
+static void
+nv50_ectx_bind(struct nvkm_engn *engn, struct nvkm_cctx *cctx, struct nvkm_chan *chan)
+{
+	struct nvkm_subdev *subdev = &chan->cgrp->runl->fifo->engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	u64 start = 0, limit = 0;
+	u32 flags = 0, ptr0, save;
+
+	switch (engn->engine->subdev.type) {
+	case NVKM_ENGINE_GR    : ptr0 = 0x0000; break;
+	case NVKM_ENGINE_MPEG  : ptr0 = 0x0060; break;
+	default:
+		WARN_ON(1);
+		return;
+	}
+
+	if (!cctx) {
+		/* HW bug workaround:
+		 *
+		 * PFIFO will hang forever if the connected engines don't report
+		 * that they've processed the context switch request.
+		 *
+		 * In order for the kickoff to work, we need to ensure all the
+		 * connected engines are in a state where they can answer.
+		 *
+		 * Newer chipsets don't seem to suffer from this issue, and well,
+		 * there's also a "ignore these engines" bitmask reg we can use
+		 * if we hit the issue there..
+		 */
+		save = nvkm_mask(device, 0x00b860, 0x00000001, 0x00000001);
+
+		/* Tell engines to save out contexts. */
+		nvkm_wr32(device, 0x0032fc, chan->inst->addr >> 12);
+		nvkm_msec(device, 2000,
+			if (nvkm_rd32(device, 0x0032fc) != 0xffffffff)
+				break;
+		);
+		nvkm_wr32(device, 0x00b860, save);
+	} else {
+		flags = 0x00190000;
+		start = cctx->vctx->inst->addr;
+		limit = start + cctx->vctx->inst->size - 1;
+	}
+
+	nvkm_kmap(chan->eng);
+	nvkm_wo32(chan->eng, ptr0 + 0x00, flags);
+	nvkm_wo32(chan->eng, ptr0 + 0x04, lower_32_bits(limit));
+	nvkm_wo32(chan->eng, ptr0 + 0x08, lower_32_bits(start));
+	nvkm_wo32(chan->eng, ptr0 + 0x0c, upper_32_bits(limit) << 24 |
+					  lower_32_bits(start));
+	nvkm_wo32(chan->eng, ptr0 + 0x10, 0x00000000);
+	nvkm_wo32(chan->eng, ptr0 + 0x14, 0x00000000);
+	nvkm_done(chan->eng);
 }
 
+static const struct nvkm_engn_func
+nv50_engn = {
+	.bind = nv50_ectx_bind,
+	.ramht_add = nv50_eobj_ramht_add,
+	.ramht_del = nv50_eobj_ramht_del,
+};
+
+const struct nvkm_engn_func
+nv50_engn_sw = {
+	.ramht_add = nv50_eobj_ramht_add,
+	.ramht_del = nv50_eobj_ramht_del,
+};
+
+static bool
+nv50_runl_pending(struct nvkm_runl *runl)
+{
+	return nvkm_rd32(runl->fifo->engine.subdev.device, 0x0032ec) & 0x00000100;
+}
+
+int
+nv50_runl_wait(struct nvkm_runl *runl)
+{
+	struct nvkm_fifo *fifo = runl->fifo;
+
+	nvkm_msec(fifo->engine.subdev.device, fifo->timeout.chan_msec,
+		if (!nvkm_runl_update_pending(runl))
+			return 0;
+		usleep_range(1, 2);
+	);
+
+	return -ETIMEDOUT;
+}
+
+static void
+nv50_runl_commit(struct nvkm_runl *runl, struct nvkm_memory *memory, u32 start, int count)
+{
+	struct nvkm_device *device = runl->fifo->engine.subdev.device;
+	u64 addr = nvkm_memory_addr(memory) + start;
+
+	nvkm_wr32(device, 0x0032f4, addr >> 12);
+	nvkm_wr32(device, 0x0032ec, count);
+}
+
+static void
+nv50_runl_insert_chan(struct nvkm_chan *chan, struct nvkm_memory *memory, u64 offset)
+{
+	nvkm_wo32(memory, offset, chan->id);
+}
+
+static struct nvkm_memory *
+nv50_runl_alloc(struct nvkm_runl *runl, u32 *offset)
+{
+	const u32 segment = ALIGN((runl->cgrp_nr + runl->chan_nr) * runl->func->size, 0x1000);
+	const u32 maxsize = (runl->cgid ? runl->cgid->nr : 0) + runl->chid->nr;
+	int ret;
+
+	if (unlikely(!runl->mem)) {
+		ret = nvkm_memory_new(runl->fifo->engine.subdev.device, NVKM_MEM_TARGET_INST,
+				      maxsize * 2 * runl->func->size, 0, false, &runl->mem);
+		if (ret) {
+			RUNL_ERROR(runl, "alloc %d\n", ret);
+			return ERR_PTR(ret);
+		}
+	} else {
+		if (runl->offset + segment >= nvkm_memory_size(runl->mem)) {
+			ret = runl->func->wait(runl);
+			if (ret) {
+				RUNL_DEBUG(runl, "rewind timeout");
+				return ERR_PTR(ret);
+			}
+
+			runl->offset = 0;
+		}
+	}
+
+	*offset = runl->offset;
+	runl->offset += segment;
+	return runl->mem;
+}
+
+int
+nv50_runl_update(struct nvkm_runl *runl)
+{
+	struct nvkm_memory *memory;
+	struct nvkm_cgrp *cgrp;
+	struct nvkm_chan *chan;
+	u32 start, offset, count;
+
+	/*TODO: prio, interleaving. */
+
+	RUNL_TRACE(runl, "RAMRL: update cgrps:%d chans:%d", runl->cgrp_nr, runl->chan_nr);
+	memory = nv50_runl_alloc(runl, &start);
+	if (IS_ERR(memory))
+		return PTR_ERR(memory);
+
+	RUNL_TRACE(runl, "RAMRL: update start:%08x", start);
+	offset = start;
+
+	nvkm_kmap(memory);
+	nvkm_runl_foreach_cgrp(cgrp, runl) {
+		if (cgrp->hw) {
+			CGRP_TRACE(cgrp, "     RAMRL+%08x: chans:%d", offset, cgrp->chan_nr);
+			runl->func->insert_cgrp(cgrp, memory, offset);
+			offset += runl->func->size;
+		}
+
+		nvkm_cgrp_foreach_chan(chan, cgrp) {
+			CHAN_TRACE(chan, "RAMRL+%08x: [%s]", offset, chan->name);
+			runl->func->insert_chan(chan, memory, offset);
+			offset += runl->func->size;
+		}
+	}
+	nvkm_done(memory);
+
+	/*TODO: look into using features on newer HW to guarantee forward progress. */
+	list_rotate_left(&runl->cgrps);
+
+	count = (offset - start) / runl->func->size;
+	RUNL_TRACE(runl, "RAMRL: commit start:%08x count:%d", start, count);
+
+	runl->func->commit(runl, memory, start, count);
+	return 0;
+}
+
+const struct nvkm_runl_func
+nv50_runl = {
+	.size = 4,
+	.update = nv50_runl_update,
+	.insert_chan = nv50_runl_insert_chan,
+	.commit = nv50_runl_commit,
+	.wait = nv50_runl_wait,
+	.pending = nv50_runl_pending,
+};
+
 void
-nv50_fifo_init(struct nvkm_fifo *base)
+nv50_fifo_init(struct nvkm_fifo *fifo)
 {
-	struct nv50_fifo *fifo = nv50_fifo(base);
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
+	struct nvkm_runl *runl = nvkm_runl_first(fifo);
+	struct nvkm_device *device = fifo->engine.subdev.device;
 	int i;
 
 	nvkm_mask(device, 0x000200, 0x00000100, 0x00000000);
@@ -89,61 +352,47 @@ nv50_fifo_init(struct nvkm_fifo *base)
 
 	for (i = 0; i < 128; i++)
 		nvkm_wr32(device, 0x002600 + (i * 4), 0x00000000);
-	nv50_fifo_runlist_update_locked(fifo);
+
+	atomic_set(&runl->changed, 1);
+	runl->func->update(runl);
 
 	nvkm_wr32(device, 0x003200, 0x00000001);
 	nvkm_wr32(device, 0x003250, 0x00000001);
 	nvkm_wr32(device, 0x002500, 0x00000001);
 }
 
-void *
-nv50_fifo_dtor(struct nvkm_fifo *base)
+int
+nv50_fifo_chid_ctor(struct nvkm_fifo *fifo, int nr)
 {
-	struct nv50_fifo *fifo = nv50_fifo(base);
-	nvkm_memory_unref(&fifo->runlist[1]);
-	nvkm_memory_unref(&fifo->runlist[0]);
-	return fifo;
+	/* CHID 0 is unusable (some kind of PIO channel?), 127 is "channel invalid". */
+	return nvkm_chid_new(&nvkm_chan_event, &fifo->engine.subdev, nr, 1, nr - 2, &fifo->chid);
 }
 
 int
-nv50_fifo_new_(const struct nvkm_fifo_func *func, struct nvkm_device *device,
-	       enum nvkm_subdev_type type, int inst, struct nvkm_fifo **pfifo)
+nv50_fifo_chid_nr(struct nvkm_fifo *fifo)
 {
-	struct nv50_fifo *fifo;
-	int ret;
-
-	if (!(fifo = kzalloc(sizeof(*fifo), GFP_KERNEL)))
-		return -ENOMEM;
-	*pfifo = &fifo->base;
-
-	ret = nvkm_fifo_ctor(func, device, type, inst, 128, &fifo->base);
-	if (ret)
-		return ret;
-
-	set_bit(0, fifo->base.mask); /* PIO channel */
-	set_bit(127, fifo->base.mask); /* inactive channel */
-	return 0;
+	return 128;
 }
 
 static const struct nvkm_fifo_func
 nv50_fifo = {
-	.dtor = nv50_fifo_dtor,
-	.oneinit = nv50_fifo_oneinit,
+	.chid_nr = nv50_fifo_chid_nr,
+	.chid_ctor = nv50_fifo_chid_ctor,
+	.runl_ctor = nv04_fifo_runl_ctor,
 	.init = nv50_fifo_init,
 	.intr = nv04_fifo_intr,
-	.engine_id = nv04_fifo_engine_id,
-	.id_engine = nv04_fifo_id_engine,
 	.pause = nv04_fifo_pause,
 	.start = nv04_fifo_start,
-	.chan = {
-		&nv50_fifo_gpfifo_oclass,
-		NULL
-	},
+	.runl = &nv50_runl,
+	.engn = &nv50_engn,
+	.engn_sw = &nv50_engn_sw,
+	.cgrp = {{                           }, &nv04_cgrp },
+	.chan = {{ 0, 0, NV50_CHANNEL_GPFIFO }, &nv50_chan },
 };
 
 int
 nv50_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
 	      struct nvkm_fifo **pfifo)
 {
-	return nv50_fifo_new_(&nv50_fifo, device, type, inst, pfifo);
+	return nvkm_fifo_new_(&nv50_fifo, device, type, inst, pfifo);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv50.h
deleted file mode 100644
index 0111e7e5a4e3..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv50.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-#ifndef __NV50_FIFO_H__
-#define __NV50_FIFO_H__
-#define nv50_fifo(p) container_of((p), struct nv50_fifo, base)
-#include "priv.h"
-
-struct nv50_fifo {
-	struct nvkm_fifo base;
-	struct nvkm_memory *runlist[2];
-	int cur_runlist;
-};
-
-int nv50_fifo_new_(const struct nvkm_fifo_func *, struct nvkm_device *, enum nvkm_subdev_type, int,
-		   struct nvkm_fifo **);
-
-void *nv50_fifo_dtor(struct nvkm_fifo *);
-int nv50_fifo_oneinit(struct nvkm_fifo *);
-void nv50_fifo_init(struct nvkm_fifo *);
-void nv50_fifo_runlist_update(struct nv50_fifo *);
-#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h
index 79cec57647f0..4d448be19224 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h
@@ -3,46 +3,207 @@
 #define __NVKM_FIFO_PRIV_H__
 #define nvkm_fifo(p) container_of((p), struct nvkm_fifo, engine)
 #include <engine/fifo.h>
+#include <core/enum.h>
+struct nvkm_cctx;
+struct nvkm_cgrp;
+struct nvkm_engn;
+struct nvkm_memory;
+struct nvkm_runl;
+struct nvkm_runq;
+struct nvkm_vctx;
 
-int nvkm_fifo_ctor(const struct nvkm_fifo_func *, struct nvkm_device *, enum nvkm_subdev_type, int,
-		   int nr, struct nvkm_fifo *);
-void nvkm_fifo_uevent(struct nvkm_fifo *);
-void nvkm_fifo_kevent(struct nvkm_fifo *, int chid);
-void nvkm_fifo_recover_chan(struct nvkm_fifo *, int chid);
-
-struct nvkm_fifo_chan *
-nvkm_fifo_chan_inst_locked(struct nvkm_fifo *, u64 inst);
-
-struct nvkm_fifo_chan_oclass;
 struct nvkm_fifo_func {
-	void *(*dtor)(struct nvkm_fifo *);
-	int (*oneinit)(struct nvkm_fifo *);
-	int (*info)(struct nvkm_fifo *, u64 mthd, u64 *data);
+	int (*chid_nr)(struct nvkm_fifo *);
+	int (*chid_ctor)(struct nvkm_fifo *, int nr);
+	int (*runq_nr)(struct nvkm_fifo *);
+	int (*runl_ctor)(struct nvkm_fifo *);
+
 	void (*init)(struct nvkm_fifo *);
-	void (*fini)(struct nvkm_fifo *);
-	void (*intr)(struct nvkm_fifo *);
-	void (*fault)(struct nvkm_fifo *, struct nvkm_fault_data *);
-	int (*engine_id)(struct nvkm_fifo *, struct nvkm_engine *);
-	struct nvkm_engine *(*id_engine)(struct nvkm_fifo *, int engi);
+	void (*init_pbdmas)(struct nvkm_fifo *, u32 mask);
+
+	irqreturn_t (*intr)(struct nvkm_inth *);
+	void (*intr_mmu_fault_unit)(struct nvkm_fifo *, int unit);
+	void (*intr_ctxsw_timeout)(struct nvkm_fifo *, u32 engm);
+
+	const struct nvkm_fifo_func_mmu_fault {
+		void (*recover)(struct nvkm_fifo *, struct nvkm_fault_data *);
+		const struct nvkm_enum *access;
+		const struct nvkm_enum *engine;
+		const struct nvkm_enum *reason;
+		const struct nvkm_enum *hubclient;
+		const struct nvkm_enum *gpcclient;
+	} *mmu_fault;
+
 	void (*pause)(struct nvkm_fifo *, unsigned long *);
 	void (*start)(struct nvkm_fifo *, unsigned long *);
-	void (*uevent_init)(struct nvkm_fifo *);
-	void (*uevent_fini)(struct nvkm_fifo *);
-	void (*recover_chan)(struct nvkm_fifo *, int chid);
-	int (*class_get)(struct nvkm_fifo *, int index, struct nvkm_oclass *);
-	int (*class_new)(struct nvkm_fifo *, const struct nvkm_oclass *,
-			 void *, u32, struct nvkm_object **);
-	const struct nvkm_fifo_chan_oclass *chan[];
+
+	int (*nonstall_ctor)(struct nvkm_fifo *);
+	const struct nvkm_event_func *nonstall;
+
+	const struct nvkm_runl_func *runl;
+	const struct nvkm_runq_func *runq;
+	const struct nvkm_engn_func *engn;
+	const struct nvkm_engn_func *engn_sw;
+	const struct nvkm_engn_func *engn_ce;
+
+	struct nvkm_fifo_func_cgrp {
+		struct nvkm_sclass user;
+		const struct nvkm_cgrp_func *func;
+		bool force;
+	} cgrp;
+
+	struct nvkm_fifo_func_chan {
+		struct nvkm_sclass user;
+		const struct nvkm_chan_func *func;
+	} chan;
 };
 
-void nv04_fifo_intr(struct nvkm_fifo *);
-int nv04_fifo_engine_id(struct nvkm_fifo *, struct nvkm_engine *);
-struct nvkm_engine *nv04_fifo_id_engine(struct nvkm_fifo *, int);
+int nvkm_fifo_new_(const struct nvkm_fifo_func *, struct nvkm_device *, enum nvkm_subdev_type, int,
+		   struct nvkm_fifo **);
+
+int nv04_fifo_chid_ctor(struct nvkm_fifo *, int);
+int nv04_fifo_runl_ctor(struct nvkm_fifo *);
+void nv04_fifo_init(struct nvkm_fifo *);
+irqreturn_t nv04_fifo_intr(struct nvkm_inth *);
 void nv04_fifo_pause(struct nvkm_fifo *, unsigned long *);
 void nv04_fifo_start(struct nvkm_fifo *, unsigned long *);
+extern const struct nvkm_runl_func nv04_runl;
+extern const struct nvkm_engn_func nv04_engn;
+extern const struct nvkm_cgrp_func nv04_cgrp;
+extern const struct nvkm_chan_func_inst nv04_chan_inst;
+extern const struct nvkm_chan_func_userd nv04_chan_userd;
+void nv04_chan_ramfc_clear(struct nvkm_chan *);
+void nv04_chan_start(struct nvkm_chan *);
+void nv04_chan_stop(struct nvkm_chan *);
+void nv04_eobj_ramht_del(struct nvkm_chan *, int);
+
+int nv10_fifo_chid_nr(struct nvkm_fifo *);
+
+int nv50_fifo_chid_nr(struct nvkm_fifo *);
+int nv50_fifo_chid_ctor(struct nvkm_fifo *, int);
+void nv50_fifo_init(struct nvkm_fifo *);
+extern const struct nvkm_runl_func nv50_runl;
+int nv50_runl_update(struct nvkm_runl *);
+int nv50_runl_wait(struct nvkm_runl *);
+extern const struct nvkm_engn_func nv50_engn_sw;
+extern const struct nvkm_chan_func_inst nv50_chan_inst;
+extern const struct nvkm_chan_func_userd nv50_chan_userd;
+void nv50_chan_unbind(struct nvkm_chan *);
+void nv50_chan_start(struct nvkm_chan *);
+void nv50_chan_stop(struct nvkm_chan *);
+void nv50_chan_preempt(struct nvkm_chan *);
+int nv50_eobj_ramht_add(struct nvkm_engn *, struct nvkm_object *, struct nvkm_chan *);
+void nv50_eobj_ramht_del(struct nvkm_chan *, int);
+
+extern const struct nvkm_event_func g84_fifo_nonstall;
+extern const struct nvkm_engn_func g84_engn;
+extern const struct nvkm_chan_func g84_chan;
+
+int gf100_fifo_chid_ctor(struct nvkm_fifo *, int);
+int gf100_fifo_runq_nr(struct nvkm_fifo *);
+bool gf100_fifo_intr_pbdma(struct nvkm_fifo *);
+void gf100_fifo_intr_mmu_fault(struct nvkm_fifo *);
+void gf100_fifo_intr_mmu_fault_unit(struct nvkm_fifo *, int);
+void gf100_fifo_intr_sched(struct nvkm_fifo *);
+void gf100_fifo_intr_ctxsw_timeout(struct nvkm_fifo *, u32);
+void gf100_fifo_mmu_fault_recover(struct nvkm_fifo *, struct nvkm_fault_data *);
+extern const struct nvkm_enum gf100_fifo_mmu_fault_access[];
+extern const struct nvkm_event_func gf100_fifo_nonstall;
+bool gf100_runl_preempt_pending(struct nvkm_runl *);
+void gf100_runq_init(struct nvkm_runq *);
+bool gf100_runq_intr(struct nvkm_runq *, struct nvkm_runl *);
+void gf100_engn_mmu_fault_trigger(struct nvkm_engn *);
+bool gf100_engn_mmu_fault_triggered(struct nvkm_engn *);
+extern const struct nvkm_engn_func gf100_engn_sw;
+extern const struct nvkm_chan_func_inst gf100_chan_inst;
+void gf100_chan_userd_clear(struct nvkm_chan *);
+void gf100_chan_preempt(struct nvkm_chan *);
+
+int gk104_fifo_chid_nr(struct nvkm_fifo *);
+int gk104_fifo_runl_ctor(struct nvkm_fifo *);
+void gk104_fifo_init(struct nvkm_fifo *);
+void gk104_fifo_init_pbdmas(struct nvkm_fifo *, u32);
+irqreturn_t gk104_fifo_intr(struct nvkm_inth *);
+void gk104_fifo_intr_runlist(struct nvkm_fifo *);
+void gk104_fifo_intr_chsw(struct nvkm_fifo *);
+void gk104_fifo_intr_bind(struct nvkm_fifo *);
+extern const struct nvkm_fifo_func_mmu_fault gk104_fifo_mmu_fault;
+extern const struct nvkm_enum gk104_fifo_mmu_fault_reason[];
+extern const struct nvkm_enum gk104_fifo_mmu_fault_hubclient[];
+extern const struct nvkm_enum gk104_fifo_mmu_fault_gpcclient[];
+void gk104_runl_insert_chan(struct nvkm_chan *, struct nvkm_memory *, u64);
+void gk104_runl_commit(struct nvkm_runl *, struct nvkm_memory *, u32, int);
+bool gk104_runl_pending(struct nvkm_runl *);
+void gk104_runl_block(struct nvkm_runl *, u32);
+void gk104_runl_allow(struct nvkm_runl *, u32);
+void gk104_runl_fault_clear(struct nvkm_runl *);
+extern const struct nvkm_runq_func gk104_runq;
+void gk104_runq_init(struct nvkm_runq *);
+bool gk104_runq_intr(struct nvkm_runq *, struct nvkm_runl *);
+extern const struct nvkm_bitfield gk104_runq_intr_0_names[];
+bool gk104_runq_idle(struct nvkm_runq *);
+extern const struct nvkm_engn_func gk104_engn;
+bool gk104_engn_chsw(struct nvkm_engn *);
+int gk104_engn_cxid(struct nvkm_engn *, bool *cgid);
+int gk104_ectx_ctor(struct nvkm_engn *, struct nvkm_vctx *);
+extern const struct nvkm_engn_func gk104_engn_ce;
+extern const struct nvkm_chan_func_userd gk104_chan_userd;
+extern const struct nvkm_chan_func_ramfc gk104_chan_ramfc;
+void gk104_chan_bind(struct nvkm_chan *);
+void gk104_chan_bind_inst(struct nvkm_chan *);
+void gk104_chan_unbind(struct nvkm_chan *);
+void gk104_chan_start(struct nvkm_chan *);
+void gk104_chan_stop(struct nvkm_chan *);
+
+int gk110_fifo_chid_ctor(struct nvkm_fifo *, int);
+extern const struct nvkm_runl_func gk110_runl;
+extern const struct nvkm_cgrp_func gk110_cgrp;
+void gk110_runl_insert_cgrp(struct nvkm_cgrp *, struct nvkm_memory *, u64);
+extern const struct nvkm_chan_func gk110_chan;
+void gk110_chan_preempt(struct nvkm_chan *);
+
+extern const struct nvkm_runq_func gk208_runq;
+void gk208_runq_init(struct nvkm_runq *);
+
+void gm107_fifo_intr_mmu_fault_unit(struct nvkm_fifo *, int);
+extern const struct nvkm_fifo_func_mmu_fault gm107_fifo_mmu_fault;
+extern const struct nvkm_runl_func gm107_runl;
+extern const struct nvkm_chan_func gm107_chan;
+
+int gm200_fifo_chid_nr(struct nvkm_fifo *);
+int gm200_fifo_runq_nr(struct nvkm_fifo *);
+
+extern const struct nvkm_enum gv100_fifo_mmu_fault_access[];
+extern const struct nvkm_enum gv100_fifo_mmu_fault_reason[];
+extern const struct nvkm_enum gv100_fifo_mmu_fault_hubclient[];
+extern const struct nvkm_enum gv100_fifo_mmu_fault_gpcclient[];
+void gv100_runl_insert_cgrp(struct nvkm_cgrp *, struct nvkm_memory *, u64);
+void gv100_runl_insert_chan(struct nvkm_chan *, struct nvkm_memory *, u64);
+void gv100_runl_preempt(struct nvkm_runl *);
+extern const struct nvkm_runq_func gv100_runq;
+extern const struct nvkm_engn_func gv100_engn;
+void gv100_ectx_bind(struct nvkm_engn *, struct nvkm_cctx *, struct nvkm_chan *);
+extern const struct nvkm_engn_func gv100_engn_ce;
+int gv100_ectx_ce_ctor(struct nvkm_engn *, struct nvkm_vctx *);
+void gv100_ectx_ce_bind(struct nvkm_engn *, struct nvkm_cctx *, struct nvkm_chan *);
+extern const struct nvkm_chan_func_userd gv100_chan_userd;
+extern const struct nvkm_chan_func_ramfc gv100_chan_ramfc;
+
+void tu102_fifo_intr_ctxsw_timeout_info(struct nvkm_engn *, u32 info);
+extern const struct nvkm_fifo_func_mmu_fault tu102_fifo_mmu_fault;
 
-void gf100_fifo_intr_fault(struct nvkm_fifo *, int);
+int ga100_fifo_runl_ctor(struct nvkm_fifo *);
+int ga100_fifo_nonstall_ctor(struct nvkm_fifo *);
+extern const struct nvkm_event_func ga100_fifo_nonstall;
+extern const struct nvkm_runl_func ga100_runl;
+extern const struct nvkm_runq_func ga100_runq;
+extern const struct nvkm_engn_func ga100_engn;
+extern const struct nvkm_engn_func ga100_engn_ce;
+extern const struct nvkm_cgrp_func ga100_cgrp;
+extern const struct nvkm_chan_func ga100_chan;
 
-int gk104_fifo_engine_id(struct nvkm_fifo *, struct nvkm_engine *);
-struct nvkm_engine *gk104_fifo_id_engine(struct nvkm_fifo *, int);
+int nvkm_uchan_new(struct nvkm_fifo *, struct nvkm_cgrp *, const struct nvkm_oclass *,
+		   void *argv, u32 argc, struct nvkm_object **);
+int nvkm_ucgrp_new(struct nvkm_fifo *, const struct nvkm_oclass *, void *argv, u32 argc,
+		   struct nvkm_object **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/runl.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/runl.c
new file mode 100644
index 000000000000..b5836cbc29aa
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/runl.c
@@ -0,0 +1,430 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "runl.h"
+#include "cgrp.h"
+#include "chan.h"
+#include "chid.h"
+#include "priv.h"
+#include "runq.h"
+
+#include <core/gpuobj.h>
+#include <subdev/timer.h>
+#include <subdev/top.h>
+
+struct nvkm_cgrp *
+nvkm_engn_cgrp_get(struct nvkm_engn *engn, unsigned long *pirqflags)
+{
+	struct nvkm_cgrp *cgrp = NULL;
+	struct nvkm_chan *chan;
+	bool cgid;
+	int id;
+
+	id = engn->func->cxid(engn, &cgid);
+	if (id < 0)
+		return NULL;
+
+	if (!cgid) {
+		chan = nvkm_runl_chan_get_chid(engn->runl, id, pirqflags);
+		if (chan)
+			cgrp = chan->cgrp;
+	} else {
+		cgrp = nvkm_runl_cgrp_get_cgid(engn->runl, id, pirqflags);
+	}
+
+	WARN_ON(!cgrp);
+	return cgrp;
+}
+
+static void
+nvkm_runl_rc(struct nvkm_runl *runl)
+{
+	struct nvkm_fifo *fifo = runl->fifo;
+	struct nvkm_cgrp *cgrp, *gtmp;
+	struct nvkm_chan *chan, *ctmp;
+	struct nvkm_engn *engn;
+	unsigned long flags;
+	int rc, state, i;
+	bool reset;
+
+	/* Runlist is blocked before scheduling recovery - fetch count. */
+	BUG_ON(!mutex_is_locked(&runl->mutex));
+	rc = atomic_xchg(&runl->rc_pending, 0);
+	if (!rc)
+		return;
+
+	/* Look for channel groups flagged for RC. */
+	nvkm_runl_foreach_cgrp_safe(cgrp, gtmp, runl) {
+		state = atomic_cmpxchg(&cgrp->rc, NVKM_CGRP_RC_PENDING, NVKM_CGRP_RC_RUNNING);
+		if (state == NVKM_CGRP_RC_PENDING) {
+			/* Disable all channels in them, and remove from runlist. */
+			nvkm_cgrp_foreach_chan_safe(chan, ctmp, cgrp) {
+				nvkm_chan_error(chan, false);
+				nvkm_chan_remove_locked(chan);
+			}
+		}
+	}
+
+	/* On GPUs with runlist preempt, wait for PBDMA(s) servicing runlist to go idle. */
+	if (runl->func->preempt) {
+		for (i = 0; i < runl->runq_nr; i++) {
+			struct nvkm_runq *runq = runl->runq[i];
+
+			if (runq) {
+				nvkm_msec(fifo->engine.subdev.device, 2000,
+					if (runq->func->idle(runq))
+						break;
+				);
+			}
+		}
+	}
+
+	/* Look for engines that are still on flagged channel groups - reset them. */
+	nvkm_runl_foreach_engn_cond(engn, runl, engn->func->cxid) {
+		cgrp = nvkm_engn_cgrp_get(engn, &flags);
+		if (!cgrp) {
+			ENGN_DEBUG(engn, "cxid not valid");
+			continue;
+		}
+
+		reset = atomic_read(&cgrp->rc) == NVKM_CGRP_RC_RUNNING;
+		nvkm_cgrp_put(&cgrp, flags);
+		if (!reset) {
+			ENGN_DEBUG(engn, "cxid not in recovery");
+			continue;
+		}
+
+		ENGN_DEBUG(engn, "resetting...");
+		/*TODO: can we do something less of a potential catastrophe on failure? */
+		WARN_ON(nvkm_engine_reset(engn->engine));
+	}
+
+	/* Submit runlist update, and clear any remaining exception state. */
+	runl->func->update(runl);
+	if (runl->func->fault_clear)
+		runl->func->fault_clear(runl);
+
+	/* Unblock runlist processing. */
+	while (rc--)
+		nvkm_runl_allow(runl);
+	runl->func->wait(runl);
+}
+
+static void
+nvkm_runl_rc_runl(struct nvkm_runl *runl)
+{
+	RUNL_ERROR(runl, "rc scheduled");
+
+	nvkm_runl_block(runl);
+	if (runl->func->preempt)
+		runl->func->preempt(runl);
+
+	atomic_inc(&runl->rc_pending);
+	schedule_work(&runl->work);
+}
+
+void
+nvkm_runl_rc_cgrp(struct nvkm_cgrp *cgrp)
+{
+	if (atomic_cmpxchg(&cgrp->rc, NVKM_CGRP_RC_NONE, NVKM_CGRP_RC_PENDING) != NVKM_CGRP_RC_NONE)
+		return;
+
+	CGRP_ERROR(cgrp, "rc scheduled");
+	nvkm_runl_rc_runl(cgrp->runl);
+}
+
+void
+nvkm_runl_rc_engn(struct nvkm_runl *runl, struct nvkm_engn *engn)
+{
+	struct nvkm_cgrp *cgrp;
+	unsigned long flags;
+
+	/* Lookup channel group currently on engine. */
+	cgrp = nvkm_engn_cgrp_get(engn, &flags);
+	if (!cgrp) {
+		ENGN_DEBUG(engn, "rc skipped, not on channel");
+		return;
+	}
+
+	nvkm_runl_rc_cgrp(cgrp);
+	nvkm_cgrp_put(&cgrp, flags);
+}
+
+static void
+nvkm_runl_work(struct work_struct *work)
+{
+	struct nvkm_runl *runl = container_of(work, typeof(*runl), work);
+
+	mutex_lock(&runl->mutex);
+	nvkm_runl_rc(runl);
+	mutex_unlock(&runl->mutex);
+
+}
+
+struct nvkm_chan *
+nvkm_runl_chan_get_inst(struct nvkm_runl *runl, u64 inst, unsigned long *pirqflags)
+{
+	struct nvkm_chid *chid = runl->chid;
+	struct nvkm_chan *chan;
+	unsigned long flags;
+	int id;
+
+	spin_lock_irqsave(&chid->lock, flags);
+	for_each_set_bit(id, chid->used, chid->nr) {
+		chan = chid->data[id];
+		if (likely(chan)) {
+			if (chan->inst->addr == inst) {
+				spin_lock(&chan->cgrp->lock);
+				*pirqflags = flags;
+				spin_unlock(&chid->lock);
+				return chan;
+			}
+		}
+	}
+	spin_unlock_irqrestore(&chid->lock, flags);
+	return NULL;
+}
+
+struct nvkm_chan *
+nvkm_runl_chan_get_chid(struct nvkm_runl *runl, int id, unsigned long *pirqflags)
+{
+	struct nvkm_chid *chid = runl->chid;
+	struct nvkm_chan *chan;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chid->lock, flags);
+	if (!WARN_ON(id >= chid->nr)) {
+		chan = chid->data[id];
+		if (likely(chan)) {
+			spin_lock(&chan->cgrp->lock);
+			*pirqflags = flags;
+			spin_unlock(&chid->lock);
+			return chan;
+		}
+	}
+	spin_unlock_irqrestore(&chid->lock, flags);
+	return NULL;
+}
+
+struct nvkm_cgrp *
+nvkm_runl_cgrp_get_cgid(struct nvkm_runl *runl, int id, unsigned long *pirqflags)
+{
+	struct nvkm_chid *cgid = runl->cgid;
+	struct nvkm_cgrp *cgrp;
+	unsigned long flags;
+
+	spin_lock_irqsave(&cgid->lock, flags);
+	if (!WARN_ON(id >= cgid->nr)) {
+		cgrp = cgid->data[id];
+		if (likely(cgrp)) {
+			spin_lock(&cgrp->lock);
+			*pirqflags = flags;
+			spin_unlock(&cgid->lock);
+			return cgrp;
+		}
+	}
+	spin_unlock_irqrestore(&cgid->lock, flags);
+	return NULL;
+}
+
+int
+nvkm_runl_preempt_wait(struct nvkm_runl *runl)
+{
+	return nvkm_msec(runl->fifo->engine.subdev.device, runl->fifo->timeout.chan_msec,
+		if (!runl->func->preempt_pending(runl))
+			break;
+
+		nvkm_runl_rc(runl);
+		usleep_range(1, 2);
+	) < 0 ? -ETIMEDOUT : 0;
+}
+
+bool
+nvkm_runl_update_pending(struct nvkm_runl *runl)
+{
+	if (!runl->func->pending(runl))
+		return false;
+
+	nvkm_runl_rc(runl);
+	return true;
+}
+
+void
+nvkm_runl_update_locked(struct nvkm_runl *runl, bool wait)
+{
+	if (atomic_xchg(&runl->changed, 0) && runl->func->update) {
+		runl->func->update(runl);
+		if (wait)
+			runl->func->wait(runl);
+	}
+}
+
+void
+nvkm_runl_allow(struct nvkm_runl *runl)
+{
+	struct nvkm_fifo *fifo = runl->fifo;
+	unsigned long flags;
+
+	spin_lock_irqsave(&fifo->lock, flags);
+	if (!--runl->blocked) {
+		RUNL_TRACE(runl, "running");
+		runl->func->allow(runl, ~0);
+	}
+	spin_unlock_irqrestore(&fifo->lock, flags);
+}
+
+void
+nvkm_runl_block(struct nvkm_runl *runl)
+{
+	struct nvkm_fifo *fifo = runl->fifo;
+	unsigned long flags;
+
+	spin_lock_irqsave(&fifo->lock, flags);
+	if (!runl->blocked++) {
+		RUNL_TRACE(runl, "stopped");
+		runl->func->block(runl, ~0);
+	}
+	spin_unlock_irqrestore(&fifo->lock, flags);
+}
+
+void
+nvkm_runl_fini(struct nvkm_runl *runl)
+{
+	if (runl->func->fini)
+		runl->func->fini(runl);
+
+	flush_work(&runl->work);
+}
+
+void
+nvkm_runl_del(struct nvkm_runl *runl)
+{
+	struct nvkm_engn *engn, *engt;
+
+	nvkm_memory_unref(&runl->mem);
+
+	list_for_each_entry_safe(engn, engt, &runl->engns, head) {
+		list_del(&engn->head);
+		kfree(engn);
+	}
+
+	nvkm_chid_unref(&runl->chid);
+	nvkm_chid_unref(&runl->cgid);
+
+	list_del(&runl->head);
+	mutex_destroy(&runl->mutex);
+	kfree(runl);
+}
+
+struct nvkm_engn *
+nvkm_runl_add(struct nvkm_runl *runl, int engi, const struct nvkm_engn_func *func,
+	      enum nvkm_subdev_type type, int inst)
+{
+	struct nvkm_fifo *fifo = runl->fifo;
+	struct nvkm_device *device = fifo->engine.subdev.device;
+	struct nvkm_engine *engine;
+	struct nvkm_engn *engn;
+
+	engine = nvkm_device_engine(device, type, inst);
+	if (!engine) {
+		RUNL_DEBUG(runl, "engn %d.%d[%s] not found", engi, inst, nvkm_subdev_type[type]);
+		return NULL;
+	}
+
+	if (!(engn = kzalloc(sizeof(*engn), GFP_KERNEL)))
+		return NULL;
+
+	engn->func = func;
+	engn->runl = runl;
+	engn->id = engi;
+	engn->engine = engine;
+	engn->fault = -1;
+	list_add_tail(&engn->head, &runl->engns);
+
+	/* Lookup MMU engine ID for fault handling. */
+	if (device->top)
+		engn->fault = nvkm_top_fault_id(device, engine->subdev.type, engine->subdev.inst);
+
+	if (engn->fault < 0 && fifo->func->mmu_fault) {
+		const struct nvkm_enum *map = fifo->func->mmu_fault->engine;
+
+		while (map->name) {
+			if (map->data2 == engine->subdev.type && map->inst == engine->subdev.inst) {
+				engn->fault = map->value;
+				break;
+			}
+			map++;
+		}
+	}
+
+	return engn;
+}
+
+struct nvkm_runl *
+nvkm_runl_get(struct nvkm_fifo *fifo, int runi, u32 addr)
+{
+	struct nvkm_runl *runl;
+
+	nvkm_runl_foreach(runl, fifo) {
+		if ((runi >= 0 && runl->id == runi) || (runi < 0 && runl->addr == addr))
+			return runl;
+	}
+
+	return NULL;
+}
+
+struct nvkm_runl *
+nvkm_runl_new(struct nvkm_fifo *fifo, int runi, u32 addr, int id_nr)
+{
+	struct nvkm_subdev *subdev = &fifo->engine.subdev;
+	struct nvkm_runl *runl;
+	int ret;
+
+	if (!(runl = kzalloc(sizeof(*runl), GFP_KERNEL)))
+		return NULL;
+
+	runl->func = fifo->func->runl;
+	runl->fifo = fifo;
+	runl->id = runi;
+	runl->addr = addr;
+	INIT_LIST_HEAD(&runl->engns);
+	INIT_LIST_HEAD(&runl->cgrps);
+	atomic_set(&runl->changed, 0);
+	mutex_init(&runl->mutex);
+	INIT_WORK(&runl->work, nvkm_runl_work);
+	atomic_set(&runl->rc_triggered, 0);
+	atomic_set(&runl->rc_pending, 0);
+	list_add_tail(&runl->head, &fifo->runls);
+
+	if (!fifo->chid) {
+		if ((ret = nvkm_chid_new(&nvkm_chan_event, subdev, id_nr, 0, id_nr, &runl->cgid)) ||
+		    (ret = nvkm_chid_new(&nvkm_chan_event, subdev, id_nr, 0, id_nr, &runl->chid))) {
+			RUNL_ERROR(runl, "cgid/chid: %d", ret);
+			nvkm_runl_del(runl);
+			return NULL;
+		}
+	} else {
+		runl->cgid = nvkm_chid_ref(fifo->cgid);
+		runl->chid = nvkm_chid_ref(fifo->chid);
+	}
+
+	return runl;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/runl.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/runl.h
new file mode 100644
index 000000000000..c93d21bb7bd5
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/runl.h
@@ -0,0 +1,125 @@
+#ifndef __NVKM_RUNL_H__
+#define __NVKM_RUNL_H__
+#include <core/intr.h>
+struct nvkm_cctx;
+struct nvkm_cgrp;
+struct nvkm_chan;
+struct nvkm_memory;
+struct nvkm_object;
+struct nvkm_vctx;
+enum nvkm_subdev_type;
+
+struct nvkm_engn {
+	const struct nvkm_engn_func {
+		bool (*chsw)(struct nvkm_engn *);
+		int (*cxid)(struct nvkm_engn *, bool *cgid);
+		void (*mmu_fault_trigger)(struct nvkm_engn *);
+		bool (*mmu_fault_triggered)(struct nvkm_engn *);
+		int (*ctor)(struct nvkm_engn *, struct nvkm_vctx *);
+		void (*bind)(struct nvkm_engn *, struct nvkm_cctx *, struct nvkm_chan *);
+		int (*ramht_add)(struct nvkm_engn *, struct nvkm_object *, struct nvkm_chan *);
+		void (*ramht_del)(struct nvkm_chan *, int hash);
+	} *func;
+	struct nvkm_runl *runl;
+	int id;
+
+	struct nvkm_engine *engine;
+
+	int fault;
+
+	struct list_head head;
+};
+
+#define ENGN_PRINT(e,l,p,f,a...)                                                           \
+	RUNL_PRINT((e)->runl, l, p, "%02d[%8s]:"f, (e)->id, (e)->engine->subdev.name, ##a)
+#define ENGN_DEBUG(e,f,a...) ENGN_PRINT((e), DEBUG,   info, " "f"\n", ##a)
+
+struct nvkm_runl {
+	const struct nvkm_runl_func {
+		void (*init)(struct nvkm_runl *);
+		void (*fini)(struct nvkm_runl *);
+		int runqs;
+		u8 size;
+		int (*update)(struct nvkm_runl *);
+		void (*insert_cgrp)(struct nvkm_cgrp *, struct nvkm_memory *, u64 offset);
+		void (*insert_chan)(struct nvkm_chan *, struct nvkm_memory *, u64 offset);
+		void (*commit)(struct nvkm_runl *, struct nvkm_memory *, u32 start, int count);
+		int (*wait)(struct nvkm_runl *);
+		bool (*pending)(struct nvkm_runl *);
+		void (*block)(struct nvkm_runl *, u32 engm);
+		void (*allow)(struct nvkm_runl *, u32 engm);
+		void (*fault_clear)(struct nvkm_runl *);
+		void (*preempt)(struct nvkm_runl *);
+		bool (*preempt_pending)(struct nvkm_runl *);
+	} *func;
+	struct nvkm_fifo *fifo;
+	int id;
+	u32 addr;
+	u32 chan;
+	u16 doorbell;
+
+	struct nvkm_chid *cgid;
+#define NVKM_CHAN_EVENT_ERRORED BIT(0)
+	struct nvkm_chid *chid;
+
+	struct list_head engns;
+
+	struct nvkm_runq *runq[2];
+	int runq_nr;
+
+	struct nvkm_inth inth;
+
+	struct list_head cgrps;
+	int cgrp_nr;
+	int chan_nr;
+	atomic_t changed;
+	struct nvkm_memory *mem;
+	u32 offset;
+	struct mutex mutex;
+
+	int blocked;
+
+	struct work_struct work;
+	atomic_t rc_triggered;
+	atomic_t rc_pending;
+
+	struct list_head head;
+};
+
+struct nvkm_runl *nvkm_runl_new(struct nvkm_fifo *, int runi, u32 addr, int id_nr);
+struct nvkm_runl *nvkm_runl_get(struct nvkm_fifo *, int runi, u32 addr);
+struct nvkm_engn *nvkm_runl_add(struct nvkm_runl *, int engi, const struct nvkm_engn_func *,
+				enum nvkm_subdev_type, int inst);
+void nvkm_runl_del(struct nvkm_runl *);
+void nvkm_runl_fini(struct nvkm_runl *);
+void nvkm_runl_block(struct nvkm_runl *);
+void nvkm_runl_allow(struct nvkm_runl *);
+void nvkm_runl_update_locked(struct nvkm_runl *, bool wait);
+bool nvkm_runl_update_pending(struct nvkm_runl *);
+int nvkm_runl_preempt_wait(struct nvkm_runl *);
+
+void nvkm_runl_rc_engn(struct nvkm_runl *, struct nvkm_engn *);
+void nvkm_runl_rc_cgrp(struct nvkm_cgrp *);
+
+struct nvkm_cgrp *nvkm_runl_cgrp_get_cgid(struct nvkm_runl *, int cgid, unsigned long *irqflags);
+struct nvkm_chan *nvkm_runl_chan_get_chid(struct nvkm_runl *, int chid, unsigned long *irqflags);
+struct nvkm_chan *nvkm_runl_chan_get_inst(struct nvkm_runl *, u64 inst, unsigned long *irqflags);
+
+#define nvkm_runl_find_engn(engn,runl,cond) nvkm_list_find(engn, &(runl)->engns, head, (cond))
+
+#define nvkm_runl_first(fifo) list_first_entry(&(fifo)->runls, struct nvkm_runl, head)
+#define nvkm_runl_foreach(runl,fifo) list_for_each_entry((runl), &(fifo)->runls, head)
+#define nvkm_runl_foreach_cond(runl,fifo,cond) nvkm_list_foreach(runl, &(fifo)->runls, head, (cond))
+#define nvkm_runl_foreach_engn(engn,runl) list_for_each_entry((engn), &(runl)->engns, head)
+#define nvkm_runl_foreach_engn_cond(engn,runl,cond) \
+	nvkm_list_foreach(engn, &(runl)->engns, head, (cond))
+#define nvkm_runl_foreach_cgrp(cgrp,runl) list_for_each_entry((cgrp), &(runl)->cgrps, head)
+#define nvkm_runl_foreach_cgrp_safe(cgrp,gtmp,runl) \
+	list_for_each_entry_safe((cgrp), (gtmp), &(runl)->cgrps, head)
+
+#define RUNL_PRINT(r,l,p,f,a...)                                                          \
+	nvkm_printk__(&(r)->fifo->engine.subdev, NV_DBG_##l, p, "%06x:"f, (r)->addr, ##a)
+#define RUNL_ERROR(r,f,a...) RUNL_PRINT((r), ERROR,    err, " "f"\n", ##a)
+#define RUNL_DEBUG(r,f,a...) RUNL_PRINT((r), DEBUG,   info, " "f"\n", ##a)
+#define RUNL_TRACE(r,f,a...) RUNL_PRINT((r), TRACE,   info, " "f"\n", ##a)
+#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/usertu102.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/runq.c
index 217268f8ccad..33bcf5fb3ef0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/usertu102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/runq.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2018 Red Hat Inc.
+ * Copyright 2021 Red Hat Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -19,27 +19,27 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  */
-#include "user.h"
+#include "runq.h"
+#include "priv.h"
 
-static int
-tu102_fifo_user_map(struct nvkm_object *object, void *argv, u32 argc,
-		    enum nvkm_object_map *type, u64 *addr, u64 *size)
+void
+nvkm_runq_del(struct nvkm_runq *runq)
 {
-	struct nvkm_device *device = object->engine->subdev.device;
-	*addr = 0xbb0000 + device->func->resource_addr(device, 0);
-	*size = 0x010000;
-	*type = NVKM_OBJECT_MAP_IO;
-	return 0;
+	list_del(&runq->head);
+	kfree(runq);
 }
 
-static const struct nvkm_object_func
-tu102_fifo_user = {
-	.map = tu102_fifo_user_map,
-};
-
-int
-tu102_fifo_user_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
-		    struct nvkm_object **pobject)
+struct nvkm_runq *
+nvkm_runq_new(struct nvkm_fifo *fifo, int pbid)
 {
-	return nvkm_object_new_(&tu102_fifo_user, oclass, argv, argc, pobject);
+	struct nvkm_runq *runq;
+
+	if (!(runq = kzalloc(sizeof(*runq), GFP_KERNEL)))
+		return NULL;
+
+	runq->func = fifo->func->runq;
+	runq->fifo = fifo;
+	runq->id = pbid;
+	list_add_tail(&runq->head, &fifo->runqs);
+	return runq;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/runq.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/runq.h
new file mode 100644
index 000000000000..2cb4836e8b31
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/runq.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: MIT */
+#ifndef __NVKM_RUNQ_H__
+#define __NVKM_RUNQ_H__
+#include <core/os.h>
+struct nvkm_runl;
+
+struct nvkm_runq {
+	const struct nvkm_runq_func {
+		void (*init)(struct nvkm_runq *);
+		bool (*intr)(struct nvkm_runq *, struct nvkm_runl *);
+		const struct nvkm_bitfield *intr_0_names;
+		bool (*intr_1_ctxnotvalid)(struct nvkm_runq *, int chid);
+		bool (*idle)(struct nvkm_runq *);
+	} *func;
+	struct nvkm_fifo *fifo;
+	int id;
+
+	struct list_head head;
+};
+
+struct nvkm_runq *nvkm_runq_new(struct nvkm_fifo *, int pbid);
+void nvkm_runq_del(struct nvkm_runq *);
+
+#define nvkm_runq_foreach(runq,fifo) list_for_each_entry((runq), &(fifo)->runqs, head)
+#define nvkm_runq_foreach_cond(runq,fifo,cond) nvkm_list_foreach(runq, &(fifo)->runqs, head, (cond))
+
+#define RUNQ_PRINT(r,l,p,f,a...)							   \
+	nvkm_printk__(&(r)->fifo->engine.subdev, NV_DBG_##l, p, "PBDMA%d:"f, (r)->id, ##a)
+#define RUNQ_ERROR(r,f,a...) RUNQ_PRINT((r), ERROR,    err, " "f"\n", ##a)
+#define RUNQ_DEBUG(r,f,a...) RUNQ_PRINT((r), DEBUG,   info, " "f"\n", ##a)
+#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/tu102.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/tu102.c
index 260b197f81bc..ea9e151dbb48 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/tu102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/tu102.c
@@ -19,46 +19,83 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  */
-#include "gk104.h"
+#include "priv.h"
 #include "cgrp.h"
-#include "changk104.h"
-#include "user.h"
+#include "chan.h"
+#include "runl.h"
 
-#include <core/client.h>
-#include <core/gpuobj.h>
-#include <subdev/bar.h>
-#include <subdev/fault.h>
-#include <subdev/top.h>
-#include <subdev/timer.h>
-#include <engine/sw.h>
+#include <core/memory.h>
+#include <subdev/mc.h>
+#include <subdev/vfn.h>
 
 #include <nvif/class.h>
 
+static u32
+tu102_chan_doorbell_handle(struct nvkm_chan *chan)
+{
+	return (chan->cgrp->runl->id << 16) | chan->id;
+}
+
 static void
-tu102_fifo_runlist_commit(struct gk104_fifo *fifo, int runl,
-			  struct nvkm_memory *mem, int nr)
+tu102_chan_start(struct nvkm_chan *chan)
 {
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	u64 addr = nvkm_memory_addr(mem);
-	/*XXX: target? */
+	struct nvkm_device *device = chan->cgrp->runl->fifo->engine.subdev.device;
+
+	gk104_chan_start(chan);
+	nvkm_wr32(device, device->vfn->addr.user + 0x0090, chan->func->doorbell_handle(chan));
+}
+
+static const struct nvkm_chan_func
+tu102_chan = {
+	.inst = &gf100_chan_inst,
+	.userd = &gv100_chan_userd,
+	.ramfc = &gv100_chan_ramfc,
+	.bind = gk104_chan_bind_inst,
+	.unbind = gk104_chan_unbind,
+	.start = tu102_chan_start,
+	.stop = gk104_chan_stop,
+	.preempt = gk110_chan_preempt,
+	.doorbell_handle = tu102_chan_doorbell_handle,
+};
+
+static bool
+tu102_runl_pending(struct nvkm_runl *runl)
+{
+	struct nvkm_device *device = runl->fifo->engine.subdev.device;
+
+	return nvkm_rd32(device, 0x002b0c + (runl->id * 0x10)) & 0x00008000;
+}
 
-	nvkm_wr32(device, 0x002b00 + (runl * 0x10), lower_32_bits(addr));
-	nvkm_wr32(device, 0x002b04 + (runl * 0x10), upper_32_bits(addr));
-	nvkm_wr32(device, 0x002b08 + (runl * 0x10), nr);
+static void
+tu102_runl_commit(struct nvkm_runl *runl, struct nvkm_memory *memory, u32 start, int count)
+{
+	struct nvkm_device *device = runl->fifo->engine.subdev.device;
+	u64 addr = nvkm_memory_addr(memory) + start;
+	/*XXX: target? */
 
-	/*XXX: how to wait? can you even wait? */
+	nvkm_wr32(device, 0x002b00 + (runl->id * 0x10), lower_32_bits(addr));
+	nvkm_wr32(device, 0x002b04 + (runl->id * 0x10), upper_32_bits(addr));
+	nvkm_wr32(device, 0x002b08 + (runl->id * 0x10), count);
 }
 
-static const struct gk104_fifo_runlist_func
-tu102_fifo_runlist = {
+static const struct nvkm_runl_func
+tu102_runl = {
+	.runqs = 2,
 	.size = 16,
-	.cgrp = gv100_fifo_runlist_cgrp,
-	.chan = gv100_fifo_runlist_chan,
-	.commit = tu102_fifo_runlist_commit,
+	.update = nv50_runl_update,
+	.insert_cgrp = gv100_runl_insert_cgrp,
+	.insert_chan = gv100_runl_insert_chan,
+	.commit = tu102_runl_commit,
+	.wait = nv50_runl_wait,
+	.pending = tu102_runl_pending,
+	.block = gk104_runl_block,
+	.allow = gk104_runl_allow,
+	.preempt = gv100_runl_preempt,
+	.preempt_pending = gf100_runl_preempt_pending,
 };
 
 static const struct nvkm_enum
-tu102_fifo_fault_engine[] = {
+tu102_fifo_mmu_fault_engine[] = {
 	{ 0x01, "DISPLAY" },
 	{ 0x03, "PTP" },
 	{ 0x06, "PWR_PMU" },
@@ -85,305 +122,82 @@ tu102_fifo_fault_engine[] = {
 	{}
 };
 
-static void
-tu102_fifo_pbdma_init(struct gk104_fifo *fifo)
-{
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	const u32 mask = (1 << fifo->pbdma_nr) - 1;
-	/*XXX: this is a bit of a guess at this point in time. */
-	nvkm_mask(device, 0xb65000, 0x80000fff, 0x80000000 | mask);
-}
-
-static const struct gk104_fifo_pbdma_func
-tu102_fifo_pbdma = {
-	.nr = gm200_fifo_pbdma_nr,
-	.init = tu102_fifo_pbdma_init,
-	.init_timeout = gk208_fifo_pbdma_init_timeout,
-};
-
-static const struct gk104_fifo_func
-tu102_fifo = {
-	.pbdma = &tu102_fifo_pbdma,
-	.fault.access = gv100_fifo_fault_access,
-	.fault.engine = tu102_fifo_fault_engine,
-	.fault.reason = gv100_fifo_fault_reason,
-	.fault.hubclient = gv100_fifo_fault_hubclient,
-	.fault.gpcclient = gv100_fifo_fault_gpcclient,
-	.runlist = &tu102_fifo_runlist,
-	.user = {{-1,-1,VOLTA_USERMODE_A       }, tu102_fifo_user_new   },
-	.chan = {{ 0, 0,TURING_CHANNEL_GPFIFO_A}, tu102_fifo_gpfifo_new },
-	.cgrp_force = true,
+const struct nvkm_fifo_func_mmu_fault
+tu102_fifo_mmu_fault = {
+	.recover = gf100_fifo_mmu_fault_recover,
+	.access = gv100_fifo_mmu_fault_access,
+	.engine = tu102_fifo_mmu_fault_engine,
+	.reason = gv100_fifo_mmu_fault_reason,
+	.hubclient = gv100_fifo_mmu_fault_hubclient,
+	.gpcclient = gv100_fifo_mmu_fault_gpcclient,
 };
 
-static void
-tu102_fifo_recover_work(struct work_struct *w)
+void
+tu102_fifo_intr_ctxsw_timeout_info(struct nvkm_engn *engn, u32 info)
 {
-	struct gk104_fifo *fifo = container_of(w, typeof(*fifo), recover.work);
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	struct nvkm_engine *engine;
+	struct nvkm_runl *runl = engn->runl;
+	struct nvkm_cgrp *cgrp;
 	unsigned long flags;
-	u32 engm, runm, todo;
-	int engn, runl;
-
-	spin_lock_irqsave(&fifo->base.lock, flags);
-	runm = fifo->recover.runm;
-	engm = fifo->recover.engm;
-	fifo->recover.engm = 0;
-	fifo->recover.runm = 0;
-	spin_unlock_irqrestore(&fifo->base.lock, flags);
-
-	nvkm_mask(device, 0x002630, runm, runm);
-
-	for (todo = engm; engn = __ffs(todo), todo; todo &= ~BIT(engn)) {
-		if ((engine = fifo->engine[engn].engine)) {
-			nvkm_subdev_fini(&engine->subdev, false);
-			WARN_ON(nvkm_subdev_init(&engine->subdev));
-		}
-	}
-
-	for (todo = runm; runl = __ffs(todo), todo; todo &= ~BIT(runl))
-		gk104_fifo_runlist_update(fifo, runl);
-
-	nvkm_mask(device, 0x002630, runm, 0x00000000);
-}
-
-static void tu102_fifo_recover_engn(struct gk104_fifo *fifo, int engn);
-
-static void
-tu102_fifo_recover_runl(struct gk104_fifo *fifo, int runl)
-{
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	const u32 runm = BIT(runl);
-
-	assert_spin_locked(&fifo->base.lock);
-	if (fifo->recover.runm & runm)
-		return;
-	fifo->recover.runm |= runm;
-
-	/* Block runlist to prevent channel assignment(s) from changing. */
-	nvkm_mask(device, 0x002630, runm, runm);
-
-	/* Schedule recovery. */
-	nvkm_warn(subdev, "runlist %d: scheduled for recovery\n", runl);
-	schedule_work(&fifo->recover.work);
-}
-
-static struct gk104_fifo_chan *
-tu102_fifo_recover_chid(struct gk104_fifo *fifo, int runl, int chid)
-{
-	struct gk104_fifo_chan *chan;
-	struct nvkm_fifo_cgrp *cgrp;
-
-	list_for_each_entry(chan, &fifo->runlist[runl].chan, head) {
-		if (chan->base.chid == chid) {
-			list_del_init(&chan->head);
-			return chan;
-		}
-	}
-
-	list_for_each_entry(cgrp, &fifo->runlist[runl].cgrp, head) {
-		if (cgrp->id == chid) {
-			chan = list_first_entry(&cgrp->chan, typeof(*chan), head);
-			list_del_init(&chan->head);
-			if (!--cgrp->chan_nr)
-				list_del_init(&cgrp->head);
-			return chan;
-		}
-	}
-
-	return NULL;
-}
 
-static void
-tu102_fifo_recover_chan(struct nvkm_fifo *base, int chid)
-{
-	struct gk104_fifo *fifo = gk104_fifo(base);
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	const u32  stat = nvkm_rd32(device, 0x800004 + (chid * 0x08));
-	const u32  runl = (stat & 0x000f0000) >> 16;
-	const bool used = (stat & 0x00000001);
-	unsigned long engn, engm = fifo->runlist[runl].engm;
-	struct gk104_fifo_chan *chan;
-
-	assert_spin_locked(&fifo->base.lock);
-	if (!used)
+	/* Check that engine hasn't become unstuck since timeout raised. */
+	ENGN_DEBUG(engn, "CTXSW_TIMEOUT %08x", info);
+	if (info & 0xc0000000)
 		return;
 
-	/* Lookup SW state for channel, and mark it as dead. */
-	chan = tu102_fifo_recover_chid(fifo, runl, chid);
-	if (chan) {
-		chan->killed = true;
-		nvkm_fifo_kevent(&fifo->base, chid);
-	}
-
-	/* Disable channel. */
-	nvkm_wr32(device, 0x800004 + (chid * 0x08), stat | 0x00000800);
-	nvkm_warn(subdev, "channel %d: killed\n", chid);
-
-	/* Block channel assignments from changing during recovery. */
-	tu102_fifo_recover_runl(fifo, runl);
-
-	/* Schedule recovery for any engines the channel is on. */
-	for_each_set_bit(engn, &engm, fifo->engine_nr) {
-		struct gk104_fifo_engine_status status;
-
-		gk104_fifo_engine_status(fifo, engn, &status);
-		if (!status.chan || status.chan->id != chid)
-			continue;
-		tu102_fifo_recover_engn(fifo, engn);
+	/* Determine channel group the engine is stuck on, and schedule recovery. */
+	switch (info & 0x0000c000) {
+	case 0x00004000: /* LOAD */
+		cgrp = nvkm_runl_cgrp_get_cgid(runl, info & 0x3fff0000, &flags);
+		break;
+	case 0x00008000: /* SAVE */
+	case 0x0000c000: /* SWITCH */
+		cgrp = nvkm_runl_cgrp_get_cgid(runl, info & 0x00003fff, &flags);
+		break;
+	default:
+		cgrp = NULL;
+		break;
 	}
-}
-
-static void
-tu102_fifo_recover_engn(struct gk104_fifo *fifo, int engn)
-{
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	const u32 runl = fifo->engine[engn].runl;
-	const u32 engm = BIT(engn);
-	struct gk104_fifo_engine_status status;
-
-	assert_spin_locked(&fifo->base.lock);
-	if (fifo->recover.engm & engm)
-		return;
-	fifo->recover.engm |= engm;
 
-	/* Block channel assignments from changing during recovery. */
-	tu102_fifo_recover_runl(fifo, runl);
-
-	/* Determine which channel (if any) is currently on the engine. */
-	gk104_fifo_engine_status(fifo, engn, &status);
-	if (status.chan) {
-		/* The channel is not longer viable, kill it. */
-		tu102_fifo_recover_chan(&fifo->base, status.chan->id);
+	if (!WARN_ON(!cgrp)) {
+		nvkm_runl_rc_cgrp(cgrp);
+		nvkm_cgrp_put(&cgrp, flags);
 	}
-
-	/* Preempt the runlist */
-	nvkm_wr32(device, 0x2638, BIT(runl));
-
-	/* Schedule recovery. */
-	nvkm_warn(subdev, "engine %d: scheduled for recovery\n", engn);
-	schedule_work(&fifo->recover.work);
 }
 
 static void
-tu102_fifo_fault(struct nvkm_fifo *base, struct nvkm_fault_data *info)
+tu102_fifo_intr_ctxsw_timeout(struct nvkm_fifo *fifo)
 {
-	struct gk104_fifo *fifo = gk104_fifo(base);
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	const struct nvkm_enum *er, *ee, *ec, *ea;
-	struct nvkm_engine *engine = NULL;
-	struct nvkm_fifo_chan *chan;
-	unsigned long flags;
-	const char *en = "";
-	char ct[8] = "HUB/";
-	int engn;
-
-	er = nvkm_enum_find(fifo->func->fault.reason, info->reason);
-	ee = nvkm_enum_find(fifo->func->fault.engine, info->engine);
-	if (info->hub) {
-		ec = nvkm_enum_find(fifo->func->fault.hubclient, info->client);
-	} else {
-		ec = nvkm_enum_find(fifo->func->fault.gpcclient, info->client);
-		snprintf(ct, sizeof(ct), "GPC%d/", info->gpc);
-	}
-	ea = nvkm_enum_find(fifo->func->fault.access, info->access);
-
-	if (ee && ee->data2) {
-		switch (ee->data2) {
-		case NVKM_SUBDEV_BAR:
-			nvkm_bar_bar1_reset(device);
-			break;
-		case NVKM_SUBDEV_INSTMEM:
-			nvkm_bar_bar2_reset(device);
-			break;
-		case NVKM_ENGINE_IFB:
-			nvkm_mask(device, 0x001718, 0x00000000, 0x00000000);
-			break;
-		default:
-			engine = nvkm_device_engine(device, ee->data2, 0);
-			break;
-		}
-	}
-
-	if (ee == NULL) {
-		struct nvkm_subdev *subdev = nvkm_top_fault(device, info->engine);
-		if (subdev) {
-			if (subdev->func == &nvkm_engine)
-				engine = container_of(subdev, typeof(*engine), subdev);
-			en = engine->subdev.name;
+	struct nvkm_device *device = fifo->engine.subdev.device;
+	struct nvkm_runl *runl;
+	struct nvkm_engn *engn;
+	u32 engm = nvkm_rd32(device, 0x002a30);
+	u32 info;
+
+	nvkm_runl_foreach(runl, fifo) {
+		nvkm_runl_foreach_engn_cond(engn, runl, engm & BIT(engn->id)) {
+			info = nvkm_rd32(device, 0x003200 + (engn->id * 4));
+			tu102_fifo_intr_ctxsw_timeout_info(engn, info);
 		}
-	} else {
-		en = ee->name;
 	}
 
-	spin_lock_irqsave(&fifo->base.lock, flags);
-	chan = nvkm_fifo_chan_inst_locked(&fifo->base, info->inst);
-
-	nvkm_error(subdev,
-		   "fault %02x [%s] at %016llx engine %02x [%s] client %02x "
-		   "[%s%s] reason %02x [%s] on channel %d [%010llx %s]\n",
-		   info->access, ea ? ea->name : "", info->addr,
-		   info->engine, ee ? ee->name : en,
-		   info->client, ct, ec ? ec->name : "",
-		   info->reason, er ? er->name : "", chan ? chan->chid : -1,
-		   info->inst, chan ? chan->object.client->name : "unknown");
-
-	/* Kill the channel that caused the fault. */
-	if (chan)
-		tu102_fifo_recover_chan(&fifo->base, chan->chid);
-
-	/* Channel recovery will probably have already done this for the
-	 * correct engine(s), but just in case we can't find the channel
-	 * information...
-	 */
-	for (engn = 0; engn < fifo->engine_nr && engine; engn++) {
-		if (fifo->engine[engn].engine == engine) {
-			tu102_fifo_recover_engn(fifo, engn);
-			break;
-		}
-	}
-
-	spin_unlock_irqrestore(&fifo->base.lock, flags);
-}
-
-static void
-tu102_fifo_intr_ctxsw_timeout(struct gk104_fifo *fifo)
-{
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	unsigned long flags, engm;
-	u32 engn;
-
-	spin_lock_irqsave(&fifo->base.lock, flags);
-
-	engm = nvkm_rd32(device, 0x2a30);
-	nvkm_wr32(device, 0x2a30, engm);
-
-	for_each_set_bit(engn, &engm, 32)
-		tu102_fifo_recover_engn(fifo, engn);
-
-	spin_unlock_irqrestore(&fifo->base.lock, flags);
+	nvkm_wr32(device, 0x002a30, engm);
 }
 
 static void
-tu102_fifo_intr_sched(struct gk104_fifo *fifo)
+tu102_fifo_intr_sched(struct nvkm_fifo *fifo)
 {
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	u32 intr = nvkm_rd32(device, 0x00254c);
+	struct nvkm_subdev *subdev = &fifo->engine.subdev;
+	u32 intr = nvkm_rd32(subdev->device, 0x00254c);
 	u32 code = intr & 0x000000ff;
 
 	nvkm_error(subdev, "SCHED_ERROR %02x\n", code);
 }
 
-static void
-tu102_fifo_intr(struct nvkm_fifo *base)
+static irqreturn_t
+tu102_fifo_intr(struct nvkm_inth *inth)
 {
-	struct gk104_fifo *fifo = gk104_fifo(base);
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+	struct nvkm_fifo *fifo = container_of(inth, typeof(*fifo), engine.subdev.inth);
+	struct nvkm_subdev *subdev = &fifo->engine.subdev;
 	struct nvkm_device *device = subdev->device;
 	u32 mask = nvkm_rd32(device, 0x002140);
 	u32 stat = nvkm_rd32(device, 0x002100) & mask;
@@ -412,17 +226,8 @@ tu102_fifo_intr(struct nvkm_fifo *base)
 	}
 
 	if (stat & 0x20000000) {
-		u32 mask = nvkm_rd32(device, 0x0025a0);
-
-		while (mask) {
-			u32 unit = __ffs(mask);
-
-			gk104_fifo_intr_pbdma_0(fifo, unit);
-			gk104_fifo_intr_pbdma_1(fifo, unit);
-			nvkm_wr32(device, 0x0025a0, (1 << unit));
-			mask &= ~(1 << unit);
-		}
-		stat &= ~0x20000000;
+		if (gf100_fifo_intr_pbdma(fifo))
+			stat &= ~0x20000000;
 	}
 
 	if (stat & 0x40000000) {
@@ -432,46 +237,50 @@ tu102_fifo_intr(struct nvkm_fifo *base)
 
 	if (stat & 0x80000000) {
 		nvkm_wr32(device, 0x002100, 0x80000000);
-		gk104_fifo_intr_engine(fifo);
+		nvkm_event_ntfy(&fifo->nonstall.event, 0, NVKM_FIFO_NONSTALL_EVENT);
 		stat &= ~0x80000000;
 	}
 
 	if (stat) {
 		nvkm_error(subdev, "INTR %08x\n", stat);
+		spin_lock(&fifo->lock);
 		nvkm_mask(device, 0x002140, stat, 0x00000000);
+		spin_unlock(&fifo->lock);
 		nvkm_wr32(device, 0x002100, stat);
 	}
+
+	return IRQ_HANDLED;
+}
+
+static void
+tu102_fifo_init_pbdmas(struct nvkm_fifo *fifo, u32 mask)
+{
+	/* Not directly related to PBDMAs, but, enables doorbell to function. */
+	nvkm_mask(fifo->engine.subdev.device, 0xb65000, 0x80000000, 0x80000000);
 }
 
 static const struct nvkm_fifo_func
-tu102_fifo_ = {
-	.dtor = gk104_fifo_dtor,
-	.oneinit = gk104_fifo_oneinit,
-	.info = gk104_fifo_info,
+tu102_fifo = {
+	.chid_nr = gm200_fifo_chid_nr,
+	.chid_ctor = gk110_fifo_chid_ctor,
+	.runq_nr = gm200_fifo_runq_nr,
+	.runl_ctor = gk104_fifo_runl_ctor,
 	.init = gk104_fifo_init,
-	.fini = gk104_fifo_fini,
+	.init_pbdmas = tu102_fifo_init_pbdmas,
 	.intr = tu102_fifo_intr,
-	.fault = tu102_fifo_fault,
-	.engine_id = gk104_fifo_engine_id,
-	.id_engine = gk104_fifo_id_engine,
-	.uevent_init = gk104_fifo_uevent_init,
-	.uevent_fini = gk104_fifo_uevent_fini,
-	.recover_chan = tu102_fifo_recover_chan,
-	.class_get = gk104_fifo_class_get,
-	.class_new = gk104_fifo_class_new,
+	.mmu_fault = &tu102_fifo_mmu_fault,
+	.nonstall = &gf100_fifo_nonstall,
+	.runl = &tu102_runl,
+	.runq = &gv100_runq,
+	.engn = &gv100_engn,
+	.engn_ce = &gv100_engn_ce,
+	.cgrp = {{ 0, 0, KEPLER_CHANNEL_GROUP_A  }, &gk110_cgrp, .force = true },
+	.chan = {{ 0, 0, TURING_CHANNEL_GPFIFO_A }, &tu102_chan },
 };
 
 int
 tu102_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
 	       struct nvkm_fifo **pfifo)
 {
-	struct gk104_fifo *fifo;
-
-	if (!(fifo = kzalloc(sizeof(*fifo), GFP_KERNEL)))
-		return -ENOMEM;
-	fifo->func = &tu102_fifo;
-	INIT_WORK(&fifo->recover.work, tu102_fifo_recover_work);
-	*pfifo = &fifo->base;
-
-	return nvkm_fifo_ctor(&tu102_fifo_, device, type, inst, 4096, &fifo->base);
+	return nvkm_fifo_new_(&tu102_fifo, device, type, inst, pfifo);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ucgrp.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ucgrp.c
new file mode 100644
index 000000000000..52c594dfb1b8
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ucgrp.c
@@ -0,0 +1,125 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#define nvkm_ucgrp(p) container_of((p), struct nvkm_ucgrp, object)
+#include "priv.h"
+#include "cgrp.h"
+#include "runl.h"
+
+#include <subdev/mmu.h>
+
+#include <nvif/if0021.h>
+
+struct nvkm_ucgrp {
+	struct nvkm_object object;
+	struct nvkm_cgrp *cgrp;
+};
+
+static int
+nvkm_ucgrp_chan_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		    struct nvkm_object **pobject)
+{
+	struct nvkm_cgrp *cgrp = nvkm_ucgrp(oclass->parent)->cgrp;
+
+	return nvkm_uchan_new(cgrp->runl->fifo, cgrp, oclass, argv, argc, pobject);
+}
+
+static int
+nvkm_ucgrp_sclass(struct nvkm_object *object, int index, struct nvkm_oclass *oclass)
+{
+	struct nvkm_cgrp *cgrp = nvkm_ucgrp(object)->cgrp;
+	struct nvkm_fifo *fifo = cgrp->runl->fifo;
+	const struct nvkm_fifo_func_chan *chan = &fifo->func->chan;
+	int c = 0;
+
+	/* *_CHANNEL_GPFIFO_* */
+	if (chan->user.oclass) {
+		if (c++ == index) {
+			oclass->base = chan->user;
+			oclass->ctor = nvkm_ucgrp_chan_new;
+			return 0;
+		}
+	}
+
+	return -EINVAL;
+}
+
+static void *
+nvkm_ucgrp_dtor(struct nvkm_object *object)
+{
+	struct nvkm_ucgrp *ucgrp = nvkm_ucgrp(object);
+
+	nvkm_cgrp_unref(&ucgrp->cgrp);
+	return ucgrp;
+}
+
+static const struct nvkm_object_func
+nvkm_ucgrp = {
+	.dtor = nvkm_ucgrp_dtor,
+	.sclass = nvkm_ucgrp_sclass,
+};
+
+int
+nvkm_ucgrp_new(struct nvkm_fifo *fifo, const struct nvkm_oclass *oclass, void *argv, u32 argc,
+	       struct nvkm_object **pobject)
+{
+	union nvif_cgrp_args *args = argv;
+	struct nvkm_runl *runl;
+	struct nvkm_vmm *vmm;
+	struct nvkm_ucgrp *ucgrp;
+	int ret;
+
+	if (argc < sizeof(args->v0) || args->v0.version != 0)
+		return -ENOSYS;
+	argc -= sizeof(args->v0);
+
+	if (args->v0.namelen != argc)
+		return -EINVAL;
+
+	/* Lookup objects referenced in args. */
+	runl = nvkm_runl_get(fifo, args->v0.runlist, 0);
+	if (!runl)
+		return -EINVAL;
+
+	vmm = nvkm_uvmm_search(oclass->client, args->v0.vmm);
+	if (IS_ERR(vmm))
+		return PTR_ERR(vmm);
+
+	/* Allocate channel group. */
+	if (!(ucgrp = kzalloc(sizeof(*ucgrp), GFP_KERNEL))) {
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	nvkm_object_ctor(&nvkm_ucgrp, oclass, &ucgrp->object);
+	*pobject = &ucgrp->object;
+
+	ret = nvkm_cgrp_new(runl, args->v0.name, vmm, true, &ucgrp->cgrp);
+	if (ret)
+		goto done;
+
+	/* Return channel group info to caller. */
+	args->v0.cgid = ucgrp->cgrp->id;
+
+done:
+	nvkm_vmm_unref(&vmm);
+	return ret;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/uchan.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/uchan.c
new file mode 100644
index 000000000000..1dac95ae7b43
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/uchan.c
@@ -0,0 +1,409 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#define nvkm_uchan(p) container_of((p), struct nvkm_uchan, object)
+#include "priv.h"
+#include "cgrp.h"
+#include "chan.h"
+#include "chid.h"
+#include "runl.h"
+
+#include <core/gpuobj.h>
+#include <core/oproxy.h>
+#include <subdev/mmu.h>
+#include <engine/dma.h>
+
+#include <nvif/if0020.h>
+
+struct nvkm_uchan {
+	struct nvkm_object object;
+	struct nvkm_chan *chan;
+};
+
+static int
+nvkm_uchan_uevent(struct nvkm_object *object, void *argv, u32 argc, struct nvkm_uevent *uevent)
+{
+	struct nvkm_chan *chan = nvkm_uchan(object)->chan;
+	struct nvkm_runl *runl = chan->cgrp->runl;
+	union nvif_chan_event_args *args = argv;
+
+	if (!uevent)
+		return 0;
+	if (argc != sizeof(args->v0) || args->v0.version != 0)
+		return -ENOSYS;
+
+	switch (args->v0.type) {
+	case NVIF_CHAN_EVENT_V0_NON_STALL_INTR:
+		return nvkm_uevent_add(uevent, &runl->fifo->nonstall.event, 0,
+				       NVKM_FIFO_NONSTALL_EVENT, NULL);
+	case NVIF_CHAN_EVENT_V0_KILLED:
+		return nvkm_uevent_add(uevent, &runl->chid->event, chan->id,
+				       NVKM_CHAN_EVENT_ERRORED, NULL);
+	default:
+		break;
+	}
+
+	return -ENOSYS;
+}
+
+struct nvkm_uobj {
+	struct nvkm_oproxy oproxy;
+	struct nvkm_chan *chan;
+	struct nvkm_cctx *cctx;
+	int hash;
+};
+
+static int
+nvkm_uchan_object_fini_1(struct nvkm_oproxy *oproxy, bool suspend)
+{
+	struct nvkm_uobj *uobj = container_of(oproxy, typeof(*uobj), oproxy);
+	struct nvkm_chan *chan = uobj->chan;
+	struct nvkm_cctx *cctx = uobj->cctx;
+	struct nvkm_ectx *ectx = cctx->vctx->ectx;
+
+	if (!ectx->object)
+		return 0;
+
+	/* Unbind engine context from channel, if no longer required. */
+	if (refcount_dec_and_mutex_lock(&cctx->uses, &chan->cgrp->mutex)) {
+		nvkm_chan_cctx_bind(chan, ectx->engn, NULL);
+
+		if (refcount_dec_and_test(&ectx->uses))
+			nvkm_object_fini(ectx->object, false);
+		mutex_unlock(&chan->cgrp->mutex);
+	}
+
+	return 0;
+}
+
+static int
+nvkm_uchan_object_init_0(struct nvkm_oproxy *oproxy)
+{
+	struct nvkm_uobj *uobj = container_of(oproxy, typeof(*uobj), oproxy);
+	struct nvkm_chan *chan = uobj->chan;
+	struct nvkm_cctx *cctx = uobj->cctx;
+	struct nvkm_ectx *ectx = cctx->vctx->ectx;
+	int ret = 0;
+
+	if (!ectx->object)
+		return 0;
+
+	/* Bind engine context to channel, if it hasn't been already. */
+	if (!refcount_inc_not_zero(&cctx->uses)) {
+		mutex_lock(&chan->cgrp->mutex);
+		if (!refcount_inc_not_zero(&cctx->uses)) {
+			if (!refcount_inc_not_zero(&ectx->uses)) {
+				ret = nvkm_object_init(ectx->object);
+				if (ret == 0)
+					refcount_set(&ectx->uses, 1);
+			}
+
+			if (ret == 0) {
+				nvkm_chan_cctx_bind(chan, ectx->engn, cctx);
+				refcount_set(&cctx->uses, 1);
+			}
+		}
+		mutex_unlock(&chan->cgrp->mutex);
+	}
+
+	return ret;
+}
+
+static void
+nvkm_uchan_object_dtor(struct nvkm_oproxy *oproxy)
+{
+	struct nvkm_uobj *uobj = container_of(oproxy, typeof(*uobj), oproxy);
+	struct nvkm_engn *engn;
+
+	if (!uobj->cctx)
+		return;
+
+	engn = uobj->cctx->vctx->ectx->engn;
+	if (engn->func->ramht_del)
+		engn->func->ramht_del(uobj->chan, uobj->hash);
+
+	nvkm_chan_cctx_put(uobj->chan, &uobj->cctx);
+}
+
+static const struct nvkm_oproxy_func
+nvkm_uchan_object = {
+	.dtor[1] = nvkm_uchan_object_dtor,
+	.init[0] = nvkm_uchan_object_init_0,
+	.fini[1] = nvkm_uchan_object_fini_1,
+};
+
+static int
+nvkm_uchan_object_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		      struct nvkm_object **pobject)
+{
+	struct nvkm_chan *chan = nvkm_uchan(oclass->parent)->chan;
+	struct nvkm_cgrp *cgrp = chan->cgrp;
+	struct nvkm_engn *engn;
+	struct nvkm_uobj *uobj;
+	int ret;
+
+	/* Lookup host engine state for target engine. */
+	engn = nvkm_runl_find_engn(engn, cgrp->runl, engn->engine == oclass->engine);
+	if (WARN_ON(!engn))
+		return -EINVAL;
+
+	/* Allocate SW object. */
+	if (!(uobj = kzalloc(sizeof(*uobj), GFP_KERNEL)))
+		return -ENOMEM;
+
+	nvkm_oproxy_ctor(&nvkm_uchan_object, oclass, &uobj->oproxy);
+	uobj->chan = chan;
+	*pobject = &uobj->oproxy.base;
+
+	/* Ref. channel context for target engine.*/
+	ret = nvkm_chan_cctx_get(chan, engn, &uobj->cctx, oclass->client);
+	if (ret)
+		return ret;
+
+	/* Allocate HW object. */
+	ret = oclass->base.ctor(&(const struct nvkm_oclass) {
+					.base = oclass->base,
+					.engn = oclass->engn,
+					.handle = oclass->handle,
+					.object = oclass->object,
+					.client = oclass->client,
+					.parent = uobj->cctx->vctx->ectx->object ?: oclass->parent,
+					.engine = engn->engine,
+				 }, argv, argc, &uobj->oproxy.object);
+	if (ret)
+		return ret;
+
+	if (engn->func->ramht_add) {
+		uobj->hash = engn->func->ramht_add(engn, uobj->oproxy.object, uobj->chan);
+		if (uobj->hash < 0)
+			return uobj->hash;
+	}
+
+	return 0;
+}
+
+static int
+nvkm_uchan_sclass(struct nvkm_object *object, int index, struct nvkm_oclass *oclass)
+{
+	struct nvkm_chan *chan = nvkm_uchan(object)->chan;
+	struct nvkm_engn *engn;
+	int ret, runq = 0;
+
+	nvkm_runl_foreach_engn(engn, chan->cgrp->runl) {
+		struct nvkm_engine *engine = engn->engine;
+		int c = 0;
+
+		/* Each runqueue, on runlists with multiple, has its own LCE. */
+		if (engn->runl->func->runqs) {
+			if (engine->subdev.type == NVKM_ENGINE_CE) {
+				if (chan->runq != runq++)
+					continue;
+			}
+		}
+
+		oclass->engine = engine;
+		oclass->base.oclass = 0;
+
+		if (engine->func->fifo.sclass) {
+			ret = engine->func->fifo.sclass(oclass, index);
+			if (oclass->base.oclass) {
+				if (!oclass->base.ctor)
+					oclass->base.ctor = nvkm_object_new;
+				oclass->ctor = nvkm_uchan_object_new;
+				return 0;
+			}
+
+			index -= ret;
+			continue;
+		}
+
+		while (engine->func->sclass[c].oclass) {
+			if (c++ == index) {
+				oclass->base = engine->func->sclass[index];
+				if (!oclass->base.ctor)
+					oclass->base.ctor = nvkm_object_new;
+				oclass->ctor = nvkm_uchan_object_new;
+				return 0;
+			}
+		}
+
+		index -= c;
+	}
+
+	return -EINVAL;
+}
+
+static int
+nvkm_uchan_map(struct nvkm_object *object, void *argv, u32 argc,
+	       enum nvkm_object_map *type, u64 *addr, u64 *size)
+{
+	struct nvkm_chan *chan = nvkm_uchan(object)->chan;
+	struct nvkm_device *device = chan->cgrp->runl->fifo->engine.subdev.device;
+
+	if (chan->func->userd->bar < 0)
+		return -ENOSYS;
+
+	*type = NVKM_OBJECT_MAP_IO;
+	*addr = device->func->resource_addr(device, chan->func->userd->bar) +
+		chan->func->userd->base + chan->userd.base;
+	*size = chan->func->userd->size;
+	return 0;
+}
+
+static int
+nvkm_uchan_fini(struct nvkm_object *object, bool suspend)
+{
+	struct nvkm_chan *chan = nvkm_uchan(object)->chan;
+
+	nvkm_chan_block(chan);
+	nvkm_chan_remove(chan, true);
+
+	if (chan->func->unbind)
+		chan->func->unbind(chan);
+
+	return 0;
+}
+
+static int
+nvkm_uchan_init(struct nvkm_object *object)
+{
+	struct nvkm_chan *chan = nvkm_uchan(object)->chan;
+
+	if (atomic_read(&chan->errored))
+		return 0;
+
+	if (chan->func->bind)
+		chan->func->bind(chan);
+
+	nvkm_chan_allow(chan);
+	nvkm_chan_insert(chan);
+	return 0;
+}
+
+static void *
+nvkm_uchan_dtor(struct nvkm_object *object)
+{
+	struct nvkm_uchan *uchan = nvkm_uchan(object);
+
+	nvkm_chan_del(&uchan->chan);
+	return uchan;
+}
+
+static const struct nvkm_object_func
+nvkm_uchan = {
+	.dtor = nvkm_uchan_dtor,
+	.init = nvkm_uchan_init,
+	.fini = nvkm_uchan_fini,
+	.map = nvkm_uchan_map,
+	.sclass = nvkm_uchan_sclass,
+	.uevent = nvkm_uchan_uevent,
+};
+
+int
+nvkm_uchan_new(struct nvkm_fifo *fifo, struct nvkm_cgrp *cgrp, const struct nvkm_oclass *oclass,
+	       void *argv, u32 argc, struct nvkm_object **pobject)
+{
+	union nvif_chan_args *args = argv;
+	struct nvkm_runl *runl;
+	struct nvkm_vmm *vmm = NULL;
+	struct nvkm_dmaobj *ctxdma = NULL;
+	struct nvkm_memory *userd = NULL;
+	struct nvkm_uchan *uchan;
+	struct nvkm_chan *chan;
+	int ret;
+
+	if (argc < sizeof(args->v0) || args->v0.version != 0)
+		return -ENOSYS;
+	argc -= sizeof(args->v0);
+
+	if (args->v0.namelen != argc)
+		return -EINVAL;
+
+	/* Lookup objects referenced in args. */
+	runl = nvkm_runl_get(fifo, args->v0.runlist, 0);
+	if (!runl)
+		return -EINVAL;
+
+	if (args->v0.vmm) {
+		vmm = nvkm_uvmm_search(oclass->client, args->v0.vmm);
+		if (IS_ERR(vmm))
+			return PTR_ERR(vmm);
+	}
+
+	if (args->v0.ctxdma) {
+		ctxdma = nvkm_dmaobj_search(oclass->client, args->v0.ctxdma);
+		if (IS_ERR(ctxdma)) {
+			ret = PTR_ERR(ctxdma);
+			goto done;
+		}
+	}
+
+	if (args->v0.huserd) {
+		userd = nvkm_umem_search(oclass->client, args->v0.huserd);
+		if (IS_ERR(userd)) {
+			ret = PTR_ERR(userd);
+			userd = NULL;
+			goto done;
+		}
+	}
+
+	/* Allocate channel. */
+	if (!(uchan = kzalloc(sizeof(*uchan), GFP_KERNEL))) {
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	nvkm_object_ctor(&nvkm_uchan, oclass, &uchan->object);
+	*pobject = &uchan->object;
+
+	ret = nvkm_chan_new_(fifo->func->chan.func, runl, args->v0.runq, cgrp, args->v0.name,
+			     args->v0.priv != 0, args->v0.devm, vmm, ctxdma, args->v0.offset,
+			     args->v0.length, userd, args->v0.ouserd, &uchan->chan);
+	if (ret)
+		goto done;
+
+	chan = uchan->chan;
+
+	/* Return channel info to caller. */
+	if (chan->func->doorbell_handle)
+		args->v0.token = chan->func->doorbell_handle(chan);
+	else
+		args->v0.token = ~0;
+
+	args->v0.chid = chan->id;
+
+	switch (nvkm_memory_target(chan->inst->memory)) {
+	case NVKM_MEM_TARGET_INST: args->v0.aper = NVIF_CHAN_V0_INST_APER_INST; break;
+	case NVKM_MEM_TARGET_VRAM: args->v0.aper = NVIF_CHAN_V0_INST_APER_VRAM; break;
+	case NVKM_MEM_TARGET_HOST: args->v0.aper = NVIF_CHAN_V0_INST_APER_HOST; break;
+	case NVKM_MEM_TARGET_NCOH: args->v0.aper = NVIF_CHAN_V0_INST_APER_NCOH; break;
+	default:
+		WARN_ON(1);
+		ret = -EFAULT;
+		break;
+	}
+
+	args->v0.inst = nvkm_memory_addr(chan->inst->memory);
+done:
+	nvkm_memory_unref(&userd);
+	nvkm_vmm_unref(&vmm);
+	return ret;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/user.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/user.h
deleted file mode 100644
index 54a3a3092cc0..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/user.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef __NVKM_FIFO_USER_H__
-#define __NVKM_FIFO_USER_H__
-#include "priv.h"
-int gv100_fifo_user_new(const struct nvkm_oclass *, void *, u32,
-			struct nvkm_object **);
-int tu102_fifo_user_new(const struct nvkm_oclass *, void *, u32,
-			struct nvkm_object **);
-#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild
index 558c86fd8e82..b5418f05ccd8 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild
@@ -40,6 +40,7 @@ nvkm-y += nvkm/engine/gr/gp108.o
 nvkm-y += nvkm/engine/gr/gp10b.o
 nvkm-y += nvkm/engine/gr/gv100.o
 nvkm-y += nvkm/engine/gr/tu102.o
+nvkm-y += nvkm/engine/gr/ga102.o
 
 nvkm-y += nvkm/engine/gr/ctxnv40.o
 nvkm-y += nvkm/engine/gr/ctxnv50.o
@@ -63,3 +64,4 @@ nvkm-y += nvkm/engine/gr/ctxgp104.o
 nvkm-y += nvkm/engine/gr/ctxgp107.o
 nvkm-y += nvkm/engine/gr/ctxgv100.o
 nvkm-y += nvkm/engine/gr/ctxtu102.o
+nvkm-y += nvkm/engine/gr/ctxga102.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c
index 61759f54406e..71b824e6da9d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c
@@ -136,6 +136,17 @@ nvkm_gr_oneinit(struct nvkm_engine *engine)
 }
 
 static int
+nvkm_gr_reset(struct nvkm_engine *engine)
+{
+	struct nvkm_gr *gr = nvkm_gr(engine);
+
+	if (gr->func->reset)
+		return gr->func->reset(gr);
+
+	return -ENOSYS;
+}
+
+static int
 nvkm_gr_init(struct nvkm_engine *engine)
 {
 	struct nvkm_gr *gr = nvkm_gr(engine);
@@ -166,6 +177,7 @@ nvkm_gr = {
 	.oneinit = nvkm_gr_oneinit,
 	.init = nvkm_gr_init,
 	.fini = nvkm_gr_fini,
+	.reset = nvkm_gr_reset,
 	.intr = nvkm_gr_intr,
 	.tile = nvkm_gr_tile,
 	.chsw_load = nvkm_gr_chsw_load,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxga102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxga102.c
new file mode 100644
index 000000000000..11461adf5036
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxga102.c
@@ -0,0 +1,77 @@
+/*
+ * Copyright 2019 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ctxgf100.h"
+
+static void
+ga102_grctx_generate_sm_id(struct gf100_gr *gr, int gpc, int tpc, int sm)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+
+	tpc = gv100_gr_nonpes_aware_tpc(gr, gpc, tpc);
+
+	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x608), sm);
+}
+
+static void
+ga102_grctx_generate_unkn(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+
+	nvkm_mask(device, 0x41980c, 0x00000010, 0x00000010);
+	nvkm_mask(device, 0x41be08, 0x00000004, 0x00000004);
+}
+
+static void
+ga102_grctx_generate_r419ea8(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+
+	nvkm_wr32(device, 0x419ea8, nvkm_rd32(device, 0x504728) | 0x08000000);
+}
+
+const struct gf100_grctx_func
+ga102_grctx = {
+	.main = gf100_grctx_generate_main,
+	.unkn = ga102_grctx_generate_unkn,
+	.bundle = gm107_grctx_generate_bundle,
+	.bundle_size = 0x3000,
+	.bundle_min_gpm_fifo_depth = 0x180,
+	.bundle_token_limit = 0x1140,
+	.pagepool = gp100_grctx_generate_pagepool,
+	.pagepool_size = 0x20000,
+	.attrib_cb_size = gp102_grctx_generate_attrib_cb_size,
+	.attrib_cb = gv100_grctx_generate_attrib_cb,
+	.attrib = gv100_grctx_generate_attrib,
+	.attrib_nr_max = 0x800,
+	.attrib_nr = 0x4a1,
+	.alpha_nr_max = 0xc00,
+	.alpha_nr = 0x800,
+	.unknown_size = 0x80000,
+	.unknown = tu102_grctx_generate_unknown,
+	.gfxp_nr = 0xd28,
+	.sm_id = ga102_grctx_generate_sm_id,
+	.skip_pd_num_tpc_per_gpc = true,
+	.rop_mapping = gv100_grctx_generate_rop_mapping,
+	.r406500 = gm200_grctx_generate_r406500,
+	.r400088 = gv100_grctx_generate_r400088,
+	.r419ea8 = ga102_grctx_generate_r419ea8,
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
index 297915719bf2..cb390e0134a2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
@@ -26,6 +26,7 @@
 #include <subdev/fb.h>
 #include <subdev/mc.h>
 #include <subdev/timer.h>
+#include <engine/fifo.h>
 
 /*******************************************************************************
  * PGRAPH context register lists
@@ -990,43 +991,16 @@ gf100_grctx_pack_tpc[] = {
  * PGRAPH context implementation
  ******************************************************************************/
 
-int
-gf100_grctx_mmio_data(struct gf100_grctx *info, u32 size, u32 align, bool priv)
-{
-	if (info->data) {
-		info->buffer[info->buffer_nr] = round_up(info->addr, align);
-		info->addr = info->buffer[info->buffer_nr] + size;
-		info->data->size = size;
-		info->data->align = align;
-		info->data->priv = priv;
-		info->data++;
-		return info->buffer_nr++;
-	}
-	return -1;
-}
-
 void
-gf100_grctx_mmio_item(struct gf100_grctx *info, u32 addr, u32 data,
-		      int shift, int buffer)
+gf100_grctx_patch_wr32(struct gf100_gr_chan *chan, u32 addr, u32 data)
 {
-	struct nvkm_device *device = info->gr->base.engine.subdev.device;
-	if (info->data) {
-		if (shift >= 0) {
-			info->mmio->addr = addr;
-			info->mmio->data = data;
-			info->mmio->shift = shift;
-			info->mmio->buffer = buffer;
-			if (buffer >= 0)
-				data |= info->buffer[buffer] >> shift;
-			info->mmio++;
-		} else
-			return;
-	} else {
-		if (buffer >= 0)
-			return;
+	if (unlikely(!chan->mmio)) {
+		nvkm_wr32(chan->gr->base.engine.subdev.device, addr, data);
+		return;
 	}
 
-	nvkm_wr32(device, addr, data);
+	nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, addr);
+	nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, data);
 }
 
 void
@@ -1037,56 +1011,60 @@ gf100_grctx_generate_r419cb8(struct gf100_gr *gr)
 }
 
 void
-gf100_grctx_generate_bundle(struct gf100_grctx *info)
+gf100_grctx_generate_bundle(struct gf100_gr_chan *chan, u64 addr, u32 size)
 {
-	const struct gf100_grctx_func *grctx = info->gr->func->grctx;
-	const int s = 8;
-	const int b = mmio_vram(info, grctx->bundle_size, (1 << s), true);
-	mmio_refn(info, 0x408004, 0x00000000, s, b);
-	mmio_wr32(info, 0x408008, 0x80000000 | (grctx->bundle_size >> s));
-	mmio_refn(info, 0x418808, 0x00000000, s, b);
-	mmio_wr32(info, 0x41880c, 0x80000000 | (grctx->bundle_size >> s));
+	gf100_grctx_patch_wr32(chan, 0x408004, addr >> 8);
+	gf100_grctx_patch_wr32(chan, 0x408008, 0x80000000 | (size >> 8));
+	gf100_grctx_patch_wr32(chan, 0x418808, addr >> 8);
+	gf100_grctx_patch_wr32(chan, 0x41880c, 0x80000000 | (size >> 8));
 }
 
 void
-gf100_grctx_generate_pagepool(struct gf100_grctx *info)
+gf100_grctx_generate_pagepool(struct gf100_gr_chan *chan, u64 addr)
 {
-	const struct gf100_grctx_func *grctx = info->gr->func->grctx;
-	const int s = 8;
-	const int b = mmio_vram(info, grctx->pagepool_size, (1 << s), true);
-	mmio_refn(info, 0x40800c, 0x00000000, s, b);
-	mmio_wr32(info, 0x408010, 0x80000000);
-	mmio_refn(info, 0x419004, 0x00000000, s, b);
-	mmio_wr32(info, 0x419008, 0x00000000);
+	gf100_grctx_patch_wr32(chan, 0x40800c, addr >> 8);
+	gf100_grctx_patch_wr32(chan, 0x408010, 0x80000000);
+	gf100_grctx_patch_wr32(chan, 0x419004, addr >> 8);
+	gf100_grctx_patch_wr32(chan, 0x419008, 0x00000000);
 }
 
 void
-gf100_grctx_generate_attrib(struct gf100_grctx *info)
+gf100_grctx_generate_attrib(struct gf100_gr_chan *chan)
 {
-	struct gf100_gr *gr = info->gr;
+	struct gf100_gr *gr = chan->gr;
 	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	const u32 attrib = grctx->attrib_nr;
-	const u32   size = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max);
-	const int s = 12;
-	const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), false);
 	int gpc, tpc;
 	u32 bo = 0;
 
-	mmio_refn(info, 0x418810, 0x80000000, s, b);
-	mmio_refn(info, 0x419848, 0x10000000, s, b);
-	mmio_wr32(info, 0x405830, (attrib << 16));
+	gf100_grctx_patch_wr32(chan, 0x405830, (attrib << 16));
 
 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
 		for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
 			const u32 o = TPC_UNIT(gpc, tpc, 0x0520);
-			mmio_skip(info, o, (attrib << 16) | ++bo);
-			mmio_wr32(info, o, (attrib << 16) | --bo);
+
+			gf100_grctx_patch_wr32(chan, o, (attrib << 16) | bo);
 			bo += grctx->attrib_nr_max;
 		}
 	}
 }
 
 void
+gf100_grctx_generate_attrib_cb(struct gf100_gr_chan *chan, u64 addr, u32 size)
+{
+	gf100_grctx_patch_wr32(chan, 0x418810, 0x80000000 | addr >> 12);
+	gf100_grctx_patch_wr32(chan, 0x419848, 0x10000000 | addr >> 12);
+}
+
+u32
+gf100_grctx_generate_attrib_cb_size(struct gf100_gr *gr)
+{
+	const struct gf100_grctx_func *grctx = gr->func->grctx;
+
+	return 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max) * gr->tpc_total;
+}
+
+void
 gf100_grctx_generate_unkn(struct gf100_gr *gr)
 {
 }
@@ -1361,8 +1339,9 @@ gf100_grctx_generate_floorsweep(struct gf100_gr *gr)
 }
 
 void
-gf100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
+gf100_grctx_generate_main(struct gf100_gr_chan *chan)
 {
+	struct gf100_gr *gr = chan->gr;
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	u32 idle_timeout;
@@ -1380,15 +1359,23 @@ gf100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 		gf100_gr_mmio(gr, gr->sw_ctx);
 	}
 
+	if (gr->func->init_419bd8)
+		gr->func->init_419bd8(gr);
+	if (grctx->r419ea8)
+		grctx->r419ea8(gr);
+
 	gf100_gr_wait_idle(gr);
 
 	idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000);
 
-	grctx->pagepool(info);
-	grctx->bundle(info);
-	grctx->attrib(info);
+	grctx->pagepool(chan, chan->pagepool->addr);
+	grctx->bundle(chan, chan->bundle_cb->addr, grctx->bundle_size);
+	grctx->attrib_cb(chan, chan->attrib_cb->addr, grctx->attrib_cb_size(gr));
+	grctx->attrib(chan);
 	if (grctx->patch_ltc)
-		grctx->patch_ltc(info);
+		grctx->patch_ltc(chan);
+	if (grctx->unknown_size)
+		grctx->unknown(chan, chan->unknown->addr, grctx->unknown_size);
 	grctx->unkn(gr);
 
 	gf100_grctx_generate_floorsweep(gr);
@@ -1396,12 +1383,23 @@ gf100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 	gf100_gr_wait_idle(gr);
 
 	if (grctx->r400088) grctx->r400088(gr, false);
+
 	if (gr->bundle)
 		gf100_gr_icmd(gr, gr->bundle);
 	else
 		gf100_gr_icmd(gr, grctx->icmd);
-	if (grctx->sw_veid_bundle_init)
+
+	if (gr->bundle_veid)
+		gf100_gr_icmd(gr, gr->bundle_veid);
+	else
 		gf100_gr_icmd(gr, grctx->sw_veid_bundle_init);
+
+	if (gr->bundle64)
+		gf100_gr_icmd(gr, gr->bundle64);
+	else
+	if (grctx->sw_bundle64_init)
+		gf100_gr_icmd(gr, grctx->sw_bundle64_init);
+
 	if (grctx->r400088) grctx->r400088(gr, true);
 
 	nvkm_wr32(device, 0x404154, idle_timeout);
@@ -1428,21 +1426,20 @@ gf100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 		grctx->r408840(gr);
 	if (grctx->r419c0c)
 		grctx->r419c0c(gr);
+
+	gf100_gr_wait_idle(gr);
 }
 
 #define CB_RESERVED 0x80000
 
 int
-gf100_grctx_generate(struct gf100_gr *gr)
+gf100_grctx_generate(struct gf100_gr *gr, struct gf100_gr_chan *chan, struct nvkm_gpuobj *inst)
 {
 	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
-	struct nvkm_memory *inst = NULL;
 	struct nvkm_memory *data = NULL;
-	struct nvkm_vmm *vmm = NULL;
 	struct nvkm_vma *ctx = NULL;
-	struct gf100_grctx info;
 	int ret, i;
 	u64 addr;
 
@@ -1457,72 +1454,47 @@ gf100_grctx_generate(struct gf100_gr *gr)
 		grctx->unkn88c(gr, true);
 
 	/* Reset FECS. */
-	nvkm_wr32(device, 0x409614, 0x00000070);
-	nvkm_usec(device, 10, NVKM_DELAY);
-	nvkm_mask(device, 0x409614, 0x00000700, 0x00000700);
-	nvkm_usec(device, 10, NVKM_DELAY);
-	nvkm_rd32(device, 0x409614);
+	gr->func->fecs.reset(gr);
 
 	if (grctx->unkn88c)
 		grctx->unkn88c(gr, false);
 
 	/* NV_PGRAPH_FE_PWR_MODE_AUTO. */
 	nvkm_wr32(device, 0x404170, 0x00000010);
+	nvkm_msec(device, 2000,
+		if (!(nvkm_rd32(device, 0x404170) & 0x00000010))
+			break;
+	);
 
 	/* Init SCC RAM. */
 	nvkm_wr32(device, 0x40802c, 0x00000001);
 
-	/* Allocate memory to for a "channel", which we'll use to generate
-	 * the default context values.
-	 */
-	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST,
-			      0x1000, 0x1000, true, &inst);
-	if (ret)
-		goto done;
-
-	ret = nvkm_vmm_new(device, 0, 0, NULL, 0, NULL, "grctx", &vmm);
-	if (ret)
-		goto done;
-
-	vmm->debug = subdev->debug;
-
-	ret = nvkm_vmm_join(vmm, inst);
-	if (ret)
-		goto done;
-
+	/* Allocate memory to store context, and dummy global context buffers. */
 	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST,
 			      CB_RESERVED + gr->size, 0, true, &data);
 	if (ret)
 		goto done;
 
-	ret = nvkm_vmm_get(vmm, 0, nvkm_memory_size(data), &ctx);
+	ret = nvkm_vmm_get(chan->vmm, 0, nvkm_memory_size(data), &ctx);
 	if (ret)
 		goto done;
 
-	ret = nvkm_memory_map(data, 0, vmm, ctx, NULL, 0);
+	ret = nvkm_memory_map(data, 0, chan->vmm, ctx, NULL, 0);
 	if (ret)
 		goto done;
 
-
 	/* Setup context pointer. */
 	nvkm_kmap(inst);
 	nvkm_wo32(inst, 0x0210, lower_32_bits(ctx->addr + CB_RESERVED) | 4);
 	nvkm_wo32(inst, 0x0214, upper_32_bits(ctx->addr + CB_RESERVED));
 	nvkm_done(inst);
 
-	/* Setup default state for mmio list construction. */
-	info.gr = gr;
-	info.data = gr->mmio_data;
-	info.mmio = gr->mmio_list;
-	info.addr = ctx->addr;
-	info.buffer_nr = 0;
-
 	/* Make channel current. */
-	addr = nvkm_memory_addr(inst) >> 12;
+	addr = inst->addr >> 12;
 	if (gr->firmware) {
 		ret = gf100_gr_fecs_bind_pointer(gr, 0x80000000 | addr);
 		if (ret)
-			goto done;
+			goto done_inst;
 
 		nvkm_kmap(data);
 		nvkm_wo32(data, 0x1c, 1);
@@ -1540,19 +1512,27 @@ gf100_grctx_generate(struct gf100_gr *gr)
 		);
 	}
 
-	grctx->main(gr, &info);
+	grctx->main(chan);
 
-	/* Trigger a context unload by unsetting the "next channel valid" bit
-	 * and faking a context switch interrupt.
-	 */
-	nvkm_mask(device, 0x409b04, 0x80000000, 0x00000000);
-	nvkm_wr32(device, 0x409000, 0x00000100);
-	if (nvkm_msec(device, 2000,
-		if (!(nvkm_rd32(device, 0x409b00) & 0x80000000))
-			break;
-	) < 0) {
-		ret = -EBUSY;
-		goto done;
+	if (!gr->firmware) {
+		/* Trigger a context unload by unsetting the "next channel valid" bit
+		 * and faking a context switch interrupt.
+		 */
+		nvkm_mask(device, 0x409b04, 0x80000000, 0x00000000);
+		nvkm_wr32(device, 0x409000, 0x00000100);
+		if (nvkm_msec(device, 2000,
+			if (!(nvkm_rd32(device, 0x409b00) & 0x80000000))
+				break;
+		) < 0) {
+			ret = -EBUSY;
+			goto done_inst;
+		}
+	} else {
+		ret = gf100_gr_fecs_wfi_golden_save(gr, 0x80000000 | addr);
+		if (ret)
+			goto done_inst;
+
+		nvkm_mask(device, 0x409b00, 0x80000000, 0x00000000);
 	}
 
 	gr->data = kmalloc(gr->size, GFP_KERNEL);
@@ -1566,12 +1546,14 @@ gf100_grctx_generate(struct gf100_gr *gr)
 		ret = -ENOMEM;
 	}
 
+done_inst:
+	nvkm_kmap(inst);
+	nvkm_wo32(inst, 0x0210, 0);
+	nvkm_wo32(inst, 0x0214, 0);
+	nvkm_done(inst);
 done:
-	nvkm_vmm_put(vmm, &ctx);
+	nvkm_vmm_put(chan->vmm, &ctx);
 	nvkm_memory_unref(&data);
-	nvkm_vmm_part(vmm, inst);
-	nvkm_vmm_unref(&vmm);
-	nvkm_memory_unref(&inst);
 	return ret;
 }
 
@@ -1590,6 +1572,8 @@ gf100_grctx = {
 	.bundle_size = 0x1800,
 	.pagepool = gf100_grctx_generate_pagepool,
 	.pagepool_size = 0x8000,
+	.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gf100_grctx_generate_attrib_cb,
 	.attrib = gf100_grctx_generate_attrib,
 	.attrib_nr_max = 0x324,
 	.attrib_nr = 0x218,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
index 32bbddc0993e..00dbeda7e346 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
@@ -3,27 +3,12 @@
 #define __NVKM_GRCTX_NVC0_H__
 #include "gf100.h"
 
-struct gf100_grctx {
-	struct gf100_gr *gr;
-	struct gf100_gr_data *data;
-	struct gf100_gr_mmio *mmio;
-	int buffer_nr;
-	u64 buffer[4];
-	u64 addr;
-};
-
-int  gf100_grctx_mmio_data(struct gf100_grctx *, u32 size, u32 align, bool priv);
-void gf100_grctx_mmio_item(struct gf100_grctx *, u32 addr, u32 data, int s, int);
-
-#define mmio_vram(a,b,c,d) gf100_grctx_mmio_data((a), (b), (c), (d))
-#define mmio_refn(a,b,c,d,e) gf100_grctx_mmio_item((a), (b), (c), (d), (e))
-#define mmio_skip(a,b,c) mmio_refn((a), (b), (c), -1, -1)
-#define mmio_wr32(a,b,c) mmio_refn((a), (b), (c),  0, -1)
+void gf100_grctx_patch_wr32(struct gf100_gr_chan *, u32 addr, u32 data);
 
 struct gf100_grctx_func {
 	void (*unkn88c)(struct gf100_gr *, bool on);
 	/* main context generation function */
-	void  (*main)(struct gf100_gr *, struct gf100_grctx *);
+	void  (*main)(struct gf100_gr_chan *);
 	/* context-specific modify-on-first-load list generation function */
 	void  (*unkn)(struct gf100_gr *);
 	/* mmio context data */
@@ -37,23 +22,29 @@ struct gf100_grctx_func {
 	const struct gf100_gr_pack *icmd;
 	const struct gf100_gr_pack *mthd;
 	const struct gf100_gr_pack *sw_veid_bundle_init;
+	const struct gf100_gr_pack *sw_bundle64_init;
 	/* bundle circular buffer */
-	void (*bundle)(struct gf100_grctx *);
+	void (*bundle)(struct gf100_gr_chan *, u64 addr, u32 size);
 	u32 bundle_size;
 	u32 bundle_min_gpm_fifo_depth;
 	u32 bundle_token_limit;
 	/* pagepool */
-	void (*pagepool)(struct gf100_grctx *);
+	void (*pagepool)(struct gf100_gr_chan *, u64 addr);
 	u32 pagepool_size;
 	/* attribute(/alpha) circular buffer */
-	void (*attrib)(struct gf100_grctx *);
+	u32 (*attrib_cb_size)(struct gf100_gr *);
+	void (*attrib_cb)(struct gf100_gr_chan *, u64 addr, u32 size);
+	void (*attrib)(struct gf100_gr_chan *);
 	u32 attrib_nr_max;
 	u32 attrib_nr;
 	u32 alpha_nr_max;
 	u32 alpha_nr;
 	u32 gfxp_nr;
+	/* some other context buffer */
+	void (*unknown)(struct gf100_gr_chan *, u64 addr, u32 size);
+	u32 unknown_size;
 	/* other patch buffer stuff */
-	void (*patch_ltc)(struct gf100_grctx *);
+	void (*patch_ltc)(struct gf100_gr_chan *);
 	/* floorsweeping */
 	void (*sm_id)(struct gf100_gr *, int gpc, int tpc, int sm);
 	void (*tpc_nr)(struct gf100_gr *, int gpc);
@@ -78,14 +69,17 @@ struct gf100_grctx_func {
 	void (*r419a3c)(struct gf100_gr *);
 	void (*r408840)(struct gf100_gr *);
 	void (*r419c0c)(struct gf100_gr *);
+	void (*r419ea8)(struct gf100_gr *);
 };
 
 extern const struct gf100_grctx_func gf100_grctx;
-int  gf100_grctx_generate(struct gf100_gr *);
-void gf100_grctx_generate_main(struct gf100_gr *, struct gf100_grctx *);
-void gf100_grctx_generate_bundle(struct gf100_grctx *);
-void gf100_grctx_generate_pagepool(struct gf100_grctx *);
-void gf100_grctx_generate_attrib(struct gf100_grctx *);
+int  gf100_grctx_generate(struct gf100_gr *, struct gf100_gr_chan *, struct nvkm_gpuobj *inst);
+void gf100_grctx_generate_main(struct gf100_gr_chan *);
+void gf100_grctx_generate_pagepool(struct gf100_gr_chan *, u64);
+void gf100_grctx_generate_bundle(struct gf100_gr_chan *, u64, u32);
+u32 gf100_grctx_generate_attrib_cb_size(struct gf100_gr *);
+void gf100_grctx_generate_attrib_cb(struct gf100_gr_chan *, u64, u32);
+void gf100_grctx_generate_attrib(struct gf100_gr_chan *);
 void gf100_grctx_generate_unkn(struct gf100_gr *);
 void gf100_grctx_generate_floorsweep(struct gf100_gr *);
 void gf100_grctx_generate_sm_id(struct gf100_gr *, int, int, int);
@@ -97,14 +91,14 @@ void gf100_grctx_generate_max_ways_evict(struct gf100_gr *);
 void gf100_grctx_generate_r419cb8(struct gf100_gr *);
 
 extern const struct gf100_grctx_func gf108_grctx;
-void gf108_grctx_generate_attrib(struct gf100_grctx *);
+void gf108_grctx_generate_attrib(struct gf100_gr_chan *);
 void gf108_grctx_generate_unkn(struct gf100_gr *);
 
 extern const struct gf100_grctx_func gf104_grctx;
 extern const struct gf100_grctx_func gf110_grctx;
 
 extern const struct gf100_grctx_func gf117_grctx;
-void gf117_grctx_generate_attrib(struct gf100_grctx *);
+void gf117_grctx_generate_attrib(struct gf100_gr_chan *);
 void gf117_grctx_generate_rop_mapping(struct gf100_gr *);
 void gf117_grctx_generate_dist_skip_table(struct gf100_gr *);
 
@@ -115,9 +109,9 @@ void gk104_grctx_generate_alpha_beta_tables(struct gf100_gr *);
 void gk104_grctx_generate_gpc_tpc_nr(struct gf100_gr *);
 
 extern const struct gf100_grctx_func gk20a_grctx;
-void gk104_grctx_generate_bundle(struct gf100_grctx *);
-void gk104_grctx_generate_pagepool(struct gf100_grctx *);
-void gk104_grctx_generate_patch_ltc(struct gf100_grctx *);
+void gk104_grctx_generate_pagepool(struct gf100_gr_chan *, u64);
+void gk104_grctx_generate_bundle(struct gf100_gr_chan *, u64, u32);
+void gk104_grctx_generate_patch_ltc(struct gf100_gr_chan *);
 void gk104_grctx_generate_unkn(struct gf100_gr *);
 void gk104_grctx_generate_r418800(struct gf100_gr *);
 
@@ -128,9 +122,10 @@ extern const struct gf100_grctx_func gk110b_grctx;
 extern const struct gf100_grctx_func gk208_grctx;
 
 extern const struct gf100_grctx_func gm107_grctx;
-void gm107_grctx_generate_bundle(struct gf100_grctx *);
-void gm107_grctx_generate_pagepool(struct gf100_grctx *);
-void gm107_grctx_generate_attrib(struct gf100_grctx *);
+void gm107_grctx_generate_pagepool(struct gf100_gr_chan *, u64);
+void gm107_grctx_generate_bundle(struct gf100_gr_chan *, u64, u32);
+void gm107_grctx_generate_attrib_cb(struct gf100_gr_chan *, u64, u32);
+void gm107_grctx_generate_attrib(struct gf100_gr_chan *);
 void gm107_grctx_generate_sm_id(struct gf100_gr *, int, int, int);
 
 extern const struct gf100_grctx_func gm200_grctx;
@@ -143,11 +138,13 @@ void gm200_grctx_generate_r419a3c(struct gf100_gr *);
 extern const struct gf100_grctx_func gm20b_grctx;
 
 extern const struct gf100_grctx_func gp100_grctx;
-void gp100_grctx_generate_pagepool(struct gf100_grctx *);
+void gp100_grctx_generate_pagepool(struct gf100_gr_chan *, u64);
+void gp100_grctx_generate_attrib_cb(struct gf100_gr_chan *, u64, u32);
 void gp100_grctx_generate_smid_config(struct gf100_gr *);
 
 extern const struct gf100_grctx_func gp102_grctx;
-void gp102_grctx_generate_attrib(struct gf100_grctx *);
+u32 gp102_grctx_generate_attrib_cb_size(struct gf100_gr *);
+void gp102_grctx_generate_attrib(struct gf100_gr_chan *);
 
 extern const struct gf100_grctx_func gp104_grctx;
 
@@ -158,11 +155,15 @@ extern const struct gf100_grctx_func gv100_grctx;
 extern const struct gf100_grctx_func tu102_grctx;
 void gv100_grctx_unkn88c(struct gf100_gr *, bool);
 void gv100_grctx_generate_unkn(struct gf100_gr *);
-extern const struct gf100_gr_init gv100_grctx_init_sw_veid_bundle_init_0[];
-void gv100_grctx_generate_attrib(struct gf100_grctx *);
+void gv100_grctx_generate_attrib_cb(struct gf100_gr_chan *, u64, u32);
+void gv100_grctx_generate_attrib(struct gf100_gr_chan *);
 void gv100_grctx_generate_rop_mapping(struct gf100_gr *);
 void gv100_grctx_generate_r400088(struct gf100_gr *, bool);
 
+void tu102_grctx_generate_unknown(struct gf100_gr_chan *, u64, u32);
+
+extern const struct gf100_grctx_func ga102_grctx;
+
 /* context init value lists */
 
 extern const struct gf100_gr_pack gf100_grctx_pack_icmd[];
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c
index 7a0564b6e3c7..ba63a3b46518 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c
@@ -94,6 +94,8 @@ gf104_grctx = {
 	.bundle_size = 0x1800,
 	.pagepool = gf100_grctx_generate_pagepool,
 	.pagepool_size = 0x8000,
+	.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gf100_grctx_generate_attrib_cb,
 	.attrib = gf100_grctx_generate_attrib,
 	.attrib_nr_max = 0x324,
 	.attrib_nr = 0x218,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c
index dda2c32e6232..0bc2eab6ad98 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c
@@ -733,25 +733,20 @@ gf108_grctx_pack_tpc[] = {
  ******************************************************************************/
 
 void
-gf108_grctx_generate_attrib(struct gf100_grctx *info)
+gf108_grctx_generate_attrib(struct gf100_gr_chan *chan)
 {
-	struct gf100_gr *gr = info->gr;
+	struct gf100_gr *gr = chan->gr;
 	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	const u32  alpha = grctx->alpha_nr;
 	const u32   beta = grctx->attrib_nr;
-	const u32   size = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max);
-	const int s = 12;
-	const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), false);
 	const int timeslice_mode = 1;
 	const int max_batches = 0xffff;
 	u32 bo = 0;
 	u32 ao = bo + grctx->attrib_nr_max * gr->tpc_total;
 	int gpc, tpc;
 
-	mmio_refn(info, 0x418810, 0x80000000, s, b);
-	mmio_refn(info, 0x419848, 0x10000000, s, b);
-	mmio_wr32(info, 0x405830, (beta << 16) | alpha);
-	mmio_wr32(info, 0x4064c4, ((alpha / 4) << 16) | max_batches);
+	gf100_grctx_patch_wr32(chan, 0x405830, (beta << 16) | alpha);
+	gf100_grctx_patch_wr32(chan, 0x4064c4, ((alpha / 4) << 16) | max_batches);
 
 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
 		for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
@@ -759,10 +754,10 @@ gf108_grctx_generate_attrib(struct gf100_grctx *info)
 			const u32 b =  beta;
 			const u32 t = timeslice_mode;
 			const u32 o = TPC_UNIT(gpc, tpc, 0x500);
-			mmio_skip(info, o + 0x20, (t << 28) | (b << 16) | ++bo);
-			mmio_wr32(info, o + 0x20, (t << 28) | (b << 16) | --bo);
+
+			gf100_grctx_patch_wr32(chan, o + 0x20, (t << 28) | (b << 16) | bo);
 			bo += grctx->attrib_nr_max;
-			mmio_wr32(info, o + 0x44, (a << 16) | ao);
+			gf100_grctx_patch_wr32(chan, o + 0x44, (a << 16) | ao);
 			ao += grctx->alpha_nr_max;
 		}
 	}
@@ -795,6 +790,8 @@ gf108_grctx = {
 	.bundle_size = 0x1800,
 	.pagepool = gf100_grctx_generate_pagepool,
 	.pagepool_size = 0x8000,
+	.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gf100_grctx_generate_attrib_cb,
 	.attrib = gf108_grctx_generate_attrib,
 	.attrib_nr_max = 0x324,
 	.attrib_nr = 0x218,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c
index f5cca5e6a4f2..64b723b0afb5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c
@@ -342,6 +342,8 @@ gf110_grctx = {
 	.bundle_size = 0x1800,
 	.pagepool = gf100_grctx_generate_pagepool,
 	.pagepool_size = 0x8000,
+	.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gf100_grctx_generate_attrib_cb,
 	.attrib = gf100_grctx_generate_attrib,
 	.attrib_nr_max = 0x324,
 	.attrib_nr = 0x218,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
index 276c282d19aa..e34c5da2a9ff 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
@@ -241,38 +241,34 @@ gf117_grctx_generate_rop_mapping(struct gf100_gr *gr)
 }
 
 void
-gf117_grctx_generate_attrib(struct gf100_grctx *info)
+gf117_grctx_generate_attrib(struct gf100_gr_chan *chan)
 {
-	struct gf100_gr *gr = info->gr;
+	struct gf100_gr *gr = chan->gr;
 	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	const u32  alpha = grctx->alpha_nr;
 	const u32   beta = grctx->attrib_nr;
-	const u32   size = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max);
-	const int s = 12;
-	const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), false);
 	const int timeslice_mode = 1;
 	const int max_batches = 0xffff;
 	u32 bo = 0;
 	u32 ao = bo + grctx->attrib_nr_max * gr->tpc_total;
 	int gpc, ppc;
 
-	mmio_refn(info, 0x418810, 0x80000000, s, b);
-	mmio_refn(info, 0x419848, 0x10000000, s, b);
-	mmio_wr32(info, 0x405830, (beta << 16) | alpha);
-	mmio_wr32(info, 0x4064c4, ((alpha / 4) << 16) | max_batches);
+	gf100_grctx_patch_wr32(chan, 0x405830, (beta << 16) | alpha);
+	gf100_grctx_patch_wr32(chan, 0x4064c4, ((alpha / 4) << 16) | max_batches);
 
 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
-		for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++) {
+		for (ppc = 0; ppc < gr->func->ppc_nr; ppc++) {
 			const u32 a = alpha * gr->ppc_tpc_nr[gpc][ppc];
 			const u32 b =  beta * gr->ppc_tpc_nr[gpc][ppc];
 			const u32 t = timeslice_mode;
 			const u32 o = PPC_UNIT(gpc, ppc, 0);
+
 			if (!(gr->ppc_mask[gpc] & (1 << ppc)))
 				continue;
-			mmio_skip(info, o + 0xc0, (t << 28) | (b << 16) | ++bo);
-			mmio_wr32(info, o + 0xc0, (t << 28) | (b << 16) | --bo);
+
+			gf100_grctx_patch_wr32(chan, o + 0xc0, (t << 28) | (b << 16) | bo);
 			bo += grctx->attrib_nr_max * gr->ppc_tpc_nr[gpc][ppc];
-			mmio_wr32(info, o + 0xe4, (a << 16) | ao);
+			gf100_grctx_patch_wr32(chan, o + 0xe4, (a << 16) | ao);
 			ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc];
 		}
 	}
@@ -294,6 +290,8 @@ gf117_grctx = {
 	.bundle_size = 0x1800,
 	.pagepool = gf100_grctx_generate_pagepool,
 	.pagepool_size = 0x8000,
+	.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gf100_grctx_generate_attrib_cb,
 	.attrib = gf117_grctx_generate_attrib,
 	.attrib_nr_max = 0x324,
 	.attrib_nr = 0x218,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c
index 0cfe46366af6..426ad1b8d426 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c
@@ -510,6 +510,8 @@ gf119_grctx = {
 	.bundle_size = 0x1800,
 	.pagepool = gf100_grctx_generate_pagepool,
 	.pagepool_size = 0x8000,
+	.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gf100_grctx_generate_attrib_cb,
 	.attrib = gf108_grctx_generate_attrib,
 	.attrib_nr_max = 0x324,
 	.attrib_nr = 0x218,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
index 304e9d268bad..94233d0119df 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
@@ -861,43 +861,33 @@ gk104_grctx_generate_r418800(struct gf100_gr *gr)
 }
 
 void
-gk104_grctx_generate_patch_ltc(struct gf100_grctx *info)
+gk104_grctx_generate_patch_ltc(struct gf100_gr_chan *chan)
 {
-	struct nvkm_device *device = info->gr->base.engine.subdev.device;
+	struct nvkm_device *device = chan->gr->base.engine.subdev.device;
 	u32 data0 = nvkm_rd32(device, 0x17e91c);
 	u32 data1 = nvkm_rd32(device, 0x17e920);
+
 	/*XXX: Figure out how to modify this correctly! */
-	mmio_wr32(info, 0x17e91c, data0);
-	mmio_wr32(info, 0x17e920, data1);
+	gf100_grctx_patch_wr32(chan, 0x17e91c, data0);
+	gf100_grctx_patch_wr32(chan, 0x17e920, data1);
 }
 
 void
-gk104_grctx_generate_bundle(struct gf100_grctx *info)
+gk104_grctx_generate_bundle(struct gf100_gr_chan *chan, u64 addr, u32 size)
 {
-	const struct gf100_grctx_func *grctx = info->gr->func->grctx;
-	const u32 state_limit = min(grctx->bundle_min_gpm_fifo_depth,
-				    grctx->bundle_size / 0x20);
+	const struct gf100_grctx_func *grctx = chan->gr->func->grctx;
+	const u32 state_limit = min(grctx->bundle_min_gpm_fifo_depth, size / 0x20);
 	const u32 token_limit = grctx->bundle_token_limit;
-	const int s = 8;
-	const int b = mmio_vram(info, grctx->bundle_size, (1 << s), true);
-	mmio_refn(info, 0x408004, 0x00000000, s, b);
-	mmio_wr32(info, 0x408008, 0x80000000 | (grctx->bundle_size >> s));
-	mmio_refn(info, 0x418808, 0x00000000, s, b);
-	mmio_wr32(info, 0x41880c, 0x80000000 | (grctx->bundle_size >> s));
-	mmio_wr32(info, 0x4064c8, (state_limit << 16) | token_limit);
+
+	gf100_grctx_generate_bundle(chan, addr, size);
+	gf100_grctx_patch_wr32(chan, 0x4064c8, (state_limit << 16) | token_limit);
 }
 
 void
-gk104_grctx_generate_pagepool(struct gf100_grctx *info)
+gk104_grctx_generate_pagepool(struct gf100_gr_chan *chan, u64 addr)
 {
-	const struct gf100_grctx_func *grctx = info->gr->func->grctx;
-	const int s = 8;
-	const int b = mmio_vram(info, grctx->pagepool_size, (1 << s), true);
-	mmio_refn(info, 0x40800c, 0x00000000, s, b);
-	mmio_wr32(info, 0x408010, 0x80000000);
-	mmio_refn(info, 0x419004, 0x00000000, s, b);
-	mmio_wr32(info, 0x419008, 0x00000000);
-	mmio_wr32(info, 0x4064cc, 0x80000000);
+	gf100_grctx_generate_pagepool(chan, addr);
+	gf100_grctx_patch_wr32(chan, 0x4064cc, 0x80000000);
 }
 
 void
@@ -991,6 +981,8 @@ gk104_grctx = {
 	.bundle_token_limit = 0x600,
 	.pagepool = gk104_grctx_generate_pagepool,
 	.pagepool_size = 0x8000,
+	.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gf100_grctx_generate_attrib_cb,
 	.attrib = gf117_grctx_generate_attrib,
 	.attrib_nr_max = 0x324,
 	.attrib_nr = 0x218,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c
index 86547cfc38dc..4391458e1fb2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c
@@ -838,6 +838,8 @@ gk110_grctx = {
 	.bundle_token_limit = 0x7c0,
 	.pagepool = gk104_grctx_generate_pagepool,
 	.pagepool_size = 0x8000,
+	.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gf100_grctx_generate_attrib_cb,
 	.attrib = gf117_grctx_generate_attrib,
 	.attrib_nr_max = 0x324,
 	.attrib_nr = 0x218,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c
index ebb947bd1446..7b9a34f9ec3c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c
@@ -87,6 +87,8 @@ gk110b_grctx = {
 	.bundle_token_limit = 0x600,
 	.pagepool = gk104_grctx_generate_pagepool,
 	.pagepool_size = 0x8000,
+	.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gf100_grctx_generate_attrib_cb,
 	.attrib = gf117_grctx_generate_attrib,
 	.attrib_nr_max = 0x324,
 	.attrib_nr = 0x218,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c
index 4d40512b5c99..c78d07a8bb7d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c
@@ -553,6 +553,8 @@ gk208_grctx = {
 	.bundle_token_limit = 0x200,
 	.pagepool = gk104_grctx_generate_pagepool,
 	.pagepool_size = 0x8000,
+	.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gf100_grctx_generate_attrib_cb,
 	.attrib = gf117_grctx_generate_attrib,
 	.attrib_nr_max = 0x324,
 	.attrib_nr = 0x218,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c
index c0d36bc601f9..ac5fdcb5cd3f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c
@@ -25,8 +25,9 @@
 #include <subdev/mc.h>
 
 static void
-gk20a_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
+gk20a_grctx_generate_main(struct gf100_gr_chan *chan)
 {
+	struct gf100_gr *gr = chan->gr;
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	u32 idle_timeout;
@@ -38,7 +39,8 @@ gk20a_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 
 	idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000);
 
-	grctx->attrib(info);
+	grctx->attrib_cb(chan, chan->attrib_cb->addr, grctx->attrib_cb_size(gr));
+	grctx->attrib(chan);
 
 	grctx->unkn(gr);
 
@@ -60,8 +62,8 @@ gk20a_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 	gf100_gr_wait_idle(gr);
 
 	gf100_gr_icmd(gr, gr->bundle);
-	grctx->pagepool(info);
-	grctx->bundle(info);
+	grctx->pagepool(chan, chan->pagepool->addr);
+	grctx->bundle(chan, chan->bundle_cb->addr, grctx->bundle_size);
 }
 
 const struct gf100_grctx_func
@@ -74,6 +76,8 @@ gk20a_grctx = {
 	.bundle_token_limit = 0x100,
 	.pagepool = gk104_grctx_generate_pagepool,
 	.pagepool_size = 0x8000,
+	.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gf100_grctx_generate_attrib_cb,
 	.attrib = gf117_grctx_generate_attrib,
 	.attrib_nr_max = 0x240,
 	.attrib_nr = 0x240,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
index 0b3964e6b36e..beac66eb2a80 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
@@ -876,75 +876,70 @@ gm107_grctx_generate_r419e00(struct gf100_gr *gr)
 }
 
 void
-gm107_grctx_generate_bundle(struct gf100_grctx *info)
+gm107_grctx_generate_bundle(struct gf100_gr_chan *chan, u64 addr, u32 size)
 {
-	const struct gf100_grctx_func *grctx = info->gr->func->grctx;
-	const u32 state_limit = min(grctx->bundle_min_gpm_fifo_depth,
-				    grctx->bundle_size / 0x20);
+	const struct gf100_grctx_func *grctx = chan->gr->func->grctx;
+	const u32 state_limit = min(grctx->bundle_min_gpm_fifo_depth, size / 0x20);
 	const u32 token_limit = grctx->bundle_token_limit;
-	const int s = 8;
-	const int b = mmio_vram(info, grctx->bundle_size, (1 << s), true);
-	mmio_refn(info, 0x408004, 0x00000000, s, b);
-	mmio_wr32(info, 0x408008, 0x80000000 | (grctx->bundle_size >> s));
-	mmio_refn(info, 0x418e24, 0x00000000, s, b);
-	mmio_wr32(info, 0x418e28, 0x80000000 | (grctx->bundle_size >> s));
-	mmio_wr32(info, 0x4064c8, (state_limit << 16) | token_limit);
+
+	gf100_grctx_patch_wr32(chan, 0x408004, addr >> 8);
+	gf100_grctx_patch_wr32(chan, 0x408008, 0x80000000 | (size >> 8));
+	gf100_grctx_patch_wr32(chan, 0x418e24, addr >> 8);
+	gf100_grctx_patch_wr32(chan, 0x418e28, 0x80000000 | (size >> 8));
+	gf100_grctx_patch_wr32(chan, 0x4064c8, (state_limit << 16) | token_limit);
 }
 
 void
-gm107_grctx_generate_pagepool(struct gf100_grctx *info)
+gm107_grctx_generate_pagepool(struct gf100_gr_chan *chan, u64 addr)
 {
-	const struct gf100_grctx_func *grctx = info->gr->func->grctx;
-	const int s = 8;
-	const int b = mmio_vram(info, grctx->pagepool_size, (1 << s), true);
-	mmio_refn(info, 0x40800c, 0x00000000, s, b);
-	mmio_wr32(info, 0x408010, 0x80000000);
-	mmio_refn(info, 0x419004, 0x00000000, s, b);
-	mmio_wr32(info, 0x419008, 0x00000000);
-	mmio_wr32(info, 0x4064cc, 0x80000000);
-	mmio_wr32(info, 0x418e30, 0x80000000); /* guess at it being related */
+	gk104_grctx_generate_pagepool(chan, addr);
+	gf100_grctx_patch_wr32(chan, 0x418e30, 0x80000000);
 }
 
 void
-gm107_grctx_generate_attrib(struct gf100_grctx *info)
+gm107_grctx_generate_attrib(struct gf100_gr_chan *chan)
 {
-	struct gf100_gr *gr = info->gr;
+	struct gf100_gr *gr = chan->gr;
 	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	const u32  alpha = grctx->alpha_nr;
 	const u32 attrib = grctx->attrib_nr;
-	const u32   size = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max);
-	const int s = 12;
-	const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), false);
 	const int max_batches = 0xffff;
 	u32 bo = 0;
 	u32 ao = bo + grctx->attrib_nr_max * gr->tpc_total;
 	int gpc, ppc, n = 0;
 
-	mmio_refn(info, 0x418810, 0x80000000, s, b);
-	mmio_refn(info, 0x419848, 0x10000000, s, b);
-	mmio_refn(info, 0x419c2c, 0x10000000, s, b);
-	mmio_wr32(info, 0x405830, (attrib << 16) | alpha);
-	mmio_wr32(info, 0x4064c4, ((alpha / 4) << 16) | max_batches);
+	gf100_grctx_patch_wr32(chan, 0x405830, (attrib << 16) | alpha);
+	gf100_grctx_patch_wr32(chan, 0x4064c4, ((alpha / 4) << 16) | max_batches);
 
 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
-		for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) {
+		for (ppc = 0; ppc < gr->func->ppc_nr; ppc++, n++) {
 			const u32 as =  alpha * gr->ppc_tpc_nr[gpc][ppc];
 			const u32 bs = attrib * gr->ppc_tpc_nr[gpc][ppc];
 			const u32 u = 0x418ea0 + (n * 0x04);
 			const u32 o = PPC_UNIT(gpc, ppc, 0);
+
 			if (!(gr->ppc_mask[gpc] & (1 << ppc)))
 				continue;
-			mmio_wr32(info, o + 0xc0, bs);
-			mmio_wr32(info, o + 0xf4, bo);
+
+			gf100_grctx_patch_wr32(chan, o + 0xc0, bs);
+			gf100_grctx_patch_wr32(chan, o + 0xf4, bo);
 			bo += grctx->attrib_nr_max * gr->ppc_tpc_nr[gpc][ppc];
-			mmio_wr32(info, o + 0xe4, as);
-			mmio_wr32(info, o + 0xf8, ao);
+			gf100_grctx_patch_wr32(chan, o + 0xe4, as);
+			gf100_grctx_patch_wr32(chan, o + 0xf8, ao);
 			ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc];
-			mmio_wr32(info, u, ((bs / 3) << 16) | bs);
+			gf100_grctx_patch_wr32(chan, u, ((bs / 3) << 16) | bs);
 		}
 	}
 }
 
+void
+gm107_grctx_generate_attrib_cb(struct gf100_gr_chan *chan, u64 addr, u32 size)
+{
+	gf100_grctx_generate_attrib_cb(chan, addr, size);
+
+	gf100_grctx_patch_wr32(chan, 0x419c2c, 0x10000000 | addr >> 12);
+}
+
 static void
 gm107_grctx_generate_r406500(struct gf100_gr *gr)
 {
@@ -978,6 +973,8 @@ gm107_grctx = {
 	.bundle_token_limit = 0x2c0,
 	.pagepool = gm107_grctx_generate_pagepool,
 	.pagepool_size = 0x8000,
+	.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gm107_grctx_generate_attrib_cb,
 	.attrib = gm107_grctx_generate_attrib,
 	.attrib_nr_max = 0xff0,
 	.attrib_nr = 0xaa0,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c
index 013d05a0f0f6..175da8ac656c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c
@@ -87,7 +87,7 @@ gm200_grctx_generate_dist_skip_table(struct gf100_gr *gr)
 	int gpc, ppc, i;
 
 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
-		for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++) {
+		for (ppc = 0; ppc < gr->func->ppc_nr; ppc++) {
 			u8 ppc_tpcs = gr->ppc_tpc_nr[gpc][ppc];
 			u8 ppc_tpcm = gr->ppc_tpc_mask[gpc][ppc];
 			while (ppc_tpcs-- > gr->ppc_tpc_min)
@@ -111,6 +111,8 @@ gm200_grctx = {
 	.bundle_token_limit = 0x780,
 	.pagepool = gm107_grctx_generate_pagepool,
 	.pagepool_size = 0x20000,
+	.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gm107_grctx_generate_attrib_cb,
 	.attrib = gm107_grctx_generate_attrib,
 	.attrib_nr_max = 0x600,
 	.attrib_nr = 0x400,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c
index 6b92f8aa18a3..b8edccfada58 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c
@@ -22,8 +22,9 @@
 #include "ctxgf100.h"
 
 static void
-gm20b_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
+gm20b_grctx_generate_main(struct gf100_gr_chan *chan)
 {
+	struct gf100_gr *gr = chan->gr;
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	u32 idle_timeout;
@@ -35,7 +36,8 @@ gm20b_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 
 	idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000);
 
-	grctx->attrib(info);
+	grctx->attrib_cb(chan, chan->attrib_cb->addr, grctx->attrib_cb_size(gr));
+	grctx->attrib(chan);
 
 	grctx->unkn(gr);
 
@@ -63,8 +65,8 @@ gm20b_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 	gf100_gr_wait_idle(gr);
 
 	gf100_gr_icmd(gr, gr->bundle);
-	grctx->pagepool(info);
-	grctx->bundle(info);
+	grctx->pagepool(chan, chan->pagepool->addr);
+	grctx->bundle(chan, chan->bundle_cb->addr, grctx->bundle_size);
 }
 
 const struct gf100_grctx_func
@@ -77,6 +79,8 @@ gm20b_grctx = {
 	.bundle_token_limit = 0x1c0,
 	.pagepool = gm107_grctx_generate_pagepool,
 	.pagepool_size = 0x8000,
+	.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gm107_grctx_generate_attrib_cb,
 	.attrib = gm107_grctx_generate_attrib,
 	.attrib_nr_max = 0x600,
 	.attrib_nr = 0x400,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c
index 0b3326262e12..8485aaeae7a9 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c
@@ -30,66 +30,76 @@
  ******************************************************************************/
 
 void
-gp100_grctx_generate_pagepool(struct gf100_grctx *info)
+gp100_grctx_generate_pagepool(struct gf100_gr_chan *chan, u64 addr)
 {
-	const struct gf100_grctx_func *grctx = info->gr->func->grctx;
-	const int s = 8;
-	const int b = mmio_vram(info, grctx->pagepool_size, (1 << s), true);
-	mmio_refn(info, 0x40800c, 0x00000000, s, b);
-	mmio_wr32(info, 0x408010, 0x8007d800);
-	mmio_refn(info, 0x419004, 0x00000000, s, b);
-	mmio_wr32(info, 0x419008, 0x00000000);
+	gf100_grctx_patch_wr32(chan, 0x40800c, addr >> 8);
+	gf100_grctx_patch_wr32(chan, 0x408010, 0x8007d800);
+	gf100_grctx_patch_wr32(chan, 0x419004, addr >> 8);
+	gf100_grctx_patch_wr32(chan, 0x419008, 0x00000000);
 }
 
 static void
-gp100_grctx_generate_attrib(struct gf100_grctx *info)
+gp100_grctx_generate_attrib(struct gf100_gr_chan *chan)
 {
-	struct gf100_gr *gr = info->gr;
+	struct gf100_gr *gr = chan->gr;
 	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	const u32  alpha = grctx->alpha_nr;
 	const u32 attrib = grctx->attrib_nr;
-	const int s = 12;
 	const int max_batches = 0xffff;
 	u32 size = grctx->alpha_nr_max * gr->tpc_total;
 	u32 ao = 0;
 	u32 bo = ao + size;
-	int gpc, ppc, b, n = 0;
+	int gpc, ppc, n = 0;
 
-	for (gpc = 0; gpc < gr->gpc_nr; gpc++)
-		size += grctx->attrib_nr_max * gr->ppc_nr[gpc] * gr->ppc_tpc_max;
-	size = ((size * 0x20) + 128) & ~127;
-	b = mmio_vram(info, size, (1 << s), false);
-
-	mmio_refn(info, 0x418810, 0x80000000, s, b);
-	mmio_refn(info, 0x419848, 0x10000000, s, b);
-	mmio_refn(info, 0x419c2c, 0x10000000, s, b);
-	mmio_refn(info, 0x419b00, 0x00000000, s, b);
-	mmio_wr32(info, 0x419b04, 0x80000000 | size >> 7);
-	mmio_wr32(info, 0x405830, attrib);
-	mmio_wr32(info, 0x40585c, alpha);
-	mmio_wr32(info, 0x4064c4, ((alpha / 4) << 16) | max_batches);
+	gf100_grctx_patch_wr32(chan, 0x405830, attrib);
+	gf100_grctx_patch_wr32(chan, 0x40585c, alpha);
+	gf100_grctx_patch_wr32(chan, 0x4064c4, ((alpha / 4) << 16) | max_batches);
 
 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
-		for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) {
+		for (ppc = 0; ppc < gr->func->ppc_nr; ppc++, n++) {
 			const u32 as =  alpha * gr->ppc_tpc_nr[gpc][ppc];
 			const u32 bs = attrib * gr->ppc_tpc_max;
 			const u32 u = 0x418ea0 + (n * 0x04);
 			const u32 o = PPC_UNIT(gpc, ppc, 0);
+
 			if (!(gr->ppc_mask[gpc] & (1 << ppc)))
 				continue;
-			mmio_wr32(info, o + 0xc0, bs);
-			mmio_wr32(info, o + 0xf4, bo);
-			mmio_wr32(info, o + 0xf0, bs);
+
+			gf100_grctx_patch_wr32(chan, o + 0xc0, bs);
+			gf100_grctx_patch_wr32(chan, o + 0xf4, bo);
+			gf100_grctx_patch_wr32(chan, o + 0xf0, bs);
 			bo += grctx->attrib_nr_max * gr->ppc_tpc_max;
-			mmio_wr32(info, o + 0xe4, as);
-			mmio_wr32(info, o + 0xf8, ao);
+			gf100_grctx_patch_wr32(chan, o + 0xe4, as);
+			gf100_grctx_patch_wr32(chan, o + 0xf8, ao);
 			ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc];
-			mmio_wr32(info, u, bs);
+			gf100_grctx_patch_wr32(chan, u, bs);
 		}
 	}
 
-	mmio_wr32(info, 0x418eec, 0x00000000);
-	mmio_wr32(info, 0x41befc, 0x00000000);
+	gf100_grctx_patch_wr32(chan, 0x418eec, 0x00000000);
+	gf100_grctx_patch_wr32(chan, 0x41befc, 0x00000000);
+}
+
+void
+gp100_grctx_generate_attrib_cb(struct gf100_gr_chan *chan, u64 addr, u32 size)
+{
+	gm107_grctx_generate_attrib_cb(chan, addr, size);
+
+	gf100_grctx_patch_wr32(chan, 0x419b00, 0x00000000 | addr >> 12);
+	gf100_grctx_patch_wr32(chan, 0x419b04, 0x80000000 | size >> 7);
+}
+
+static u32
+gp100_grctx_generate_attrib_cb_size(struct gf100_gr *gr)
+{
+	const struct gf100_grctx_func *grctx = gr->func->grctx;
+	u32 size = grctx->alpha_nr_max * gr->tpc_total;
+	int gpc;
+
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++)
+		size += grctx->attrib_nr_max * gr->func->ppc_nr * gr->ppc_tpc_max;
+
+	return ((size * 0x20) + 128) & ~127;
 }
 
 void
@@ -123,6 +133,8 @@ gp100_grctx = {
 	.bundle_token_limit = 0x1080,
 	.pagepool = gp100_grctx_generate_pagepool,
 	.pagepool_size = 0x20000,
+	.attrib_cb_size = gp100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gp100_grctx_generate_attrib_cb,
 	.attrib = gp100_grctx_generate_attrib,
 	.attrib_nr_max = 0x660,
 	.attrib_nr = 0x440,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c
index daee17bf7d0d..7537979a5492 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c
@@ -37,58 +37,62 @@ gp102_grctx_generate_r408840(struct gf100_gr *gr)
 }
 
 void
-gp102_grctx_generate_attrib(struct gf100_grctx *info)
+gp102_grctx_generate_attrib(struct gf100_gr_chan *chan)
 {
-	struct gf100_gr *gr = info->gr;
+	struct gf100_gr *gr = chan->gr;
 	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	const u32  alpha = grctx->alpha_nr;
 	const u32 attrib = grctx->attrib_nr;
 	const u32   gfxp = grctx->gfxp_nr;
-	const int s = 12;
 	const int max_batches = 0xffff;
 	u32 size = grctx->alpha_nr_max * gr->tpc_total;
 	u32 ao = 0;
 	u32 bo = ao + size;
-	int gpc, ppc, b, n = 0;
+	int gpc, ppc, n = 0;
 
-	for (gpc = 0; gpc < gr->gpc_nr; gpc++)
-		size += grctx->gfxp_nr * gr->ppc_nr[gpc] * gr->ppc_tpc_max;
-	size = ((size * 0x20) + 128) & ~127;
-	b = mmio_vram(info, size, (1 << s), false);
-
-	mmio_refn(info, 0x418810, 0x80000000, s, b);
-	mmio_refn(info, 0x419848, 0x10000000, s, b);
-	mmio_refn(info, 0x419c2c, 0x10000000, s, b);
-	mmio_refn(info, 0x419b00, 0x00000000, s, b);
-	mmio_wr32(info, 0x419b04, 0x80000000 | size >> 7);
-	mmio_wr32(info, 0x405830, attrib);
-	mmio_wr32(info, 0x40585c, alpha);
-	mmio_wr32(info, 0x4064c4, ((alpha / 4) << 16) | max_batches);
+	gf100_grctx_patch_wr32(chan, 0x405830, attrib);
+	gf100_grctx_patch_wr32(chan, 0x40585c, alpha);
+	gf100_grctx_patch_wr32(chan, 0x4064c4, ((alpha / 4) << 16) | max_batches);
 
 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
-		for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) {
+		for (ppc = 0; ppc < gr->func->ppc_nr; ppc++, n++) {
 			const u32 as =  alpha * gr->ppc_tpc_nr[gpc][ppc];
 			const u32 bs = attrib * gr->ppc_tpc_max;
 			const u32 gs =   gfxp * gr->ppc_tpc_max;
 			const u32 u = 0x418ea0 + (n * 0x04);
 			const u32 o = PPC_UNIT(gpc, ppc, 0);
 			const u32 p = GPC_UNIT(gpc, 0xc44 + (ppc * 4));
+
 			if (!(gr->ppc_mask[gpc] & (1 << ppc)))
 				continue;
-			mmio_wr32(info, o + 0xc0, gs);
-			mmio_wr32(info, p, bs);
-			mmio_wr32(info, o + 0xf4, bo);
-			mmio_wr32(info, o + 0xf0, bs);
+
+			gf100_grctx_patch_wr32(chan, o + 0xc0, gs);
+			gf100_grctx_patch_wr32(chan, p, bs);
+			gf100_grctx_patch_wr32(chan, o + 0xf4, bo);
+			gf100_grctx_patch_wr32(chan, o + 0xf0, bs);
 			bo += gs;
-			mmio_wr32(info, o + 0xe4, as);
-			mmio_wr32(info, o + 0xf8, ao);
+			gf100_grctx_patch_wr32(chan, o + 0xe4, as);
+			gf100_grctx_patch_wr32(chan, o + 0xf8, ao);
 			ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc];
-			mmio_wr32(info, u, bs);
+			gf100_grctx_patch_wr32(chan, u, bs);
 		}
 	}
 
-	mmio_wr32(info, 0x4181e4, 0x00000100);
-	mmio_wr32(info, 0x41befc, 0x00000100);
+	gf100_grctx_patch_wr32(chan, 0x4181e4, 0x00000100);
+	gf100_grctx_patch_wr32(chan, 0x41befc, 0x00000100);
+}
+
+u32
+gp102_grctx_generate_attrib_cb_size(struct gf100_gr *gr)
+{
+	const struct gf100_grctx_func *grctx = gr->func->grctx;
+	u32 size = grctx->alpha_nr_max * gr->tpc_total;
+	int gpc;
+
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++)
+		size += grctx->gfxp_nr * gr->func->ppc_nr * gr->ppc_tpc_max;
+
+	return ((size * 0x20) + 127) & ~127;
 }
 
 const struct gf100_grctx_func
@@ -101,6 +105,8 @@ gp102_grctx = {
 	.bundle_token_limit = 0x900,
 	.pagepool = gp100_grctx_generate_pagepool,
 	.pagepool_size = 0x20000,
+	.attrib_cb_size = gp102_grctx_generate_attrib_cb_size,
+	.attrib_cb = gp100_grctx_generate_attrib_cb,
 	.attrib = gp102_grctx_generate_attrib,
 	.attrib_nr_max = 0x4b0,
 	.attrib_nr = 0x320,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp104.c
index 3b85e3d326b2..90b5f793e567 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp104.c
@@ -31,6 +31,8 @@ gp104_grctx = {
 	.bundle_token_limit = 0x900,
 	.pagepool = gp100_grctx_generate_pagepool,
 	.pagepool_size = 0x20000,
+	.attrib_cb_size = gp102_grctx_generate_attrib_cb_size,
+	.attrib_cb = gp100_grctx_generate_attrib_cb,
 	.attrib = gp102_grctx_generate_attrib,
 	.attrib_nr_max = 0x4b0,
 	.attrib_nr = 0x320,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c
index 5060c5ee5ce0..d191761a0471 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c
@@ -39,6 +39,8 @@ gp107_grctx = {
 	.bundle_token_limit = 0x300,
 	.pagepool = gp100_grctx_generate_pagepool,
 	.pagepool_size = 0x20000,
+	.attrib_cb_size = gp102_grctx_generate_attrib_cb_size,
+	.attrib_cb = gp100_grctx_generate_attrib_cb,
 	.attrib = gp102_grctx_generate_attrib,
 	.attrib_nr_max = 0x15de,
 	.attrib_nr = 0x540,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgv100.c
index 39553d55d3f3..957ea9d6bad4 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgv100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgv100.c
@@ -25,7 +25,7 @@
  * PGRAPH context implementation
  ******************************************************************************/
 
-const struct gf100_gr_init
+static const struct gf100_gr_init
 gv100_grctx_init_sw_veid_bundle_init_0[] = {
 	{ 0x00001000, 64, 0x00100000, 0x00000008 },
 	{ 0x00000941, 64, 0x00100000, 0x00000000 },
@@ -59,67 +59,70 @@ gv100_grctx_pack_sw_veid_bundle_init[] = {
 };
 
 void
-gv100_grctx_generate_attrib(struct gf100_grctx *info)
+gv100_grctx_generate_attrib(struct gf100_gr_chan *chan)
 {
-	struct gf100_gr *gr = info->gr;
+	struct gf100_gr *gr = chan->gr;
 	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	const u32  alpha = grctx->alpha_nr;
 	const u32 attrib = grctx->attrib_nr;
 	const u32   gfxp = grctx->gfxp_nr;
-	const int s = 12;
+	const int max_batches = 0xffff;
 	u32 size = grctx->alpha_nr_max * gr->tpc_total;
 	u32 ao = 0;
 	u32 bo = ao + size;
-	int gpc, ppc, b, n = 0;
+	int gpc, ppc, n = 0;
 
-	for (gpc = 0; gpc < gr->gpc_nr; gpc++)
-		size += grctx->gfxp_nr * gr->ppc_nr[gpc] * gr->ppc_tpc_max;
-	size = ((size * 0x20) + 127) & ~127;
-	b = mmio_vram(info, size, (1 << s), false);
-
-	mmio_refn(info, 0x418810, 0x80000000, s, b);
-	mmio_refn(info, 0x419848, 0x10000000, s, b);
-	mmio_refn(info, 0x419c2c, 0x10000000, s, b);
-	mmio_refn(info, 0x419e00, 0x00000000, s, b);
-	mmio_wr32(info, 0x419e04, 0x80000000 | size >> 7);
-	mmio_wr32(info, 0x405830, attrib);
-	mmio_wr32(info, 0x40585c, alpha);
+	gf100_grctx_patch_wr32(chan, 0x405830, attrib);
+	gf100_grctx_patch_wr32(chan, 0x40585c, alpha);
+	gf100_grctx_patch_wr32(chan, 0x4064c4, ((alpha / 4) << 16) | max_batches);
 
 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
-		for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) {
+		for (ppc = 0; ppc < gr->func->ppc_nr; ppc++, n++) {
 			const u32 as =  alpha * gr->ppc_tpc_nr[gpc][ppc];
 			const u32 bs = attrib * gr->ppc_tpc_max;
 			const u32 gs =   gfxp * gr->ppc_tpc_max;
 			const u32 u = 0x418ea0 + (n * 0x04);
 			const u32 o = PPC_UNIT(gpc, ppc, 0);
+
 			if (!(gr->ppc_mask[gpc] & (1 << ppc)))
 				continue;
-			mmio_wr32(info, o + 0xc0, gs);
-			mmio_wr32(info, o + 0xf4, bo);
-			mmio_wr32(info, o + 0xf0, bs);
+
+			gf100_grctx_patch_wr32(chan, o + 0xc0, gs);
+			gf100_grctx_patch_wr32(chan, o + 0xf4, bo);
+			gf100_grctx_patch_wr32(chan, o + 0xf0, bs);
 			bo += gs;
-			mmio_wr32(info, o + 0xe4, as);
-			mmio_wr32(info, o + 0xf8, ao);
+			gf100_grctx_patch_wr32(chan, o + 0xe4, as);
+			gf100_grctx_patch_wr32(chan, o + 0xf8, ao);
 			ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc];
-			mmio_wr32(info, u, bs);
+			gf100_grctx_patch_wr32(chan, u, bs);
 		}
 	}
 
-	mmio_wr32(info, 0x4181e4, 0x00000100);
-	mmio_wr32(info, 0x41befc, 0x00000100);
+	gf100_grctx_patch_wr32(chan, 0x4181e4, 0x00000100);
+	gf100_grctx_patch_wr32(chan, 0x41befc, 0x00000100);
+}
+
+void
+gv100_grctx_generate_attrib_cb(struct gf100_gr_chan *chan, u64 addr, u32 size)
+{
+	gm107_grctx_generate_attrib_cb(chan, addr, size);
+
+	gf100_grctx_patch_wr32(chan, 0x419e00, 0x00000000 | addr >> 12);
+	gf100_grctx_patch_wr32(chan, 0x419e04, 0x80000000 | size >> 7);
 }
 
 void
 gv100_grctx_generate_rop_mapping(struct gf100_gr *gr)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
+	const u32 mapregs = DIV_ROUND_UP(gr->func->gpc_nr * gr->func->tpc_nr, 6);
 	u32 data;
 	int i, j;
 
 	/* Pack tile map into register format. */
 	nvkm_wr32(device, 0x418bb8, (gr->tpc_total << 8) |
 				     gr->screen_tile_row_offset);
-	for (i = 0; i < 11; i++) {
+	for (i = 0; i < mapregs; i++) {
 		for (data = 0, j = 0; j < 6; j++)
 			data |= (gr->tile[i * 6 + j] & 0x1f) << (j * 5);
 		nvkm_wr32(device, 0x418b08 + (i * 4), data);
@@ -157,6 +160,9 @@ static void
 gv100_grctx_generate_sm_id(struct gf100_gr *gr, int gpc, int tpc, int sm)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
+
+	tpc = gv100_gr_nonpes_aware_tpc(gr, gpc, tpc);
+
 	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x608), sm);
 	nvkm_wr32(device, GPC_UNIT(gpc, 0x0c10 + tpc * 4), sm);
 	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), sm);
@@ -198,6 +204,8 @@ gv100_grctx = {
 	.bundle_token_limit = 0x1680,
 	.pagepool = gp100_grctx_generate_pagepool,
 	.pagepool_size = 0x20000,
+	.attrib_cb_size = gp102_grctx_generate_attrib_cb_size,
+	.attrib_cb = gv100_grctx_generate_attrib_cb,
 	.attrib = gv100_grctx_generate_attrib,
 	.attrib_nr_max = 0x6c0,
 	.attrib_nr = 0x480,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxtu102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxtu102.c
index 2299ca07d04a..542ab0c78be6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxtu102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxtu102.c
@@ -34,6 +34,9 @@ static void
 tu102_grctx_generate_sm_id(struct gf100_gr *gr, int gpc, int tpc, int sm)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
+
+	tpc = gv100_gr_nonpes_aware_tpc(gr, gpc, tpc);
+
 	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x608), sm);
 	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), sm);
 }
@@ -47,42 +50,38 @@ tu102_grctx_init_unknown_bundle_init_0[] = {
 };
 
 static const struct gf100_gr_pack
-tu102_grctx_pack_sw_veid_bundle_init[] = {
-	{ gv100_grctx_init_sw_veid_bundle_init_0 },
-	{ tu102_grctx_init_unknown_bundle_init_0 },
+tu102_grctx_pack_sw_bundle64_init[] = {
+	{ tu102_grctx_init_unknown_bundle_init_0, .type = 64 },
 	{}
 };
 
-static void
-tu102_grctx_generate_attrib(struct gf100_grctx *info)
+void
+tu102_grctx_generate_unknown(struct gf100_gr_chan *chan, u64 addr, u32 size)
 {
-	const u64 size = 0x80000; /*XXX: educated guess */
-	const int s = 8;
-	const int b = mmio_vram(info, size, (1 << s), true);
-
-	gv100_grctx_generate_attrib(info);
-
-	mmio_refn(info, 0x408070, 0x00000000, s, b);
-	mmio_wr32(info, 0x408074, size >> s); /*XXX: guess */
-	mmio_refn(info, 0x419034, 0x00000000, s, b);
-	mmio_wr32(info, 0x408078, 0x00000000);
+	gf100_grctx_patch_wr32(chan, 0x408070, addr >> 8);
+	gf100_grctx_patch_wr32(chan, 0x408074, size >> 8); /*XXX: guess */
+	gf100_grctx_patch_wr32(chan, 0x419034, addr >> 8);
+	gf100_grctx_patch_wr32(chan, 0x408078, 0x00000000);
 }
 
 const struct gf100_grctx_func
 tu102_grctx = {
-	.unkn88c = gv100_grctx_unkn88c,
 	.main = gf100_grctx_generate_main,
 	.unkn = gv100_grctx_generate_unkn,
-	.sw_veid_bundle_init = tu102_grctx_pack_sw_veid_bundle_init,
+	.sw_bundle64_init = tu102_grctx_pack_sw_bundle64_init,
 	.bundle = gm107_grctx_generate_bundle,
 	.bundle_size = 0x3000,
 	.bundle_min_gpm_fifo_depth = 0x180,
 	.bundle_token_limit = 0xa80,
 	.pagepool = gp100_grctx_generate_pagepool,
 	.pagepool_size = 0x20000,
-	.attrib = tu102_grctx_generate_attrib,
+	.attrib_cb_size = gp102_grctx_generate_attrib_cb_size,
+	.attrib_cb = gv100_grctx_generate_attrib_cb,
+	.attrib = gv100_grctx_generate_attrib,
 	.attrib_nr_max = 0x800,
 	.attrib_nr = 0x700,
+	.unknown_size = 0x80000,
+	.unknown = tu102_grctx_generate_unknown,
 	.alpha_nr_max = 0xc00,
 	.alpha_nr = 0x800,
 	.gfxp_nr = 0xfa8,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ga102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ga102.c
new file mode 100644
index 000000000000..a5b5ac2755a2
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ga102.c
@@ -0,0 +1,347 @@
+/*
+ * Copyright 2019 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "gf100.h"
+#include "ctxgf100.h"
+
+#include <core/firmware.h>
+#include <subdev/acr.h>
+#include <subdev/timer.h>
+#include <subdev/vfn.h>
+
+#include <nvfw/flcn.h>
+
+#include <nvif/class.h>
+
+static void
+ga102_gr_zbc_clear_color(struct gf100_gr *gr, int zbc)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	u32 invalid[] = { 0, 0, 0, 0 }, *color;
+
+	if (gr->zbc_color[zbc].format)
+		color = gr->zbc_color[zbc].l2;
+	else
+		color = invalid;
+
+	nvkm_mask(device, 0x41bcb4, 0x0000001f, zbc);
+	nvkm_wr32(device, 0x41bcec, color[0]);
+	nvkm_wr32(device, 0x41bcf0, color[1]);
+	nvkm_wr32(device, 0x41bcf4, color[2]);
+	nvkm_wr32(device, 0x41bcf8, color[3]);
+}
+
+static const struct gf100_gr_func_zbc
+ga102_gr_zbc = {
+	.clear_color = ga102_gr_zbc_clear_color,
+	.clear_depth = gp100_gr_zbc_clear_depth,
+	.stencil_get = gp102_gr_zbc_stencil_get,
+	.clear_stencil = gp102_gr_zbc_clear_stencil,
+};
+
+static void
+ga102_gr_gpccs_reset(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+
+	nvkm_wr32(device, 0x41a610, 0x00000000);
+	nvkm_msec(device, 1, NVKM_DELAY);
+	nvkm_wr32(device, 0x41a610, 0x00000001);
+}
+
+static const struct nvkm_acr_lsf_func
+ga102_gr_gpccs_acr = {
+	.flags = NVKM_ACR_LSF_FORCE_PRIV_LOAD,
+	.bl_entry = 0x3400,
+	.bld_size = sizeof(struct flcn_bl_dmem_desc_v2),
+	.bld_write = gp108_gr_acr_bld_write,
+	.bld_patch = gp108_gr_acr_bld_patch,
+};
+
+static void
+ga102_gr_fecs_reset(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+
+	nvkm_wr32(device, 0x409614, 0x00000010);
+	nvkm_wr32(device, 0x41a614, 0x00000020);
+	nvkm_usec(device, 10, NVKM_DELAY);
+	nvkm_wr32(device, 0x409614, 0x00000110);
+	nvkm_wr32(device, 0x41a614, 0x00000a20);
+	nvkm_usec(device, 10, NVKM_DELAY);
+	nvkm_rd32(device, 0x409614);
+	nvkm_rd32(device, 0x41a614);
+}
+
+static const struct nvkm_acr_lsf_func
+ga102_gr_fecs_acr = {
+	.bl_entry = 0x7e00,
+	.bld_size = sizeof(struct flcn_bl_dmem_desc_v2),
+	.bld_write = gp108_gr_acr_bld_write,
+	.bld_patch = gp108_gr_acr_bld_patch,
+};
+
+static void
+ga102_gr_init_rop_exceptions(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+
+	nvkm_wr32(device, 0x41bcbc, 0x40000000);
+	nvkm_wr32(device, 0x41bc38, 0x40000000);
+	nvkm_wr32(device, 0x41ac94, nvkm_rd32(device, 0x502c94));
+}
+
+static void
+ga102_gr_init_40a790(struct gf100_gr *gr)
+{
+	nvkm_wr32(gr->base.engine.subdev.device, 0x40a790, 0xc0000000);
+}
+
+static void
+ga102_gr_init_gpc_mmu(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+
+	nvkm_wr32(device, 0x418880, nvkm_rd32(device, 0x100c80) & 0xf8001fff);
+	nvkm_wr32(device, 0x418894, 0x00000000);
+
+	nvkm_wr32(device, 0x4188b4, nvkm_rd32(device, 0x100cc8));
+	nvkm_wr32(device, 0x4188b8, nvkm_rd32(device, 0x100ccc));
+	nvkm_wr32(device, 0x4188b0, nvkm_rd32(device, 0x100cc4));
+}
+
+static struct nvkm_intr *
+ga102_gr_oneinit_intr(struct gf100_gr *gr, enum nvkm_intr_type *pvector)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+
+	*pvector = nvkm_rd32(device, 0x400154) & 0x00000fff;
+	return &device->vfn->intr;
+}
+
+static const struct gf100_gr_func
+ga102_gr = {
+	.oneinit_intr = ga102_gr_oneinit_intr,
+	.oneinit_tiles = gm200_gr_oneinit_tiles,
+	.oneinit_sm_id = gv100_gr_oneinit_sm_id,
+	.init = gf100_gr_init,
+	.init_419bd8 = gv100_gr_init_419bd8,
+	.init_gpc_mmu = ga102_gr_init_gpc_mmu,
+	.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
+	.init_zcull = tu102_gr_init_zcull,
+	.init_num_active_ltcs = gf100_gr_init_num_active_ltcs,
+	.init_swdx_pes_mask = gp102_gr_init_swdx_pes_mask,
+	.init_fs = tu102_gr_init_fs,
+	.init_fecs_exceptions = tu102_gr_init_fecs_exceptions,
+	.init_40a790 = ga102_gr_init_40a790,
+	.init_ds_hww_esr_2 = gm200_gr_init_ds_hww_esr_2,
+	.init_sked_hww_esr = gk104_gr_init_sked_hww_esr,
+	.init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
+	.init_504430 = gv100_gr_init_504430,
+	.init_shader_exceptions = gv100_gr_init_shader_exceptions,
+	.init_rop_exceptions = ga102_gr_init_rop_exceptions,
+	.init_4188a4 = gv100_gr_init_4188a4,
+	.trap_mp = gv100_gr_trap_mp,
+	.fecs.reset = ga102_gr_fecs_reset,
+	.gpccs.reset = ga102_gr_gpccs_reset,
+	.rops = gm200_gr_rops,
+	.gpc_nr = 7,
+	.tpc_nr = 6,
+	.ppc_nr = 3,
+	.grctx = &ga102_grctx,
+	.zbc = &ga102_gr_zbc,
+	.sclass = {
+		{ -1, -1, FERMI_TWOD_A },
+		{ -1, -1, KEPLER_INLINE_TO_MEMORY_B },
+		{ -1, -1, AMPERE_B, &gf100_fermi },
+		{ -1, -1, AMPERE_COMPUTE_B },
+		{}
+	}
+};
+
+MODULE_FIRMWARE("nvidia/ga102/gr/fecs_bl.bin");
+MODULE_FIRMWARE("nvidia/ga102/gr/fecs_sig.bin");
+MODULE_FIRMWARE("nvidia/ga102/gr/gpccs_bl.bin");
+MODULE_FIRMWARE("nvidia/ga102/gr/gpccs_sig.bin");
+MODULE_FIRMWARE("nvidia/ga102/gr/NET_img.bin");
+
+MODULE_FIRMWARE("nvidia/ga103/gr/fecs_bl.bin");
+MODULE_FIRMWARE("nvidia/ga103/gr/fecs_sig.bin");
+MODULE_FIRMWARE("nvidia/ga103/gr/gpccs_bl.bin");
+MODULE_FIRMWARE("nvidia/ga103/gr/gpccs_sig.bin");
+MODULE_FIRMWARE("nvidia/ga103/gr/NET_img.bin");
+
+MODULE_FIRMWARE("nvidia/ga104/gr/fecs_bl.bin");
+MODULE_FIRMWARE("nvidia/ga104/gr/fecs_sig.bin");
+MODULE_FIRMWARE("nvidia/ga104/gr/gpccs_bl.bin");
+MODULE_FIRMWARE("nvidia/ga104/gr/gpccs_sig.bin");
+MODULE_FIRMWARE("nvidia/ga104/gr/NET_img.bin");
+
+MODULE_FIRMWARE("nvidia/ga106/gr/fecs_bl.bin");
+MODULE_FIRMWARE("nvidia/ga106/gr/fecs_sig.bin");
+MODULE_FIRMWARE("nvidia/ga106/gr/gpccs_bl.bin");
+MODULE_FIRMWARE("nvidia/ga106/gr/gpccs_sig.bin");
+MODULE_FIRMWARE("nvidia/ga106/gr/NET_img.bin");
+
+MODULE_FIRMWARE("nvidia/ga107/gr/fecs_bl.bin");
+MODULE_FIRMWARE("nvidia/ga107/gr/fecs_sig.bin");
+MODULE_FIRMWARE("nvidia/ga107/gr/gpccs_bl.bin");
+MODULE_FIRMWARE("nvidia/ga107/gr/gpccs_sig.bin");
+MODULE_FIRMWARE("nvidia/ga107/gr/NET_img.bin");
+
+struct netlist_region {
+	u32 region_id;
+	u32 data_size;
+	u32 data_offset;
+};
+
+struct netlist_image_header {
+	u32 version;
+	u32 regions;
+};
+
+struct netlist_image {
+	struct netlist_image_header header;
+	struct netlist_region regions[];
+};
+
+struct netlist_av64 {
+	u32 addr;
+	u32 data_hi;
+	u32 data_lo;
+};
+
+static int
+ga102_gr_av64_to_init(struct nvkm_blob *blob, struct gf100_gr_pack **ppack)
+{
+	struct gf100_gr_init *init;
+	struct gf100_gr_pack *pack;
+	int nent;
+	int i;
+
+	nent = (blob->size / sizeof(struct netlist_av64));
+	pack = vzalloc((sizeof(*pack) * 2) + (sizeof(*init) * (nent + 1)));
+	if (!pack)
+		return -ENOMEM;
+
+	init = (void *)(pack + 2);
+	pack[0].init = init;
+	pack[0].type = 64;
+
+	for (i = 0; i < nent; i++) {
+		struct gf100_gr_init *ent = &init[i];
+		struct netlist_av64 *av = &((struct netlist_av64 *)blob->data)[i];
+
+		ent->addr = av->addr;
+		ent->data = ((u64)av->data_hi << 32) | av->data_lo;
+		ent->count = 1;
+		ent->pitch = 1;
+	}
+
+	*ppack = pack;
+	return 0;
+}
+
+static int
+ga102_gr_load(struct gf100_gr *gr, int ver, const struct gf100_gr_fwif *fwif)
+{
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+	const struct firmware *fw;
+	const struct netlist_image *net;
+	const struct netlist_region *fecs_inst = NULL;
+	const struct netlist_region *fecs_data = NULL;
+	const struct netlist_region *gpccs_inst = NULL;
+	const struct netlist_region *gpccs_data = NULL;
+	int ret, i;
+
+	ret = nvkm_firmware_get(subdev, "gr/NET_img", 0, &fw);
+	if (ret)
+		return ret;
+
+	net = (const void *)fw->data;
+	nvkm_debug(subdev, "netlist version %d, %d regions\n",
+		   net->header.version, net->header.regions);
+
+	for (i = 0; i < net->header.regions; i++) {
+		const struct netlist_region *reg = &net->regions[i];
+		struct nvkm_blob blob = {
+			.data = (void *)fw->data + reg->data_offset,
+			.size = reg->data_size,
+		};
+
+		nvkm_debug(subdev, "\t%2d: %08x %08x\n",
+			   reg->region_id, reg->data_offset, reg->data_size);
+
+		switch (reg->region_id) {
+		case  0: fecs_data = reg; break;
+		case  1: fecs_inst = reg; break;
+		case  2: gpccs_data = reg; break;
+		case  3: gpccs_inst = reg; break;
+		case  4: gk20a_gr_av_to_init(&blob, &gr->bundle); break;
+		case  5: gk20a_gr_aiv_to_init(&blob, &gr->sw_ctx); break;
+		case  7: gk20a_gr_av_to_method(&blob, &gr->method); break;
+		case 28: tu102_gr_av_to_init_veid(&blob, &gr->bundle_veid); break;
+		case 34: ga102_gr_av64_to_init(&blob, &gr->bundle64); break;
+		case 48: gk20a_gr_av_to_init(&blob, &gr->sw_nonctx1); break;
+		case 49: gk20a_gr_av_to_init(&blob, &gr->sw_nonctx2); break;
+		case 50: gk20a_gr_av_to_init(&blob, &gr->sw_nonctx3); break;
+		case 51: gk20a_gr_av_to_init(&blob, &gr->sw_nonctx4); break;
+		default:
+			break;
+		}
+	}
+
+	ret = nvkm_acr_lsfw_load_bl_sig_net(subdev, &gr->fecs.falcon, NVKM_ACR_LSF_FECS,
+					    "gr/fecs_", ver, fwif->fecs,
+					    fw->data + fecs_inst->data_offset,
+						       fecs_inst->data_size,
+					    fw->data + fecs_data->data_offset,
+						       fecs_data->data_size);
+	if (ret)
+		return ret;
+
+	ret = nvkm_acr_lsfw_load_bl_sig_net(subdev, &gr->gpccs.falcon, NVKM_ACR_LSF_GPCCS,
+					    "gr/gpccs_", ver, fwif->gpccs,
+					    fw->data + gpccs_inst->data_offset,
+						       gpccs_inst->data_size,
+					    fw->data + gpccs_data->data_offset,
+						       gpccs_data->data_size);
+	if (ret)
+		return ret;
+
+	gr->firmware = true;
+
+	nvkm_firmware_put(fw);
+	return 0;
+}
+
+static const struct gf100_gr_fwif
+ga102_gr_fwif[] = {
+	{  0, ga102_gr_load, &ga102_gr, &ga102_gr_fecs_acr, &ga102_gr_gpccs_acr },
+	{ -1, gm200_gr_nofw },
+	{}
+};
+
+int
+ga102_gr_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst, struct nvkm_gr **pgr)
+{
+	return gf100_gr_new_(ga102_gr_fwif, device, type, inst, pgr);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
index f16eabf4f642..5f20079c3660 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
@@ -67,7 +67,7 @@ gf100_gr_zbc_color_get(struct gf100_gr *gr, int format,
 	struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
 	int zbc = -ENOSPC, i;
 
-	for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) {
+	for (i = ltc->zbc_color_min; i <= ltc->zbc_color_max; i++) {
 		if (gr->zbc_color[i].format) {
 			if (gr->zbc_color[i].format != format)
 				continue;
@@ -114,7 +114,7 @@ gf100_gr_zbc_depth_get(struct gf100_gr *gr, int format,
 	struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
 	int zbc = -ENOSPC, i;
 
-	for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) {
+	for (i = ltc->zbc_depth_min; i <= ltc->zbc_depth_max; i++) {
 		if (gr->zbc_depth[i].format) {
 			if (gr->zbc_depth[i].format != format)
 				continue;
@@ -355,15 +355,14 @@ static void *
 gf100_gr_chan_dtor(struct nvkm_object *object)
 {
 	struct gf100_gr_chan *chan = gf100_gr_chan(object);
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(chan->data); i++) {
-		nvkm_vmm_put(chan->vmm, &chan->data[i].vma);
-		nvkm_memory_unref(&chan->data[i].mem);
-	}
 
 	nvkm_vmm_put(chan->vmm, &chan->mmio_vma);
 	nvkm_memory_unref(&chan->mmio);
+
+	nvkm_vmm_put(chan->vmm, &chan->attrib_cb);
+	nvkm_vmm_put(chan->vmm, &chan->unknown);
+	nvkm_vmm_put(chan->vmm, &chan->bundle_cb);
+	nvkm_vmm_put(chan->vmm, &chan->pagepool);
 	nvkm_vmm_unref(&chan->vmm);
 	return chan;
 }
@@ -380,12 +379,10 @@ gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
 		  struct nvkm_object **pobject)
 {
 	struct gf100_gr *gr = gf100_gr(base);
-	struct gf100_gr_data *data = gr->mmio_data;
-	struct gf100_gr_mmio *mmio = gr->mmio_list;
 	struct gf100_gr_chan *chan;
 	struct gf100_vmm_map_v0 args = { .priv = 1 };
 	struct nvkm_device *device = gr->base.engine.subdev.device;
-	int ret, i;
+	int ret;
 
 	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
 		return -ENOMEM;
@@ -394,63 +391,91 @@ gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
 	chan->vmm = nvkm_vmm_ref(fifoch->vmm);
 	*pobject = &chan->object;
 
-	/* allocate memory for a "mmio list" buffer that's used by the HUB
-	 * fuc to modify some per-context register settings on first load
-	 * of the context.
-	 */
-	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x100,
-			      false, &chan->mmio);
+	/* Map pagepool. */
+	ret = nvkm_vmm_get(chan->vmm, 12, nvkm_memory_size(gr->pagepool), &chan->pagepool);
 	if (ret)
 		return ret;
 
-	ret = nvkm_vmm_get(fifoch->vmm, 12, 0x1000, &chan->mmio_vma);
+	ret = nvkm_memory_map(gr->pagepool, 0, chan->vmm, chan->pagepool, &args, sizeof(args));
 	if (ret)
 		return ret;
 
-	ret = nvkm_memory_map(chan->mmio, 0, fifoch->vmm,
-			      chan->mmio_vma, &args, sizeof(args));
+	/* Map bundle circular buffer. */
+	ret = nvkm_vmm_get(chan->vmm, 12, nvkm_memory_size(gr->bundle_cb), &chan->bundle_cb);
+	if (ret)
+		return ret;
+
+	ret = nvkm_memory_map(gr->bundle_cb, 0, chan->vmm, chan->bundle_cb, &args, sizeof(args));
+	if (ret)
+		return ret;
+
+	/* Map attribute circular buffer. */
+	ret = nvkm_vmm_get(chan->vmm, 12, nvkm_memory_size(gr->attrib_cb), &chan->attrib_cb);
 	if (ret)
 		return ret;
 
-	/* allocate buffers referenced by mmio list */
-	for (i = 0; data->size && i < ARRAY_SIZE(gr->mmio_data); i++) {
-		ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST,
-				      data->size, data->align, false,
-				      &chan->data[i].mem);
+	if (device->card_type < GP100) {
+		ret = nvkm_memory_map(gr->attrib_cb, 0, chan->vmm, chan->attrib_cb, NULL, 0);
 		if (ret)
 			return ret;
-
-		ret = nvkm_vmm_get(fifoch->vmm, 12,
-				   nvkm_memory_size(chan->data[i].mem),
-				   &chan->data[i].vma);
+	} else {
+		ret = nvkm_memory_map(gr->attrib_cb, 0, chan->vmm, chan->attrib_cb,
+				      &args, sizeof(args));;
 		if (ret)
 			return ret;
+	}
 
-		args.priv = data->priv;
+	/* Map some context buffer of unknown purpose. */
+	if (gr->func->grctx->unknown_size) {
+		ret = nvkm_vmm_get(chan->vmm, 12, nvkm_memory_size(gr->unknown), &chan->unknown);
+		if (ret)
+			return ret;
 
-		ret = nvkm_memory_map(chan->data[i].mem, 0, chan->vmm,
-				      chan->data[i].vma, &args, sizeof(args));
+		ret = nvkm_memory_map(gr->unknown, 0, chan->vmm, chan->unknown,
+				      &args, sizeof(args));
 		if (ret)
 			return ret;
+	}
 
-		data++;
+	/* Generate golden context image. */
+	mutex_lock(&gr->fecs.mutex);
+	if (gr->data == NULL) {
+		ret = gf100_grctx_generate(gr, chan, fifoch->inst);
+		if (ret) {
+			nvkm_error(&base->engine.subdev, "failed to construct context\n");
+			return ret;
+		}
 	}
+	mutex_unlock(&gr->fecs.mutex);
 
-	/* finally, fill in the mmio list and point the context at it */
-	nvkm_kmap(chan->mmio);
-	for (i = 0; mmio->addr && i < ARRAY_SIZE(gr->mmio_list); i++) {
-		u32 addr = mmio->addr;
-		u32 data = mmio->data;
+	/* allocate memory for a "mmio list" buffer that's used by the HUB
+	 * fuc to modify some per-context register settings on first load
+	 * of the context.
+	 */
+	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x100,
+			      false, &chan->mmio);
+	if (ret)
+		return ret;
 
-		if (mmio->buffer >= 0) {
-			u64 info = chan->data[mmio->buffer].vma->addr;
-			data |= info >> mmio->shift;
-		}
+	ret = nvkm_vmm_get(fifoch->vmm, 12, 0x1000, &chan->mmio_vma);
+	if (ret)
+		return ret;
 
-		nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, addr);
-		nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, data);
-		mmio++;
-	}
+	ret = nvkm_memory_map(chan->mmio, 0, fifoch->vmm,
+			      chan->mmio_vma, &args, sizeof(args));
+	if (ret)
+		return ret;
+
+	/* finally, fill in the mmio list and point the context at it */
+	nvkm_kmap(chan->mmio);
+	gr->func->grctx->pagepool(chan, chan->pagepool->addr);
+	gr->func->grctx->bundle(chan, chan->bundle_cb->addr, gr->func->grctx->bundle_size);
+	gr->func->grctx->attrib_cb(chan, chan->attrib_cb->addr, gr->func->grctx->attrib_cb_size(gr));
+	gr->func->grctx->attrib(chan);
+	if (gr->func->grctx->patch_ltc)
+		gr->func->grctx->patch_ltc(chan);
+	if (gr->func->grctx->unknown_size)
+		gr->func->grctx->unknown(chan, chan->unknown->addr, gr->func->grctx->unknown_size);
 	nvkm_done(chan->mmio);
 	return 0;
 }
@@ -727,7 +752,7 @@ gf100_gr_fecs_ctrl_ctxsw(struct gf100_gr *gr, u32 mthd)
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 
 	nvkm_wr32(device, 0x409804, 0xffffffff);
-	nvkm_wr32(device, 0x409840, 0xffffffff);
+	nvkm_wr32(device, 0x409800, 0x00000000);
 	nvkm_wr32(device, 0x409500, 0xffffffff);
 	nvkm_wr32(device, 0x409504, mthd);
 	nvkm_msec(device, 2000,
@@ -771,12 +796,45 @@ gf100_gr_fecs_stop_ctxsw(struct nvkm_gr *base)
 	return ret;
 }
 
+static int
+gf100_gr_fecs_halt_pipeline(struct gf100_gr *gr)
+{
+	int ret = 0;
+
+	if (gr->firmware) {
+		mutex_lock(&gr->fecs.mutex);
+		ret = gf100_gr_fecs_ctrl_ctxsw(gr, 0x04);
+		mutex_unlock(&gr->fecs.mutex);
+	}
+
+	return ret;
+}
+
+int
+gf100_gr_fecs_wfi_golden_save(struct gf100_gr *gr, u32 inst)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+
+	nvkm_mask(device, 0x409800, 0x00000003, 0x00000000);
+	nvkm_wr32(device, 0x409500, inst);
+	nvkm_wr32(device, 0x409504, 0x00000009);
+	nvkm_msec(device, 2000,
+		u32 stat = nvkm_rd32(device, 0x409800);
+		if (stat & 0x00000002)
+			return -EIO;
+		if (stat & 0x00000001)
+			return 0;
+	);
+
+	return -ETIMEDOUT;
+}
+
 int
 gf100_gr_fecs_bind_pointer(struct gf100_gr *gr, u32 inst)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 
-	nvkm_wr32(device, 0x409840, 0x00000030);
+	nvkm_mask(device, 0x409800, 0x00000030, 0x00000000);
 	nvkm_wr32(device, 0x409500, inst);
 	nvkm_wr32(device, 0x409504, 0x00000003);
 	nvkm_msec(device, 2000,
@@ -867,7 +925,7 @@ gf100_gr_fecs_discover_pm_image_size(struct gf100_gr *gr, u32 *psize)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 
-	nvkm_wr32(device, 0x409840, 0xffffffff);
+	nvkm_wr32(device, 0x409800, 0x00000000);
 	nvkm_wr32(device, 0x409500, 0x00000000);
 	nvkm_wr32(device, 0x409504, 0x00000025);
 	nvkm_msec(device, 2000,
@@ -883,7 +941,7 @@ gf100_gr_fecs_discover_zcull_image_size(struct gf100_gr *gr, u32 *psize)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 
-	nvkm_wr32(device, 0x409840, 0xffffffff);
+	nvkm_wr32(device, 0x409800, 0x00000000);
 	nvkm_wr32(device, 0x409500, 0x00000000);
 	nvkm_wr32(device, 0x409504, 0x00000016);
 	nvkm_msec(device, 2000,
@@ -899,7 +957,7 @@ gf100_gr_fecs_discover_image_size(struct gf100_gr *gr, u32 *psize)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 
-	nvkm_wr32(device, 0x409840, 0xffffffff);
+	nvkm_wr32(device, 0x409800, 0x00000000);
 	nvkm_wr32(device, 0x409500, 0x00000000);
 	nvkm_wr32(device, 0x409504, 0x00000010);
 	nvkm_msec(device, 2000,
@@ -915,7 +973,7 @@ gf100_gr_fecs_set_watchdog_timeout(struct gf100_gr *gr, u32 timeout)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 
-	nvkm_wr32(device, 0x409840, 0xffffffff);
+	nvkm_wr32(device, 0x409800, 0x00000000);
 	nvkm_wr32(device, 0x409500, timeout);
 	nvkm_wr32(device, 0x409504, 0x00000021);
 }
@@ -955,7 +1013,7 @@ gf100_gr_zbc_init(struct gf100_gr *gr)
 	const u32 f32_1[] = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
 			      0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 };
 	struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
-	int index, c = ltc->zbc_min, d = ltc->zbc_min, s = ltc->zbc_min;
+	int index, c = ltc->zbc_color_min, d = ltc->zbc_depth_min, s = ltc->zbc_depth_min;
 
 	if (!gr->zbc_color[0].format) {
 		gf100_gr_zbc_color_get(gr, 1,  & zero[0],   &zero[4]); c++;
@@ -971,13 +1029,13 @@ gf100_gr_zbc_init(struct gf100_gr *gr)
 		}
 	}
 
-	for (index = c; index <= ltc->zbc_max; index++)
+	for (index = c; index <= ltc->zbc_color_max; index++)
 		gr->func->zbc->clear_color(gr, index);
-	for (index = d; index <= ltc->zbc_max; index++)
+	for (index = d; index <= ltc->zbc_depth_max; index++)
 		gr->func->zbc->clear_depth(gr, index);
 
 	if (gr->func->zbc->clear_stencil) {
-		for (index = s; index <= ltc->zbc_max; index++)
+		for (index = s; index <= ltc->zbc_depth_max; index++)
 			gr->func->zbc->clear_stencil(gr, index);
 	}
 }
@@ -1003,7 +1061,7 @@ gf100_gr_wait_idle(struct gf100_gr *gr)
 		nvkm_rd32(device, 0x400700);
 
 		gr_enabled = nvkm_rd32(device, 0x200) & 0x1000;
-		ctxsw_active = nvkm_rd32(device, 0x2640) & 0x8000;
+		ctxsw_active = nvkm_fifo_ctxsw_in_progress(&gr->base.engine);
 		gr_busy = nvkm_rd32(device, 0x40060c) & 0x1;
 
 		if (!gr_enabled || (!gr_busy && !ctxsw_active))
@@ -1039,7 +1097,7 @@ gf100_gr_icmd(struct gf100_gr *gr, const struct gf100_gr_pack *p)
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 	const struct gf100_gr_pack *pack;
 	const struct gf100_gr_init *init;
-	u32 data = 0;
+	u64 data = 0;
 
 	nvkm_wr32(device, 0x400208, 0x80000000);
 
@@ -1049,6 +1107,8 @@ gf100_gr_icmd(struct gf100_gr *gr, const struct gf100_gr_pack *p)
 
 		if ((pack == p && init == p->init) || data != init->data) {
 			nvkm_wr32(device, 0x400204, init->data);
+			if (pack->type == 64)
+				nvkm_wr32(device, 0x40020c, upper_32_bits(init->data));
 			data = init->data;
 		}
 
@@ -1542,13 +1602,13 @@ gf100_gr_ctxctl_isr(struct gf100_gr *gr)
 	}
 }
 
-static void
-gf100_gr_intr(struct nvkm_gr *base)
+static irqreturn_t
+gf100_gr_intr(struct nvkm_inth *inth)
 {
-	struct gf100_gr *gr = gf100_gr(base);
+	struct gf100_gr *gr = container_of(inth, typeof(*gr), base.engine.subdev.inth);
 	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
-	struct nvkm_fifo_chan *chan;
+	struct nvkm_chan *chan;
 	unsigned long flags;
 	u64 inst = nvkm_rd32(device, 0x409b00) & 0x0fffffff;
 	u32 stat = nvkm_rd32(device, 0x400100);
@@ -1561,10 +1621,10 @@ gf100_gr_intr(struct nvkm_gr *base)
 	const char *name = "unknown";
 	int chid = -1;
 
-	chan = nvkm_fifo_chan_inst(device->fifo, (u64)inst << 12, &flags);
+	chan = nvkm_chan_get_inst(&gr->base.engine, (u64)inst << 12, &flags);
 	if (chan) {
-		name = chan->object.client->name;
-		chid = chan->chid;
+		name = chan->name;
+		chid = chan->id;
 	}
 
 	if (device->card_type < NV_E0 || subc < 4)
@@ -1631,7 +1691,8 @@ gf100_gr_intr(struct nvkm_gr *base)
 	}
 
 	nvkm_wr32(device, 0x400500, 0x00010001);
-	nvkm_fifo_chan_put(device->fifo, flags, &chan);
+	nvkm_chan_put(&chan, flags);
+	return IRQ_HANDLED;
 }
 
 static void
@@ -1721,7 +1782,7 @@ gf100_gr_init_ctxctl_ext(struct gf100_gr *gr)
 	nvkm_mc_unk260(device, 1);
 
 	/* start both of them running */
-	nvkm_wr32(device, 0x409840, 0xffffffff);
+	nvkm_wr32(device, 0x409800, 0x00000000);
 	nvkm_wr32(device, 0x41a10c, 0x00000000);
 	nvkm_wr32(device, 0x40910c, 0x00000000);
 
@@ -1763,15 +1824,6 @@ gf100_gr_init_ctxctl_ext(struct gf100_gr *gr)
 			return ret;
 	}
 
-	/* Generate golden context image. */
-	if (gr->data == NULL) {
-		int ret = gf100_grctx_generate(gr);
-		if (ret) {
-			nvkm_error(subdev, "failed to construct context\n");
-			return ret;
-		}
-	}
-
 	return 0;
 }
 
@@ -1823,14 +1875,6 @@ gf100_gr_init_ctxctl_int(struct gf100_gr *gr)
 	}
 
 	gr->size = nvkm_rd32(device, 0x409804);
-	if (gr->data == NULL) {
-		int ret = gf100_grctx_generate(gr);
-		if (ret) {
-			nvkm_error(subdev, "failed to construct context\n");
-			return ret;
-		}
-	}
-
 	return 0;
 }
 
@@ -1847,10 +1891,11 @@ gf100_gr_init_ctxctl(struct gf100_gr *gr)
 	return ret;
 }
 
-void
+int
 gf100_gr_oneinit_sm_id(struct gf100_gr *gr)
 {
 	int tpc, gpc;
+
 	for (tpc = 0; tpc < gr->tpc_max; tpc++) {
 		for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
 			if (tpc < gr->tpc_nr[gpc]) {
@@ -1860,6 +1905,8 @@ gf100_gr_oneinit_sm_id(struct gf100_gr *gr)
 			}
 		}
 	}
+
+	return 0;
 }
 
 void
@@ -1944,7 +1991,17 @@ gf100_gr_oneinit(struct nvkm_gr *base)
 	struct gf100_gr *gr = gf100_gr(base);
 	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
-	int i, j;
+	struct nvkm_intr *intr = &device->mc->intr;
+	enum nvkm_intr_type intr_type = NVKM_INTR_SUBDEV;
+	int ret, i, j;
+
+	if (gr->func->oneinit_intr)
+		intr = gr->func->oneinit_intr(gr, &intr_type);
+
+	ret = nvkm_inth_add(intr, intr_type, NVKM_INTR_PRIO_NORMAL, &gr->base.engine.subdev,
+			    gf100_gr_intr, &gr->base.engine.subdev.inth);
+	if (ret)
+		return ret;
 
 	nvkm_pmu_pgob(device->pmu, false);
 
@@ -1954,12 +2011,14 @@ gf100_gr_oneinit(struct nvkm_gr *base)
 		gr->tpc_nr[i]  = nvkm_rd32(device, GPC_UNIT(i, 0x2608));
 		gr->tpc_max = max(gr->tpc_max, gr->tpc_nr[i]);
 		gr->tpc_total += gr->tpc_nr[i];
-		gr->ppc_nr[i]  = gr->func->ppc_nr;
-		for (j = 0; j < gr->ppc_nr[i]; j++) {
+		for (j = 0; j < gr->func->ppc_nr; j++) {
 			gr->ppc_tpc_mask[i][j] =
 				nvkm_rd32(device, GPC_UNIT(i, 0x0c30 + (j * 4)));
 			if (gr->ppc_tpc_mask[i][j] == 0)
 				continue;
+
+			gr->ppc_nr[i]++;
+
 			gr->ppc_mask[i] |= (1 << j);
 			gr->ppc_tpc_nr[i][j] = hweight8(gr->ppc_tpc_mask[i][j]);
 			if (gr->ppc_tpc_min == 0 ||
@@ -1968,12 +2027,37 @@ gf100_gr_oneinit(struct nvkm_gr *base)
 			if (gr->ppc_tpc_max < gr->ppc_tpc_nr[i][j])
 				gr->ppc_tpc_max = gr->ppc_tpc_nr[i][j];
 		}
+
+		gr->ppc_total += gr->ppc_nr[i];
+	}
+
+	/* Allocate global context buffers. */
+	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, gr->func->grctx->pagepool_size,
+			      0x100, false, &gr->pagepool);
+	if (ret)
+		return ret;
+
+	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, gr->func->grctx->bundle_size,
+			      0x100, false, &gr->bundle_cb);
+	if (ret)
+		return ret;
+
+	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, gr->func->grctx->attrib_cb_size(gr),
+			      0x1000, false, &gr->attrib_cb);
+	if (ret)
+		return ret;
+
+	if (gr->func->grctx->unknown_size) {
+		ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, gr->func->grctx->unknown_size,
+				      0x100, false, &gr->unknown);
+		if (ret)
+			return ret;
 	}
 
 	memset(gr->tile, 0xff, sizeof(gr->tile));
 	gr->func->oneinit_tiles(gr);
-	gr->func->oneinit_sm_id(gr);
-	return 0;
+
+	return gr->func->oneinit_sm_id(gr);
 }
 
 static int
@@ -1983,7 +2067,7 @@ gf100_gr_init_(struct nvkm_gr *base)
 	struct nvkm_subdev *subdev = &base->engine.subdev;
 	struct nvkm_device *device = subdev->device;
 	bool reset = device->chipset == 0x137 || device->chipset == 0x138;
-	u32 ret;
+	int ret;
 
 	/* On certain GP107/GP108 boards, we trigger a weird issue where
 	 * GR will stop responding to PRI accesses after we've asked the
@@ -2019,7 +2103,12 @@ gf100_gr_init_(struct nvkm_gr *base)
 	if (ret)
 		return ret;
 
-	return gr->func->init(gr);
+	ret = gr->func->init(gr);
+	if (ret)
+		return ret;
+
+	nvkm_inth_allow(&subdev->inth);
+	return 0;
 }
 
 static int
@@ -2027,6 +2116,9 @@ gf100_gr_fini(struct nvkm_gr *base, bool suspend)
 {
 	struct gf100_gr *gr = gf100_gr(base);
 	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+
+	nvkm_inth_block(&subdev->inth);
+
 	nvkm_falcon_put(&gr->gpccs.falcon, subdev);
 	nvkm_falcon_put(&gr->fecs.falcon, subdev);
 	return 0;
@@ -2039,6 +2131,11 @@ gf100_gr_dtor(struct nvkm_gr *base)
 
 	kfree(gr->data);
 
+	nvkm_memory_unref(&gr->unknown);
+	nvkm_memory_unref(&gr->attrib_cb);
+	nvkm_memory_unref(&gr->bundle_cb);
+	nvkm_memory_unref(&gr->pagepool);
+
 	nvkm_falcon_dtor(&gr->gpccs.falcon);
 	nvkm_falcon_dtor(&gr->fecs.falcon);
 
@@ -2047,81 +2144,27 @@ gf100_gr_dtor(struct nvkm_gr *base)
 	nvkm_blob_dtor(&gr->gpccs.inst);
 	nvkm_blob_dtor(&gr->gpccs.data);
 
+	vfree(gr->bundle64);
+	vfree(gr->bundle_veid);
 	vfree(gr->bundle);
 	vfree(gr->method);
 	vfree(gr->sw_ctx);
 	vfree(gr->sw_nonctx);
+	vfree(gr->sw_nonctx1);
+	vfree(gr->sw_nonctx2);
+	vfree(gr->sw_nonctx3);
+	vfree(gr->sw_nonctx4);
 
 	return gr;
 }
 
-static const struct nvkm_gr_func
-gf100_gr_ = {
-	.dtor = gf100_gr_dtor,
-	.oneinit = gf100_gr_oneinit,
-	.init = gf100_gr_init_,
-	.fini = gf100_gr_fini,
-	.intr = gf100_gr_intr,
-	.units = gf100_gr_units,
-	.chan_new = gf100_gr_chan_new,
-	.object_get = gf100_gr_object_get,
-	.chsw_load = gf100_gr_chsw_load,
-	.ctxsw.pause = gf100_gr_fecs_stop_ctxsw,
-	.ctxsw.resume = gf100_gr_fecs_start_ctxsw,
-	.ctxsw.inst = gf100_gr_ctxsw_inst,
-};
-
 static const struct nvkm_falcon_func
 gf100_gr_flcn = {
-	.fbif = 0x600,
 	.load_imem = nvkm_falcon_v1_load_imem,
 	.load_dmem = nvkm_falcon_v1_load_dmem,
-	.read_dmem = nvkm_falcon_v1_read_dmem,
-	.bind_context = nvkm_falcon_v1_bind_context,
-	.wait_for_halt = nvkm_falcon_v1_wait_for_halt,
-	.clear_interrupt = nvkm_falcon_v1_clear_interrupt,
-	.set_start_addr = nvkm_falcon_v1_set_start_addr,
 	.start = nvkm_falcon_v1_start,
-	.enable = nvkm_falcon_v1_enable,
-	.disable = nvkm_falcon_v1_disable,
 };
 
-int
-gf100_gr_new_(const struct gf100_gr_fwif *fwif, struct nvkm_device *device,
-	      enum nvkm_subdev_type type, int inst, struct nvkm_gr **pgr)
-{
-	struct gf100_gr *gr;
-	int ret;
-
-	if (!(gr = kzalloc(sizeof(*gr), GFP_KERNEL)))
-		return -ENOMEM;
-	*pgr = &gr->base;
-
-	ret = nvkm_gr_ctor(&gf100_gr_, device, type, inst, true, &gr->base);
-	if (ret)
-		return ret;
-
-	fwif = nvkm_firmware_load(&gr->base.engine.subdev, fwif, "Gr", gr);
-	if (IS_ERR(fwif))
-		return PTR_ERR(fwif);
-
-	gr->func = fwif->func;
-
-	ret = nvkm_falcon_ctor(&gf100_gr_flcn, &gr->base.engine.subdev,
-			       "fecs", 0x409000, &gr->fecs.falcon);
-	if (ret)
-		return ret;
-
-	mutex_init(&gr->fecs.mutex);
-
-	ret = nvkm_falcon_ctor(&gf100_gr_flcn, &gr->base.engine.subdev,
-			       "gpccs", 0x41a000, &gr->gpccs.falcon);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
 void
 gf100_gr_init_num_tpc_per_gpc(struct gf100_gr *gr, bool pd, bool ds)
 {
@@ -2146,6 +2189,29 @@ gf100_gr_init_400054(struct gf100_gr *gr)
 }
 
 void
+gf100_gr_init_exception2(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+
+	nvkm_wr32(device, 0x40011c, 0xffffffff);
+	nvkm_wr32(device, 0x400134, 0xffffffff);
+}
+
+void
+gf100_gr_init_rop_exceptions(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	int rop;
+
+	for (rop = 0; rop < gr->rop_nr; rop++) {
+		nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0x40000000);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0x40000000);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff);
+	}
+}
+
+void
 gf100_gr_init_shader_exceptions(struct gf100_gr *gr, int gpc, int tpc)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
@@ -2252,21 +2318,47 @@ gf100_gr_init_vsc_stream_master(struct gf100_gr *gr)
 	nvkm_mask(device, TPC_UNIT(0, 0, 0x05c), 0x00000001, 0x00000001);
 }
 
+static int
+gf100_gr_reset(struct nvkm_gr *base)
+{
+	struct nvkm_subdev *subdev = &base->engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	struct gf100_gr *gr = gf100_gr(base);
+
+	nvkm_mask(device, 0x400500, 0x00000001, 0x00000000);
+
+	WARN_ON(gf100_gr_fecs_halt_pipeline(gr));
+
+	subdev->func->fini(subdev, false);
+	nvkm_mc_disable(device, subdev->type, subdev->inst);
+	if (gr->func->gpccs.reset)
+		gr->func->gpccs.reset(gr);
+
+	nvkm_mc_enable(device, subdev->type, subdev->inst);
+	return subdev->func->init(subdev);
+}
+
 int
 gf100_gr_init(struct gf100_gr *gr)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
-	int gpc, tpc, rop;
+	int gpc, tpc;
 
-	if (gr->func->init_419bd8)
-		gr->func->init_419bd8(gr);
+	nvkm_mask(device, 0x400500, 0x00010001, 0x00000000);
 
 	gr->func->init_gpc_mmu(gr);
 
-	if (gr->sw_nonctx)
+	if (gr->sw_nonctx1) {
+		gf100_gr_mmio(gr, gr->sw_nonctx1);
+		gf100_gr_mmio(gr, gr->sw_nonctx2);
+		gf100_gr_mmio(gr, gr->sw_nonctx3);
+		gf100_gr_mmio(gr, gr->sw_nonctx4);
+	} else
+	if (gr->sw_nonctx) {
 		gf100_gr_mmio(gr, gr->sw_nonctx);
-	else
+	} else {
 		gf100_gr_mmio(gr, gr->func->mmio);
+	}
 
 	gf100_gr_wait_idle(gr);
 
@@ -2298,6 +2390,10 @@ gf100_gr_init(struct gf100_gr *gr)
 	nvkm_wr32(device, 0x400124, 0x00000002);
 
 	gr->func->init_fecs_exceptions(gr);
+
+	if (gr->func->init_40a790)
+		gr->func->init_40a790(gr);
+
 	if (gr->func->init_ds_hww_esr_2)
 		gr->func->init_ds_hww_esr_2(gr);
 
@@ -2346,19 +2442,14 @@ gf100_gr_init(struct gf100_gr *gr)
 		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
 	}
 
-	for (rop = 0; rop < gr->rop_nr; rop++) {
-		nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0x40000000);
-		nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0x40000000);
-		nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff);
-		nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff);
-	}
+	gr->func->init_rop_exceptions(gr);
 
 	nvkm_wr32(device, 0x400108, 0xffffffff);
 	nvkm_wr32(device, 0x400138, 0xffffffff);
 	nvkm_wr32(device, 0x400118, 0xffffffff);
 	nvkm_wr32(device, 0x400130, 0xffffffff);
-	nvkm_wr32(device, 0x40011c, 0xffffffff);
-	nvkm_wr32(device, 0x400134, 0xffffffff);
+	if (gr->func->init_exception2)
+		gr->func->init_exception2(gr);
 
 	if (gr->func->init_400054)
 		gr->func->init_400054(gr);
@@ -2371,6 +2462,18 @@ gf100_gr_init(struct gf100_gr *gr)
 	return gf100_gr_init_ctxctl(gr);
 }
 
+void
+gf100_gr_fecs_reset(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+
+	nvkm_wr32(device, 0x409614, 0x00000070);
+	nvkm_usec(device, 10, NVKM_DELAY);
+	nvkm_mask(device, 0x409614, 0x00000700, 0x00000700);
+	nvkm_usec(device, 10, NVKM_DELAY);
+	nvkm_rd32(device, 0x409614);
+}
+
 #include "fuc/hubgf100.fuc3.h"
 
 struct gf100_gr_ucode
@@ -2391,6 +2494,22 @@ gf100_gr_gpccs_ucode = {
 	.data.size = sizeof(gf100_grgpc_data),
 };
 
+static const struct nvkm_gr_func
+gf100_gr_ = {
+	.dtor = gf100_gr_dtor,
+	.oneinit = gf100_gr_oneinit,
+	.init = gf100_gr_init_,
+	.fini = gf100_gr_fini,
+	.reset = gf100_gr_reset,
+	.units = gf100_gr_units,
+	.chan_new = gf100_gr_chan_new,
+	.object_get = gf100_gr_object_get,
+	.chsw_load = gf100_gr_chsw_load,
+	.ctxsw.pause = gf100_gr_fecs_stop_ctxsw,
+	.ctxsw.resume = gf100_gr_fecs_start_ctxsw,
+	.ctxsw.inst = gf100_gr_ctxsw_inst,
+};
+
 static const struct gf100_gr_func
 gf100_gr = {
 	.oneinit_tiles = gf100_gr_oneinit_tiles,
@@ -2406,10 +2525,13 @@ gf100_gr = {
 	.init_419eb4 = gf100_gr_init_419eb4,
 	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
 	.init_shader_exceptions = gf100_gr_init_shader_exceptions,
+	.init_rop_exceptions = gf100_gr_init_rop_exceptions,
+	.init_exception2 = gf100_gr_init_exception2,
 	.init_400054 = gf100_gr_init_400054,
 	.trap_mp = gf100_gr_trap_mp,
 	.mmio = gf100_gr_pack_mmio,
 	.fecs.ucode = &gf100_gr_fecs_ucode,
+	.fecs.reset = gf100_gr_fecs_reset,
 	.gpccs.ucode = &gf100_gr_gpccs_ucode,
 	.rops = gf100_gr_rops,
 	.grctx = &gf100_grctx,
@@ -2483,6 +2605,42 @@ gf100_gr_fwif[] = {
 };
 
 int
+gf100_gr_new_(const struct gf100_gr_fwif *fwif, struct nvkm_device *device,
+	      enum nvkm_subdev_type type, int inst, struct nvkm_gr **pgr)
+{
+	struct gf100_gr *gr;
+	int ret;
+
+	if (!(gr = kzalloc(sizeof(*gr), GFP_KERNEL)))
+		return -ENOMEM;
+	*pgr = &gr->base;
+
+	ret = nvkm_gr_ctor(&gf100_gr_, device, type, inst, true, &gr->base);
+	if (ret)
+		return ret;
+
+	fwif = nvkm_firmware_load(&gr->base.engine.subdev, fwif, "Gr", gr);
+	if (IS_ERR(fwif))
+		return PTR_ERR(fwif);
+
+	gr->func = fwif->func;
+
+	ret = nvkm_falcon_ctor(&gf100_gr_flcn, &gr->base.engine.subdev,
+			       "fecs", 0x409000, &gr->fecs.falcon);
+	if (ret)
+		return ret;
+
+	mutex_init(&gr->fecs.mutex);
+
+	ret = nvkm_falcon_ctor(&gf100_gr_flcn, &gr->base.engine.subdev,
+			       "gpccs", 0x41a000, &gr->gpccs.falcon);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+int
 gf100_gr_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst, struct nvkm_gr **pgr)
 {
 	return gf100_gr_new_(gf100_gr_fwif, device, type, inst, pgr);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
index c0038f906135..94ca7ac16acf 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
@@ -44,19 +44,6 @@ struct nvkm_acr_lsfw;
 #define PPC_UNIT(t, m, r) (0x503000 + (t) * 0x8000 + (m) * 0x200 + (r))
 #define TPC_UNIT(t, m, r) (0x504000 + (t) * 0x8000 + (m) * 0x800 + (r))
 
-struct gf100_gr_data {
-	u32 size;
-	u32 align;
-	bool priv;
-};
-
-struct gf100_gr_mmio {
-	u32 addr;
-	u32 data;
-	u32 shift;
-	int buffer;
-};
-
 struct gf100_gr_zbc_color {
 	u32 format;
 	u32 ds[4];
@@ -101,13 +88,19 @@ struct gf100_gr {
 	 * using hardcoded arrays. To be allocated with vzalloc().
 	 */
 	struct gf100_gr_pack *sw_nonctx;
+	struct gf100_gr_pack *sw_nonctx1;
+	struct gf100_gr_pack *sw_nonctx2;
+	struct gf100_gr_pack *sw_nonctx3;
+	struct gf100_gr_pack *sw_nonctx4;
 	struct gf100_gr_pack *sw_ctx;
 	struct gf100_gr_pack *bundle;
+	struct gf100_gr_pack *bundle_veid;
+	struct gf100_gr_pack *bundle64;
 	struct gf100_gr_pack *method;
 
-	struct gf100_gr_zbc_color zbc_color[NVKM_LTC_MAX_ZBC_CNT];
-	struct gf100_gr_zbc_depth zbc_depth[NVKM_LTC_MAX_ZBC_CNT];
-	struct gf100_gr_zbc_stencil zbc_stencil[NVKM_LTC_MAX_ZBC_CNT];
+	struct gf100_gr_zbc_color zbc_color[NVKM_LTC_MAX_ZBC_COLOR_CNT];
+	struct gf100_gr_zbc_depth zbc_depth[NVKM_LTC_MAX_ZBC_DEPTH_CNT];
+	struct gf100_gr_zbc_stencil zbc_stencil[NVKM_LTC_MAX_ZBC_DEPTH_CNT];
 
 	u8 rop_nr;
 	u8 gpc_nr;
@@ -120,6 +113,12 @@ struct gf100_gr {
 	u8 ppc_tpc_nr[GPC_MAX][4];
 	u8 ppc_tpc_min;
 	u8 ppc_tpc_max;
+	u8 ppc_total;
+
+	struct nvkm_memory *pagepool;
+	struct nvkm_memory *bundle_cb;
+	struct nvkm_memory *attrib_cb;
+	struct nvkm_memory *unknown;
 
 	u8 screen_tile_row_offset;
 	u8 tile[TPC_MAX];
@@ -130,8 +129,6 @@ struct gf100_gr {
 	} sm[TPC_MAX];
 	u8 sm_nr;
 
-	struct gf100_gr_data mmio_data[4];
-	struct gf100_gr_mmio mmio_list[4096/8];
 	u32  size;
 	u32 *data;
 	u32 size_zcull;
@@ -139,6 +136,7 @@ struct gf100_gr {
 };
 
 int gf100_gr_fecs_bind_pointer(struct gf100_gr *, u32 inst);
+int gf100_gr_fecs_wfi_golden_save(struct gf100_gr *, u32 inst);
 
 struct gf100_gr_func_zbc {
 	void (*clear_color)(struct gf100_gr *, int zbc);
@@ -149,8 +147,9 @@ struct gf100_gr_func_zbc {
 };
 
 struct gf100_gr_func {
+	struct nvkm_intr *(*oneinit_intr)(struct gf100_gr *, enum nvkm_intr_type *);
 	void (*oneinit_tiles)(struct gf100_gr *);
-	void (*oneinit_sm_id)(struct gf100_gr *);
+	int (*oneinit_sm_id)(struct gf100_gr *);
 	int (*init)(struct gf100_gr *);
 	void (*init_419bd8)(struct gf100_gr *);
 	void (*init_gpc_mmu)(struct gf100_gr *);
@@ -164,6 +163,7 @@ struct gf100_gr_func {
 	void (*init_swdx_pes_mask)(struct gf100_gr *);
 	void (*init_fs)(struct gf100_gr *);
 	void (*init_fecs_exceptions)(struct gf100_gr *);
+	void (*init_40a790)(struct gf100_gr *);
 	void (*init_ds_hww_esr_2)(struct gf100_gr *);
 	void (*init_40601c)(struct gf100_gr *);
 	void (*init_sked_hww_esr)(struct gf100_gr *);
@@ -174,6 +174,8 @@ struct gf100_gr_func {
 	void (*init_tex_hww_esr)(struct gf100_gr *, int gpc, int tpc);
 	void (*init_504430)(struct gf100_gr *, int gpc, int tpc);
 	void (*init_shader_exceptions)(struct gf100_gr *, int gpc, int tpc);
+	void (*init_rop_exceptions)(struct gf100_gr *);
+	void (*init_exception2)(struct gf100_gr *);
 	void (*init_400054)(struct gf100_gr *);
 	void (*init_4188a4)(struct gf100_gr *);
 	void (*trap_mp)(struct gf100_gr *, int gpc, int tpc);
@@ -181,9 +183,11 @@ struct gf100_gr_func {
 	const struct gf100_gr_pack *mmio;
 	struct {
 		struct gf100_gr_ucode *ucode;
+		void (*reset)(struct gf100_gr *);
 	} fecs;
 	struct {
 		struct gf100_gr_ucode *ucode;
+		void (*reset)(struct gf100_gr *);
 	} gpccs;
 	int (*rops)(struct gf100_gr *);
 	int gpc_nr;
@@ -197,7 +201,7 @@ struct gf100_gr_func {
 
 int gf100_gr_rops(struct gf100_gr *);
 void gf100_gr_oneinit_tiles(struct gf100_gr *);
-void gf100_gr_oneinit_sm_id(struct gf100_gr *);
+int gf100_gr_oneinit_sm_id(struct gf100_gr *);
 int gf100_gr_init(struct gf100_gr *);
 void gf100_gr_init_vsc_stream_master(struct gf100_gr *);
 void gf100_gr_init_zcull(struct gf100_gr *);
@@ -208,9 +212,12 @@ void gf100_gr_init_419cc0(struct gf100_gr *);
 void gf100_gr_init_419eb4(struct gf100_gr *);
 void gf100_gr_init_tex_hww_esr(struct gf100_gr *, int, int);
 void gf100_gr_init_shader_exceptions(struct gf100_gr *, int, int);
+void gf100_gr_init_rop_exceptions(struct gf100_gr *);
+void gf100_gr_init_exception2(struct gf100_gr *);
 void gf100_gr_init_400054(struct gf100_gr *);
 void gf100_gr_init_num_tpc_per_gpc(struct gf100_gr *, bool, bool);
 extern const struct gf100_gr_func_zbc gf100_gr_zbc;
+void gf100_gr_fecs_reset(struct gf100_gr *);
 
 void gf117_gr_init_zcull(struct gf100_gr *);
 
@@ -226,9 +233,13 @@ void gm107_gr_init_shader_exceptions(struct gf100_gr *, int, int);
 void gm107_gr_init_400054(struct gf100_gr *);
 
 int gk20a_gr_init(struct gf100_gr *);
+int gk20a_gr_av_to_init_(struct nvkm_blob *, u8 count, u32 pitch, struct gf100_gr_pack **);
+int gk20a_gr_av_to_init(struct nvkm_blob *, struct gf100_gr_pack **);
+int gk20a_gr_aiv_to_init(struct nvkm_blob *, struct gf100_gr_pack **);
+int gk20a_gr_av_to_method(struct nvkm_blob *, struct gf100_gr_pack **);
 
 void gm200_gr_oneinit_tiles(struct gf100_gr *);
-void gm200_gr_oneinit_sm_id(struct gf100_gr *);
+int gm200_gr_oneinit_sm_id(struct gf100_gr *);
 int gm200_gr_rops(struct gf100_gr *);
 void gm200_gr_init_num_active_ltcs(struct gf100_gr *);
 void gm200_gr_init_ds_hww_esr_2(struct gf100_gr *);
@@ -242,14 +253,24 @@ extern const struct gf100_gr_func_zbc gp100_gr_zbc;
 
 void gp102_gr_init_swdx_pes_mask(struct gf100_gr *);
 extern const struct gf100_gr_func_zbc gp102_gr_zbc;
+int gp102_gr_zbc_stencil_get(struct gf100_gr *, int, const u32, const u32);
+void gp102_gr_zbc_clear_stencil(struct gf100_gr *, int);
 
 extern const struct gf100_gr_func gp107_gr;
 
+int gv100_gr_oneinit_sm_id(struct gf100_gr *);
+u32 gv100_gr_nonpes_aware_tpc(struct gf100_gr *gr, u32 gpc, u32 tpc);
 void gv100_gr_init_419bd8(struct gf100_gr *);
 void gv100_gr_init_504430(struct gf100_gr *, int, int);
 void gv100_gr_init_shader_exceptions(struct gf100_gr *, int, int);
+void gv100_gr_init_4188a4(struct gf100_gr *);
 void gv100_gr_trap_mp(struct gf100_gr *, int, int);
 
+int tu102_gr_av_to_init_veid(struct nvkm_blob *, struct gf100_gr_pack **);
+void tu102_gr_init_zcull(struct gf100_gr *);
+void tu102_gr_init_fs(struct gf100_gr *);
+void tu102_gr_init_fecs_exceptions(struct gf100_gr *);
+
 #define gf100_gr_chan(p) container_of((p), struct gf100_gr_chan, object)
 #include <core/object.h>
 
@@ -258,14 +279,14 @@ struct gf100_gr_chan {
 	struct gf100_gr *gr;
 	struct nvkm_vmm *vmm;
 
+	struct nvkm_vma *pagepool;
+	struct nvkm_vma *bundle_cb;
+	struct nvkm_vma *attrib_cb;
+	struct nvkm_vma *unknown;
+
 	struct nvkm_memory *mmio;
 	struct nvkm_vma *mmio_vma;
 	int mmio_nr;
-
-	struct {
-		struct nvkm_memory *mem;
-		struct nvkm_vma *vma;
-	} data[4];
 };
 
 void gf100_gr_ctxctl_debug(struct gf100_gr *);
@@ -279,7 +300,7 @@ struct gf100_gr_init {
 	u32 addr;
 	u8  count;
 	u32 pitch;
-	u32 data;
+	u64 data;
 };
 
 struct gf100_gr_pack {
@@ -403,6 +424,9 @@ int gf100_gr_load(struct gf100_gr *, int, const struct gf100_gr_fwif *);
 int gf100_gr_nofw(struct gf100_gr *, int, const struct gf100_gr_fwif *);
 
 int gk20a_gr_load_sw(struct gf100_gr *, const char *path, int ver);
+int gk20a_gr_load_net(struct gf100_gr *, const char *, const char *, int,
+		      int (*)(struct nvkm_blob *, struct gf100_gr_pack **),
+		      struct gf100_gr_pack **);
 
 int gm200_gr_nofw(struct gf100_gr *, int, const struct gf100_gr_fwif *);
 int gm200_gr_load(struct gf100_gr *, int, const struct gf100_gr_fwif *);
@@ -415,6 +439,8 @@ void gm20b_gr_acr_bld_patch(struct nvkm_acr *, u32, s64);
 
 extern const struct nvkm_acr_lsf_func gp108_gr_gpccs_acr;
 extern const struct nvkm_acr_lsf_func gp108_gr_fecs_acr;
+void gp108_gr_acr_bld_write(struct nvkm_acr *, u32, struct nvkm_acr_lsfw *);
+void gp108_gr_acr_bld_patch(struct nvkm_acr *, u32, s64);
 
 int gf100_gr_new_(const struct gf100_gr_fwif *, struct nvkm_device *, enum nvkm_subdev_type, int,
 		  struct nvkm_gr **);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c
index 3acd99c306f2..63bd29c22fe1 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c
@@ -127,10 +127,13 @@ gf104_gr = {
 	.init_419eb4 = gf100_gr_init_419eb4,
 	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
 	.init_shader_exceptions = gf100_gr_init_shader_exceptions,
+	.init_rop_exceptions = gf100_gr_init_rop_exceptions,
+	.init_exception2 = gf100_gr_init_exception2,
 	.init_400054 = gf100_gr_init_400054,
 	.trap_mp = gf100_gr_trap_mp,
 	.mmio = gf104_gr_pack_mmio,
 	.fecs.ucode = &gf100_gr_fecs_ucode,
+	.fecs.reset = gf100_gr_fecs_reset,
 	.gpccs.ucode = &gf100_gr_gpccs_ucode,
 	.rops = gf100_gr_rops,
 	.grctx = &gf104_grctx,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c
index ab3760e804b8..495a844f925f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c
@@ -125,10 +125,13 @@ gf108_gr = {
 	.init_419eb4 = gf100_gr_init_419eb4,
 	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
 	.init_shader_exceptions = gf100_gr_init_shader_exceptions,
+	.init_rop_exceptions = gf100_gr_init_rop_exceptions,
+	.init_exception2 = gf100_gr_init_exception2,
 	.init_400054 = gf100_gr_init_400054,
 	.trap_mp = gf100_gr_trap_mp,
 	.mmio = gf108_gr_pack_mmio,
 	.fecs.ucode = &gf100_gr_fecs_ucode,
+	.fecs.reset = gf100_gr_fecs_reset,
 	.gpccs.ucode = &gf100_gr_gpccs_ucode,
 	.rops = gf100_gr_rops,
 	.grctx = &gf108_grctx,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c
index 616e2def1865..70fad235d161 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c
@@ -99,10 +99,13 @@ gf110_gr = {
 	.init_419eb4 = gf100_gr_init_419eb4,
 	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
 	.init_shader_exceptions = gf100_gr_init_shader_exceptions,
+	.init_rop_exceptions = gf100_gr_init_rop_exceptions,
+	.init_exception2 = gf100_gr_init_exception2,
 	.init_400054 = gf100_gr_init_400054,
 	.trap_mp = gf100_gr_trap_mp,
 	.mmio = gf110_gr_pack_mmio,
 	.fecs.ucode = &gf100_gr_fecs_ucode,
+	.fecs.reset = gf100_gr_fecs_reset,
 	.gpccs.ucode = &gf100_gr_gpccs_ucode,
 	.rops = gf100_gr_rops,
 	.grctx = &gf110_grctx,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c
index 669e7536970e..f12728248048 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c
@@ -125,7 +125,9 @@ gf117_gr_init_zcull(struct gf100_gr *gr)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
-	const u8 tile_nr = ALIGN(gr->tpc_total, 32);
+	/*TODO: fill in litter vals for gf117-gm2xx */
+	const u8 tile_nr = !gr->func->gpc_nr ? ALIGN(gr->tpc_total, 32) :
+			   (gr->func->gpc_nr * gr->func->tpc_nr);
 	u8 bank[GPC_MAX] = {}, gpc, i, j;
 	u32 data;
 
@@ -163,10 +165,13 @@ gf117_gr = {
 	.init_419eb4 = gf100_gr_init_419eb4,
 	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
 	.init_shader_exceptions = gf100_gr_init_shader_exceptions,
+	.init_rop_exceptions = gf100_gr_init_rop_exceptions,
+	.init_exception2 = gf100_gr_init_exception2,
 	.init_400054 = gf100_gr_init_400054,
 	.trap_mp = gf100_gr_trap_mp,
 	.mmio = gf117_gr_pack_mmio,
 	.fecs.ucode = &gf117_gr_fecs_ucode,
+	.fecs.reset = gf100_gr_fecs_reset,
 	.gpccs.ucode = &gf117_gr_gpccs_ucode,
 	.rops = gf100_gr_rops,
 	.ppc_nr = 1,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c
index 5b09bda8110c..75ceb514c06e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c
@@ -190,10 +190,13 @@ gf119_gr = {
 	.init_419eb4 = gf100_gr_init_419eb4,
 	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
 	.init_shader_exceptions = gf100_gr_init_shader_exceptions,
+	.init_rop_exceptions = gf100_gr_init_rop_exceptions,
+	.init_exception2 = gf100_gr_init_exception2,
 	.init_400054 = gf100_gr_init_400054,
 	.trap_mp = gf100_gr_trap_mp,
 	.mmio = gf119_gr_pack_mmio,
 	.fecs.ucode = &gf100_gr_fecs_ucode,
+	.fecs.reset = gf100_gr_fecs_reset,
 	.gpccs.ucode = &gf100_gr_gpccs_ucode,
 	.rops = gf100_gr_rops,
 	.grctx = &gf119_grctx,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c
index b680eaa0f350..e53ade24ad23 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c
@@ -418,7 +418,7 @@ gk104_gr_init_ppc_exceptions(struct gf100_gr *gr)
 	int gpc, ppc;
 
 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
-		for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++) {
+		for (ppc = 0; ppc < gr->func->ppc_nr; ppc++) {
 			if (!(gr->ppc_mask[gpc] & (1 << ppc)))
 				continue;
 			nvkm_wr32(device, PPC_UNIT(gpc, ppc, 0x038), 0xc0000000);
@@ -470,10 +470,13 @@ gk104_gr = {
 	.init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
 	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
 	.init_shader_exceptions = gf100_gr_init_shader_exceptions,
+	.init_rop_exceptions = gf100_gr_init_rop_exceptions,
+	.init_exception2 = gf100_gr_init_exception2,
 	.init_400054 = gf100_gr_init_400054,
 	.trap_mp = gf100_gr_trap_mp,
 	.mmio = gk104_gr_pack_mmio,
 	.fecs.ucode = &gk104_gr_fecs_ucode,
+	.fecs.reset = gf100_gr_fecs_reset,
 	.gpccs.ucode = &gk104_gr_gpccs_ucode,
 	.rops = gf100_gr_rops,
 	.ppc_nr = 1,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c
index 103e06a77e65..c7e1c5dbc6a9 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c
@@ -366,10 +366,13 @@ gk110_gr = {
 	.init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
 	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
 	.init_shader_exceptions = gf100_gr_init_shader_exceptions,
+	.init_rop_exceptions = gf100_gr_init_rop_exceptions,
+	.init_exception2 = gf100_gr_init_exception2,
 	.init_400054 = gf100_gr_init_400054,
 	.trap_mp = gf100_gr_trap_mp,
 	.mmio = gk110_gr_pack_mmio,
 	.fecs.ucode = &gk110_gr_fecs_ucode,
+	.fecs.reset = gf100_gr_fecs_reset,
 	.gpccs.ucode = &gk110_gr_gpccs_ucode,
 	.rops = gf100_gr_rops,
 	.ppc_nr = 2,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c
index 034d0b11a17d..458abae571bf 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c
@@ -118,10 +118,13 @@ gk110b_gr = {
 	.init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
 	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
 	.init_shader_exceptions = gf100_gr_init_shader_exceptions,
+	.init_rop_exceptions = gf100_gr_init_rop_exceptions,
+	.init_exception2 = gf100_gr_init_exception2,
 	.init_400054 = gf100_gr_init_400054,
 	.trap_mp = gf100_gr_trap_mp,
 	.mmio = gk110b_gr_pack_mmio,
 	.fecs.ucode = &gk110_gr_fecs_ucode,
+	.fecs.reset = gf100_gr_fecs_reset,
 	.gpccs.ucode = &gk110_gr_gpccs_ucode,
 	.rops = gf100_gr_rops,
 	.ppc_nr = 2,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c
index 116d682f9f96..d3f6b65c21d2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c
@@ -176,10 +176,13 @@ gk208_gr = {
 	.init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
 	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
 	.init_shader_exceptions = gf100_gr_init_shader_exceptions,
+	.init_rop_exceptions = gf100_gr_init_rop_exceptions,
+	.init_exception2 = gf100_gr_init_exception2,
 	.init_400054 = gf100_gr_init_400054,
 	.trap_mp = gf100_gr_trap_mp,
 	.mmio = gk208_gr_pack_mmio,
 	.fecs.ucode = &gk208_gr_fecs_ucode,
+	.fecs.reset = gf100_gr_fecs_reset,
 	.gpccs.ucode = &gk208_gr_gpccs_ucode,
 	.rops = gf100_gr_rops,
 	.ppc_nr = 1,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
index be0b2cefd8e8..035ea213f543 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
@@ -33,47 +33,40 @@ struct gk20a_fw_av
 	u32 data;
 };
 
-static int
-gk20a_gr_av_to_init(struct gf100_gr *gr, const char *path, const char *name,
-		    int ver, struct gf100_gr_pack **ppack)
+int
+gk20a_gr_av_to_init_(struct nvkm_blob *blob, u8 count, u32 pitch, struct gf100_gr_pack **ppack)
 {
-	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
-	struct nvkm_blob blob;
 	struct gf100_gr_init *init;
 	struct gf100_gr_pack *pack;
 	int nent;
-	int ret;
 	int i;
 
-	ret = nvkm_firmware_load_blob(subdev, path, name, ver, &blob);
-	if (ret)
-		return ret;
-
-	nent = (blob.size / sizeof(struct gk20a_fw_av));
+	nent = (blob->size / sizeof(struct gk20a_fw_av));
 	pack = vzalloc((sizeof(*pack) * 2) + (sizeof(*init) * (nent + 1)));
-	if (!pack) {
-		ret = -ENOMEM;
-		goto end;
-	}
+	if (!pack)
+		return -ENOMEM;
 
 	init = (void *)(pack + 2);
 	pack[0].init = init;
 
 	for (i = 0; i < nent; i++) {
 		struct gf100_gr_init *ent = &init[i];
-		struct gk20a_fw_av *av = &((struct gk20a_fw_av *)blob.data)[i];
+		struct gk20a_fw_av *av = &((struct gk20a_fw_av *)blob->data)[i];
 
 		ent->addr = av->addr;
 		ent->data = av->data;
-		ent->count = 1;
-		ent->pitch = 1;
+		ent->count = ((ent->addr & 0xffff) != 0xe100) ? count : 1;
+		ent->pitch = pitch;
 	}
 
 	*ppack = pack;
+	return 0;
+}
 
-end:
-	nvkm_blob_dtor(&blob);
-	return ret;
+int
+gk20a_gr_av_to_init(struct nvkm_blob *blob, struct gf100_gr_pack **ppack)
+{
+	return gk20a_gr_av_to_init_(blob, 1, 1, ppack);
 }
 
 struct gk20a_fw_aiv
@@ -83,35 +76,25 @@ struct gk20a_fw_aiv
 	u32 data;
 };
 
-static int
-gk20a_gr_aiv_to_init(struct gf100_gr *gr, const char *path, const char *name,
-		     int ver, struct gf100_gr_pack **ppack)
+int
+gk20a_gr_aiv_to_init(struct nvkm_blob *blob, struct gf100_gr_pack **ppack)
 {
-	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
-	struct nvkm_blob blob;
 	struct gf100_gr_init *init;
 	struct gf100_gr_pack *pack;
 	int nent;
-	int ret;
 	int i;
 
-	ret = nvkm_firmware_load_blob(subdev, path, name, ver, &blob);
-	if (ret)
-		return ret;
-
-	nent = (blob.size / sizeof(struct gk20a_fw_aiv));
+	nent = (blob->size / sizeof(struct gk20a_fw_aiv));
 	pack = vzalloc((sizeof(*pack) * 2) + (sizeof(*init) * (nent + 1)));
-	if (!pack) {
-		ret = -ENOMEM;
-		goto end;
-	}
+	if (!pack)
+		return -ENOMEM;
 
 	init = (void *)(pack + 2);
 	pack[0].init = init;
 
 	for (i = 0; i < nent; i++) {
 		struct gf100_gr_init *ent = &init[i];
-		struct gk20a_fw_aiv *av = &((struct gk20a_fw_aiv *)blob.data)[i];
+		struct gk20a_fw_aiv *av = &((struct gk20a_fw_aiv *)blob->data)[i];
 
 		ent->addr = av->addr;
 		ent->data = av->data;
@@ -120,44 +103,30 @@ gk20a_gr_aiv_to_init(struct gf100_gr *gr, const char *path, const char *name,
 	}
 
 	*ppack = pack;
-
-end:
-	nvkm_blob_dtor(&blob);
-	return ret;
+	return 0;
 }
 
-static int
-gk20a_gr_av_to_method(struct gf100_gr *gr, const char *path, const char *name,
-		      int ver, struct gf100_gr_pack **ppack)
+int
+gk20a_gr_av_to_method(struct nvkm_blob *blob, struct gf100_gr_pack **ppack)
 {
-	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
-	struct nvkm_blob blob;
 	struct gf100_gr_init *init;
 	struct gf100_gr_pack *pack;
 	/* We don't suppose we will initialize more than 16 classes here... */
 	static const unsigned int max_classes = 16;
 	u32 classidx = 0, prevclass = 0;
 	int nent;
-	int ret;
 	int i;
 
-	ret = nvkm_firmware_load_blob(subdev, path, name, ver, &blob);
-	if (ret)
-		return ret;
-
-	nent = (blob.size / sizeof(struct gk20a_fw_av));
-
+	nent = (blob->size / sizeof(struct gk20a_fw_av));
 	pack = vzalloc((sizeof(*pack) * (max_classes + 1)) +
 		       (sizeof(*init) * (nent + max_classes + 1)));
-	if (!pack) {
-		ret = -ENOMEM;
-		goto end;
-	}
+	if (!pack)
+		return -ENOMEM;
 
 	init = (void *)(pack + max_classes + 1);
 
 	for (i = 0; i < nent; i++, init++) {
-		struct gk20a_fw_av *av = &((struct gk20a_fw_av *)blob.data)[i];
+		struct gk20a_fw_av *av = &((struct gk20a_fw_av *)blob->data)[i];
 		u32 class = av->addr & 0xffff;
 		u32 addr = (av->addr & 0xffff0000) >> 14;
 
@@ -169,8 +138,7 @@ gk20a_gr_av_to_method(struct gf100_gr *gr, const char *path, const char *name,
 			prevclass = class;
 			if (++classidx >= max_classes) {
 				vfree(pack);
-				ret = -ENOSPC;
-				goto end;
+				return -ENOSPC;
 			}
 		}
 
@@ -181,10 +149,7 @@ gk20a_gr_av_to_method(struct gf100_gr *gr, const char *path, const char *name,
 	}
 
 	*ppack = pack;
-
-end:
-	nvkm_blob_dtor(&blob);
-	return ret;
+	return 0;
 }
 
 static int
@@ -294,6 +259,7 @@ gk20a_gr = {
 	.init_rop_active_fbps = gk104_gr_init_rop_active_fbps,
 	.trap_mp = gf100_gr_trap_mp,
 	.set_hww_esr_report_mask = gk20a_gr_set_hww_esr_report_mask,
+	.fecs.reset = gf100_gr_fecs_reset,
 	.rops = gf100_gr_rops,
 	.ppc_nr = 1,
 	.grctx = &gk20a_grctx,
@@ -308,12 +274,29 @@ gk20a_gr = {
 };
 
 int
+gk20a_gr_load_net(struct gf100_gr *gr, const char *path, const char *name, int ver,
+		  int (*load)(struct nvkm_blob *, struct gf100_gr_pack **),
+		  struct gf100_gr_pack **ppack)
+{
+	struct nvkm_blob blob;
+	int ret;
+
+	ret = nvkm_firmware_load_blob(&gr->base.engine.subdev, path, name, ver, &blob);
+	if (ret)
+		return ret;
+
+	ret = load(&blob, ppack);
+	nvkm_blob_dtor(&blob);
+	return 0;
+}
+
+int
 gk20a_gr_load_sw(struct gf100_gr *gr, const char *path, int ver)
 {
-	if (gk20a_gr_av_to_init(gr, path, "sw_nonctx", ver, &gr->sw_nonctx) ||
-	    gk20a_gr_aiv_to_init(gr, path, "sw_ctx", ver, &gr->sw_ctx) ||
-	    gk20a_gr_av_to_init(gr, path, "sw_bundle_init", ver, &gr->bundle) ||
-	    gk20a_gr_av_to_method(gr, path, "sw_method_init", ver, &gr->method))
+	if (gk20a_gr_load_net(gr, path, "sw_nonctx", ver, gk20a_gr_av_to_init, &gr->sw_nonctx) ||
+	    gk20a_gr_load_net(gr, path, "sw_ctx", ver, gk20a_gr_aiv_to_init, &gr->sw_ctx) ||
+	    gk20a_gr_load_net(gr, path, "sw_bundle_init", ver, gk20a_gr_av_to_init, &gr->bundle) ||
+	    gk20a_gr_load_net(gr, path, "sw_method_init", ver, gk20a_gr_av_to_method, &gr->method))
 		return -ENOENT;
 
 	return 0;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c
index 310987174cb5..797b828a943b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c
@@ -411,10 +411,13 @@ gm107_gr = {
 	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
 	.init_504430 = gm107_gr_init_504430,
 	.init_shader_exceptions = gm107_gr_init_shader_exceptions,
+	.init_rop_exceptions = gf100_gr_init_rop_exceptions,
+	.init_exception2 = gf100_gr_init_exception2,
 	.init_400054 = gm107_gr_init_400054,
 	.trap_mp = gf100_gr_trap_mp,
 	.mmio = gm107_gr_pack_mmio,
 	.fecs.ucode = &gm107_gr_fecs_ucode,
+	.fecs.reset = gf100_gr_fecs_reset,
 	.gpccs.ucode = &gm107_gr_gpccs_ucode,
 	.rops = gf100_gr_rops,
 	.ppc_nr = 2,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c
index 385cfd91b266..b5210b31c1b2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c
@@ -148,11 +148,11 @@ gm200_gr_tile_map_2_8[] = {
 	0, 1, 1, 0, 0, 1, 1, 0,
 };
 
-void
+int
 gm200_gr_oneinit_sm_id(struct gf100_gr *gr)
 {
 	/*XXX: There's a different algorithm here I've not yet figured out. */
-	gf100_gr_oneinit_sm_id(gr);
+	return gf100_gr_oneinit_sm_id(gr);
 }
 
 void
@@ -199,8 +199,11 @@ gm200_gr = {
 	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
 	.init_504430 = gm107_gr_init_504430,
 	.init_shader_exceptions = gm107_gr_init_shader_exceptions,
+	.init_rop_exceptions = gf100_gr_init_rop_exceptions,
+	.init_exception2 = gf100_gr_init_exception2,
 	.init_400054 = gm107_gr_init_400054,
 	.trap_mp = gf100_gr_trap_mp,
+	.fecs.reset = gf100_gr_fecs_reset,
 	.rops = gm200_gr_rops,
 	.tpc_nr = 4,
 	.ppc_nr = 2,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c
index ec1c46e47e00..458cd1a00d3f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c
@@ -123,6 +123,7 @@ gm20b_gr = {
 	.init_rop_active_fbps = gk104_gr_init_rop_active_fbps,
 	.trap_mp = gf100_gr_trap_mp,
 	.set_hww_esr_report_mask = gm20b_gr_set_hww_esr_report_mask,
+	.fecs.reset = gf100_gr_fecs_reset,
 	.rops = gm200_gr_rops,
 	.ppc_nr = 1,
 	.grctx = &gm20b_grctx,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c
index 0550dd6f46f1..851e743d2cab 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c
@@ -87,7 +87,7 @@ gp100_gr_init_419c9c(struct gf100_gr *gr)
 void
 gp100_gr_init_fecs_exceptions(struct gf100_gr *gr)
 {
-	nvkm_wr32(gr->base.engine.subdev.device, 0x409c24, 0x000f0002);
+	nvkm_wr32(gr->base.engine.subdev.device, 0x409c24, 0x000e0002);
 }
 
 void
@@ -119,7 +119,10 @@ gp100_gr = {
 	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
 	.init_504430 = gm107_gr_init_504430,
 	.init_shader_exceptions = gp100_gr_init_shader_exceptions,
+	.init_rop_exceptions = gf100_gr_init_rop_exceptions,
+	.init_exception2 = gf100_gr_init_exception2,
 	.trap_mp = gf100_gr_trap_mp,
+	.fecs.reset = gf100_gr_fecs_reset,
 	.rops = gm200_gr_rops,
 	.gpc_nr = 6,
 	.tpc_nr = 5,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c
index 5b001f374be0..0e223b7b5f0e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c
@@ -26,7 +26,7 @@
 
 #include <nvif/class.h>
 
-static void
+void
 gp102_gr_zbc_clear_stencil(struct gf100_gr *gr, int zbc)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
@@ -40,14 +40,14 @@ gp102_gr_zbc_clear_stencil(struct gf100_gr *gr, int zbc)
 			  gr->zbc_stencil[zbc].format << ((znum % 4) * 7));
 }
 
-static int
+int
 gp102_gr_zbc_stencil_get(struct gf100_gr *gr, int format,
 			 const u32 ds, const u32 l2)
 {
 	struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
 	int zbc = -ENOSPC, i;
 
-	for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) {
+	for (i = ltc->zbc_depth_min; i <= ltc->zbc_depth_max; i++) {
 		if (gr->zbc_stencil[i].format) {
 			if (gr->zbc_stencil[i].format != format)
 				continue;
@@ -115,7 +115,10 @@ gp102_gr = {
 	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
 	.init_504430 = gm107_gr_init_504430,
 	.init_shader_exceptions = gp100_gr_init_shader_exceptions,
+	.init_rop_exceptions = gf100_gr_init_rop_exceptions,
+	.init_exception2 = gf100_gr_init_exception2,
 	.trap_mp = gf100_gr_trap_mp,
+	.fecs.reset = gf100_gr_fecs_reset,
 	.rops = gm200_gr_rops,
 	.gpc_nr = 6,
 	.tpc_nr = 5,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp104.c
index 2655574ec63b..6802cb9b199f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp104.c
@@ -43,7 +43,10 @@ gp104_gr = {
 	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
 	.init_504430 = gm107_gr_init_504430,
 	.init_shader_exceptions = gp100_gr_init_shader_exceptions,
+	.init_rop_exceptions = gf100_gr_init_rop_exceptions,
+	.init_exception2 = gf100_gr_init_exception2,
 	.trap_mp = gf100_gr_trap_mp,
+	.fecs.reset = gf100_gr_fecs_reset,
 	.rops = gm200_gr_rops,
 	.gpc_nr = 6,
 	.tpc_nr = 5,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c
index adabc04d4f3a..cc2bb0d0a987 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c
@@ -45,7 +45,10 @@ gp107_gr = {
 	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
 	.init_504430 = gm107_gr_init_504430,
 	.init_shader_exceptions = gp100_gr_init_shader_exceptions,
+	.init_rop_exceptions = gf100_gr_init_rop_exceptions,
+	.init_exception2 = gf100_gr_init_exception2,
 	.trap_mp = gf100_gr_trap_mp,
+	.fecs.reset = gf100_gr_fecs_reset,
 	.rops = gm200_gr_rops,
 	.gpc_nr = 2,
 	.tpc_nr = 3,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp108.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp108.c
index 7310f0466bb7..311f703439e4 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp108.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp108.c
@@ -25,7 +25,7 @@
 
 #include <nvfw/flcn.h>
 
-static void
+void
 gp108_gr_acr_bld_patch(struct nvkm_acr *acr, u32 bld, s64 adjust)
 {
 	struct flcn_bl_dmem_desc_v2 hdr;
@@ -36,7 +36,7 @@ gp108_gr_acr_bld_patch(struct nvkm_acr *acr, u32 bld, s64 adjust)
 	flcn_bl_dmem_desc_v2_dump(&acr->subdev, &hdr);
 }
 
-static void
+void
 gp108_gr_acr_bld_write(struct nvkm_acr *acr, u32 bld,
 		       struct nvkm_acr_lsfw *lsfw)
 {
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c
index e13683b6e7b1..5008881ca079 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c
@@ -55,7 +55,10 @@ gp10b_gr = {
 	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
 	.init_504430 = gm107_gr_init_504430,
 	.init_shader_exceptions = gp100_gr_init_shader_exceptions,
+	.init_rop_exceptions = gf100_gr_init_rop_exceptions,
+	.init_exception2 = gf100_gr_init_exception2,
 	.trap_mp = gf100_gr_trap_mp,
+	.fecs.reset = gf100_gr_fecs_reset,
 	.rops = gm200_gr_rops,
 	.gpc_nr = 1,
 	.tpc_nr = 2,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gv100.c
index 4d043c1173ea..7f7404a76140 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gv100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gv100.c
@@ -52,10 +52,11 @@ gv100_gr_trap_mp(struct gf100_gr *gr, int gpc, int tpc)
 	gv100_gr_trap_sm(gr, gpc, tpc, 1);
 }
 
-static void
+void
 gv100_gr_init_4188a4(struct gf100_gr *gr)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
+
 	nvkm_mask(device, 0x4188a4, 0x03000000, 0x03000000);
 }
 
@@ -65,7 +66,6 @@ gv100_gr_init_shader_exceptions(struct gf100_gr *gr, int gpc, int tpc)
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 	int sm;
 	for (sm = 0; sm < 0x100; sm += 0x80) {
-		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x728 + sm), 0x0085eb64);
 		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x610), 0x00000001);
 		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x72c + sm), 0x00000004);
 	}
@@ -85,10 +85,202 @@ gv100_gr_init_419bd8(struct gf100_gr *gr)
 	nvkm_mask(device, 0x419bd8, 0x00000700, 0x00000000);
 }
 
+u32
+gv100_gr_nonpes_aware_tpc(struct gf100_gr *gr, u32 gpc, u32 tpc)
+{
+	u32 pes, temp, tpc_new = 0;
+
+	for (pes = 0; pes < gr->ppc_nr[gpc]; pes++) {
+		if (gr->ppc_tpc_mask[gpc][pes] & BIT(tpc))
+			break;
+
+		tpc_new += gr->ppc_tpc_nr[gpc][pes];
+	}
+
+	temp = (BIT(tpc) - 1) & gr->ppc_tpc_mask[gpc][pes];
+	temp = hweight32(temp);
+	return tpc_new + temp;
+}
+
+static int
+gv100_gr_scg_estimate_perf(struct gf100_gr *gr, unsigned long *gpc_tpc_mask,
+			   u32 disable_gpc, u32 disable_tpc, int *perf)
+{
+	const u32 scale_factor = 512UL;		/* Use fx23.9 */
+	const u32 pix_scale = 1024*1024UL;	/* Pix perf in [29:20] */
+	const u32 world_scale = 1024UL;		/* World performance in [19:10] */
+	const u32 tpc_scale = 1;		/* TPC balancing in [9:0] */
+	u32 scg_num_pes = 0;
+	u32 min_scg_gpc_pix_perf = scale_factor; /* Init perf as maximum */
+	u32 average_tpcs = 0; /* Average of # of TPCs per GPC */
+	u32 deviation; /* absolute diff between TPC# and average_tpcs, averaged across GPCs */
+	u32 norm_tpc_deviation;	/* deviation/max_tpc_per_gpc */
+	u32 tpc_balance;
+	u32 scg_gpc_pix_perf;
+	u32 scg_world_perf;
+	u32 gpc;
+	u32 pes;
+	int diff;
+	bool tpc_removed_gpc = false;
+	bool tpc_removed_pes = false;
+	u32 max_tpc_gpc = 0;
+	u32 num_tpc_mask;
+	u32 *num_tpc_gpc;
+	int ret = -EINVAL;
+
+	if (!(num_tpc_gpc = kcalloc(gr->gpc_nr, sizeof(*num_tpc_gpc), GFP_KERNEL)))
+		return -ENOMEM;
+
+	/* Calculate pix-perf-reduction-rate per GPC and find bottleneck TPC */
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+		num_tpc_mask = gpc_tpc_mask[gpc];
+
+		if ((gpc == disable_gpc) && num_tpc_mask & BIT(disable_tpc)) {
+			/* Safety check if a TPC is removed twice */
+			if (WARN_ON(tpc_removed_gpc))
+				goto done;
+
+			/* Remove logical TPC from set */
+			num_tpc_mask &= ~BIT(disable_tpc);
+			tpc_removed_gpc = true;
+		}
+
+		/* track balancing of tpcs across gpcs */
+		num_tpc_gpc[gpc] = hweight32(num_tpc_mask);
+		average_tpcs += num_tpc_gpc[gpc];
+
+		/* save the maximum numer of gpcs */
+		max_tpc_gpc = num_tpc_gpc[gpc] > max_tpc_gpc ? num_tpc_gpc[gpc] : max_tpc_gpc;
+
+		/*
+		 * Calculate ratio between TPC count and post-FS and post-SCG
+		 *
+		 * ratio represents relative throughput of the GPC
+		 */
+		scg_gpc_pix_perf = scale_factor * num_tpc_gpc[gpc] / gr->tpc_nr[gpc];
+		if (min_scg_gpc_pix_perf > scg_gpc_pix_perf)
+			min_scg_gpc_pix_perf = scg_gpc_pix_perf;
+
+		/* Calculate # of surviving PES */
+		for (pes = 0; pes < gr->ppc_nr[gpc]; pes++) {
+			/* Count the number of TPC on the set */
+			num_tpc_mask = gr->ppc_tpc_mask[gpc][pes] & gpc_tpc_mask[gpc];
+
+			if ((gpc == disable_gpc) && (num_tpc_mask & BIT(disable_tpc))) {
+				if (WARN_ON(tpc_removed_pes))
+					goto done;
+
+				num_tpc_mask &= ~BIT(disable_tpc);
+				tpc_removed_pes = true;
+			}
+
+			if (hweight32(num_tpc_mask))
+				scg_num_pes++;
+		}
+	}
+
+	if (WARN_ON(!tpc_removed_gpc || !tpc_removed_pes))
+		goto done;
+
+	if (max_tpc_gpc == 0) {
+		*perf = 0;
+		goto done_ok;
+	}
+
+	/* Now calculate perf */
+	scg_world_perf = (scale_factor * scg_num_pes) / gr->ppc_total;
+	deviation = 0;
+	average_tpcs = scale_factor * average_tpcs / gr->gpc_nr;
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+		diff = average_tpcs - scale_factor * num_tpc_gpc[gpc];
+		if (diff < 0)
+			diff = -diff;
+
+		deviation += diff;
+	}
+
+	deviation /= gr->gpc_nr;
+
+	norm_tpc_deviation = deviation / max_tpc_gpc;
+
+	tpc_balance = scale_factor - norm_tpc_deviation;
+
+	if ((tpc_balance > scale_factor)          ||
+	    (scg_world_perf > scale_factor)       ||
+	    (min_scg_gpc_pix_perf > scale_factor) ||
+	    (norm_tpc_deviation > scale_factor)) {
+		WARN_ON(1);
+		goto done;
+	}
+
+	*perf = (pix_scale * min_scg_gpc_pix_perf) +
+		(world_scale * scg_world_perf) +
+		(tpc_scale * tpc_balance);
+done_ok:
+	ret = 0;
+done:
+	kfree(num_tpc_gpc);
+	return ret;
+}
+
+int
+gv100_gr_oneinit_sm_id(struct gf100_gr *gr)
+{
+	unsigned long *gpc_tpc_mask;
+	u32 *tpc_table, *gpc_table;
+	u32 gpc, tpc, pes, gtpc;
+	int perf, maxperf, ret = 0;
+
+	gpc_tpc_mask = kcalloc(gr->gpc_nr, sizeof(*gpc_tpc_mask), GFP_KERNEL);
+	gpc_table = kcalloc(gr->tpc_total, sizeof(*gpc_table), GFP_KERNEL);
+	tpc_table = kcalloc(gr->tpc_total, sizeof(*tpc_table), GFP_KERNEL);
+	if (!gpc_table || !tpc_table || !gpc_tpc_mask) {
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+		for (pes = 0; pes < gr->ppc_nr[gpc]; pes++)
+			gpc_tpc_mask[gpc] |= gr->ppc_tpc_mask[gpc][pes];
+	}
+
+	for (gtpc = 0; gtpc < gr->tpc_total; gtpc++) {
+		for (maxperf = -1, gpc = 0; gpc < gr->gpc_nr; gpc++) {
+			for_each_set_bit(tpc, &gpc_tpc_mask[gpc], gr->tpc_nr[gpc]) {
+				ret = gv100_gr_scg_estimate_perf(gr, gpc_tpc_mask, gpc, tpc, &perf);
+				if (ret)
+					goto done;
+
+				/* nvgpu does ">=" here, but this gets us RM's numbers. */
+				if (perf > maxperf) {
+					maxperf = perf;
+					gpc_table[gtpc] = gpc;
+					tpc_table[gtpc] = tpc;
+				}
+			}
+		}
+
+		gpc_tpc_mask[gpc_table[gtpc]] &= ~BIT(tpc_table[gtpc]);
+	}
+
+	/*TODO: build table for sm_per_tpc != 1, don't use yet, but might need later? */
+	for (gtpc = 0; gtpc < gr->tpc_total; gtpc++) {
+		gr->sm[gtpc].gpc = gpc_table[gtpc];
+		gr->sm[gtpc].tpc = tpc_table[gtpc];
+		gr->sm_nr++;
+	}
+
+done:
+	kfree(gpc_table);
+	kfree(tpc_table);
+	kfree(gpc_tpc_mask);
+	return ret;
+}
+
 static const struct gf100_gr_func
 gv100_gr = {
 	.oneinit_tiles = gm200_gr_oneinit_tiles,
-	.oneinit_sm_id = gm200_gr_oneinit_sm_id,
+	.oneinit_sm_id = gv100_gr_oneinit_sm_id,
 	.init = gf100_gr_init,
 	.init_419bd8 = gv100_gr_init_419bd8,
 	.init_gpc_mmu = gm200_gr_init_gpc_mmu,
@@ -103,11 +295,14 @@ gv100_gr = {
 	.init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
 	.init_504430 = gv100_gr_init_504430,
 	.init_shader_exceptions = gv100_gr_init_shader_exceptions,
+	.init_rop_exceptions = gf100_gr_init_rop_exceptions,
+	.init_exception2 = gf100_gr_init_exception2,
 	.init_4188a4 = gv100_gr_init_4188a4,
 	.trap_mp = gv100_gr_trap_mp,
+	.fecs.reset = gf100_gr_fecs_reset,
 	.rops = gm200_gr_rops,
 	.gpc_nr = 6,
-	.tpc_nr = 5,
+	.tpc_nr = 7,
 	.ppc_nr = 3,
 	.grctx = &gv100_grctx,
 	.zbc = &gp102_gr_zbc,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv04.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv04.c
index 0bc1a238de43..81bd682c2102 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv04.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv04.c
@@ -1192,7 +1192,7 @@ nv04_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
 		return -ENOMEM;
 	nvkm_object_ctor(&nv04_gr_chan, oclass, &chan->object);
 	chan->gr = gr;
-	chan->chid = fifoch->chid;
+	chan->chid = fifoch->id;
 	*pobject = &chan->object;
 
 	*ctx_reg(chan, NV04_PGRAPH_DEBUG_3) = 0xfad4ff31;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv10.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv10.c
index 942450b33bc6..7fe6e58f6bab 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv10.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv10.c
@@ -1011,7 +1011,7 @@ nv10_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
 		return -ENOMEM;
 	nvkm_object_ctor(&nv10_gr_chan, oclass, &chan->object);
 	chan->gr = gr;
-	chan->chid = fifoch->chid;
+	chan->chid = fifoch->id;
 	*pobject = &chan->object;
 
 	NV_WRITE_CTX(0x00400e88, 0x08000000);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.c
index 6bff10cee71b..75434f5de7ad 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.c
@@ -83,7 +83,7 @@ nv20_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
 		return -ENOMEM;
 	nvkm_object_ctor(&nv20_gr_chan, oclass, &chan->object);
 	chan->gr = gr;
-	chan->chid = fifoch->chid;
+	chan->chid = fifoch->id;
 	*pobject = &chan->object;
 
 	ret = nvkm_memory_new(gr->base.engine.subdev.device,
@@ -182,7 +182,7 @@ nv20_gr_intr(struct nvkm_gr *base)
 	struct nv20_gr *gr = nv20_gr(base);
 	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
-	struct nvkm_fifo_chan *chan;
+	struct nvkm_chan *chan;
 	u32 stat = nvkm_rd32(device, NV03_PGRAPH_INTR);
 	u32 nsource = nvkm_rd32(device, NV03_PGRAPH_NSOURCE);
 	u32 nstatus = nvkm_rd32(device, NV03_PGRAPH_NSTATUS);
@@ -196,7 +196,7 @@ nv20_gr_intr(struct nvkm_gr *base)
 	char msg[128], src[128], sta[128];
 	unsigned long flags;
 
-	chan = nvkm_fifo_chan_chid(device->fifo, chid, &flags);
+	chan = nvkm_chan_get_chid(&gr->base.engine, chid, &flags);
 
 	nvkm_wr32(device, NV03_PGRAPH_INTR, stat);
 	nvkm_wr32(device, NV04_PGRAPH_FIFO, 0x00000001);
@@ -209,11 +209,11 @@ nv20_gr_intr(struct nvkm_gr *base)
 				   "nstatus %08x [%s] ch %d [%s] subc %d "
 				   "class %04x mthd %04x data %08x\n",
 			   show, msg, nsource, src, nstatus, sta, chid,
-			   chan ? chan->object.client->name : "unknown",
+			   chan ? chan->name : "unknown",
 			   subc, class, mthd, data);
 	}
 
-	nvkm_fifo_chan_put(device->fifo, flags, &chan);
+	nvkm_chan_put(&chan, flags);
 }
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv25.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv25.c
index f3a56f17d94a..94685e4d4f87 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv25.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv25.c
@@ -29,7 +29,7 @@ nv25_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
 		return -ENOMEM;
 	nvkm_object_ctor(&nv25_gr_chan, oclass, &chan->object);
 	chan->gr = gr;
-	chan->chid = fifoch->chid;
+	chan->chid = fifoch->id;
 	*pobject = &chan->object;
 
 	ret = nvkm_memory_new(gr->base.engine.subdev.device,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv2a.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv2a.c
index f268d2642d29..2d6273675291 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv2a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv2a.c
@@ -29,7 +29,7 @@ nv2a_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
 		return -ENOMEM;
 	nvkm_object_ctor(&nv2a_gr_chan, oclass, &chan->object);
 	chan->gr = gr;
-	chan->chid = fifoch->chid;
+	chan->chid = fifoch->id;
 	*pobject = &chan->object;
 
 	ret = nvkm_memory_new(gr->base.engine.subdev.device,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv30.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv30.c
index e5737cdf2fa1..647bd6fede04 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv30.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv30.c
@@ -30,7 +30,7 @@ nv30_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
 		return -ENOMEM;
 	nvkm_object_ctor(&nv30_gr_chan, oclass, &chan->object);
 	chan->gr = gr;
-	chan->chid = fifoch->chid;
+	chan->chid = fifoch->id;
 	*pobject = &chan->object;
 
 	ret = nvkm_memory_new(gr->base.engine.subdev.device,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv34.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv34.c
index 1ab2da8ebf4e..2eae3fe4ef4e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv34.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv34.c
@@ -29,7 +29,7 @@ nv34_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
 		return -ENOMEM;
 	nvkm_object_ctor(&nv34_gr_chan, oclass, &chan->object);
 	chan->gr = gr;
-	chan->chid = fifoch->chid;
+	chan->chid = fifoch->id;
 	*pobject = &chan->object;
 
 	ret = nvkm_memory_new(gr->base.engine.subdev.device,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv35.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv35.c
index 591260f5676b..657d7cdba369 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv35.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv35.c
@@ -29,7 +29,7 @@ nv35_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
 		return -ENOMEM;
 	nvkm_object_ctor(&nv35_gr_chan, oclass, &chan->object);
 	chan->gr = gr;
-	chan->chid = fifoch->chid;
+	chan->chid = fifoch->id;
 	*pobject = &chan->object;
 
 	ret = nvkm_memory_new(gr->base.engine.subdev.device,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.c
index 67f3535ff97e..d2df097a6cf6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.c
@@ -275,8 +275,8 @@ nv40_gr_intr(struct nvkm_gr *base)
 				   "nstatus %08x [%s] ch %d [%08x %s] subc %d "
 				   "class %04x mthd %04x data %08x\n",
 			   show, msg, nsource, src, nstatus, sta,
-			   chan ? chan->fifo->chid : -1, inst << 4,
-			   chan ? chan->fifo->object.client->name : "unknown",
+			   chan ? chan->fifo->id : -1, inst << 4,
+			   chan ? chan->fifo->name : "unknown",
 			   subc, class, mthd, data);
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.c
index 563a10097e95..1ba18a8e380f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.c
@@ -622,7 +622,7 @@ nv50_gr_intr(struct nvkm_gr *base)
 	struct nv50_gr *gr = nv50_gr(base);
 	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
-	struct nvkm_fifo_chan *chan;
+	struct nvkm_chan *chan;
 	u32 stat = nvkm_rd32(device, 0x400100);
 	u32 inst = nvkm_rd32(device, 0x40032c) & 0x0fffffff;
 	u32 addr = nvkm_rd32(device, 0x400704);
@@ -637,10 +637,10 @@ nv50_gr_intr(struct nvkm_gr *base)
 	char msg[128];
 	int chid = -1;
 
-	chan = nvkm_fifo_chan_inst(device->fifo, (u64)inst << 12, &flags);
+	chan = nvkm_chan_get_inst(&gr->base.engine, (u64)inst << 12, &flags);
 	if (chan)  {
-		name = chan->object.client->name;
-		chid = chan->chid;
+		name = chan->name;
+		chid = chan->id;
 	}
 
 	if (show & 0x00100000) {
@@ -672,7 +672,7 @@ nv50_gr_intr(struct nvkm_gr *base)
 	if (nvkm_rd32(device, 0x400824) & (1 << 31))
 		nvkm_wr32(device, 0x400824, nvkm_rd32(device, 0x400824) & ~(1 << 31));
 
-	nvkm_fifo_chan_put(device->fifo, flags, &chan);
+	nvkm_chan_put(&chan, flags);
 }
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h
index 9b2c66e8be90..08d5c96e6458 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h
@@ -17,6 +17,7 @@ struct nvkm_gr_func {
 	int (*oneinit)(struct nvkm_gr *);
 	int (*init)(struct nvkm_gr *);
 	int (*fini)(struct nvkm_gr *, bool);
+	int (*reset)(struct nvkm_gr *);
 	void (*intr)(struct nvkm_gr *);
 	void (*tile)(struct nvkm_gr *, int region, struct nvkm_fb_tile *);
 	int (*tlb_flush)(struct nvkm_gr *);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c
index 1a8a21844e12..3b6c8100a242 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c
@@ -24,13 +24,13 @@
 
 #include <nvif/class.h>
 
-static void
+void
 tu102_gr_init_fecs_exceptions(struct gf100_gr *gr)
 {
-	nvkm_wr32(gr->base.engine.subdev.device, 0x409c24, 0x006f0002);
+	nvkm_wr32(gr->base.engine.subdev.device, 0x409c24, 0x006e0003);
 }
 
-static void
+void
 tu102_gr_init_fs(struct gf100_gr *gr)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
@@ -40,20 +40,21 @@ tu102_gr_init_fs(struct gf100_gr *gr)
 	gk104_grctx_generate_gpc_tpc_nr(gr);
 
 	for (sm = 0; sm < gr->sm_nr; sm++) {
-		nvkm_wr32(device, GPC_UNIT(gr->sm[sm].gpc, 0x0c10 +
-					   gr->sm[sm].tpc * 4), sm);
+		int tpc = gv100_gr_nonpes_aware_tpc(gr, gr->sm[sm].gpc, gr->sm[sm].tpc);
+
+		nvkm_wr32(device, GPC_UNIT(gr->sm[sm].gpc, 0x0c10 + tpc * 4), sm);
 	}
 
 	gm200_grctx_generate_dist_skip_table(gr);
 	gf100_gr_init_num_tpc_per_gpc(gr, true, true);
 }
 
-static void
+void
 tu102_gr_init_zcull(struct gf100_gr *gr)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
-	const u8 tile_nr = ALIGN(gr->tpc_total, 64);
+	const u8 tile_nr = gr->func->gpc_nr * gr->func->tpc_nr;
 	u8 bank[GPC_MAX] = {}, gpc, i, j;
 	u32 data;
 
@@ -93,7 +94,7 @@ tu102_gr_init_gpc_mmu(struct gf100_gr *gr)
 static const struct gf100_gr_func
 tu102_gr = {
 	.oneinit_tiles = gm200_gr_oneinit_tiles,
-	.oneinit_sm_id = gm200_gr_oneinit_sm_id,
+	.oneinit_sm_id = gv100_gr_oneinit_sm_id,
 	.init = gf100_gr_init,
 	.init_419bd8 = gv100_gr_init_419bd8,
 	.init_gpc_mmu = tu102_gr_init_gpc_mmu,
@@ -109,10 +110,14 @@ tu102_gr = {
 	.init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
 	.init_504430 = gv100_gr_init_504430,
 	.init_shader_exceptions = gv100_gr_init_shader_exceptions,
+	.init_rop_exceptions = gf100_gr_init_rop_exceptions,
+	.init_exception2 = gf100_gr_init_exception2,
+	.init_4188a4 = gv100_gr_init_4188a4,
 	.trap_mp = gv100_gr_trap_mp,
+	.fecs.reset = gf100_gr_fecs_reset,
 	.rops = gm200_gr_rops,
 	.gpc_nr = 6,
-	.tpc_nr = 5,
+	.tpc_nr = 6,
 	.ppc_nr = 3,
 	.grctx = &tu102_grctx,
 	.zbc = &gp102_gr_zbc,
@@ -137,6 +142,7 @@ MODULE_FIRMWARE("nvidia/tu102/gr/sw_ctx.bin");
 MODULE_FIRMWARE("nvidia/tu102/gr/sw_nonctx.bin");
 MODULE_FIRMWARE("nvidia/tu102/gr/sw_bundle_init.bin");
 MODULE_FIRMWARE("nvidia/tu102/gr/sw_method_init.bin");
+MODULE_FIRMWARE("nvidia/tu102/gr/sw_veid_bundle_init.bin");
 
 MODULE_FIRMWARE("nvidia/tu104/gr/fecs_bl.bin");
 MODULE_FIRMWARE("nvidia/tu104/gr/fecs_inst.bin");
@@ -150,6 +156,7 @@ MODULE_FIRMWARE("nvidia/tu104/gr/sw_ctx.bin");
 MODULE_FIRMWARE("nvidia/tu104/gr/sw_nonctx.bin");
 MODULE_FIRMWARE("nvidia/tu104/gr/sw_bundle_init.bin");
 MODULE_FIRMWARE("nvidia/tu104/gr/sw_method_init.bin");
+MODULE_FIRMWARE("nvidia/tu104/gr/sw_veid_bundle_init.bin");
 
 MODULE_FIRMWARE("nvidia/tu106/gr/fecs_bl.bin");
 MODULE_FIRMWARE("nvidia/tu106/gr/fecs_inst.bin");
@@ -163,6 +170,7 @@ MODULE_FIRMWARE("nvidia/tu106/gr/sw_ctx.bin");
 MODULE_FIRMWARE("nvidia/tu106/gr/sw_nonctx.bin");
 MODULE_FIRMWARE("nvidia/tu106/gr/sw_bundle_init.bin");
 MODULE_FIRMWARE("nvidia/tu106/gr/sw_method_init.bin");
+MODULE_FIRMWARE("nvidia/tu106/gr/sw_veid_bundle_init.bin");
 
 MODULE_FIRMWARE("nvidia/tu117/gr/fecs_bl.bin");
 MODULE_FIRMWARE("nvidia/tu117/gr/fecs_inst.bin");
@@ -176,6 +184,7 @@ MODULE_FIRMWARE("nvidia/tu117/gr/sw_ctx.bin");
 MODULE_FIRMWARE("nvidia/tu117/gr/sw_nonctx.bin");
 MODULE_FIRMWARE("nvidia/tu117/gr/sw_bundle_init.bin");
 MODULE_FIRMWARE("nvidia/tu117/gr/sw_method_init.bin");
+MODULE_FIRMWARE("nvidia/tu117/gr/sw_veid_bundle_init.bin");
 
 MODULE_FIRMWARE("nvidia/tu116/gr/fecs_bl.bin");
 MODULE_FIRMWARE("nvidia/tu116/gr/fecs_inst.bin");
@@ -189,6 +198,26 @@ MODULE_FIRMWARE("nvidia/tu116/gr/sw_ctx.bin");
 MODULE_FIRMWARE("nvidia/tu116/gr/sw_nonctx.bin");
 MODULE_FIRMWARE("nvidia/tu116/gr/sw_bundle_init.bin");
 MODULE_FIRMWARE("nvidia/tu116/gr/sw_method_init.bin");
+MODULE_FIRMWARE("nvidia/tu116/gr/sw_veid_bundle_init.bin");
+
+int
+tu102_gr_av_to_init_veid(struct nvkm_blob *blob, struct gf100_gr_pack **ppack)
+{
+	return gk20a_gr_av_to_init_(blob, 64, 0x00100000, ppack);
+}
+
+int
+tu102_gr_load(struct gf100_gr *gr, int ver, const struct gf100_gr_fwif *fwif)
+{
+	int ret;
+
+	ret = gm200_gr_load(gr, ver, fwif);
+	if (ret)
+		return ret;
+
+	return gk20a_gr_load_net(gr, "gr/", "sw_veid_bundle_init", ver, tu102_gr_av_to_init_veid,
+				 &gr->bundle_veid);
+}
 
 static const struct gf100_gr_fwif
 tu102_gr_fwif[] = {
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.c b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.c
index b1054db4c1b8..cb0c3991b2ad 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.c
@@ -213,8 +213,8 @@ nv31_mpeg_intr(struct nvkm_engine *engine)
 
 	if (show) {
 		nvkm_error(subdev, "ch %d [%s] %08x %08x %08x %08x\n",
-			   mpeg->chan ? mpeg->chan->fifo->chid : -1,
-			   mpeg->chan ? mpeg->chan->object.client->name :
+			   mpeg->chan ? mpeg->chan->fifo->id : -1,
+			   mpeg->chan ? mpeg->chan->fifo->name :
 			   "unknown", stat, type, mthd, data);
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv44.c b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv44.c
index 521ce43a2871..0890a279458e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv44.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv44.c
@@ -182,8 +182,8 @@ nv44_mpeg_intr(struct nvkm_engine *engine)
 
 	if (show) {
 		nvkm_error(subdev, "ch %d [%08x %s] %08x %08x %08x %08x\n",
-			   chan ? chan->fifo->chid : -1, inst << 4,
-			   chan ? chan->object.client->name : "unknown",
+			   chan ? chan->fifo->id : -1, inst << 4,
+			   chan ? chan->fifo->name : "unknown",
 			   stat, type, mthd, data);
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/Kbuild
index 9a0fd9812750..f05e79670d22 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/Kbuild
@@ -1,3 +1,4 @@
 # SPDX-License-Identifier: MIT
 nvkm-y += nvkm/engine/nvdec/base.o
 nvkm-y += nvkm/engine/nvdec/gm107.o
+nvkm-y += nvkm/engine/nvdec/ga102.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/base.c
index b0181cc5953b..1f6e3b32ba16 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/base.c
@@ -37,7 +37,7 @@ nvkm_nvdec = {
 
 int
 nvkm_nvdec_new_(const struct nvkm_nvdec_fwif *fwif, struct nvkm_device *device,
-		enum nvkm_subdev_type type, int inst, struct nvkm_nvdec **pnvdec)
+		enum nvkm_subdev_type type, int inst, u32 addr, struct nvkm_nvdec **pnvdec)
 {
 	struct nvkm_nvdec *nvdec;
 	int ret;
@@ -57,5 +57,5 @@ nvkm_nvdec_new_(const struct nvkm_nvdec_fwif *fwif, struct nvkm_device *device,
 	nvdec->func = fwif->func;
 
 	return nvkm_falcon_ctor(nvdec->func->flcn, &nvdec->engine.subdev,
-				nvdec->engine.subdev.name, 0, &nvdec->falcon);
+				nvdec->engine.subdev.name, addr, &nvdec->falcon);
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv04.c b/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/ga102.c
index 9acaec5c271e..37d8c3c0f3ab 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv04.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/ga102.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2012 Red Hat Inc.
+ * Copyright 2021 Red Hat Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -18,45 +18,44 @@
  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs
  */
 #include "priv.h"
-#include "head.h"
-
-#include <core/client.h>
 
-#include <nvif/cl0046.h>
-#include <nvif/unpack.h>
+#include <subdev/mc.h>
+#include <subdev/timer.h>
 
-int
-nv04_disp_mthd(struct nvkm_object *object, u32 mthd, void *data, u32 size)
-{
-	struct nvkm_disp *disp = nvkm_disp(object->engine);
-	union {
-		struct nv04_disp_mthd_v0 v0;
-	} *args = data;
-	struct nvkm_head *head;
-	int id, ret = -ENOSYS;
+static const struct nvkm_falcon_func
+ga102_nvdec_flcn = {
+	.disable = gm200_flcn_disable,
+	.enable = gm200_flcn_enable,
+	.addr2 = 0x1c00,
+	.reset_pmc = true,
+	.reset_prep = ga102_flcn_reset_prep,
+	.reset_wait_mem_scrubbing = ga102_flcn_reset_wait_mem_scrubbing,
+	.imem_dma = &ga102_flcn_dma,
+	.dmem_dma = &ga102_flcn_dma,
+};
 
-	nvif_ioctl(object, "disp mthd size %d\n", size);
-	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, true))) {
-		nvif_ioctl(object, "disp mthd vers %d mthd %02x head %d\n",
-			   args->v0.version, args->v0.method, args->v0.head);
-		mthd = args->v0.method;
-		id   = args->v0.head;
-	} else
-		return ret;
+static const struct nvkm_nvdec_func
+ga102_nvdec = {
+	.flcn = &ga102_nvdec_flcn,
+};
 
-	if (!(head = nvkm_head_find(disp, id)))
-		return -ENXIO;
+static int
+ga102_nvdec_nofw(struct nvkm_nvdec *nvdec, int ver, const struct nvkm_nvdec_fwif *fwif)
+{
+	return 0;
+}
 
-	switch (mthd) {
-	case NV04_DISP_SCANOUTPOS:
-		return nvkm_head_mthd_scanoutpos(object, head, data, size);
-	default:
-		break;
-	}
+static const struct nvkm_nvdec_fwif
+ga102_nvdec_fwif[] = {
+	{ -1, ga102_nvdec_nofw, &ga102_nvdec },
+	{}
+};
 
-	return -EINVAL;
+int
+ga102_nvdec_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
+		struct nvkm_nvdec **pnvdec)
+{
+	return nvkm_nvdec_new_(ga102_nvdec_fwif, device, type, inst, 0x848000, pnvdec);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/gm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/gm107.c
index 8c44ce44a6d7..564f7e8960a2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/gm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/gm107.c
@@ -23,18 +23,13 @@
 
 static const struct nvkm_falcon_func
 gm107_nvdec_flcn = {
+	.disable = gm200_flcn_disable,
+	.enable = gm200_flcn_enable,
+	.reset_pmc = true,
+	.reset_wait_mem_scrubbing = gm200_flcn_reset_wait_mem_scrubbing,
 	.debug = 0xd00,
-	.fbif = 0x600,
-	.load_imem = nvkm_falcon_v1_load_imem,
-	.load_dmem = nvkm_falcon_v1_load_dmem,
-	.read_dmem = nvkm_falcon_v1_read_dmem,
-	.bind_context = nvkm_falcon_v1_bind_context,
-	.wait_for_halt = nvkm_falcon_v1_wait_for_halt,
-	.clear_interrupt = nvkm_falcon_v1_clear_interrupt,
-	.set_start_addr = nvkm_falcon_v1_set_start_addr,
-	.start = nvkm_falcon_v1_start,
-	.enable = nvkm_falcon_v1_enable,
-	.disable = nvkm_falcon_v1_disable,
+	.imem_pio = &gm200_flcn_imem_pio,
+	.dmem_pio = &gm200_flcn_dmem_pio,
 };
 
 static const struct nvkm_nvdec_func
@@ -59,5 +54,5 @@ int
 gm107_nvdec_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
 		struct nvkm_nvdec **pnvdec)
 {
-	return nvkm_nvdec_new_(gm107_nvdec_fwif, device, type, inst, pnvdec);
+	return nvkm_nvdec_new_(gm107_nvdec_fwif, device, type, inst, 0, pnvdec);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/priv.h
index 0920f6a887e2..61e1f7aaa509 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/priv.h
@@ -15,5 +15,5 @@ struct nvkm_nvdec_fwif {
 };
 
 int nvkm_nvdec_new_(const struct nvkm_nvdec_fwif *fwif, struct nvkm_device *,
-		    enum nvkm_subdev_type, int, struct nvkm_nvdec **);
+		    enum nvkm_subdev_type, int, u32 addr, struct nvkm_nvdec **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/nvenc/gm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/nvenc/gm107.c
index f44d41bf2034..ad27d8b97569 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/nvenc/gm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/nvenc/gm107.c
@@ -24,17 +24,6 @@
 
 static const struct nvkm_falcon_func
 gm107_nvenc_flcn = {
-	.fbif = 0x800,
-	.load_imem = nvkm_falcon_v1_load_imem,
-	.load_dmem = nvkm_falcon_v1_load_dmem,
-	.read_dmem = nvkm_falcon_v1_read_dmem,
-	.bind_context = nvkm_falcon_v1_bind_context,
-	.wait_for_halt = nvkm_falcon_v1_wait_for_halt,
-	.clear_interrupt = nvkm_falcon_v1_clear_interrupt,
-	.set_start_addr = nvkm_falcon_v1_set_start_addr,
-	.start = nvkm_falcon_v1_start,
-	.enable = nvkm_falcon_v1_enable,
-	.disable = nvkm_falcon_v1_disable,
 };
 
 static const struct nvkm_nvenc_func
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sec/g98.c b/drivers/gpu/drm/nouveau/nvkm/engine/sec/g98.c
index 1b87df03c823..c15b2cbf506b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/sec/g98.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sec/g98.c
@@ -40,7 +40,7 @@ static const struct nvkm_enum g98_sec_isr_error_name[] = {
 };
 
 static void
-g98_sec_intr(struct nvkm_falcon *sec, struct nvkm_fifo_chan *chan)
+g98_sec_intr(struct nvkm_falcon *sec, struct nvkm_chan *chan)
 {
 	struct nvkm_subdev *subdev = &sec->engine.subdev;
 	struct nvkm_device *device = subdev->device;
@@ -54,9 +54,9 @@ g98_sec_intr(struct nvkm_falcon *sec, struct nvkm_fifo_chan *chan)
 
 	nvkm_error(subdev, "DISPATCH_ERROR %04x [%s] ch %d [%010llx %s] "
 			   "subc %d mthd %04x data %08x\n", ssta,
-		   en ? en->name : "UNKNOWN", chan ? chan->chid : -1,
+		   en ? en->name : "UNKNOWN", chan ? chan->id : -1,
 		   chan ? chan->inst->addr : 0,
-		   chan ? chan->object.client->name : "unknown",
+		   chan ? chan->name : "unknown",
 		   subc, mthd, data);
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/Kbuild
index 63cd2be3de08..19feadb1f67b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/Kbuild
@@ -3,3 +3,4 @@ nvkm-y += nvkm/engine/sec2/base.o
 nvkm-y += nvkm/engine/sec2/gp102.o
 nvkm-y += nvkm/engine/sec2/gp108.o
 nvkm-y += nvkm/engine/sec2/tu102.o
+nvkm-y += nvkm/engine/sec2/ga102.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/base.c
index 092c6d0b8e01..f2c60da5d1e8 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/base.c
@@ -22,53 +22,99 @@
 #include "priv.h"
 
 #include <core/firmware.h>
-#include <subdev/top.h>
+#include <subdev/mc.h>
+#include <subdev/timer.h>
 
-static void
-nvkm_sec2_recv(struct work_struct *work)
+#include <nvfw/sec2.h>
+
+static int
+nvkm_sec2_finimsg(void *priv, struct nvfw_falcon_msg *hdr)
+{
+	struct nvkm_sec2 *sec2 = priv;
+
+	atomic_set(&sec2->running, 0);
+	return 0;
+}
+
+static int
+nvkm_sec2_fini(struct nvkm_engine *engine, bool suspend)
 {
-	struct nvkm_sec2 *sec2 = container_of(work, typeof(*sec2), work);
+	struct nvkm_sec2 *sec2 = nvkm_sec2(engine);
+	struct nvkm_subdev *subdev = &sec2->engine.subdev;
+	struct nvkm_falcon *falcon = &sec2->falcon;
+	struct nvkm_falcon_cmdq *cmdq = sec2->cmdq;
+	struct nvfw_falcon_cmd cmd = {
+		.unit_id = sec2->func->unit_unload,
+		.size = sizeof(cmd),
+	};
+	int ret;
 
-	if (!sec2->initmsg_received) {
-		int ret = sec2->func->initmsg(sec2);
-		if (ret) {
-			nvkm_error(&sec2->engine.subdev,
-				   "error parsing init message: %d\n", ret);
-			return;
-		}
+	if (!subdev->use.enabled)
+		return 0;
 
-		sec2->initmsg_received = true;
+	if (atomic_read(&sec2->initmsg) == 1) {
+		ret = nvkm_falcon_cmdq_send(cmdq, &cmd, nvkm_sec2_finimsg, sec2,
+					    msecs_to_jiffies(1000));
+		WARN_ON(ret);
+
+		nvkm_msec(subdev->device, 2000,
+			if (nvkm_falcon_rd32(falcon, 0x100) & 0x00000010)
+				break;
+		);
 	}
 
-	nvkm_falcon_msgq_recv(sec2->msgq);
+	nvkm_inth_block(&subdev->inth);
+
+	nvkm_falcon_cmdq_fini(cmdq);
+	falcon->func->disable(falcon);
+	nvkm_falcon_put(falcon, subdev);
+	return 0;
 }
 
-static void
-nvkm_sec2_intr(struct nvkm_engine *engine)
+static int
+nvkm_sec2_init(struct nvkm_engine *engine)
 {
 	struct nvkm_sec2 *sec2 = nvkm_sec2(engine);
-	sec2->func->intr(sec2);
+	struct nvkm_subdev *subdev = &sec2->engine.subdev;
+	struct nvkm_falcon *falcon = &sec2->falcon;
+	int ret;
+
+	ret = nvkm_falcon_get(falcon, subdev);
+	if (ret)
+		return ret;
+
+	nvkm_falcon_wr32(falcon, 0x014, 0xffffffff);
+	atomic_set(&sec2->initmsg, 0);
+	atomic_set(&sec2->running, 1);
+	nvkm_inth_allow(&subdev->inth);
+
+	nvkm_falcon_start(falcon);
+	return 0;
 }
 
 static int
-nvkm_sec2_fini(struct nvkm_engine *engine, bool suspend)
+nvkm_sec2_oneinit(struct nvkm_engine *engine)
 {
 	struct nvkm_sec2 *sec2 = nvkm_sec2(engine);
-
-	flush_work(&sec2->work);
-
-	if (suspend) {
-		nvkm_falcon_cmdq_fini(sec2->cmdq);
-		sec2->initmsg_received = false;
+	struct nvkm_subdev *subdev = &sec2->engine.subdev;
+	struct nvkm_intr *intr = &sec2->engine.subdev.device->mc->intr;
+	enum nvkm_intr_type type = NVKM_INTR_SUBDEV;
+
+	if (sec2->func->intr_vector) {
+		intr = sec2->func->intr_vector(sec2, &type);
+		if (IS_ERR(intr))
+			return PTR_ERR(intr);
 	}
 
-	return 0;
+	return nvkm_inth_add(intr, type, NVKM_INTR_PRIO_NORMAL, subdev, sec2->func->intr,
+			     &subdev->inth);
 }
 
 static void *
 nvkm_sec2_dtor(struct nvkm_engine *engine)
 {
 	struct nvkm_sec2 *sec2 = nvkm_sec2(engine);
+
 	nvkm_falcon_msgq_del(&sec2->msgq);
 	nvkm_falcon_cmdq_del(&sec2->cmdq);
 	nvkm_falcon_qmgr_del(&sec2->qmgr);
@@ -79,8 +125,9 @@ nvkm_sec2_dtor(struct nvkm_engine *engine)
 static const struct nvkm_engine_func
 nvkm_sec2 = {
 	.dtor = nvkm_sec2_dtor,
+	.oneinit = nvkm_sec2_oneinit,
+	.init = nvkm_sec2_init,
 	.fini = nvkm_sec2_fini,
-	.intr = nvkm_sec2_intr,
 };
 
 int
@@ -113,6 +160,5 @@ nvkm_sec2_new_(const struct nvkm_sec2_fwif *fwif, struct nvkm_device *device,
 	    (ret = nvkm_falcon_msgq_new(sec2->qmgr, "msgq", &sec2->msgq)))
 		return ret;
 
-	INIT_WORK(&sec2->work, nvkm_sec2_recv);
 	return 0;
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/ga102.c b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/ga102.c
new file mode 100644
index 000000000000..945abb8156d7
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/ga102.c
@@ -0,0 +1,197 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "priv.h"
+#include <subdev/acr.h>
+#include <subdev/vfn.h>
+
+#include <nvfw/flcn.h>
+#include <nvfw/sec2.h>
+
+static int
+ga102_sec2_initmsg(struct nvkm_sec2 *sec2)
+{
+	struct nv_sec2_init_msg_v1 msg;
+	int ret, i;
+
+	ret = nvkm_falcon_msgq_recv_initmsg(sec2->msgq, &msg, sizeof(msg));
+	if (ret)
+		return ret;
+
+	if (msg.hdr.unit_id != NV_SEC2_UNIT_INIT ||
+	    msg.msg_type != NV_SEC2_INIT_MSG_INIT)
+		return -EINVAL;
+
+	for (i = 0; i < ARRAY_SIZE(msg.queue_info); i++) {
+		if (msg.queue_info[i].id == NV_SEC2_INIT_MSG_QUEUE_ID_MSGQ) {
+			nvkm_falcon_msgq_init(sec2->msgq, msg.queue_info[i].index,
+							  msg.queue_info[i].offset,
+							  msg.queue_info[i].size);
+		} else {
+			nvkm_falcon_cmdq_init(sec2->cmdq, msg.queue_info[i].index,
+							  msg.queue_info[i].offset,
+							  msg.queue_info[i].size);
+		}
+	}
+
+	return 0;
+}
+
+static struct nvkm_intr *
+ga102_sec2_intr_vector(struct nvkm_sec2 *sec2, enum nvkm_intr_type *pvector)
+{
+	struct nvkm_device *device = sec2->engine.subdev.device;
+	struct nvkm_falcon *falcon = &sec2->falcon;
+	int ret;
+
+	ret = ga102_flcn_select(falcon);
+	if (ret)
+		return ERR_PTR(ret);
+
+	*pvector = nvkm_rd32(device, 0x8403e0) & 0x000000ff;
+	return &device->vfn->intr;
+}
+
+static int
+ga102_sec2_acr_bootstrap_falcon_callback(void *priv, struct nvfw_falcon_msg *hdr)
+{
+	struct nv_sec2_acr_bootstrap_falcon_msg_v1 *msg =
+		container_of(hdr, typeof(*msg), msg.hdr);
+	struct nvkm_subdev *subdev = priv;
+	const char *name = nvkm_acr_lsf_id(msg->falcon_id);
+
+	if (msg->error_code) {
+		nvkm_error(subdev, "ACR_BOOTSTRAP_FALCON failed for falcon %d [%s]: %08x %08x\n",
+			   msg->falcon_id, name, msg->error_code, msg->unkn08);
+		return -EINVAL;
+	}
+
+	nvkm_debug(subdev, "%s booted\n", name);
+	return 0;
+}
+
+static int
+ga102_sec2_acr_bootstrap_falcon(struct nvkm_falcon *falcon, enum nvkm_acr_lsf_id id)
+{
+	struct nvkm_sec2 *sec2 = container_of(falcon, typeof(*sec2), falcon);
+	struct nv_sec2_acr_bootstrap_falcon_cmd_v1 cmd = {
+		.cmd.hdr.unit_id = sec2->func->unit_acr,
+		.cmd.hdr.size = sizeof(cmd),
+		.cmd.cmd_type = NV_SEC2_ACR_CMD_BOOTSTRAP_FALCON,
+		.flags = NV_SEC2_ACR_BOOTSTRAP_FALCON_FLAGS_RESET_YES,
+		.falcon_id = id,
+	};
+
+	return nvkm_falcon_cmdq_send(sec2->cmdq, &cmd.cmd.hdr,
+				     ga102_sec2_acr_bootstrap_falcon_callback,
+				     &sec2->engine.subdev,
+				     msecs_to_jiffies(1000));
+}
+
+static const struct nvkm_acr_lsf_func
+ga102_sec2_acr_0 = {
+	.bld_size = sizeof(struct flcn_bl_dmem_desc_v2),
+	.bld_write = gp102_sec2_acr_bld_write_1,
+	.bld_patch = gp102_sec2_acr_bld_patch_1,
+	.bootstrap_falcons = BIT_ULL(NVKM_ACR_LSF_FECS) |
+			     BIT_ULL(NVKM_ACR_LSF_GPCCS) |
+			     BIT_ULL(NVKM_ACR_LSF_SEC2),
+	.bootstrap_falcon = ga102_sec2_acr_bootstrap_falcon,
+};
+
+static const struct nvkm_falcon_func
+ga102_sec2_flcn = {
+	.disable = gm200_flcn_disable,
+	.enable = gm200_flcn_enable,
+	.select = ga102_flcn_select,
+	.addr2 = 0x1000,
+	.reset_pmc = true,
+	.reset_eng = gp102_flcn_reset_eng,
+	.reset_prep = ga102_flcn_reset_prep,
+	.reset_wait_mem_scrubbing = ga102_flcn_reset_wait_mem_scrubbing,
+	.imem_dma = &ga102_flcn_dma,
+	.dmem_pio = &gm200_flcn_dmem_pio,
+	.dmem_dma = &ga102_flcn_dma,
+	.emem_addr = 0x01000000,
+	.emem_pio = &gp102_flcn_emem_pio,
+	.start = nvkm_falcon_v1_start,
+	.cmdq = { 0xc00, 0xc04, 8 },
+	.msgq = { 0xc80, 0xc84, 8 },
+};
+
+static const struct nvkm_sec2_func
+ga102_sec2 = {
+	.flcn = &ga102_sec2_flcn,
+	.intr_vector = ga102_sec2_intr_vector,
+	.intr = gp102_sec2_intr,
+	.initmsg = ga102_sec2_initmsg,
+	.unit_acr = NV_SEC2_UNIT_V2_ACR,
+	.unit_unload = NV_SEC2_UNIT_V2_UNLOAD,
+};
+
+MODULE_FIRMWARE("nvidia/ga102/sec2/desc.bin");
+MODULE_FIRMWARE("nvidia/ga102/sec2/image.bin");
+MODULE_FIRMWARE("nvidia/ga102/sec2/sig.bin");
+MODULE_FIRMWARE("nvidia/ga102/sec2/hs_bl_sig.bin");
+
+MODULE_FIRMWARE("nvidia/ga103/sec2/desc.bin");
+MODULE_FIRMWARE("nvidia/ga103/sec2/image.bin");
+MODULE_FIRMWARE("nvidia/ga103/sec2/sig.bin");
+MODULE_FIRMWARE("nvidia/ga103/sec2/hs_bl_sig.bin");
+
+MODULE_FIRMWARE("nvidia/ga104/sec2/desc.bin");
+MODULE_FIRMWARE("nvidia/ga104/sec2/image.bin");
+MODULE_FIRMWARE("nvidia/ga104/sec2/sig.bin");
+MODULE_FIRMWARE("nvidia/ga104/sec2/hs_bl_sig.bin");
+
+MODULE_FIRMWARE("nvidia/ga106/sec2/desc.bin");
+MODULE_FIRMWARE("nvidia/ga106/sec2/image.bin");
+MODULE_FIRMWARE("nvidia/ga106/sec2/sig.bin");
+MODULE_FIRMWARE("nvidia/ga106/sec2/hs_bl_sig.bin");
+
+MODULE_FIRMWARE("nvidia/ga107/sec2/desc.bin");
+MODULE_FIRMWARE("nvidia/ga107/sec2/image.bin");
+MODULE_FIRMWARE("nvidia/ga107/sec2/sig.bin");
+MODULE_FIRMWARE("nvidia/ga107/sec2/hs_bl_sig.bin");
+
+static int
+ga102_sec2_load(struct nvkm_sec2 *sec2, int ver,
+		const struct nvkm_sec2_fwif *fwif)
+{
+	return nvkm_acr_lsfw_load_sig_image_desc_v2(&sec2->engine.subdev, &sec2->falcon,
+						    NVKM_ACR_LSF_SEC2, "sec2/", ver, fwif->acr);
+}
+
+static const struct nvkm_sec2_fwif
+ga102_sec2_fwif[] = {
+	{  0, ga102_sec2_load, &ga102_sec2, &ga102_sec2_acr_0 },
+	{ -1, gp102_sec2_nofw, &ga102_sec2 }
+};
+
+int
+ga102_sec2_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
+	       struct nvkm_sec2 **psec2)
+{
+	/* TOP info wasn't updated on Turing to reflect the PRI
+	 * address change for some reason.  We override it here.
+	 */
+	return nvkm_sec2_new_(ga102_sec2_fwif, device, type, inst, 0x840000, psec2);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/gp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/gp102.c
index 44e39f5743d5..c64013d10500 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/gp102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/gp102.c
@@ -74,16 +74,6 @@ gp102_sec2_acr_bootstrap_falcon(struct nvkm_falcon *falcon,
 				     msecs_to_jiffies(1000));
 }
 
-static int
-gp102_sec2_acr_boot(struct nvkm_falcon *falcon)
-{
-	struct nv_sec2_args args = {};
-	nvkm_falcon_load_dmem(falcon, &args,
-			      falcon->func->emem_addr, sizeof(args), 0);
-	nvkm_falcon_start(falcon);
-	return 0;
-}
-
 static void
 gp102_sec2_acr_bld_patch(struct nvkm_acr *acr, u32 bld, s64 adjust)
 {
@@ -122,7 +112,6 @@ gp102_sec2_acr_0 = {
 	.bld_size = sizeof(struct loader_config_v1),
 	.bld_write = gp102_sec2_acr_bld_write,
 	.bld_patch = gp102_sec2_acr_bld_patch,
-	.boot = gp102_sec2_acr_boot,
 	.bootstrap_falcons = BIT_ULL(NVKM_ACR_LSF_FECS) |
 			     BIT_ULL(NVKM_ACR_LSF_GPCCS) |
 			     BIT_ULL(NVKM_ACR_LSF_SEC2),
@@ -160,89 +149,68 @@ gp102_sec2_initmsg(struct nvkm_sec2 *sec2)
 	return 0;
 }
 
-void
-gp102_sec2_intr(struct nvkm_sec2 *sec2)
+irqreturn_t
+gp102_sec2_intr(struct nvkm_inth *inth)
 {
+	struct nvkm_sec2 *sec2 = container_of(inth, typeof(*sec2), engine.subdev.inth);
 	struct nvkm_subdev *subdev = &sec2->engine.subdev;
 	struct nvkm_falcon *falcon = &sec2->falcon;
 	u32 disp = nvkm_falcon_rd32(falcon, 0x01c);
 	u32 intr = nvkm_falcon_rd32(falcon, 0x008) & disp & ~(disp >> 16);
 
 	if (intr & 0x00000040) {
-		schedule_work(&sec2->work);
+		if (unlikely(atomic_read(&sec2->initmsg) == 0)) {
+			int ret = sec2->func->initmsg(sec2);
+
+			if (ret)
+				nvkm_error(subdev, "error parsing init message: %d\n", ret);
+
+			atomic_set(&sec2->initmsg, ret ?: 1);
+		}
+
+		if (atomic_read(&sec2->initmsg) > 0) {
+			if (!nvkm_falcon_msgq_empty(sec2->msgq))
+				nvkm_falcon_msgq_recv(sec2->msgq);
+		}
+
 		nvkm_falcon_wr32(falcon, 0x004, 0x00000040);
 		intr &= ~0x00000040;
 	}
 
+	if (intr & 0x00000010) {
+		if (atomic_read(&sec2->running)) {
+			FLCN_ERR(falcon, "halted");
+			gm200_flcn_tracepc(falcon);
+		}
+
+		nvkm_falcon_wr32(falcon, 0x004, 0x00000010);
+		intr &= ~0x00000010;
+	}
+
 	if (intr) {
 		nvkm_error(subdev, "unhandled intr %08x\n", intr);
 		nvkm_falcon_wr32(falcon, 0x004, intr);
 	}
-}
 
-int
-gp102_sec2_flcn_enable(struct nvkm_falcon *falcon)
-{
-	nvkm_falcon_mask(falcon, 0x3c0, 0x00000001, 0x00000001);
-	udelay(10);
-	nvkm_falcon_mask(falcon, 0x3c0, 0x00000001, 0x00000000);
-	return nvkm_falcon_v1_enable(falcon);
-}
-
-void
-gp102_sec2_flcn_bind_context(struct nvkm_falcon *falcon,
-			     struct nvkm_memory *ctx)
-{
-	struct nvkm_device *device = falcon->owner->device;
-
-	nvkm_falcon_v1_bind_context(falcon, ctx);
-	if (!ctx)
-		return;
-
-	/* Not sure if this is a WAR for a HW issue, or some additional
-	 * programming sequence that's needed to properly complete the
-	 * context switch we trigger above.
-	 *
-	 * Fixes unreliability of booting the SEC2 RTOS on Quadro P620,
-	 * particularly when resuming from suspend.
-	 *
-	 * Also removes the need for an odd workaround where we needed
-	 * to program SEC2's FALCON_CPUCTL_ALIAS_STARTCPU twice before
-	 * the SEC2 RTOS would begin executing.
-	 */
-	nvkm_msec(device, 10,
-		u32 irqstat = nvkm_falcon_rd32(falcon, 0x008);
-		u32 flcn0dc = nvkm_falcon_rd32(falcon, 0x0dc);
-		if ((irqstat & 0x00000008) &&
-		    (flcn0dc & 0x00007000) == 0x00005000)
-			break;
-	);
-
-	nvkm_falcon_mask(falcon, 0x004, 0x00000008, 0x00000008);
-	nvkm_falcon_mask(falcon, 0x058, 0x00000002, 0x00000002);
-
-	nvkm_msec(device, 10,
-		u32 flcn0dc = nvkm_falcon_rd32(falcon, 0x0dc);
-		if ((flcn0dc & 0x00007000) == 0x00000000)
-			break;
-	);
+	return IRQ_HANDLED;
 }
 
 static const struct nvkm_falcon_func
 gp102_sec2_flcn = {
+	.disable = gm200_flcn_disable,
+	.enable = gm200_flcn_enable,
+	.reset_pmc = true,
+	.reset_eng = gp102_flcn_reset_eng,
+	.reset_wait_mem_scrubbing = gm200_flcn_reset_wait_mem_scrubbing,
 	.debug = 0x408,
-	.fbif = 0x600,
-	.load_imem = nvkm_falcon_v1_load_imem,
-	.load_dmem = nvkm_falcon_v1_load_dmem,
-	.read_dmem = nvkm_falcon_v1_read_dmem,
+	.bind_inst = gm200_flcn_bind_inst,
+	.bind_stat = gm200_flcn_bind_stat,
+	.bind_intr = true,
+	.imem_pio = &gm200_flcn_imem_pio,
+	.dmem_pio = &gm200_flcn_dmem_pio,
 	.emem_addr = 0x01000000,
-	.bind_context = gp102_sec2_flcn_bind_context,
-	.wait_for_halt = nvkm_falcon_v1_wait_for_halt,
-	.clear_interrupt = nvkm_falcon_v1_clear_interrupt,
-	.set_start_addr = nvkm_falcon_v1_set_start_addr,
+	.emem_pio = &gp102_flcn_emem_pio,
 	.start = nvkm_falcon_v1_start,
-	.enable = gp102_sec2_flcn_enable,
-	.disable = nvkm_falcon_v1_disable,
 	.cmdq = { 0xa00, 0xa04, 8 },
 	.msgq = { 0xa30, 0xa34, 8 },
 };
@@ -250,6 +218,7 @@ gp102_sec2_flcn = {
 const struct nvkm_sec2_func
 gp102_sec2 = {
 	.flcn = &gp102_sec2_flcn,
+	.unit_unload = NV_SEC2_UNIT_UNLOAD,
 	.unit_acr = NV_SEC2_UNIT_ACR,
 	.intr = gp102_sec2_intr,
 	.initmsg = gp102_sec2_initmsg,
@@ -268,7 +237,7 @@ MODULE_FIRMWARE("nvidia/gp107/sec2/desc.bin");
 MODULE_FIRMWARE("nvidia/gp107/sec2/image.bin");
 MODULE_FIRMWARE("nvidia/gp107/sec2/sig.bin");
 
-static void
+void
 gp102_sec2_acr_bld_patch_1(struct nvkm_acr *acr, u32 bld, s64 adjust)
 {
 	struct flcn_bl_dmem_desc_v2 hdr;
@@ -279,7 +248,7 @@ gp102_sec2_acr_bld_patch_1(struct nvkm_acr *acr, u32 bld, s64 adjust)
 	flcn_bl_dmem_desc_v2_dump(&acr->subdev, &hdr);
 }
 
-static void
+void
 gp102_sec2_acr_bld_write_1(struct nvkm_acr *acr, u32 bld,
 			   struct nvkm_acr_lsfw *lsfw)
 {
@@ -304,7 +273,6 @@ gp102_sec2_acr_1 = {
 	.bld_size = sizeof(struct flcn_bl_dmem_desc_v2),
 	.bld_write = gp102_sec2_acr_bld_write_1,
 	.bld_patch = gp102_sec2_acr_bld_patch_1,
-	.boot = gp102_sec2_acr_boot,
 	.bootstrap_falcons = BIT_ULL(NVKM_ACR_LSF_FECS) |
 			     BIT_ULL(NVKM_ACR_LSF_GPCCS) |
 			     BIT_ULL(NVKM_ACR_LSF_SEC2),
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/priv.h
index af19229e885d..172d2705c199 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/priv.h
@@ -2,15 +2,18 @@
 #ifndef __NVKM_SEC2_PRIV_H__
 #define __NVKM_SEC2_PRIV_H__
 #include <engine/sec2.h>
+struct nvkm_acr_lsfw;
 
 struct nvkm_sec2_func {
 	const struct nvkm_falcon_func *flcn;
+	u8 unit_unload;
 	u8 unit_acr;
-	void (*intr)(struct nvkm_sec2 *);
+	struct nvkm_intr *(*intr_vector)(struct nvkm_sec2 *, enum nvkm_intr_type *);
+	irqreturn_t (*intr)(struct nvkm_inth *);
 	int (*initmsg)(struct nvkm_sec2 *);
 };
 
-void gp102_sec2_intr(struct nvkm_sec2 *);
+irqreturn_t gp102_sec2_intr(struct nvkm_inth *);
 int gp102_sec2_initmsg(struct nvkm_sec2 *);
 
 struct nvkm_sec2_fwif {
@@ -24,6 +27,8 @@ int gp102_sec2_nofw(struct nvkm_sec2 *, int, const struct nvkm_sec2_fwif *);
 int gp102_sec2_load(struct nvkm_sec2 *, int, const struct nvkm_sec2_fwif *);
 extern const struct nvkm_sec2_func gp102_sec2;
 extern const struct nvkm_acr_lsf_func gp102_sec2_acr_1;
+void gp102_sec2_acr_bld_write_1(struct nvkm_acr *, u32, struct nvkm_acr_lsfw *);
+void gp102_sec2_acr_bld_patch_1(struct nvkm_acr *, u32, s64);
 
 int nvkm_sec2_new_(const struct nvkm_sec2_fwif *, struct nvkm_device *, enum nvkm_subdev_type,
 		   int, u32 addr, struct nvkm_sec2 **);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/tu102.c b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/tu102.c
index f3faeb705575..0afc4b2fa529 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/tu102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/tu102.c
@@ -22,21 +22,24 @@
 #include "priv.h"
 #include <subdev/acr.h>
 
+#include <nvfw/sec2.h>
+
 static const struct nvkm_falcon_func
 tu102_sec2_flcn = {
+	.disable = gm200_flcn_disable,
+	.enable = gm200_flcn_enable,
+	.reset_pmc = true,
+	.reset_eng = gp102_flcn_reset_eng,
+	.reset_wait_mem_scrubbing = gm200_flcn_reset_wait_mem_scrubbing,
 	.debug = 0x408,
-	.fbif = 0x600,
-	.load_imem = nvkm_falcon_v1_load_imem,
-	.load_dmem = nvkm_falcon_v1_load_dmem,
-	.read_dmem = nvkm_falcon_v1_read_dmem,
+	.bind_inst = gm200_flcn_bind_inst,
+	.bind_stat = gm200_flcn_bind_stat,
+	.bind_intr = true,
+	.imem_pio = &gm200_flcn_imem_pio,
+	.dmem_pio = &gm200_flcn_dmem_pio,
 	.emem_addr = 0x01000000,
-	.bind_context = gp102_sec2_flcn_bind_context,
-	.wait_for_halt = nvkm_falcon_v1_wait_for_halt,
-	.clear_interrupt = nvkm_falcon_v1_clear_interrupt,
-	.set_start_addr = nvkm_falcon_v1_set_start_addr,
+	.emem_pio = &gp102_flcn_emem_pio,
 	.start = nvkm_falcon_v1_start,
-	.enable = nvkm_falcon_v1_enable,
-	.disable = nvkm_falcon_v1_disable,
 	.cmdq = { 0xc00, 0xc04, 8 },
 	.msgq = { 0xc80, 0xc84, 8 },
 };
@@ -44,7 +47,8 @@ tu102_sec2_flcn = {
 static const struct nvkm_sec2_func
 tu102_sec2 = {
 	.flcn = &tu102_sec2_flcn,
-	.unit_acr = 0x07,
+	.unit_unload = NV_SEC2_UNIT_V2_UNLOAD,
+	.unit_acr = NV_SEC2_UNIT_V2_ACR,
 	.intr = gp102_sec2_intr,
 	.initmsg = gp102_sec2_initmsg,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sw/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/sw/base.c
index 14871d0bd746..a9d464db6974 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/sw/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sw/base.c
@@ -35,7 +35,7 @@ nvkm_sw_mthd(struct nvkm_sw *sw, int chid, int subc, u32 mthd, u32 data)
 
 	spin_lock_irqsave(&sw->engine.lock, flags);
 	list_for_each_entry(chan, &sw->chan, head) {
-		if (chan->fifo->chid == chid) {
+		if (chan->fifo->id == chid) {
 			handled = nvkm_sw_chan_mthd(chan, subc, mthd, data);
 			list_del(&chan->head);
 			list_add(&chan->head, &sw->chan);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sw/chan.c b/drivers/gpu/drm/nouveau/nvkm/engine/sw/chan.c
index f28967065639..834b8cbed51d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/sw/chan.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sw/chan.c
@@ -23,7 +23,6 @@
  */
 #include "chan.h"
 
-#include <core/notify.h>
 #include <engine/fifo.h>
 
 #include <nvif/event.h>
@@ -36,7 +35,7 @@ nvkm_sw_chan_mthd(struct nvkm_sw_chan *chan, int subc, u32 mthd, u32 data)
 	case 0x0000:
 		return true;
 	case 0x0500:
-		nvkm_event_send(&chan->event, 1, 0, NULL, 0);
+		nvkm_event_ntfy(&chan->event, 0, NVKM_SW_CHAN_EVENT_PAGE_FLIP);
 		return true;
 	default:
 		if (chan->func->mthd)
@@ -46,27 +45,8 @@ nvkm_sw_chan_mthd(struct nvkm_sw_chan *chan, int subc, u32 mthd, u32 data)
 	return false;
 }
 
-static int
-nvkm_sw_chan_event_ctor(struct nvkm_object *object, void *data, u32 size,
-			struct nvkm_notify *notify)
-{
-	union {
-		struct nvif_notify_uevent_req none;
-	} *req = data;
-	int ret = -ENOSYS;
-
-	if (!(ret = nvif_unvers(ret, &data, &size, req->none))) {
-		notify->size  = sizeof(struct nvif_notify_uevent_rep);
-		notify->types = 1;
-		notify->index = 0;
-	}
-
-	return ret;
-}
-
 static const struct nvkm_event_func
 nvkm_sw_chan_event = {
-	.ctor = nvkm_sw_chan_event_ctor,
 };
 
 static void *
@@ -107,5 +87,5 @@ nvkm_sw_chan_ctor(const struct nvkm_sw_chan_func *func, struct nvkm_sw *sw,
 	list_add(&chan->head, &sw->chan);
 	spin_unlock_irqrestore(&sw->engine.lock, flags);
 
-	return nvkm_event_init(&nvkm_sw_chan_event, 1, 1, &chan->event);
+	return nvkm_event_init(&nvkm_sw_chan_event, &sw->engine.subdev, 1, 1, &chan->event);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sw/chan.h b/drivers/gpu/drm/nouveau/nvkm/engine/sw/chan.h
index 32de53427aa4..67b2e5ea93d9 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/sw/chan.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sw/chan.h
@@ -14,6 +14,7 @@ struct nvkm_sw_chan {
 	struct nvkm_fifo_chan *fifo;
 	struct list_head head;
 
+#define NVKM_SW_CHAN_EVENT_PAGE_FLIP BIT(0)
 	struct nvkm_event event;
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sw/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/sw/gf100.c
index 55abf839f29d..c3cf6f2ff86c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/sw/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sw/gf100.c
@@ -36,10 +36,10 @@
  ******************************************************************************/
 
 static int
-gf100_sw_chan_vblsem_release(struct nvkm_notify *notify)
+gf100_sw_chan_vblsem_release(struct nvkm_event_ntfy *notify, u32 bits)
 {
 	struct nv50_sw_chan *chan =
-		container_of(notify, typeof(*chan), vblank.notify[notify->index]);
+		container_of(notify, typeof(*chan), vblank.notify[notify->id]);
 	struct nvkm_sw *sw = chan->base.sw;
 	struct nvkm_device *device = sw->engine.subdev.device;
 	u32 inst = chan->base.fifo->inst->addr >> 12;
@@ -50,7 +50,7 @@ gf100_sw_chan_vblsem_release(struct nvkm_notify *notify)
 	nvkm_wr32(device, 0x060010, lower_32_bits(chan->vblank.offset));
 	nvkm_wr32(device, 0x060014, chan->vblank.value);
 
-	return NVKM_NOTIFY_DROP;
+	return NVKM_EVENT_DROP;
 }
 
 static bool
@@ -73,7 +73,7 @@ gf100_sw_chan_mthd(struct nvkm_sw_chan *base, int subc, u32 mthd, u32 data)
 		return true;
 	case 0x040c:
 		if (data < device->disp->vblank.index_nr) {
-			nvkm_notify_get(&chan->vblank.notify[data]);
+			nvkm_event_ntfy_allow(&chan->vblank.notify[data]);
 			return true;
 		}
 		break;
@@ -120,16 +120,8 @@ gf100_sw_chan_new(struct nvkm_sw *sw, struct nvkm_fifo_chan *fifoch,
 		return ret;
 
 	for (i = 0; disp && i < disp->vblank.index_nr; i++) {
-		ret = nvkm_notify_init(NULL, &disp->vblank,
-				       gf100_sw_chan_vblsem_release, false,
-				       &(struct nvif_notify_head_req_v0) {
-					.head = i,
-				       },
-				       sizeof(struct nvif_notify_head_req_v0),
-				       sizeof(struct nvif_notify_head_rep_v0),
-				       &chan->vblank.notify[i]);
-		if (ret)
-			return ret;
+		nvkm_event_ntfy_add(&disp->vblank, i, NVKM_DISP_HEAD_EVENT_VBLANK, true,
+				    gf100_sw_chan_vblsem_release, &chan->vblank.notify[i]);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sw/nv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/sw/nv50.c
index 1fdd094c8b7e..9d7a9b7d5be3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/sw/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sw/nv50.c
@@ -36,10 +36,10 @@
  ******************************************************************************/
 
 static int
-nv50_sw_chan_vblsem_release(struct nvkm_notify *notify)
+nv50_sw_chan_vblsem_release(struct nvkm_event_ntfy *notify, u32 bits)
 {
 	struct nv50_sw_chan *chan =
-		container_of(notify, typeof(*chan), vblank.notify[notify->index]);
+		container_of(notify, typeof(*chan), vblank.notify[notify->id]);
 	struct nvkm_sw *sw = chan->base.sw;
 	struct nvkm_device *device = sw->engine.subdev.device;
 
@@ -55,7 +55,7 @@ nv50_sw_chan_vblsem_release(struct nvkm_notify *notify)
 		nvkm_wr32(device, 0x060014, chan->vblank.value);
 	}
 
-	return NVKM_NOTIFY_DROP;
+	return NVKM_EVENT_DROP;
 }
 
 static bool
@@ -70,7 +70,7 @@ nv50_sw_chan_mthd(struct nvkm_sw_chan *base, int subc, u32 mthd, u32 data)
 	case 0x0404: chan->vblank.value  = data; return true;
 	case 0x0408:
 		if (data < device->disp->vblank.index_nr) {
-			nvkm_notify_get(&chan->vblank.notify[data]);
+			nvkm_event_ntfy_allow(&chan->vblank.notify[data]);
 			return true;
 		}
 		break;
@@ -85,8 +85,10 @@ nv50_sw_chan_dtor(struct nvkm_sw_chan *base)
 {
 	struct nv50_sw_chan *chan = nv50_sw_chan(base);
 	int i;
+
 	for (i = 0; i < ARRAY_SIZE(chan->vblank.notify); i++)
-		nvkm_notify_fini(&chan->vblank.notify[i]);
+		nvkm_event_ntfy_del(&chan->vblank.notify[i]);
+
 	return chan;
 }
 
@@ -113,16 +115,8 @@ nv50_sw_chan_new(struct nvkm_sw *sw, struct nvkm_fifo_chan *fifoch,
 		return ret;
 
 	for (i = 0; disp && i < disp->vblank.index_nr; i++) {
-		ret = nvkm_notify_init(NULL, &disp->vblank,
-				       nv50_sw_chan_vblsem_release, false,
-				       &(struct nvif_notify_head_req_v0) {
-					.head = i,
-				       },
-				       sizeof(struct nvif_notify_head_req_v0),
-				       sizeof(struct nvif_notify_head_rep_v0),
-				       &chan->vblank.notify[i]);
-		if (ret)
-			return ret;
+		nvkm_event_ntfy_add(&disp->vblank, i, NVKM_DISP_HEAD_EVENT_VBLANK, true,
+				    nv50_sw_chan_vblsem_release, &chan->vblank.notify[i]);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sw/nv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/sw/nv50.h
index 6d364d7b406a..b42289ce8826 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/sw/nv50.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sw/nv50.h
@@ -5,12 +5,12 @@
 #include "priv.h"
 #include "chan.h"
 #include "nvsw.h"
-#include <core/notify.h>
+#include <core/event.h>
 
 struct nv50_sw_chan {
 	struct nvkm_sw_chan base;
 	struct {
-		struct nvkm_notify notify[4];
+		struct nvkm_event_ntfy notify[4];
 		u32 ctxdma;
 		u64 offset;
 		u32 value;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sw/nvsw.c b/drivers/gpu/drm/nouveau/nvkm/engine/sw/nvsw.c
index 33dd03fff3c4..f5affa1c8f34 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/sw/nvsw.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sw/nvsw.c
@@ -27,33 +27,34 @@
 #include <nvif/if0004.h>
 
 static int
-nvkm_nvsw_mthd_(struct nvkm_object *object, u32 mthd, void *data, u32 size)
+nvkm_nvsw_uevent(struct nvkm_object *object, void *argv, u32 argc, struct nvkm_uevent *uevent)
 {
-	struct nvkm_nvsw *nvsw = nvkm_nvsw(object);
-	if (nvsw->func->mthd)
-		return nvsw->func->mthd(nvsw, mthd, data, size);
-	return -ENODEV;
+	union nv04_nvsw_event_args *args = argv;
+
+	if (!uevent)
+		return 0;
+	if (argc != sizeof(args->vn))
+		return -ENOSYS;
+
+	return nvkm_uevent_add(uevent, &nvkm_nvsw(object)->chan->event, 0,
+			       NVKM_SW_CHAN_EVENT_PAGE_FLIP, NULL);
 }
 
 static int
-nvkm_nvsw_ntfy_(struct nvkm_object *object, u32 mthd,
-		struct nvkm_event **pevent)
+nvkm_nvsw_mthd(struct nvkm_object *object, u32 mthd, void *data, u32 size)
 {
 	struct nvkm_nvsw *nvsw = nvkm_nvsw(object);
-	switch (mthd) {
-	case NV04_NVSW_NTFY_UEVENT:
-		*pevent = &nvsw->chan->event;
-		return 0;
-	default:
-		break;
-	}
-	return -EINVAL;
+
+	if (nvsw->func->mthd)
+		return nvsw->func->mthd(nvsw, mthd, data, size);
+
+	return -ENODEV;
 }
 
 static const struct nvkm_object_func
 nvkm_nvsw_ = {
-	.mthd = nvkm_nvsw_mthd_,
-	.ntfy = nvkm_nvsw_ntfy_,
+	.mthd = nvkm_nvsw_mthd,
+	.uevent = nvkm_nvsw_uevent,
 };
 
 int