diff options
Diffstat (limited to 'drivers/gpu/drm/nouveau/nvkm/engine/gr')
59 files changed, 1562 insertions, 661 deletions
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild index 558c86fd8e82..b5418f05ccd8 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild @@ -40,6 +40,7 @@ nvkm-y += nvkm/engine/gr/gp108.o nvkm-y += nvkm/engine/gr/gp10b.o nvkm-y += nvkm/engine/gr/gv100.o nvkm-y += nvkm/engine/gr/tu102.o +nvkm-y += nvkm/engine/gr/ga102.o nvkm-y += nvkm/engine/gr/ctxnv40.o nvkm-y += nvkm/engine/gr/ctxnv50.o @@ -63,3 +64,4 @@ nvkm-y += nvkm/engine/gr/ctxgp104.o nvkm-y += nvkm/engine/gr/ctxgp107.o nvkm-y += nvkm/engine/gr/ctxgv100.o nvkm-y += nvkm/engine/gr/ctxtu102.o +nvkm-y += nvkm/engine/gr/ctxga102.o diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c index 61759f54406e..71b824e6da9d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c @@ -136,6 +136,17 @@ nvkm_gr_oneinit(struct nvkm_engine *engine) } static int +nvkm_gr_reset(struct nvkm_engine *engine) +{ + struct nvkm_gr *gr = nvkm_gr(engine); + + if (gr->func->reset) + return gr->func->reset(gr); + + return -ENOSYS; +} + +static int nvkm_gr_init(struct nvkm_engine *engine) { struct nvkm_gr *gr = nvkm_gr(engine); @@ -166,6 +177,7 @@ nvkm_gr = { .oneinit = nvkm_gr_oneinit, .init = nvkm_gr_init, .fini = nvkm_gr_fini, + .reset = nvkm_gr_reset, .intr = nvkm_gr_intr, .tile = nvkm_gr_tile, .chsw_load = nvkm_gr_chsw_load, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxga102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxga102.c new file mode 100644 index 000000000000..11461adf5036 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxga102.c @@ -0,0 +1,77 @@ +/* + * Copyright 2019 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ctxgf100.h" + +static void +ga102_grctx_generate_sm_id(struct gf100_gr *gr, int gpc, int tpc, int sm) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + + tpc = gv100_gr_nonpes_aware_tpc(gr, gpc, tpc); + + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x608), sm); +} + +static void +ga102_grctx_generate_unkn(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + + nvkm_mask(device, 0x41980c, 0x00000010, 0x00000010); + nvkm_mask(device, 0x41be08, 0x00000004, 0x00000004); +} + +static void +ga102_grctx_generate_r419ea8(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + + nvkm_wr32(device, 0x419ea8, nvkm_rd32(device, 0x504728) | 0x08000000); +} + +const struct gf100_grctx_func +ga102_grctx = { + .main = gf100_grctx_generate_main, + .unkn = ga102_grctx_generate_unkn, + .bundle = gm107_grctx_generate_bundle, + .bundle_size = 0x3000, + .bundle_min_gpm_fifo_depth = 0x180, + .bundle_token_limit = 0x1140, + .pagepool = gp100_grctx_generate_pagepool, + .pagepool_size = 0x20000, + .attrib_cb_size = gp102_grctx_generate_attrib_cb_size, + .attrib_cb = gv100_grctx_generate_attrib_cb, + .attrib = gv100_grctx_generate_attrib, + .attrib_nr_max = 0x800, + .attrib_nr = 0x4a1, + .alpha_nr_max = 0xc00, + .alpha_nr = 0x800, + .unknown_size = 0x80000, + .unknown = tu102_grctx_generate_unknown, + .gfxp_nr = 0xd28, + .sm_id = ga102_grctx_generate_sm_id, + .skip_pd_num_tpc_per_gpc = true, + .rop_mapping = gv100_grctx_generate_rop_mapping, + .r406500 = gm200_grctx_generate_r406500, + .r400088 = gv100_grctx_generate_r400088, + .r419ea8 = ga102_grctx_generate_r419ea8, +}; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c index 297915719bf2..cb390e0134a2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c @@ -26,6 +26,7 @@ #include <subdev/fb.h> #include <subdev/mc.h> #include <subdev/timer.h> +#include <engine/fifo.h> /******************************************************************************* * PGRAPH context register lists @@ -990,43 +991,16 @@ gf100_grctx_pack_tpc[] = { * PGRAPH context implementation ******************************************************************************/ -int -gf100_grctx_mmio_data(struct gf100_grctx *info, u32 size, u32 align, bool priv) -{ - if (info->data) { - info->buffer[info->buffer_nr] = round_up(info->addr, align); - info->addr = info->buffer[info->buffer_nr] + size; - info->data->size = size; - info->data->align = align; - info->data->priv = priv; - info->data++; - return info->buffer_nr++; - } - return -1; -} - void -gf100_grctx_mmio_item(struct gf100_grctx *info, u32 addr, u32 data, - int shift, int buffer) +gf100_grctx_patch_wr32(struct gf100_gr_chan *chan, u32 addr, u32 data) { - struct nvkm_device *device = info->gr->base.engine.subdev.device; - if (info->data) { - if (shift >= 0) { - info->mmio->addr = addr; - info->mmio->data = data; - info->mmio->shift = shift; - info->mmio->buffer = buffer; - if (buffer >= 0) - data |= info->buffer[buffer] >> shift; - info->mmio++; - } else - return; - } else { - if (buffer >= 0) - return; + if (unlikely(!chan->mmio)) { + nvkm_wr32(chan->gr->base.engine.subdev.device, addr, data); + return; } - nvkm_wr32(device, addr, data); + nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, addr); + nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, data); } void @@ -1037,56 +1011,60 @@ gf100_grctx_generate_r419cb8(struct gf100_gr *gr) } void -gf100_grctx_generate_bundle(struct gf100_grctx *info) +gf100_grctx_generate_bundle(struct gf100_gr_chan *chan, u64 addr, u32 size) { - const struct gf100_grctx_func *grctx = info->gr->func->grctx; - const int s = 8; - const int b = mmio_vram(info, grctx->bundle_size, (1 << s), true); - mmio_refn(info, 0x408004, 0x00000000, s, b); - mmio_wr32(info, 0x408008, 0x80000000 | (grctx->bundle_size >> s)); - mmio_refn(info, 0x418808, 0x00000000, s, b); - mmio_wr32(info, 0x41880c, 0x80000000 | (grctx->bundle_size >> s)); + gf100_grctx_patch_wr32(chan, 0x408004, addr >> 8); + gf100_grctx_patch_wr32(chan, 0x408008, 0x80000000 | (size >> 8)); + gf100_grctx_patch_wr32(chan, 0x418808, addr >> 8); + gf100_grctx_patch_wr32(chan, 0x41880c, 0x80000000 | (size >> 8)); } void -gf100_grctx_generate_pagepool(struct gf100_grctx *info) +gf100_grctx_generate_pagepool(struct gf100_gr_chan *chan, u64 addr) { - const struct gf100_grctx_func *grctx = info->gr->func->grctx; - const int s = 8; - const int b = mmio_vram(info, grctx->pagepool_size, (1 << s), true); - mmio_refn(info, 0x40800c, 0x00000000, s, b); - mmio_wr32(info, 0x408010, 0x80000000); - mmio_refn(info, 0x419004, 0x00000000, s, b); - mmio_wr32(info, 0x419008, 0x00000000); + gf100_grctx_patch_wr32(chan, 0x40800c, addr >> 8); + gf100_grctx_patch_wr32(chan, 0x408010, 0x80000000); + gf100_grctx_patch_wr32(chan, 0x419004, addr >> 8); + gf100_grctx_patch_wr32(chan, 0x419008, 0x00000000); } void -gf100_grctx_generate_attrib(struct gf100_grctx *info) +gf100_grctx_generate_attrib(struct gf100_gr_chan *chan) { - struct gf100_gr *gr = info->gr; + struct gf100_gr *gr = chan->gr; const struct gf100_grctx_func *grctx = gr->func->grctx; const u32 attrib = grctx->attrib_nr; - const u32 size = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max); - const int s = 12; - const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), false); int gpc, tpc; u32 bo = 0; - mmio_refn(info, 0x418810, 0x80000000, s, b); - mmio_refn(info, 0x419848, 0x10000000, s, b); - mmio_wr32(info, 0x405830, (attrib << 16)); + gf100_grctx_patch_wr32(chan, 0x405830, (attrib << 16)); for (gpc = 0; gpc < gr->gpc_nr; gpc++) { for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) { const u32 o = TPC_UNIT(gpc, tpc, 0x0520); - mmio_skip(info, o, (attrib << 16) | ++bo); - mmio_wr32(info, o, (attrib << 16) | --bo); + + gf100_grctx_patch_wr32(chan, o, (attrib << 16) | bo); bo += grctx->attrib_nr_max; } } } void +gf100_grctx_generate_attrib_cb(struct gf100_gr_chan *chan, u64 addr, u32 size) +{ + gf100_grctx_patch_wr32(chan, 0x418810, 0x80000000 | addr >> 12); + gf100_grctx_patch_wr32(chan, 0x419848, 0x10000000 | addr >> 12); +} + +u32 +gf100_grctx_generate_attrib_cb_size(struct gf100_gr *gr) +{ + const struct gf100_grctx_func *grctx = gr->func->grctx; + + return 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max) * gr->tpc_total; +} + +void gf100_grctx_generate_unkn(struct gf100_gr *gr) { } @@ -1361,8 +1339,9 @@ gf100_grctx_generate_floorsweep(struct gf100_gr *gr) } void -gf100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) +gf100_grctx_generate_main(struct gf100_gr_chan *chan) { + struct gf100_gr *gr = chan->gr; struct nvkm_device *device = gr->base.engine.subdev.device; const struct gf100_grctx_func *grctx = gr->func->grctx; u32 idle_timeout; @@ -1380,15 +1359,23 @@ gf100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) gf100_gr_mmio(gr, gr->sw_ctx); } + if (gr->func->init_419bd8) + gr->func->init_419bd8(gr); + if (grctx->r419ea8) + grctx->r419ea8(gr); + gf100_gr_wait_idle(gr); idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000); - grctx->pagepool(info); - grctx->bundle(info); - grctx->attrib(info); + grctx->pagepool(chan, chan->pagepool->addr); + grctx->bundle(chan, chan->bundle_cb->addr, grctx->bundle_size); + grctx->attrib_cb(chan, chan->attrib_cb->addr, grctx->attrib_cb_size(gr)); + grctx->attrib(chan); if (grctx->patch_ltc) - grctx->patch_ltc(info); + grctx->patch_ltc(chan); + if (grctx->unknown_size) + grctx->unknown(chan, chan->unknown->addr, grctx->unknown_size); grctx->unkn(gr); gf100_grctx_generate_floorsweep(gr); @@ -1396,12 +1383,23 @@ gf100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) gf100_gr_wait_idle(gr); if (grctx->r400088) grctx->r400088(gr, false); + if (gr->bundle) gf100_gr_icmd(gr, gr->bundle); else gf100_gr_icmd(gr, grctx->icmd); - if (grctx->sw_veid_bundle_init) + + if (gr->bundle_veid) + gf100_gr_icmd(gr, gr->bundle_veid); + else gf100_gr_icmd(gr, grctx->sw_veid_bundle_init); + + if (gr->bundle64) + gf100_gr_icmd(gr, gr->bundle64); + else + if (grctx->sw_bundle64_init) + gf100_gr_icmd(gr, grctx->sw_bundle64_init); + if (grctx->r400088) grctx->r400088(gr, true); nvkm_wr32(device, 0x404154, idle_timeout); @@ -1428,21 +1426,20 @@ gf100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) grctx->r408840(gr); if (grctx->r419c0c) grctx->r419c0c(gr); + + gf100_gr_wait_idle(gr); } #define CB_RESERVED 0x80000 int -gf100_grctx_generate(struct gf100_gr *gr) +gf100_grctx_generate(struct gf100_gr *gr, struct gf100_gr_chan *chan, struct nvkm_gpuobj *inst) { const struct gf100_grctx_func *grctx = gr->func->grctx; struct nvkm_subdev *subdev = &gr->base.engine.subdev; struct nvkm_device *device = subdev->device; - struct nvkm_memory *inst = NULL; struct nvkm_memory *data = NULL; - struct nvkm_vmm *vmm = NULL; struct nvkm_vma *ctx = NULL; - struct gf100_grctx info; int ret, i; u64 addr; @@ -1457,72 +1454,47 @@ gf100_grctx_generate(struct gf100_gr *gr) grctx->unkn88c(gr, true); /* Reset FECS. */ - nvkm_wr32(device, 0x409614, 0x00000070); - nvkm_usec(device, 10, NVKM_DELAY); - nvkm_mask(device, 0x409614, 0x00000700, 0x00000700); - nvkm_usec(device, 10, NVKM_DELAY); - nvkm_rd32(device, 0x409614); + gr->func->fecs.reset(gr); if (grctx->unkn88c) grctx->unkn88c(gr, false); /* NV_PGRAPH_FE_PWR_MODE_AUTO. */ nvkm_wr32(device, 0x404170, 0x00000010); + nvkm_msec(device, 2000, + if (!(nvkm_rd32(device, 0x404170) & 0x00000010)) + break; + ); /* Init SCC RAM. */ nvkm_wr32(device, 0x40802c, 0x00000001); - /* Allocate memory to for a "channel", which we'll use to generate - * the default context values. - */ - ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, - 0x1000, 0x1000, true, &inst); - if (ret) - goto done; - - ret = nvkm_vmm_new(device, 0, 0, NULL, 0, NULL, "grctx", &vmm); - if (ret) - goto done; - - vmm->debug = subdev->debug; - - ret = nvkm_vmm_join(vmm, inst); - if (ret) - goto done; - + /* Allocate memory to store context, and dummy global context buffers. */ ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, CB_RESERVED + gr->size, 0, true, &data); if (ret) goto done; - ret = nvkm_vmm_get(vmm, 0, nvkm_memory_size(data), &ctx); + ret = nvkm_vmm_get(chan->vmm, 0, nvkm_memory_size(data), &ctx); if (ret) goto done; - ret = nvkm_memory_map(data, 0, vmm, ctx, NULL, 0); + ret = nvkm_memory_map(data, 0, chan->vmm, ctx, NULL, 0); if (ret) goto done; - /* Setup context pointer. */ nvkm_kmap(inst); nvkm_wo32(inst, 0x0210, lower_32_bits(ctx->addr + CB_RESERVED) | 4); nvkm_wo32(inst, 0x0214, upper_32_bits(ctx->addr + CB_RESERVED)); nvkm_done(inst); - /* Setup default state for mmio list construction. */ - info.gr = gr; - info.data = gr->mmio_data; - info.mmio = gr->mmio_list; - info.addr = ctx->addr; - info.buffer_nr = 0; - /* Make channel current. */ - addr = nvkm_memory_addr(inst) >> 12; + addr = inst->addr >> 12; if (gr->firmware) { ret = gf100_gr_fecs_bind_pointer(gr, 0x80000000 | addr); if (ret) - goto done; + goto done_inst; nvkm_kmap(data); nvkm_wo32(data, 0x1c, 1); @@ -1540,19 +1512,27 @@ gf100_grctx_generate(struct gf100_gr *gr) ); } - grctx->main(gr, &info); + grctx->main(chan); - /* Trigger a context unload by unsetting the "next channel valid" bit - * and faking a context switch interrupt. - */ - nvkm_mask(device, 0x409b04, 0x80000000, 0x00000000); - nvkm_wr32(device, 0x409000, 0x00000100); - if (nvkm_msec(device, 2000, - if (!(nvkm_rd32(device, 0x409b00) & 0x80000000)) - break; - ) < 0) { - ret = -EBUSY; - goto done; + if (!gr->firmware) { + /* Trigger a context unload by unsetting the "next channel valid" bit + * and faking a context switch interrupt. + */ + nvkm_mask(device, 0x409b04, 0x80000000, 0x00000000); + nvkm_wr32(device, 0x409000, 0x00000100); + if (nvkm_msec(device, 2000, + if (!(nvkm_rd32(device, 0x409b00) & 0x80000000)) + break; + ) < 0) { + ret = -EBUSY; + goto done_inst; + } + } else { + ret = gf100_gr_fecs_wfi_golden_save(gr, 0x80000000 | addr); + if (ret) + goto done_inst; + + nvkm_mask(device, 0x409b00, 0x80000000, 0x00000000); } gr->data = kmalloc(gr->size, GFP_KERNEL); @@ -1566,12 +1546,14 @@ gf100_grctx_generate(struct gf100_gr *gr) ret = -ENOMEM; } +done_inst: + nvkm_kmap(inst); + nvkm_wo32(inst, 0x0210, 0); + nvkm_wo32(inst, 0x0214, 0); + nvkm_done(inst); done: - nvkm_vmm_put(vmm, &ctx); + nvkm_vmm_put(chan->vmm, &ctx); nvkm_memory_unref(&data); - nvkm_vmm_part(vmm, inst); - nvkm_vmm_unref(&vmm); - nvkm_memory_unref(&inst); return ret; } @@ -1590,6 +1572,8 @@ gf100_grctx = { .bundle_size = 0x1800, .pagepool = gf100_grctx_generate_pagepool, .pagepool_size = 0x8000, + .attrib_cb_size = gf100_grctx_generate_attrib_cb_size, + .attrib_cb = gf100_grctx_generate_attrib_cb, .attrib = gf100_grctx_generate_attrib, .attrib_nr_max = 0x324, .attrib_nr = 0x218, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h index 32bbddc0993e..00dbeda7e346 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h @@ -3,27 +3,12 @@ #define __NVKM_GRCTX_NVC0_H__ #include "gf100.h" -struct gf100_grctx { - struct gf100_gr *gr; - struct gf100_gr_data *data; - struct gf100_gr_mmio *mmio; - int buffer_nr; - u64 buffer[4]; - u64 addr; -}; - -int gf100_grctx_mmio_data(struct gf100_grctx *, u32 size, u32 align, bool priv); -void gf100_grctx_mmio_item(struct gf100_grctx *, u32 addr, u32 data, int s, int); - -#define mmio_vram(a,b,c,d) gf100_grctx_mmio_data((a), (b), (c), (d)) -#define mmio_refn(a,b,c,d,e) gf100_grctx_mmio_item((a), (b), (c), (d), (e)) -#define mmio_skip(a,b,c) mmio_refn((a), (b), (c), -1, -1) -#define mmio_wr32(a,b,c) mmio_refn((a), (b), (c), 0, -1) +void gf100_grctx_patch_wr32(struct gf100_gr_chan *, u32 addr, u32 data); struct gf100_grctx_func { void (*unkn88c)(struct gf100_gr *, bool on); /* main context generation function */ - void (*main)(struct gf100_gr *, struct gf100_grctx *); + void (*main)(struct gf100_gr_chan *); /* context-specific modify-on-first-load list generation function */ void (*unkn)(struct gf100_gr *); /* mmio context data */ @@ -37,23 +22,29 @@ struct gf100_grctx_func { const struct gf100_gr_pack *icmd; const struct gf100_gr_pack *mthd; const struct gf100_gr_pack *sw_veid_bundle_init; + const struct gf100_gr_pack *sw_bundle64_init; /* bundle circular buffer */ - void (*bundle)(struct gf100_grctx *); + void (*bundle)(struct gf100_gr_chan *, u64 addr, u32 size); u32 bundle_size; u32 bundle_min_gpm_fifo_depth; u32 bundle_token_limit; /* pagepool */ - void (*pagepool)(struct gf100_grctx *); + void (*pagepool)(struct gf100_gr_chan *, u64 addr); u32 pagepool_size; /* attribute(/alpha) circular buffer */ - void (*attrib)(struct gf100_grctx *); + u32 (*attrib_cb_size)(struct gf100_gr *); + void (*attrib_cb)(struct gf100_gr_chan *, u64 addr, u32 size); + void (*attrib)(struct gf100_gr_chan *); u32 attrib_nr_max; u32 attrib_nr; u32 alpha_nr_max; u32 alpha_nr; u32 gfxp_nr; + /* some other context buffer */ + void (*unknown)(struct gf100_gr_chan *, u64 addr, u32 size); + u32 unknown_size; /* other patch buffer stuff */ - void (*patch_ltc)(struct gf100_grctx *); + void (*patch_ltc)(struct gf100_gr_chan *); /* floorsweeping */ void (*sm_id)(struct gf100_gr *, int gpc, int tpc, int sm); void (*tpc_nr)(struct gf100_gr *, int gpc); @@ -78,14 +69,17 @@ struct gf100_grctx_func { void (*r419a3c)(struct gf100_gr *); void (*r408840)(struct gf100_gr *); void (*r419c0c)(struct gf100_gr *); + void (*r419ea8)(struct gf100_gr *); }; extern const struct gf100_grctx_func gf100_grctx; -int gf100_grctx_generate(struct gf100_gr *); -void gf100_grctx_generate_main(struct gf100_gr *, struct gf100_grctx *); -void gf100_grctx_generate_bundle(struct gf100_grctx *); -void gf100_grctx_generate_pagepool(struct gf100_grctx *); -void gf100_grctx_generate_attrib(struct gf100_grctx *); +int gf100_grctx_generate(struct gf100_gr *, struct gf100_gr_chan *, struct nvkm_gpuobj *inst); +void gf100_grctx_generate_main(struct gf100_gr_chan *); +void gf100_grctx_generate_pagepool(struct gf100_gr_chan *, u64); +void gf100_grctx_generate_bundle(struct gf100_gr_chan *, u64, u32); +u32 gf100_grctx_generate_attrib_cb_size(struct gf100_gr *); +void gf100_grctx_generate_attrib_cb(struct gf100_gr_chan *, u64, u32); +void gf100_grctx_generate_attrib(struct gf100_gr_chan *); void gf100_grctx_generate_unkn(struct gf100_gr *); void gf100_grctx_generate_floorsweep(struct gf100_gr *); void gf100_grctx_generate_sm_id(struct gf100_gr *, int, int, int); @@ -97,14 +91,14 @@ void gf100_grctx_generate_max_ways_evict(struct gf100_gr *); void gf100_grctx_generate_r419cb8(struct gf100_gr *); extern const struct gf100_grctx_func gf108_grctx; -void gf108_grctx_generate_attrib(struct gf100_grctx *); +void gf108_grctx_generate_attrib(struct gf100_gr_chan *); void gf108_grctx_generate_unkn(struct gf100_gr *); extern const struct gf100_grctx_func gf104_grctx; extern const struct gf100_grctx_func gf110_grctx; extern const struct gf100_grctx_func gf117_grctx; -void gf117_grctx_generate_attrib(struct gf100_grctx *); +void gf117_grctx_generate_attrib(struct gf100_gr_chan *); void gf117_grctx_generate_rop_mapping(struct gf100_gr *); void gf117_grctx_generate_dist_skip_table(struct gf100_gr *); @@ -115,9 +109,9 @@ void gk104_grctx_generate_alpha_beta_tables(struct gf100_gr *); void gk104_grctx_generate_gpc_tpc_nr(struct gf100_gr *); extern const struct gf100_grctx_func gk20a_grctx; -void gk104_grctx_generate_bundle(struct gf100_grctx *); -void gk104_grctx_generate_pagepool(struct gf100_grctx *); -void gk104_grctx_generate_patch_ltc(struct gf100_grctx *); +void gk104_grctx_generate_pagepool(struct gf100_gr_chan *, u64); +void gk104_grctx_generate_bundle(struct gf100_gr_chan *, u64, u32); +void gk104_grctx_generate_patch_ltc(struct gf100_gr_chan *); void gk104_grctx_generate_unkn(struct gf100_gr *); void gk104_grctx_generate_r418800(struct gf100_gr *); @@ -128,9 +122,10 @@ extern const struct gf100_grctx_func gk110b_grctx; extern const struct gf100_grctx_func gk208_grctx; extern const struct gf100_grctx_func gm107_grctx; -void gm107_grctx_generate_bundle(struct gf100_grctx *); -void gm107_grctx_generate_pagepool(struct gf100_grctx *); -void gm107_grctx_generate_attrib(struct gf100_grctx *); +void gm107_grctx_generate_pagepool(struct gf100_gr_chan *, u64); +void gm107_grctx_generate_bundle(struct gf100_gr_chan *, u64, u32); +void gm107_grctx_generate_attrib_cb(struct gf100_gr_chan *, u64, u32); +void gm107_grctx_generate_attrib(struct gf100_gr_chan *); void gm107_grctx_generate_sm_id(struct gf100_gr *, int, int, int); extern const struct gf100_grctx_func gm200_grctx; @@ -143,11 +138,13 @@ void gm200_grctx_generate_r419a3c(struct gf100_gr *); extern const struct gf100_grctx_func gm20b_grctx; extern const struct gf100_grctx_func gp100_grctx; -void gp100_grctx_generate_pagepool(struct gf100_grctx *); +void gp100_grctx_generate_pagepool(struct gf100_gr_chan *, u64); +void gp100_grctx_generate_attrib_cb(struct gf100_gr_chan *, u64, u32); void gp100_grctx_generate_smid_config(struct gf100_gr *); extern const struct gf100_grctx_func gp102_grctx; -void gp102_grctx_generate_attrib(struct gf100_grctx *); +u32 gp102_grctx_generate_attrib_cb_size(struct gf100_gr *); +void gp102_grctx_generate_attrib(struct gf100_gr_chan *); extern const struct gf100_grctx_func gp104_grctx; @@ -158,11 +155,15 @@ extern const struct gf100_grctx_func gv100_grctx; extern const struct gf100_grctx_func tu102_grctx; void gv100_grctx_unkn88c(struct gf100_gr *, bool); void gv100_grctx_generate_unkn(struct gf100_gr *); -extern const struct gf100_gr_init gv100_grctx_init_sw_veid_bundle_init_0[]; -void gv100_grctx_generate_attrib(struct gf100_grctx *); +void gv100_grctx_generate_attrib_cb(struct gf100_gr_chan *, u64, u32); +void gv100_grctx_generate_attrib(struct gf100_gr_chan *); void gv100_grctx_generate_rop_mapping(struct gf100_gr *); void gv100_grctx_generate_r400088(struct gf100_gr *, bool); +void tu102_grctx_generate_unknown(struct gf100_gr_chan *, u64, u32); + +extern const struct gf100_grctx_func ga102_grctx; + /* context init value lists */ extern const struct gf100_gr_pack gf100_grctx_pack_icmd[]; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c index 7a0564b6e3c7..ba63a3b46518 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c @@ -94,6 +94,8 @@ gf104_grctx = { .bundle_size = 0x1800, .pagepool = gf100_grctx_generate_pagepool, .pagepool_size = 0x8000, + .attrib_cb_size = gf100_grctx_generate_attrib_cb_size, + .attrib_cb = gf100_grctx_generate_attrib_cb, .attrib = gf100_grctx_generate_attrib, .attrib_nr_max = 0x324, .attrib_nr = 0x218, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c index dda2c32e6232..0bc2eab6ad98 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c @@ -733,25 +733,20 @@ gf108_grctx_pack_tpc[] = { ******************************************************************************/ void -gf108_grctx_generate_attrib(struct gf100_grctx *info) +gf108_grctx_generate_attrib(struct gf100_gr_chan *chan) { - struct gf100_gr *gr = info->gr; + struct gf100_gr *gr = chan->gr; const struct gf100_grctx_func *grctx = gr->func->grctx; const u32 alpha = grctx->alpha_nr; const u32 beta = grctx->attrib_nr; - const u32 size = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max); - const int s = 12; - const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), false); const int timeslice_mode = 1; const int max_batches = 0xffff; u32 bo = 0; u32 ao = bo + grctx->attrib_nr_max * gr->tpc_total; int gpc, tpc; - mmio_refn(info, 0x418810, 0x80000000, s, b); - mmio_refn(info, 0x419848, 0x10000000, s, b); - mmio_wr32(info, 0x405830, (beta << 16) | alpha); - mmio_wr32(info, 0x4064c4, ((alpha / 4) << 16) | max_batches); + gf100_grctx_patch_wr32(chan, 0x405830, (beta << 16) | alpha); + gf100_grctx_patch_wr32(chan, 0x4064c4, ((alpha / 4) << 16) | max_batches); for (gpc = 0; gpc < gr->gpc_nr; gpc++) { for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) { @@ -759,10 +754,10 @@ gf108_grctx_generate_attrib(struct gf100_grctx *info) const u32 b = beta; const u32 t = timeslice_mode; const u32 o = TPC_UNIT(gpc, tpc, 0x500); - mmio_skip(info, o + 0x20, (t << 28) | (b << 16) | ++bo); - mmio_wr32(info, o + 0x20, (t << 28) | (b << 16) | --bo); + + gf100_grctx_patch_wr32(chan, o + 0x20, (t << 28) | (b << 16) | bo); bo += grctx->attrib_nr_max; - mmio_wr32(info, o + 0x44, (a << 16) | ao); + gf100_grctx_patch_wr32(chan, o + 0x44, (a << 16) | ao); ao += grctx->alpha_nr_max; } } @@ -795,6 +790,8 @@ gf108_grctx = { .bundle_size = 0x1800, .pagepool = gf100_grctx_generate_pagepool, .pagepool_size = 0x8000, + .attrib_cb_size = gf100_grctx_generate_attrib_cb_size, + .attrib_cb = gf100_grctx_generate_attrib_cb, .attrib = gf108_grctx_generate_attrib, .attrib_nr_max = 0x324, .attrib_nr = 0x218, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c index f5cca5e6a4f2..64b723b0afb5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c @@ -342,6 +342,8 @@ gf110_grctx = { .bundle_size = 0x1800, .pagepool = gf100_grctx_generate_pagepool, .pagepool_size = 0x8000, + .attrib_cb_size = gf100_grctx_generate_attrib_cb_size, + .attrib_cb = gf100_grctx_generate_attrib_cb, .attrib = gf100_grctx_generate_attrib, .attrib_nr_max = 0x324, .attrib_nr = 0x218, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c index 276c282d19aa..e34c5da2a9ff 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c @@ -241,38 +241,34 @@ gf117_grctx_generate_rop_mapping(struct gf100_gr *gr) } void -gf117_grctx_generate_attrib(struct gf100_grctx *info) +gf117_grctx_generate_attrib(struct gf100_gr_chan *chan) { - struct gf100_gr *gr = info->gr; + struct gf100_gr *gr = chan->gr; const struct gf100_grctx_func *grctx = gr->func->grctx; const u32 alpha = grctx->alpha_nr; const u32 beta = grctx->attrib_nr; - const u32 size = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max); - const int s = 12; - const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), false); const int timeslice_mode = 1; const int max_batches = 0xffff; u32 bo = 0; u32 ao = bo + grctx->attrib_nr_max * gr->tpc_total; int gpc, ppc; - mmio_refn(info, 0x418810, 0x80000000, s, b); - mmio_refn(info, 0x419848, 0x10000000, s, b); - mmio_wr32(info, 0x405830, (beta << 16) | alpha); - mmio_wr32(info, 0x4064c4, ((alpha / 4) << 16) | max_batches); + gf100_grctx_patch_wr32(chan, 0x405830, (beta << 16) | alpha); + gf100_grctx_patch_wr32(chan, 0x4064c4, ((alpha / 4) << 16) | max_batches); for (gpc = 0; gpc < gr->gpc_nr; gpc++) { - for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++) { + for (ppc = 0; ppc < gr->func->ppc_nr; ppc++) { const u32 a = alpha * gr->ppc_tpc_nr[gpc][ppc]; const u32 b = beta * gr->ppc_tpc_nr[gpc][ppc]; const u32 t = timeslice_mode; const u32 o = PPC_UNIT(gpc, ppc, 0); + if (!(gr->ppc_mask[gpc] & (1 << ppc))) continue; - mmio_skip(info, o + 0xc0, (t << 28) | (b << 16) | ++bo); - mmio_wr32(info, o + 0xc0, (t << 28) | (b << 16) | --bo); + + gf100_grctx_patch_wr32(chan, o + 0xc0, (t << 28) | (b << 16) | bo); bo += grctx->attrib_nr_max * gr->ppc_tpc_nr[gpc][ppc]; - mmio_wr32(info, o + 0xe4, (a << 16) | ao); + gf100_grctx_patch_wr32(chan, o + 0xe4, (a << 16) | ao); ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc]; } } @@ -294,6 +290,8 @@ gf117_grctx = { .bundle_size = 0x1800, .pagepool = gf100_grctx_generate_pagepool, .pagepool_size = 0x8000, + .attrib_cb_size = gf100_grctx_generate_attrib_cb_size, + .attrib_cb = gf100_grctx_generate_attrib_cb, .attrib = gf117_grctx_generate_attrib, .attrib_nr_max = 0x324, .attrib_nr = 0x218, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c index 0cfe46366af6..426ad1b8d426 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c @@ -510,6 +510,8 @@ gf119_grctx = { .bundle_size = 0x1800, .pagepool = gf100_grctx_generate_pagepool, .pagepool_size = 0x8000, + .attrib_cb_size = gf100_grctx_generate_attrib_cb_size, + .attrib_cb = gf100_grctx_generate_attrib_cb, .attrib = gf108_grctx_generate_attrib, .attrib_nr_max = 0x324, .attrib_nr = 0x218, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c index 304e9d268bad..94233d0119df 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c @@ -861,43 +861,33 @@ gk104_grctx_generate_r418800(struct gf100_gr *gr) } void -gk104_grctx_generate_patch_ltc(struct gf100_grctx *info) +gk104_grctx_generate_patch_ltc(struct gf100_gr_chan *chan) { - struct nvkm_device *device = info->gr->base.engine.subdev.device; + struct nvkm_device *device = chan->gr->base.engine.subdev.device; u32 data0 = nvkm_rd32(device, 0x17e91c); u32 data1 = nvkm_rd32(device, 0x17e920); + /*XXX: Figure out how to modify this correctly! */ - mmio_wr32(info, 0x17e91c, data0); - mmio_wr32(info, 0x17e920, data1); + gf100_grctx_patch_wr32(chan, 0x17e91c, data0); + gf100_grctx_patch_wr32(chan, 0x17e920, data1); } void -gk104_grctx_generate_bundle(struct gf100_grctx *info) +gk104_grctx_generate_bundle(struct gf100_gr_chan *chan, u64 addr, u32 size) { - const struct gf100_grctx_func *grctx = info->gr->func->grctx; - const u32 state_limit = min(grctx->bundle_min_gpm_fifo_depth, - grctx->bundle_size / 0x20); + const struct gf100_grctx_func *grctx = chan->gr->func->grctx; + const u32 state_limit = min(grctx->bundle_min_gpm_fifo_depth, size / 0x20); const u32 token_limit = grctx->bundle_token_limit; - const int s = 8; - const int b = mmio_vram(info, grctx->bundle_size, (1 << s), true); - mmio_refn(info, 0x408004, 0x00000000, s, b); - mmio_wr32(info, 0x408008, 0x80000000 | (grctx->bundle_size >> s)); - mmio_refn(info, 0x418808, 0x00000000, s, b); - mmio_wr32(info, 0x41880c, 0x80000000 | (grctx->bundle_size >> s)); - mmio_wr32(info, 0x4064c8, (state_limit << 16) | token_limit); + + gf100_grctx_generate_bundle(chan, addr, size); + gf100_grctx_patch_wr32(chan, 0x4064c8, (state_limit << 16) | token_limit); } void -gk104_grctx_generate_pagepool(struct gf100_grctx *info) +gk104_grctx_generate_pagepool(struct gf100_gr_chan *chan, u64 addr) { - const struct gf100_grctx_func *grctx = info->gr->func->grctx; - const int s = 8; - const int b = mmio_vram(info, grctx->pagepool_size, (1 << s), true); - mmio_refn(info, 0x40800c, 0x00000000, s, b); - mmio_wr32(info, 0x408010, 0x80000000); - mmio_refn(info, 0x419004, 0x00000000, s, b); - mmio_wr32(info, 0x419008, 0x00000000); - mmio_wr32(info, 0x4064cc, 0x80000000); + gf100_grctx_generate_pagepool(chan, addr); + gf100_grctx_patch_wr32(chan, 0x4064cc, 0x80000000); } void @@ -991,6 +981,8 @@ gk104_grctx = { .bundle_token_limit = 0x600, .pagepool = gk104_grctx_generate_pagepool, .pagepool_size = 0x8000, + .attrib_cb_size = gf100_grctx_generate_attrib_cb_size, + .attrib_cb = gf100_grctx_generate_attrib_cb, .attrib = gf117_grctx_generate_attrib, .attrib_nr_max = 0x324, .attrib_nr = 0x218, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c index 86547cfc38dc..4391458e1fb2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c @@ -838,6 +838,8 @@ gk110_grctx = { .bundle_token_limit = 0x7c0, .pagepool = gk104_grctx_generate_pagepool, .pagepool_size = 0x8000, + .attrib_cb_size = gf100_grctx_generate_attrib_cb_size, + .attrib_cb = gf100_grctx_generate_attrib_cb, .attrib = gf117_grctx_generate_attrib, .attrib_nr_max = 0x324, .attrib_nr = 0x218, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c index ebb947bd1446..7b9a34f9ec3c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c @@ -87,6 +87,8 @@ gk110b_grctx = { .bundle_token_limit = 0x600, .pagepool = gk104_grctx_generate_pagepool, .pagepool_size = 0x8000, + .attrib_cb_size = gf100_grctx_generate_attrib_cb_size, + .attrib_cb = gf100_grctx_generate_attrib_cb, .attrib = gf117_grctx_generate_attrib, .attrib_nr_max = 0x324, .attrib_nr = 0x218, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c index 4d40512b5c99..c78d07a8bb7d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c @@ -553,6 +553,8 @@ gk208_grctx = { .bundle_token_limit = 0x200, .pagepool = gk104_grctx_generate_pagepool, .pagepool_size = 0x8000, + .attrib_cb_size = gf100_grctx_generate_attrib_cb_size, + .attrib_cb = gf100_grctx_generate_attrib_cb, .attrib = gf117_grctx_generate_attrib, .attrib_nr_max = 0x324, .attrib_nr = 0x218, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c index c0d36bc601f9..ac5fdcb5cd3f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c @@ -25,8 +25,9 @@ #include <subdev/mc.h> static void -gk20a_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) +gk20a_grctx_generate_main(struct gf100_gr_chan *chan) { + struct gf100_gr *gr = chan->gr; struct nvkm_device *device = gr->base.engine.subdev.device; const struct gf100_grctx_func *grctx = gr->func->grctx; u32 idle_timeout; @@ -38,7 +39,8 @@ gk20a_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000); - grctx->attrib(info); + grctx->attrib_cb(chan, chan->attrib_cb->addr, grctx->attrib_cb_size(gr)); + grctx->attrib(chan); grctx->unkn(gr); @@ -60,8 +62,8 @@ gk20a_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) gf100_gr_wait_idle(gr); gf100_gr_icmd(gr, gr->bundle); - grctx->pagepool(info); - grctx->bundle(info); + grctx->pagepool(chan, chan->pagepool->addr); + grctx->bundle(chan, chan->bundle_cb->addr, grctx->bundle_size); } const struct gf100_grctx_func @@ -74,6 +76,8 @@ gk20a_grctx = { .bundle_token_limit = 0x100, .pagepool = gk104_grctx_generate_pagepool, .pagepool_size = 0x8000, + .attrib_cb_size = gf100_grctx_generate_attrib_cb_size, + .attrib_cb = gf100_grctx_generate_attrib_cb, .attrib = gf117_grctx_generate_attrib, .attrib_nr_max = 0x240, .attrib_nr = 0x240, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c index 0b3964e6b36e..beac66eb2a80 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c @@ -876,75 +876,70 @@ gm107_grctx_generate_r419e00(struct gf100_gr *gr) } void -gm107_grctx_generate_bundle(struct gf100_grctx *info) +gm107_grctx_generate_bundle(struct gf100_gr_chan *chan, u64 addr, u32 size) { - const struct gf100_grctx_func *grctx = info->gr->func->grctx; - const u32 state_limit = min(grctx->bundle_min_gpm_fifo_depth, - grctx->bundle_size / 0x20); + const struct gf100_grctx_func *grctx = chan->gr->func->grctx; + const u32 state_limit = min(grctx->bundle_min_gpm_fifo_depth, size / 0x20); const u32 token_limit = grctx->bundle_token_limit; - const int s = 8; - const int b = mmio_vram(info, grctx->bundle_size, (1 << s), true); - mmio_refn(info, 0x408004, 0x00000000, s, b); - mmio_wr32(info, 0x408008, 0x80000000 | (grctx->bundle_size >> s)); - mmio_refn(info, 0x418e24, 0x00000000, s, b); - mmio_wr32(info, 0x418e28, 0x80000000 | (grctx->bundle_size >> s)); - mmio_wr32(info, 0x4064c8, (state_limit << 16) | token_limit); + + gf100_grctx_patch_wr32(chan, 0x408004, addr >> 8); + gf100_grctx_patch_wr32(chan, 0x408008, 0x80000000 | (size >> 8)); + gf100_grctx_patch_wr32(chan, 0x418e24, addr >> 8); + gf100_grctx_patch_wr32(chan, 0x418e28, 0x80000000 | (size >> 8)); + gf100_grctx_patch_wr32(chan, 0x4064c8, (state_limit << 16) | token_limit); } void -gm107_grctx_generate_pagepool(struct gf100_grctx *info) +gm107_grctx_generate_pagepool(struct gf100_gr_chan *chan, u64 addr) { - const struct gf100_grctx_func *grctx = info->gr->func->grctx; - const int s = 8; - const int b = mmio_vram(info, grctx->pagepool_size, (1 << s), true); - mmio_refn(info, 0x40800c, 0x00000000, s, b); - mmio_wr32(info, 0x408010, 0x80000000); - mmio_refn(info, 0x419004, 0x00000000, s, b); - mmio_wr32(info, 0x419008, 0x00000000); - mmio_wr32(info, 0x4064cc, 0x80000000); - mmio_wr32(info, 0x418e30, 0x80000000); /* guess at it being related */ + gk104_grctx_generate_pagepool(chan, addr); + gf100_grctx_patch_wr32(chan, 0x418e30, 0x80000000); } void -gm107_grctx_generate_attrib(struct gf100_grctx *info) +gm107_grctx_generate_attrib(struct gf100_gr_chan *chan) { - struct gf100_gr *gr = info->gr; + struct gf100_gr *gr = chan->gr; const struct gf100_grctx_func *grctx = gr->func->grctx; const u32 alpha = grctx->alpha_nr; const u32 attrib = grctx->attrib_nr; - const u32 size = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max); - const int s = 12; - const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), false); const int max_batches = 0xffff; u32 bo = 0; u32 ao = bo + grctx->attrib_nr_max * gr->tpc_total; int gpc, ppc, n = 0; - mmio_refn(info, 0x418810, 0x80000000, s, b); - mmio_refn(info, 0x419848, 0x10000000, s, b); - mmio_refn(info, 0x419c2c, 0x10000000, s, b); - mmio_wr32(info, 0x405830, (attrib << 16) | alpha); - mmio_wr32(info, 0x4064c4, ((alpha / 4) << 16) | max_batches); + gf100_grctx_patch_wr32(chan, 0x405830, (attrib << 16) | alpha); + gf100_grctx_patch_wr32(chan, 0x4064c4, ((alpha / 4) << 16) | max_batches); for (gpc = 0; gpc < gr->gpc_nr; gpc++) { - for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) { + for (ppc = 0; ppc < gr->func->ppc_nr; ppc++, n++) { const u32 as = alpha * gr->ppc_tpc_nr[gpc][ppc]; const u32 bs = attrib * gr->ppc_tpc_nr[gpc][ppc]; const u32 u = 0x418ea0 + (n * 0x04); const u32 o = PPC_UNIT(gpc, ppc, 0); + if (!(gr->ppc_mask[gpc] & (1 << ppc))) continue; - mmio_wr32(info, o + 0xc0, bs); - mmio_wr32(info, o + 0xf4, bo); + + gf100_grctx_patch_wr32(chan, o + 0xc0, bs); + gf100_grctx_patch_wr32(chan, o + 0xf4, bo); bo += grctx->attrib_nr_max * gr->ppc_tpc_nr[gpc][ppc]; - mmio_wr32(info, o + 0xe4, as); - mmio_wr32(info, o + 0xf8, ao); + gf100_grctx_patch_wr32(chan, o + 0xe4, as); + gf100_grctx_patch_wr32(chan, o + 0xf8, ao); ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc]; - mmio_wr32(info, u, ((bs / 3) << 16) | bs); + gf100_grctx_patch_wr32(chan, u, ((bs / 3) << 16) | bs); } } } +void +gm107_grctx_generate_attrib_cb(struct gf100_gr_chan *chan, u64 addr, u32 size) +{ + gf100_grctx_generate_attrib_cb(chan, addr, size); + + gf100_grctx_patch_wr32(chan, 0x419c2c, 0x10000000 | addr >> 12); +} + static void gm107_grctx_generate_r406500(struct gf100_gr *gr) { @@ -978,6 +973,8 @@ gm107_grctx = { .bundle_token_limit = 0x2c0, .pagepool = gm107_grctx_generate_pagepool, .pagepool_size = 0x8000, + .attrib_cb_size = gf100_grctx_generate_attrib_cb_size, + .attrib_cb = gm107_grctx_generate_attrib_cb, .attrib = gm107_grctx_generate_attrib, .attrib_nr_max = 0xff0, .attrib_nr = 0xaa0, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c index 013d05a0f0f6..175da8ac656c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c @@ -87,7 +87,7 @@ gm200_grctx_generate_dist_skip_table(struct gf100_gr *gr) int gpc, ppc, i; for (gpc = 0; gpc < gr->gpc_nr; gpc++) { - for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++) { + for (ppc = 0; ppc < gr->func->ppc_nr; ppc++) { u8 ppc_tpcs = gr->ppc_tpc_nr[gpc][ppc]; u8 ppc_tpcm = gr->ppc_tpc_mask[gpc][ppc]; while (ppc_tpcs-- > gr->ppc_tpc_min) @@ -111,6 +111,8 @@ gm200_grctx = { .bundle_token_limit = 0x780, .pagepool = gm107_grctx_generate_pagepool, .pagepool_size = 0x20000, + .attrib_cb_size = gf100_grctx_generate_attrib_cb_size, + .attrib_cb = gm107_grctx_generate_attrib_cb, .attrib = gm107_grctx_generate_attrib, .attrib_nr_max = 0x600, .attrib_nr = 0x400, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c index 6b92f8aa18a3..b8edccfada58 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c @@ -22,8 +22,9 @@ #include "ctxgf100.h" static void -gm20b_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) +gm20b_grctx_generate_main(struct gf100_gr_chan *chan) { + struct gf100_gr *gr = chan->gr; struct nvkm_device *device = gr->base.engine.subdev.device; const struct gf100_grctx_func *grctx = gr->func->grctx; u32 idle_timeout; @@ -35,7 +36,8 @@ gm20b_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000); - grctx->attrib(info); + grctx->attrib_cb(chan, chan->attrib_cb->addr, grctx->attrib_cb_size(gr)); + grctx->attrib(chan); grctx->unkn(gr); @@ -63,8 +65,8 @@ gm20b_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) gf100_gr_wait_idle(gr); gf100_gr_icmd(gr, gr->bundle); - grctx->pagepool(info); - grctx->bundle(info); + grctx->pagepool(chan, chan->pagepool->addr); + grctx->bundle(chan, chan->bundle_cb->addr, grctx->bundle_size); } const struct gf100_grctx_func @@ -77,6 +79,8 @@ gm20b_grctx = { .bundle_token_limit = 0x1c0, .pagepool = gm107_grctx_generate_pagepool, .pagepool_size = 0x8000, + .attrib_cb_size = gf100_grctx_generate_attrib_cb_size, + .attrib_cb = gm107_grctx_generate_attrib_cb, .attrib = gm107_grctx_generate_attrib, .attrib_nr_max = 0x600, .attrib_nr = 0x400, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c index 0b3326262e12..8485aaeae7a9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c @@ -30,66 +30,76 @@ ******************************************************************************/ void -gp100_grctx_generate_pagepool(struct gf100_grctx *info) +gp100_grctx_generate_pagepool(struct gf100_gr_chan *chan, u64 addr) { - const struct gf100_grctx_func *grctx = info->gr->func->grctx; - const int s = 8; - const int b = mmio_vram(info, grctx->pagepool_size, (1 << s), true); - mmio_refn(info, 0x40800c, 0x00000000, s, b); - mmio_wr32(info, 0x408010, 0x8007d800); - mmio_refn(info, 0x419004, 0x00000000, s, b); - mmio_wr32(info, 0x419008, 0x00000000); + gf100_grctx_patch_wr32(chan, 0x40800c, addr >> 8); + gf100_grctx_patch_wr32(chan, 0x408010, 0x8007d800); + gf100_grctx_patch_wr32(chan, 0x419004, addr >> 8); + gf100_grctx_patch_wr32(chan, 0x419008, 0x00000000); } static void -gp100_grctx_generate_attrib(struct gf100_grctx *info) +gp100_grctx_generate_attrib(struct gf100_gr_chan *chan) { - struct gf100_gr *gr = info->gr; + struct gf100_gr *gr = chan->gr; const struct gf100_grctx_func *grctx = gr->func->grctx; const u32 alpha = grctx->alpha_nr; const u32 attrib = grctx->attrib_nr; - const int s = 12; const int max_batches = 0xffff; u32 size = grctx->alpha_nr_max * gr->tpc_total; u32 ao = 0; u32 bo = ao + size; - int gpc, ppc, b, n = 0; + int gpc, ppc, n = 0; - for (gpc = 0; gpc < gr->gpc_nr; gpc++) - size += grctx->attrib_nr_max * gr->ppc_nr[gpc] * gr->ppc_tpc_max; - size = ((size * 0x20) + 128) & ~127; - b = mmio_vram(info, size, (1 << s), false); - - mmio_refn(info, 0x418810, 0x80000000, s, b); - mmio_refn(info, 0x419848, 0x10000000, s, b); - mmio_refn(info, 0x419c2c, 0x10000000, s, b); - mmio_refn(info, 0x419b00, 0x00000000, s, b); - mmio_wr32(info, 0x419b04, 0x80000000 | size >> 7); - mmio_wr32(info, 0x405830, attrib); - mmio_wr32(info, 0x40585c, alpha); - mmio_wr32(info, 0x4064c4, ((alpha / 4) << 16) | max_batches); + gf100_grctx_patch_wr32(chan, 0x405830, attrib); + gf100_grctx_patch_wr32(chan, 0x40585c, alpha); + gf100_grctx_patch_wr32(chan, 0x4064c4, ((alpha / 4) << 16) | max_batches); for (gpc = 0; gpc < gr->gpc_nr; gpc++) { - for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) { + for (ppc = 0; ppc < gr->func->ppc_nr; ppc++, n++) { const u32 as = alpha * gr->ppc_tpc_nr[gpc][ppc]; const u32 bs = attrib * gr->ppc_tpc_max; const u32 u = 0x418ea0 + (n * 0x04); const u32 o = PPC_UNIT(gpc, ppc, 0); + if (!(gr->ppc_mask[gpc] & (1 << ppc))) continue; - mmio_wr32(info, o + 0xc0, bs); - mmio_wr32(info, o + 0xf4, bo); - mmio_wr32(info, o + 0xf0, bs); + + gf100_grctx_patch_wr32(chan, o + 0xc0, bs); + gf100_grctx_patch_wr32(chan, o + 0xf4, bo); + gf100_grctx_patch_wr32(chan, o + 0xf0, bs); bo += grctx->attrib_nr_max * gr->ppc_tpc_max; - mmio_wr32(info, o + 0xe4, as); - mmio_wr32(info, o + 0xf8, ao); + gf100_grctx_patch_wr32(chan, o + 0xe4, as); + gf100_grctx_patch_wr32(chan, o + 0xf8, ao); ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc]; - mmio_wr32(info, u, bs); + gf100_grctx_patch_wr32(chan, u, bs); } } - mmio_wr32(info, 0x418eec, 0x00000000); - mmio_wr32(info, 0x41befc, 0x00000000); + gf100_grctx_patch_wr32(chan, 0x418eec, 0x00000000); + gf100_grctx_patch_wr32(chan, 0x41befc, 0x00000000); +} + +void +gp100_grctx_generate_attrib_cb(struct gf100_gr_chan *chan, u64 addr, u32 size) +{ + gm107_grctx_generate_attrib_cb(chan, addr, size); + + gf100_grctx_patch_wr32(chan, 0x419b00, 0x00000000 | addr >> 12); + gf100_grctx_patch_wr32(chan, 0x419b04, 0x80000000 | size >> 7); +} + +static u32 +gp100_grctx_generate_attrib_cb_size(struct gf100_gr *gr) +{ + const struct gf100_grctx_func *grctx = gr->func->grctx; + u32 size = grctx->alpha_nr_max * gr->tpc_total; + int gpc; + + for (gpc = 0; gpc < gr->gpc_nr; gpc++) + size += grctx->attrib_nr_max * gr->func->ppc_nr * gr->ppc_tpc_max; + + return ((size * 0x20) + 128) & ~127; } void @@ -123,6 +133,8 @@ gp100_grctx = { .bundle_token_limit = 0x1080, .pagepool = gp100_grctx_generate_pagepool, .pagepool_size = 0x20000, + .attrib_cb_size = gp100_grctx_generate_attrib_cb_size, + .attrib_cb = gp100_grctx_generate_attrib_cb, .attrib = gp100_grctx_generate_attrib, .attrib_nr_max = 0x660, .attrib_nr = 0x440, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c index daee17bf7d0d..7537979a5492 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c @@ -37,58 +37,62 @@ gp102_grctx_generate_r408840(struct gf100_gr *gr) } void -gp102_grctx_generate_attrib(struct gf100_grctx *info) +gp102_grctx_generate_attrib(struct gf100_gr_chan *chan) { - struct gf100_gr *gr = info->gr; + struct gf100_gr *gr = chan->gr; const struct gf100_grctx_func *grctx = gr->func->grctx; const u32 alpha = grctx->alpha_nr; const u32 attrib = grctx->attrib_nr; const u32 gfxp = grctx->gfxp_nr; - const int s = 12; const int max_batches = 0xffff; u32 size = grctx->alpha_nr_max * gr->tpc_total; u32 ao = 0; u32 bo = ao + size; - int gpc, ppc, b, n = 0; + int gpc, ppc, n = 0; - for (gpc = 0; gpc < gr->gpc_nr; gpc++) - size += grctx->gfxp_nr * gr->ppc_nr[gpc] * gr->ppc_tpc_max; - size = ((size * 0x20) + 128) & ~127; - b = mmio_vram(info, size, (1 << s), false); - - mmio_refn(info, 0x418810, 0x80000000, s, b); - mmio_refn(info, 0x419848, 0x10000000, s, b); - mmio_refn(info, 0x419c2c, 0x10000000, s, b); - mmio_refn(info, 0x419b00, 0x00000000, s, b); - mmio_wr32(info, 0x419b04, 0x80000000 | size >> 7); - mmio_wr32(info, 0x405830, attrib); - mmio_wr32(info, 0x40585c, alpha); - mmio_wr32(info, 0x4064c4, ((alpha / 4) << 16) | max_batches); + gf100_grctx_patch_wr32(chan, 0x405830, attrib); + gf100_grctx_patch_wr32(chan, 0x40585c, alpha); + gf100_grctx_patch_wr32(chan, 0x4064c4, ((alpha / 4) << 16) | max_batches); for (gpc = 0; gpc < gr->gpc_nr; gpc++) { - for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) { + for (ppc = 0; ppc < gr->func->ppc_nr; ppc++, n++) { const u32 as = alpha * gr->ppc_tpc_nr[gpc][ppc]; const u32 bs = attrib * gr->ppc_tpc_max; const u32 gs = gfxp * gr->ppc_tpc_max; const u32 u = 0x418ea0 + (n * 0x04); const u32 o = PPC_UNIT(gpc, ppc, 0); const u32 p = GPC_UNIT(gpc, 0xc44 + (ppc * 4)); + if (!(gr->ppc_mask[gpc] & (1 << ppc))) continue; - mmio_wr32(info, o + 0xc0, gs); - mmio_wr32(info, p, bs); - mmio_wr32(info, o + 0xf4, bo); - mmio_wr32(info, o + 0xf0, bs); + + gf100_grctx_patch_wr32(chan, o + 0xc0, gs); + gf100_grctx_patch_wr32(chan, p, bs); + gf100_grctx_patch_wr32(chan, o + 0xf4, bo); + gf100_grctx_patch_wr32(chan, o + 0xf0, bs); bo += gs; - mmio_wr32(info, o + 0xe4, as); - mmio_wr32(info, o + 0xf8, ao); + gf100_grctx_patch_wr32(chan, o + 0xe4, as); + gf100_grctx_patch_wr32(chan, o + 0xf8, ao); ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc]; - mmio_wr32(info, u, bs); + gf100_grctx_patch_wr32(chan, u, bs); } } - mmio_wr32(info, 0x4181e4, 0x00000100); - mmio_wr32(info, 0x41befc, 0x00000100); + gf100_grctx_patch_wr32(chan, 0x4181e4, 0x00000100); + gf100_grctx_patch_wr32(chan, 0x41befc, 0x00000100); +} + +u32 +gp102_grctx_generate_attrib_cb_size(struct gf100_gr *gr) +{ + const struct gf100_grctx_func *grctx = gr->func->grctx; + u32 size = grctx->alpha_nr_max * gr->tpc_total; + int gpc; + + for (gpc = 0; gpc < gr->gpc_nr; gpc++) + size += grctx->gfxp_nr * gr->func->ppc_nr * gr->ppc_tpc_max; + + return ((size * 0x20) + 127) & ~127; } const struct gf100_grctx_func @@ -101,6 +105,8 @@ gp102_grctx = { .bundle_token_limit = 0x900, .pagepool = gp100_grctx_generate_pagepool, .pagepool_size = 0x20000, + .attrib_cb_size = gp102_grctx_generate_attrib_cb_size, + .attrib_cb = gp100_grctx_generate_attrib_cb, .attrib = gp102_grctx_generate_attrib, .attrib_nr_max = 0x4b0, .attrib_nr = 0x320, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp104.c index 3b85e3d326b2..90b5f793e567 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp104.c @@ -31,6 +31,8 @@ gp104_grctx = { .bundle_token_limit = 0x900, .pagepool = gp100_grctx_generate_pagepool, .pagepool_size = 0x20000, + .attrib_cb_size = gp102_grctx_generate_attrib_cb_size, + .attrib_cb = gp100_grctx_generate_attrib_cb, .attrib = gp102_grctx_generate_attrib, .attrib_nr_max = 0x4b0, .attrib_nr = 0x320, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c index 5060c5ee5ce0..d191761a0471 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c @@ -39,6 +39,8 @@ gp107_grctx = { .bundle_token_limit = 0x300, .pagepool = gp100_grctx_generate_pagepool, .pagepool_size = 0x20000, + .attrib_cb_size = gp102_grctx_generate_attrib_cb_size, + .attrib_cb = gp100_grctx_generate_attrib_cb, .attrib = gp102_grctx_generate_attrib, .attrib_nr_max = 0x15de, .attrib_nr = 0x540, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgv100.c index 39553d55d3f3..957ea9d6bad4 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgv100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgv100.c @@ -25,7 +25,7 @@ * PGRAPH context implementation ******************************************************************************/ -const struct gf100_gr_init +static const struct gf100_gr_init gv100_grctx_init_sw_veid_bundle_init_0[] = { { 0x00001000, 64, 0x00100000, 0x00000008 }, { 0x00000941, 64, 0x00100000, 0x00000000 }, @@ -59,67 +59,70 @@ gv100_grctx_pack_sw_veid_bundle_init[] = { }; void -gv100_grctx_generate_attrib(struct gf100_grctx *info) +gv100_grctx_generate_attrib(struct gf100_gr_chan *chan) { - struct gf100_gr *gr = info->gr; + struct gf100_gr *gr = chan->gr; const struct gf100_grctx_func *grctx = gr->func->grctx; const u32 alpha = grctx->alpha_nr; const u32 attrib = grctx->attrib_nr; const u32 gfxp = grctx->gfxp_nr; - const int s = 12; + const int max_batches = 0xffff; u32 size = grctx->alpha_nr_max * gr->tpc_total; u32 ao = 0; u32 bo = ao + size; - int gpc, ppc, b, n = 0; + int gpc, ppc, n = 0; - for (gpc = 0; gpc < gr->gpc_nr; gpc++) - size += grctx->gfxp_nr * gr->ppc_nr[gpc] * gr->ppc_tpc_max; - size = ((size * 0x20) + 127) & ~127; - b = mmio_vram(info, size, (1 << s), false); - - mmio_refn(info, 0x418810, 0x80000000, s, b); - mmio_refn(info, 0x419848, 0x10000000, s, b); - mmio_refn(info, 0x419c2c, 0x10000000, s, b); - mmio_refn(info, 0x419e00, 0x00000000, s, b); - mmio_wr32(info, 0x419e04, 0x80000000 | size >> 7); - mmio_wr32(info, 0x405830, attrib); - mmio_wr32(info, 0x40585c, alpha); + gf100_grctx_patch_wr32(chan, 0x405830, attrib); + gf100_grctx_patch_wr32(chan, 0x40585c, alpha); + gf100_grctx_patch_wr32(chan, 0x4064c4, ((alpha / 4) << 16) | max_batches); for (gpc = 0; gpc < gr->gpc_nr; gpc++) { - for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) { + for (ppc = 0; ppc < gr->func->ppc_nr; ppc++, n++) { const u32 as = alpha * gr->ppc_tpc_nr[gpc][ppc]; const u32 bs = attrib * gr->ppc_tpc_max; const u32 gs = gfxp * gr->ppc_tpc_max; const u32 u = 0x418ea0 + (n * 0x04); const u32 o = PPC_UNIT(gpc, ppc, 0); + if (!(gr->ppc_mask[gpc] & (1 << ppc))) continue; - mmio_wr32(info, o + 0xc0, gs); - mmio_wr32(info, o + 0xf4, bo); - mmio_wr32(info, o + 0xf0, bs); + + gf100_grctx_patch_wr32(chan, o + 0xc0, gs); + gf100_grctx_patch_wr32(chan, o + 0xf4, bo); + gf100_grctx_patch_wr32(chan, o + 0xf0, bs); bo += gs; - mmio_wr32(info, o + 0xe4, as); - mmio_wr32(info, o + 0xf8, ao); + gf100_grctx_patch_wr32(chan, o + 0xe4, as); + gf100_grctx_patch_wr32(chan, o + 0xf8, ao); ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc]; - mmio_wr32(info, u, bs); + gf100_grctx_patch_wr32(chan, u, bs); } } - mmio_wr32(info, 0x4181e4, 0x00000100); - mmio_wr32(info, 0x41befc, 0x00000100); + gf100_grctx_patch_wr32(chan, 0x4181e4, 0x00000100); + gf100_grctx_patch_wr32(chan, 0x41befc, 0x00000100); +} + +void +gv100_grctx_generate_attrib_cb(struct gf100_gr_chan *chan, u64 addr, u32 size) +{ + gm107_grctx_generate_attrib_cb(chan, addr, size); + + gf100_grctx_patch_wr32(chan, 0x419e00, 0x00000000 | addr >> 12); + gf100_grctx_patch_wr32(chan, 0x419e04, 0x80000000 | size >> 7); } void gv100_grctx_generate_rop_mapping(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; + const u32 mapregs = DIV_ROUND_UP(gr->func->gpc_nr * gr->func->tpc_nr, 6); u32 data; int i, j; /* Pack tile map into register format. */ nvkm_wr32(device, 0x418bb8, (gr->tpc_total << 8) | gr->screen_tile_row_offset); - for (i = 0; i < 11; i++) { + for (i = 0; i < mapregs; i++) { for (data = 0, j = 0; j < 6; j++) data |= (gr->tile[i * 6 + j] & 0x1f) << (j * 5); nvkm_wr32(device, 0x418b08 + (i * 4), data); @@ -157,6 +160,9 @@ static void gv100_grctx_generate_sm_id(struct gf100_gr *gr, int gpc, int tpc, int sm) { struct nvkm_device *device = gr->base.engine.subdev.device; + + tpc = gv100_gr_nonpes_aware_tpc(gr, gpc, tpc); + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x608), sm); nvkm_wr32(device, GPC_UNIT(gpc, 0x0c10 + tpc * 4), sm); nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), sm); @@ -198,6 +204,8 @@ gv100_grctx = { .bundle_token_limit = 0x1680, .pagepool = gp100_grctx_generate_pagepool, .pagepool_size = 0x20000, + .attrib_cb_size = gp102_grctx_generate_attrib_cb_size, + .attrib_cb = gv100_grctx_generate_attrib_cb, .attrib = gv100_grctx_generate_attrib, .attrib_nr_max = 0x6c0, .attrib_nr = 0x480, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxtu102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxtu102.c index 2299ca07d04a..542ab0c78be6 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxtu102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxtu102.c @@ -34,6 +34,9 @@ static void tu102_grctx_generate_sm_id(struct gf100_gr *gr, int gpc, int tpc, int sm) { struct nvkm_device *device = gr->base.engine.subdev.device; + + tpc = gv100_gr_nonpes_aware_tpc(gr, gpc, tpc); + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x608), sm); nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), sm); } @@ -47,42 +50,38 @@ tu102_grctx_init_unknown_bundle_init_0[] = { }; static const struct gf100_gr_pack -tu102_grctx_pack_sw_veid_bundle_init[] = { - { gv100_grctx_init_sw_veid_bundle_init_0 }, - { tu102_grctx_init_unknown_bundle_init_0 }, +tu102_grctx_pack_sw_bundle64_init[] = { + { tu102_grctx_init_unknown_bundle_init_0, .type = 64 }, {} }; -static void -tu102_grctx_generate_attrib(struct gf100_grctx *info) +void +tu102_grctx_generate_unknown(struct gf100_gr_chan *chan, u64 addr, u32 size) { - const u64 size = 0x80000; /*XXX: educated guess */ - const int s = 8; - const int b = mmio_vram(info, size, (1 << s), true); - - gv100_grctx_generate_attrib(info); - - mmio_refn(info, 0x408070, 0x00000000, s, b); - mmio_wr32(info, 0x408074, size >> s); /*XXX: guess */ - mmio_refn(info, 0x419034, 0x00000000, s, b); - mmio_wr32(info, 0x408078, 0x00000000); + gf100_grctx_patch_wr32(chan, 0x408070, addr >> 8); + gf100_grctx_patch_wr32(chan, 0x408074, size >> 8); /*XXX: guess */ + gf100_grctx_patch_wr32(chan, 0x419034, addr >> 8); + gf100_grctx_patch_wr32(chan, 0x408078, 0x00000000); } const struct gf100_grctx_func tu102_grctx = { - .unkn88c = gv100_grctx_unkn88c, .main = gf100_grctx_generate_main, .unkn = gv100_grctx_generate_unkn, - .sw_veid_bundle_init = tu102_grctx_pack_sw_veid_bundle_init, + .sw_bundle64_init = tu102_grctx_pack_sw_bundle64_init, .bundle = gm107_grctx_generate_bundle, .bundle_size = 0x3000, .bundle_min_gpm_fifo_depth = 0x180, .bundle_token_limit = 0xa80, .pagepool = gp100_grctx_generate_pagepool, .pagepool_size = 0x20000, - .attrib = tu102_grctx_generate_attrib, + .attrib_cb_size = gp102_grctx_generate_attrib_cb_size, + .attrib_cb = gv100_grctx_generate_attrib_cb, + .attrib = gv100_grctx_generate_attrib, .attrib_nr_max = 0x800, .attrib_nr = 0x700, + .unknown_size = 0x80000, + .unknown = tu102_grctx_generate_unknown, .alpha_nr_max = 0xc00, .alpha_nr = 0x800, .gfxp_nr = 0xfa8, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ga102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ga102.c new file mode 100644 index 000000000000..a5b5ac2755a2 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ga102.c @@ -0,0 +1,347 @@ +/* + * Copyright 2019 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "gf100.h" +#include "ctxgf100.h" + +#include <core/firmware.h> +#include <subdev/acr.h> +#include <subdev/timer.h> +#include <subdev/vfn.h> + +#include <nvfw/flcn.h> + +#include <nvif/class.h> + +static void +ga102_gr_zbc_clear_color(struct gf100_gr *gr, int zbc) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + u32 invalid[] = { 0, 0, 0, 0 }, *color; + + if (gr->zbc_color[zbc].format) + color = gr->zbc_color[zbc].l2; + else + color = invalid; + + nvkm_mask(device, 0x41bcb4, 0x0000001f, zbc); + nvkm_wr32(device, 0x41bcec, color[0]); + nvkm_wr32(device, 0x41bcf0, color[1]); + nvkm_wr32(device, 0x41bcf4, color[2]); + nvkm_wr32(device, 0x41bcf8, color[3]); +} + +static const struct gf100_gr_func_zbc +ga102_gr_zbc = { + .clear_color = ga102_gr_zbc_clear_color, + .clear_depth = gp100_gr_zbc_clear_depth, + .stencil_get = gp102_gr_zbc_stencil_get, + .clear_stencil = gp102_gr_zbc_clear_stencil, +}; + +static void +ga102_gr_gpccs_reset(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + + nvkm_wr32(device, 0x41a610, 0x00000000); + nvkm_msec(device, 1, NVKM_DELAY); + nvkm_wr32(device, 0x41a610, 0x00000001); +} + +static const struct nvkm_acr_lsf_func +ga102_gr_gpccs_acr = { + .flags = NVKM_ACR_LSF_FORCE_PRIV_LOAD, + .bl_entry = 0x3400, + .bld_size = sizeof(struct flcn_bl_dmem_desc_v2), + .bld_write = gp108_gr_acr_bld_write, + .bld_patch = gp108_gr_acr_bld_patch, +}; + +static void +ga102_gr_fecs_reset(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + + nvkm_wr32(device, 0x409614, 0x00000010); + nvkm_wr32(device, 0x41a614, 0x00000020); + nvkm_usec(device, 10, NVKM_DELAY); + nvkm_wr32(device, 0x409614, 0x00000110); + nvkm_wr32(device, 0x41a614, 0x00000a20); + nvkm_usec(device, 10, NVKM_DELAY); + nvkm_rd32(device, 0x409614); + nvkm_rd32(device, 0x41a614); +} + +static const struct nvkm_acr_lsf_func +ga102_gr_fecs_acr = { + .bl_entry = 0x7e00, + .bld_size = sizeof(struct flcn_bl_dmem_desc_v2), + .bld_write = gp108_gr_acr_bld_write, + .bld_patch = gp108_gr_acr_bld_patch, +}; + +static void +ga102_gr_init_rop_exceptions(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + + nvkm_wr32(device, 0x41bcbc, 0x40000000); + nvkm_wr32(device, 0x41bc38, 0x40000000); + nvkm_wr32(device, 0x41ac94, nvkm_rd32(device, 0x502c94)); +} + +static void +ga102_gr_init_40a790(struct gf100_gr *gr) +{ + nvkm_wr32(gr->base.engine.subdev.device, 0x40a790, 0xc0000000); +} + +static void +ga102_gr_init_gpc_mmu(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + + nvkm_wr32(device, 0x418880, nvkm_rd32(device, 0x100c80) & 0xf8001fff); + nvkm_wr32(device, 0x418894, 0x00000000); + + nvkm_wr32(device, 0x4188b4, nvkm_rd32(device, 0x100cc8)); + nvkm_wr32(device, 0x4188b8, nvkm_rd32(device, 0x100ccc)); + nvkm_wr32(device, 0x4188b0, nvkm_rd32(device, 0x100cc4)); +} + +static struct nvkm_intr * +ga102_gr_oneinit_intr(struct gf100_gr *gr, enum nvkm_intr_type *pvector) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + + *pvector = nvkm_rd32(device, 0x400154) & 0x00000fff; + return &device->vfn->intr; +} + +static const struct gf100_gr_func +ga102_gr = { + .oneinit_intr = ga102_gr_oneinit_intr, + .oneinit_tiles = gm200_gr_oneinit_tiles, + .oneinit_sm_id = gv100_gr_oneinit_sm_id, + .init = gf100_gr_init, + .init_419bd8 = gv100_gr_init_419bd8, + .init_gpc_mmu = ga102_gr_init_gpc_mmu, + .init_vsc_stream_master = gk104_gr_init_vsc_stream_master, + .init_zcull = tu102_gr_init_zcull, + .init_num_active_ltcs = gf100_gr_init_num_active_ltcs, + .init_swdx_pes_mask = gp102_gr_init_swdx_pes_mask, + .init_fs = tu102_gr_init_fs, + .init_fecs_exceptions = tu102_gr_init_fecs_exceptions, + .init_40a790 = ga102_gr_init_40a790, + .init_ds_hww_esr_2 = gm200_gr_init_ds_hww_esr_2, + .init_sked_hww_esr = gk104_gr_init_sked_hww_esr, + .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, + .init_504430 = gv100_gr_init_504430, + .init_shader_exceptions = gv100_gr_init_shader_exceptions, + .init_rop_exceptions = ga102_gr_init_rop_exceptions, + .init_4188a4 = gv100_gr_init_4188a4, + .trap_mp = gv100_gr_trap_mp, + .fecs.reset = ga102_gr_fecs_reset, + .gpccs.reset = ga102_gr_gpccs_reset, + .rops = gm200_gr_rops, + .gpc_nr = 7, + .tpc_nr = 6, + .ppc_nr = 3, + .grctx = &ga102_grctx, + .zbc = &ga102_gr_zbc, + .sclass = { + { -1, -1, FERMI_TWOD_A }, + { -1, -1, KEPLER_INLINE_TO_MEMORY_B }, + { -1, -1, AMPERE_B, &gf100_fermi }, + { -1, -1, AMPERE_COMPUTE_B }, + {} + } +}; + +MODULE_FIRMWARE("nvidia/ga102/gr/fecs_bl.bin"); +MODULE_FIRMWARE("nvidia/ga102/gr/fecs_sig.bin"); +MODULE_FIRMWARE("nvidia/ga102/gr/gpccs_bl.bin"); +MODULE_FIRMWARE("nvidia/ga102/gr/gpccs_sig.bin"); +MODULE_FIRMWARE("nvidia/ga102/gr/NET_img.bin"); + +MODULE_FIRMWARE("nvidia/ga103/gr/fecs_bl.bin"); +MODULE_FIRMWARE("nvidia/ga103/gr/fecs_sig.bin"); +MODULE_FIRMWARE("nvidia/ga103/gr/gpccs_bl.bin"); +MODULE_FIRMWARE("nvidia/ga103/gr/gpccs_sig.bin"); +MODULE_FIRMWARE("nvidia/ga103/gr/NET_img.bin"); + +MODULE_FIRMWARE("nvidia/ga104/gr/fecs_bl.bin"); +MODULE_FIRMWARE("nvidia/ga104/gr/fecs_sig.bin"); +MODULE_FIRMWARE("nvidia/ga104/gr/gpccs_bl.bin"); +MODULE_FIRMWARE("nvidia/ga104/gr/gpccs_sig.bin"); +MODULE_FIRMWARE("nvidia/ga104/gr/NET_img.bin"); + +MODULE_FIRMWARE("nvidia/ga106/gr/fecs_bl.bin"); +MODULE_FIRMWARE("nvidia/ga106/gr/fecs_sig.bin"); +MODULE_FIRMWARE("nvidia/ga106/gr/gpccs_bl.bin"); +MODULE_FIRMWARE("nvidia/ga106/gr/gpccs_sig.bin"); +MODULE_FIRMWARE("nvidia/ga106/gr/NET_img.bin"); + +MODULE_FIRMWARE("nvidia/ga107/gr/fecs_bl.bin"); +MODULE_FIRMWARE("nvidia/ga107/gr/fecs_sig.bin"); +MODULE_FIRMWARE("nvidia/ga107/gr/gpccs_bl.bin"); +MODULE_FIRMWARE("nvidia/ga107/gr/gpccs_sig.bin"); +MODULE_FIRMWARE("nvidia/ga107/gr/NET_img.bin"); + +struct netlist_region { + u32 region_id; + u32 data_size; + u32 data_offset; +}; + +struct netlist_image_header { + u32 version; + u32 regions; +}; + +struct netlist_image { + struct netlist_image_header header; + struct netlist_region regions[]; +}; + +struct netlist_av64 { + u32 addr; + u32 data_hi; + u32 data_lo; +}; + +static int +ga102_gr_av64_to_init(struct nvkm_blob *blob, struct gf100_gr_pack **ppack) +{ + struct gf100_gr_init *init; + struct gf100_gr_pack *pack; + int nent; + int i; + + nent = (blob->size / sizeof(struct netlist_av64)); + pack = vzalloc((sizeof(*pack) * 2) + (sizeof(*init) * (nent + 1))); + if (!pack) + return -ENOMEM; + + init = (void *)(pack + 2); + pack[0].init = init; + pack[0].type = 64; + + for (i = 0; i < nent; i++) { + struct gf100_gr_init *ent = &init[i]; + struct netlist_av64 *av = &((struct netlist_av64 *)blob->data)[i]; + + ent->addr = av->addr; + ent->data = ((u64)av->data_hi << 32) | av->data_lo; + ent->count = 1; + ent->pitch = 1; + } + + *ppack = pack; + return 0; +} + +static int +ga102_gr_load(struct gf100_gr *gr, int ver, const struct gf100_gr_fwif *fwif) +{ + struct nvkm_subdev *subdev = &gr->base.engine.subdev; + const struct firmware *fw; + const struct netlist_image *net; + const struct netlist_region *fecs_inst = NULL; + const struct netlist_region *fecs_data = NULL; + const struct netlist_region *gpccs_inst = NULL; + const struct netlist_region *gpccs_data = NULL; + int ret, i; + + ret = nvkm_firmware_get(subdev, "gr/NET_img", 0, &fw); + if (ret) + return ret; + + net = (const void *)fw->data; + nvkm_debug(subdev, "netlist version %d, %d regions\n", + net->header.version, net->header.regions); + + for (i = 0; i < net->header.regions; i++) { + const struct netlist_region *reg = &net->regions[i]; + struct nvkm_blob blob = { + .data = (void *)fw->data + reg->data_offset, + .size = reg->data_size, + }; + + nvkm_debug(subdev, "\t%2d: %08x %08x\n", + reg->region_id, reg->data_offset, reg->data_size); + + switch (reg->region_id) { + case 0: fecs_data = reg; break; + case 1: fecs_inst = reg; break; + case 2: gpccs_data = reg; break; + case 3: gpccs_inst = reg; break; + case 4: gk20a_gr_av_to_init(&blob, &gr->bundle); break; + case 5: gk20a_gr_aiv_to_init(&blob, &gr->sw_ctx); break; + case 7: gk20a_gr_av_to_method(&blob, &gr->method); break; + case 28: tu102_gr_av_to_init_veid(&blob, &gr->bundle_veid); break; + case 34: ga102_gr_av64_to_init(&blob, &gr->bundle64); break; + case 48: gk20a_gr_av_to_init(&blob, &gr->sw_nonctx1); break; + case 49: gk20a_gr_av_to_init(&blob, &gr->sw_nonctx2); break; + case 50: gk20a_gr_av_to_init(&blob, &gr->sw_nonctx3); break; + case 51: gk20a_gr_av_to_init(&blob, &gr->sw_nonctx4); break; + default: + break; + } + } + + ret = nvkm_acr_lsfw_load_bl_sig_net(subdev, &gr->fecs.falcon, NVKM_ACR_LSF_FECS, + "gr/fecs_", ver, fwif->fecs, + fw->data + fecs_inst->data_offset, + fecs_inst->data_size, + fw->data + fecs_data->data_offset, + fecs_data->data_size); + if (ret) + return ret; + + ret = nvkm_acr_lsfw_load_bl_sig_net(subdev, &gr->gpccs.falcon, NVKM_ACR_LSF_GPCCS, + "gr/gpccs_", ver, fwif->gpccs, + fw->data + gpccs_inst->data_offset, + gpccs_inst->data_size, + fw->data + gpccs_data->data_offset, + gpccs_data->data_size); + if (ret) + return ret; + + gr->firmware = true; + + nvkm_firmware_put(fw); + return 0; +} + +static const struct gf100_gr_fwif +ga102_gr_fwif[] = { + { 0, ga102_gr_load, &ga102_gr, &ga102_gr_fecs_acr, &ga102_gr_gpccs_acr }, + { -1, gm200_gr_nofw }, + {} +}; + +int +ga102_gr_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst, struct nvkm_gr **pgr) +{ + return gf100_gr_new_(ga102_gr_fwif, device, type, inst, pgr); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c index f16eabf4f642..5f20079c3660 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c @@ -67,7 +67,7 @@ gf100_gr_zbc_color_get(struct gf100_gr *gr, int format, struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc; int zbc = -ENOSPC, i; - for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) { + for (i = ltc->zbc_color_min; i <= ltc->zbc_color_max; i++) { if (gr->zbc_color[i].format) { if (gr->zbc_color[i].format != format) continue; @@ -114,7 +114,7 @@ gf100_gr_zbc_depth_get(struct gf100_gr *gr, int format, struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc; int zbc = -ENOSPC, i; - for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) { + for (i = ltc->zbc_depth_min; i <= ltc->zbc_depth_max; i++) { if (gr->zbc_depth[i].format) { if (gr->zbc_depth[i].format != format) continue; @@ -355,15 +355,14 @@ static void * gf100_gr_chan_dtor(struct nvkm_object *object) { struct gf100_gr_chan *chan = gf100_gr_chan(object); - int i; - - for (i = 0; i < ARRAY_SIZE(chan->data); i++) { - nvkm_vmm_put(chan->vmm, &chan->data[i].vma); - nvkm_memory_unref(&chan->data[i].mem); - } nvkm_vmm_put(chan->vmm, &chan->mmio_vma); nvkm_memory_unref(&chan->mmio); + + nvkm_vmm_put(chan->vmm, &chan->attrib_cb); + nvkm_vmm_put(chan->vmm, &chan->unknown); + nvkm_vmm_put(chan->vmm, &chan->bundle_cb); + nvkm_vmm_put(chan->vmm, &chan->pagepool); nvkm_vmm_unref(&chan->vmm); return chan; } @@ -380,12 +379,10 @@ gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch, struct nvkm_object **pobject) { struct gf100_gr *gr = gf100_gr(base); - struct gf100_gr_data *data = gr->mmio_data; - struct gf100_gr_mmio *mmio = gr->mmio_list; struct gf100_gr_chan *chan; struct gf100_vmm_map_v0 args = { .priv = 1 }; struct nvkm_device *device = gr->base.engine.subdev.device; - int ret, i; + int ret; if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL))) return -ENOMEM; @@ -394,63 +391,91 @@ gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch, chan->vmm = nvkm_vmm_ref(fifoch->vmm); *pobject = &chan->object; - /* allocate memory for a "mmio list" buffer that's used by the HUB - * fuc to modify some per-context register settings on first load - * of the context. - */ - ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x100, - false, &chan->mmio); + /* Map pagepool. */ + ret = nvkm_vmm_get(chan->vmm, 12, nvkm_memory_size(gr->pagepool), &chan->pagepool); if (ret) return ret; - ret = nvkm_vmm_get(fifoch->vmm, 12, 0x1000, &chan->mmio_vma); + ret = nvkm_memory_map(gr->pagepool, 0, chan->vmm, chan->pagepool, &args, sizeof(args)); if (ret) return ret; - ret = nvkm_memory_map(chan->mmio, 0, fifoch->vmm, - chan->mmio_vma, &args, sizeof(args)); + /* Map bundle circular buffer. */ + ret = nvkm_vmm_get(chan->vmm, 12, nvkm_memory_size(gr->bundle_cb), &chan->bundle_cb); + if (ret) + return ret; + + ret = nvkm_memory_map(gr->bundle_cb, 0, chan->vmm, chan->bundle_cb, &args, sizeof(args)); + if (ret) + return ret; + + /* Map attribute circular buffer. */ + ret = nvkm_vmm_get(chan->vmm, 12, nvkm_memory_size(gr->attrib_cb), &chan->attrib_cb); if (ret) return ret; - /* allocate buffers referenced by mmio list */ - for (i = 0; data->size && i < ARRAY_SIZE(gr->mmio_data); i++) { - ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, - data->size, data->align, false, - &chan->data[i].mem); + if (device->card_type < GP100) { + ret = nvkm_memory_map(gr->attrib_cb, 0, chan->vmm, chan->attrib_cb, NULL, 0); if (ret) return ret; - - ret = nvkm_vmm_get(fifoch->vmm, 12, - nvkm_memory_size(chan->data[i].mem), - &chan->data[i].vma); + } else { + ret = nvkm_memory_map(gr->attrib_cb, 0, chan->vmm, chan->attrib_cb, + &args, sizeof(args));; if (ret) return ret; + } - args.priv = data->priv; + /* Map some context buffer of unknown purpose. */ + if (gr->func->grctx->unknown_size) { + ret = nvkm_vmm_get(chan->vmm, 12, nvkm_memory_size(gr->unknown), &chan->unknown); + if (ret) + return ret; - ret = nvkm_memory_map(chan->data[i].mem, 0, chan->vmm, - chan->data[i].vma, &args, sizeof(args)); + ret = nvkm_memory_map(gr->unknown, 0, chan->vmm, chan->unknown, + &args, sizeof(args)); if (ret) return ret; + } - data++; + /* Generate golden context image. */ + mutex_lock(&gr->fecs.mutex); + if (gr->data == NULL) { + ret = gf100_grctx_generate(gr, chan, fifoch->inst); + if (ret) { + nvkm_error(&base->engine.subdev, "failed to construct context\n"); + return ret; + } } + mutex_unlock(&gr->fecs.mutex); - /* finally, fill in the mmio list and point the context at it */ - nvkm_kmap(chan->mmio); - for (i = 0; mmio->addr && i < ARRAY_SIZE(gr->mmio_list); i++) { - u32 addr = mmio->addr; - u32 data = mmio->data; + /* allocate memory for a "mmio list" buffer that's used by the HUB + * fuc to modify some per-context register settings on first load + * of the context. + */ + ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x100, + false, &chan->mmio); + if (ret) + return ret; - if (mmio->buffer >= 0) { - u64 info = chan->data[mmio->buffer].vma->addr; - data |= info >> mmio->shift; - } + ret = nvkm_vmm_get(fifoch->vmm, 12, 0x1000, &chan->mmio_vma); + if (ret) + return ret; - nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, addr); - nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, data); - mmio++; - } + ret = nvkm_memory_map(chan->mmio, 0, fifoch->vmm, + chan->mmio_vma, &args, sizeof(args)); + if (ret) + return ret; + + /* finally, fill in the mmio list and point the context at it */ + nvkm_kmap(chan->mmio); + gr->func->grctx->pagepool(chan, chan->pagepool->addr); + gr->func->grctx->bundle(chan, chan->bundle_cb->addr, gr->func->grctx->bundle_size); + gr->func->grctx->attrib_cb(chan, chan->attrib_cb->addr, gr->func->grctx->attrib_cb_size(gr)); + gr->func->grctx->attrib(chan); + if (gr->func->grctx->patch_ltc) + gr->func->grctx->patch_ltc(chan); + if (gr->func->grctx->unknown_size) + gr->func->grctx->unknown(chan, chan->unknown->addr, gr->func->grctx->unknown_size); nvkm_done(chan->mmio); return 0; } @@ -727,7 +752,7 @@ gf100_gr_fecs_ctrl_ctxsw(struct gf100_gr *gr, u32 mthd) struct nvkm_device *device = gr->base.engine.subdev.device; nvkm_wr32(device, 0x409804, 0xffffffff); - nvkm_wr32(device, 0x409840, 0xffffffff); + nvkm_wr32(device, 0x409800, 0x00000000); nvkm_wr32(device, 0x409500, 0xffffffff); nvkm_wr32(device, 0x409504, mthd); nvkm_msec(device, 2000, @@ -771,12 +796,45 @@ gf100_gr_fecs_stop_ctxsw(struct nvkm_gr *base) return ret; } +static int +gf100_gr_fecs_halt_pipeline(struct gf100_gr *gr) +{ + int ret = 0; + + if (gr->firmware) { + mutex_lock(&gr->fecs.mutex); + ret = gf100_gr_fecs_ctrl_ctxsw(gr, 0x04); + mutex_unlock(&gr->fecs.mutex); + } + + return ret; +} + +int +gf100_gr_fecs_wfi_golden_save(struct gf100_gr *gr, u32 inst) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + + nvkm_mask(device, 0x409800, 0x00000003, 0x00000000); + nvkm_wr32(device, 0x409500, inst); + nvkm_wr32(device, 0x409504, 0x00000009); + nvkm_msec(device, 2000, + u32 stat = nvkm_rd32(device, 0x409800); + if (stat & 0x00000002) + return -EIO; + if (stat & 0x00000001) + return 0; + ); + + return -ETIMEDOUT; +} + int gf100_gr_fecs_bind_pointer(struct gf100_gr *gr, u32 inst) { struct nvkm_device *device = gr->base.engine.subdev.device; - nvkm_wr32(device, 0x409840, 0x00000030); + nvkm_mask(device, 0x409800, 0x00000030, 0x00000000); nvkm_wr32(device, 0x409500, inst); nvkm_wr32(device, 0x409504, 0x00000003); nvkm_msec(device, 2000, @@ -867,7 +925,7 @@ gf100_gr_fecs_discover_pm_image_size(struct gf100_gr *gr, u32 *psize) { struct nvkm_device *device = gr->base.engine.subdev.device; - nvkm_wr32(device, 0x409840, 0xffffffff); + nvkm_wr32(device, 0x409800, 0x00000000); nvkm_wr32(device, 0x409500, 0x00000000); nvkm_wr32(device, 0x409504, 0x00000025); nvkm_msec(device, 2000, @@ -883,7 +941,7 @@ gf100_gr_fecs_discover_zcull_image_size(struct gf100_gr *gr, u32 *psize) { struct nvkm_device *device = gr->base.engine.subdev.device; - nvkm_wr32(device, 0x409840, 0xffffffff); + nvkm_wr32(device, 0x409800, 0x00000000); nvkm_wr32(device, 0x409500, 0x00000000); nvkm_wr32(device, 0x409504, 0x00000016); nvkm_msec(device, 2000, @@ -899,7 +957,7 @@ gf100_gr_fecs_discover_image_size(struct gf100_gr *gr, u32 *psize) { struct nvkm_device *device = gr->base.engine.subdev.device; - nvkm_wr32(device, 0x409840, 0xffffffff); + nvkm_wr32(device, 0x409800, 0x00000000); nvkm_wr32(device, 0x409500, 0x00000000); nvkm_wr32(device, 0x409504, 0x00000010); nvkm_msec(device, 2000, @@ -915,7 +973,7 @@ gf100_gr_fecs_set_watchdog_timeout(struct gf100_gr *gr, u32 timeout) { struct nvkm_device *device = gr->base.engine.subdev.device; - nvkm_wr32(device, 0x409840, 0xffffffff); + nvkm_wr32(device, 0x409800, 0x00000000); nvkm_wr32(device, 0x409500, timeout); nvkm_wr32(device, 0x409504, 0x00000021); } @@ -955,7 +1013,7 @@ gf100_gr_zbc_init(struct gf100_gr *gr) const u32 f32_1[] = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 }; struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc; - int index, c = ltc->zbc_min, d = ltc->zbc_min, s = ltc->zbc_min; + int index, c = ltc->zbc_color_min, d = ltc->zbc_depth_min, s = ltc->zbc_depth_min; if (!gr->zbc_color[0].format) { gf100_gr_zbc_color_get(gr, 1, & zero[0], &zero[4]); c++; @@ -971,13 +1029,13 @@ gf100_gr_zbc_init(struct gf100_gr *gr) } } - for (index = c; index <= ltc->zbc_max; index++) + for (index = c; index <= ltc->zbc_color_max; index++) gr->func->zbc->clear_color(gr, index); - for (index = d; index <= ltc->zbc_max; index++) + for (index = d; index <= ltc->zbc_depth_max; index++) gr->func->zbc->clear_depth(gr, index); if (gr->func->zbc->clear_stencil) { - for (index = s; index <= ltc->zbc_max; index++) + for (index = s; index <= ltc->zbc_depth_max; index++) gr->func->zbc->clear_stencil(gr, index); } } @@ -1003,7 +1061,7 @@ gf100_gr_wait_idle(struct gf100_gr *gr) nvkm_rd32(device, 0x400700); gr_enabled = nvkm_rd32(device, 0x200) & 0x1000; - ctxsw_active = nvkm_rd32(device, 0x2640) & 0x8000; + ctxsw_active = nvkm_fifo_ctxsw_in_progress(&gr->base.engine); gr_busy = nvkm_rd32(device, 0x40060c) & 0x1; if (!gr_enabled || (!gr_busy && !ctxsw_active)) @@ -1039,7 +1097,7 @@ gf100_gr_icmd(struct gf100_gr *gr, const struct gf100_gr_pack *p) struct nvkm_device *device = gr->base.engine.subdev.device; const struct gf100_gr_pack *pack; const struct gf100_gr_init *init; - u32 data = 0; + u64 data = 0; nvkm_wr32(device, 0x400208, 0x80000000); @@ -1049,6 +1107,8 @@ gf100_gr_icmd(struct gf100_gr *gr, const struct gf100_gr_pack *p) if ((pack == p && init == p->init) || data != init->data) { nvkm_wr32(device, 0x400204, init->data); + if (pack->type == 64) + nvkm_wr32(device, 0x40020c, upper_32_bits(init->data)); data = init->data; } @@ -1542,13 +1602,13 @@ gf100_gr_ctxctl_isr(struct gf100_gr *gr) } } -static void -gf100_gr_intr(struct nvkm_gr *base) +static irqreturn_t +gf100_gr_intr(struct nvkm_inth *inth) { - struct gf100_gr *gr = gf100_gr(base); + struct gf100_gr *gr = container_of(inth, typeof(*gr), base.engine.subdev.inth); struct nvkm_subdev *subdev = &gr->base.engine.subdev; struct nvkm_device *device = subdev->device; - struct nvkm_fifo_chan *chan; + struct nvkm_chan *chan; unsigned long flags; u64 inst = nvkm_rd32(device, 0x409b00) & 0x0fffffff; u32 stat = nvkm_rd32(device, 0x400100); @@ -1561,10 +1621,10 @@ gf100_gr_intr(struct nvkm_gr *base) const char *name = "unknown"; int chid = -1; - chan = nvkm_fifo_chan_inst(device->fifo, (u64)inst << 12, &flags); + chan = nvkm_chan_get_inst(&gr->base.engine, (u64)inst << 12, &flags); if (chan) { - name = chan->object.client->name; - chid = chan->chid; + name = chan->name; + chid = chan->id; } if (device->card_type < NV_E0 || subc < 4) @@ -1631,7 +1691,8 @@ gf100_gr_intr(struct nvkm_gr *base) } nvkm_wr32(device, 0x400500, 0x00010001); - nvkm_fifo_chan_put(device->fifo, flags, &chan); + nvkm_chan_put(&chan, flags); + return IRQ_HANDLED; } static void @@ -1721,7 +1782,7 @@ gf100_gr_init_ctxctl_ext(struct gf100_gr *gr) nvkm_mc_unk260(device, 1); /* start both of them running */ - nvkm_wr32(device, 0x409840, 0xffffffff); + nvkm_wr32(device, 0x409800, 0x00000000); nvkm_wr32(device, 0x41a10c, 0x00000000); nvkm_wr32(device, 0x40910c, 0x00000000); @@ -1763,15 +1824,6 @@ gf100_gr_init_ctxctl_ext(struct gf100_gr *gr) return ret; } - /* Generate golden context image. */ - if (gr->data == NULL) { - int ret = gf100_grctx_generate(gr); - if (ret) { - nvkm_error(subdev, "failed to construct context\n"); - return ret; - } - } - return 0; } @@ -1823,14 +1875,6 @@ gf100_gr_init_ctxctl_int(struct gf100_gr *gr) } gr->size = nvkm_rd32(device, 0x409804); - if (gr->data == NULL) { - int ret = gf100_grctx_generate(gr); - if (ret) { - nvkm_error(subdev, "failed to construct context\n"); - return ret; - } - } - return 0; } @@ -1847,10 +1891,11 @@ gf100_gr_init_ctxctl(struct gf100_gr *gr) return ret; } -void +int gf100_gr_oneinit_sm_id(struct gf100_gr *gr) { int tpc, gpc; + for (tpc = 0; tpc < gr->tpc_max; tpc++) { for (gpc = 0; gpc < gr->gpc_nr; gpc++) { if (tpc < gr->tpc_nr[gpc]) { @@ -1860,6 +1905,8 @@ gf100_gr_oneinit_sm_id(struct gf100_gr *gr) } } } + + return 0; } void @@ -1944,7 +1991,17 @@ gf100_gr_oneinit(struct nvkm_gr *base) struct gf100_gr *gr = gf100_gr(base); struct nvkm_subdev *subdev = &gr->base.engine.subdev; struct nvkm_device *device = subdev->device; - int i, j; + struct nvkm_intr *intr = &device->mc->intr; + enum nvkm_intr_type intr_type = NVKM_INTR_SUBDEV; + int ret, i, j; + + if (gr->func->oneinit_intr) + intr = gr->func->oneinit_intr(gr, &intr_type); + + ret = nvkm_inth_add(intr, intr_type, NVKM_INTR_PRIO_NORMAL, &gr->base.engine.subdev, + gf100_gr_intr, &gr->base.engine.subdev.inth); + if (ret) + return ret; nvkm_pmu_pgob(device->pmu, false); @@ -1954,12 +2011,14 @@ gf100_gr_oneinit(struct nvkm_gr *base) gr->tpc_nr[i] = nvkm_rd32(device, GPC_UNIT(i, 0x2608)); gr->tpc_max = max(gr->tpc_max, gr->tpc_nr[i]); gr->tpc_total += gr->tpc_nr[i]; - gr->ppc_nr[i] = gr->func->ppc_nr; - for (j = 0; j < gr->ppc_nr[i]; j++) { + for (j = 0; j < gr->func->ppc_nr; j++) { gr->ppc_tpc_mask[i][j] = nvkm_rd32(device, GPC_UNIT(i, 0x0c30 + (j * 4))); if (gr->ppc_tpc_mask[i][j] == 0) continue; + + gr->ppc_nr[i]++; + gr->ppc_mask[i] |= (1 << j); gr->ppc_tpc_nr[i][j] = hweight8(gr->ppc_tpc_mask[i][j]); if (gr->ppc_tpc_min == 0 || @@ -1968,12 +2027,37 @@ gf100_gr_oneinit(struct nvkm_gr *base) if (gr->ppc_tpc_max < gr->ppc_tpc_nr[i][j]) gr->ppc_tpc_max = gr->ppc_tpc_nr[i][j]; } + + gr->ppc_total += gr->ppc_nr[i]; + } + + /* Allocate global context buffers. */ + ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, gr->func->grctx->pagepool_size, + 0x100, false, &gr->pagepool); + if (ret) + return ret; + + ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, gr->func->grctx->bundle_size, + 0x100, false, &gr->bundle_cb); + if (ret) + return ret; + + ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, gr->func->grctx->attrib_cb_size(gr), + 0x1000, false, &gr->attrib_cb); + if (ret) + return ret; + + if (gr->func->grctx->unknown_size) { + ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, gr->func->grctx->unknown_size, + 0x100, false, &gr->unknown); + if (ret) + return ret; } memset(gr->tile, 0xff, sizeof(gr->tile)); gr->func->oneinit_tiles(gr); - gr->func->oneinit_sm_id(gr); - return 0; + + return gr->func->oneinit_sm_id(gr); } static int @@ -1983,7 +2067,7 @@ gf100_gr_init_(struct nvkm_gr *base) struct nvkm_subdev *subdev = &base->engine.subdev; struct nvkm_device *device = subdev->device; bool reset = device->chipset == 0x137 || device->chipset == 0x138; - u32 ret; + int ret; /* On certain GP107/GP108 boards, we trigger a weird issue where * GR will stop responding to PRI accesses after we've asked the @@ -2019,7 +2103,12 @@ gf100_gr_init_(struct nvkm_gr *base) if (ret) return ret; - return gr->func->init(gr); + ret = gr->func->init(gr); + if (ret) + return ret; + + nvkm_inth_allow(&subdev->inth); + return 0; } static int @@ -2027,6 +2116,9 @@ gf100_gr_fini(struct nvkm_gr *base, bool suspend) { struct gf100_gr *gr = gf100_gr(base); struct nvkm_subdev *subdev = &gr->base.engine.subdev; + + nvkm_inth_block(&subdev->inth); + nvkm_falcon_put(&gr->gpccs.falcon, subdev); nvkm_falcon_put(&gr->fecs.falcon, subdev); return 0; @@ -2039,6 +2131,11 @@ gf100_gr_dtor(struct nvkm_gr *base) kfree(gr->data); + nvkm_memory_unref(&gr->unknown); + nvkm_memory_unref(&gr->attrib_cb); + nvkm_memory_unref(&gr->bundle_cb); + nvkm_memory_unref(&gr->pagepool); + nvkm_falcon_dtor(&gr->gpccs.falcon); nvkm_falcon_dtor(&gr->fecs.falcon); @@ -2047,81 +2144,27 @@ gf100_gr_dtor(struct nvkm_gr *base) nvkm_blob_dtor(&gr->gpccs.inst); nvkm_blob_dtor(&gr->gpccs.data); + vfree(gr->bundle64); + vfree(gr->bundle_veid); vfree(gr->bundle); vfree(gr->method); vfree(gr->sw_ctx); vfree(gr->sw_nonctx); + vfree(gr->sw_nonctx1); + vfree(gr->sw_nonctx2); + vfree(gr->sw_nonctx3); + vfree(gr->sw_nonctx4); return gr; } -static const struct nvkm_gr_func -gf100_gr_ = { - .dtor = gf100_gr_dtor, - .oneinit = gf100_gr_oneinit, - .init = gf100_gr_init_, - .fini = gf100_gr_fini, - .intr = gf100_gr_intr, - .units = gf100_gr_units, - .chan_new = gf100_gr_chan_new, - .object_get = gf100_gr_object_get, - .chsw_load = gf100_gr_chsw_load, - .ctxsw.pause = gf100_gr_fecs_stop_ctxsw, - .ctxsw.resume = gf100_gr_fecs_start_ctxsw, - .ctxsw.inst = gf100_gr_ctxsw_inst, -}; - static const struct nvkm_falcon_func gf100_gr_flcn = { - .fbif = 0x600, .load_imem = nvkm_falcon_v1_load_imem, .load_dmem = nvkm_falcon_v1_load_dmem, - .read_dmem = nvkm_falcon_v1_read_dmem, - .bind_context = nvkm_falcon_v1_bind_context, - .wait_for_halt = nvkm_falcon_v1_wait_for_halt, - .clear_interrupt = nvkm_falcon_v1_clear_interrupt, - .set_start_addr = nvkm_falcon_v1_set_start_addr, .start = nvkm_falcon_v1_start, - .enable = nvkm_falcon_v1_enable, - .disable = nvkm_falcon_v1_disable, }; -int -gf100_gr_new_(const struct gf100_gr_fwif *fwif, struct nvkm_device *device, - enum nvkm_subdev_type type, int inst, struct nvkm_gr **pgr) -{ - struct gf100_gr *gr; - int ret; - - if (!(gr = kzalloc(sizeof(*gr), GFP_KERNEL))) - return -ENOMEM; - *pgr = &gr->base; - - ret = nvkm_gr_ctor(&gf100_gr_, device, type, inst, true, &gr->base); - if (ret) - return ret; - - fwif = nvkm_firmware_load(&gr->base.engine.subdev, fwif, "Gr", gr); - if (IS_ERR(fwif)) - return PTR_ERR(fwif); - - gr->func = fwif->func; - - ret = nvkm_falcon_ctor(&gf100_gr_flcn, &gr->base.engine.subdev, - "fecs", 0x409000, &gr->fecs.falcon); - if (ret) - return ret; - - mutex_init(&gr->fecs.mutex); - - ret = nvkm_falcon_ctor(&gf100_gr_flcn, &gr->base.engine.subdev, - "gpccs", 0x41a000, &gr->gpccs.falcon); - if (ret) - return ret; - - return 0; -} - void gf100_gr_init_num_tpc_per_gpc(struct gf100_gr *gr, bool pd, bool ds) { @@ -2146,6 +2189,29 @@ gf100_gr_init_400054(struct gf100_gr *gr) } void +gf100_gr_init_exception2(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + + nvkm_wr32(device, 0x40011c, 0xffffffff); + nvkm_wr32(device, 0x400134, 0xffffffff); +} + +void +gf100_gr_init_rop_exceptions(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + int rop; + + for (rop = 0; rop < gr->rop_nr; rop++) { + nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0x40000000); + nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0x40000000); + nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff); + nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff); + } +} + +void gf100_gr_init_shader_exceptions(struct gf100_gr *gr, int gpc, int tpc) { struct nvkm_device *device = gr->base.engine.subdev.device; @@ -2252,21 +2318,47 @@ gf100_gr_init_vsc_stream_master(struct gf100_gr *gr) nvkm_mask(device, TPC_UNIT(0, 0, 0x05c), 0x00000001, 0x00000001); } +static int +gf100_gr_reset(struct nvkm_gr *base) +{ + struct nvkm_subdev *subdev = &base->engine.subdev; + struct nvkm_device *device = subdev->device; + struct gf100_gr *gr = gf100_gr(base); + + nvkm_mask(device, 0x400500, 0x00000001, 0x00000000); + + WARN_ON(gf100_gr_fecs_halt_pipeline(gr)); + + subdev->func->fini(subdev, false); + nvkm_mc_disable(device, subdev->type, subdev->inst); + if (gr->func->gpccs.reset) + gr->func->gpccs.reset(gr); + + nvkm_mc_enable(device, subdev->type, subdev->inst); + return subdev->func->init(subdev); +} + int gf100_gr_init(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; - int gpc, tpc, rop; + int gpc, tpc; - if (gr->func->init_419bd8) - gr->func->init_419bd8(gr); + nvkm_mask(device, 0x400500, 0x00010001, 0x00000000); gr->func->init_gpc_mmu(gr); - if (gr->sw_nonctx) + if (gr->sw_nonctx1) { + gf100_gr_mmio(gr, gr->sw_nonctx1); + gf100_gr_mmio(gr, gr->sw_nonctx2); + gf100_gr_mmio(gr, gr->sw_nonctx3); + gf100_gr_mmio(gr, gr->sw_nonctx4); + } else + if (gr->sw_nonctx) { gf100_gr_mmio(gr, gr->sw_nonctx); - else + } else { gf100_gr_mmio(gr, gr->func->mmio); + } gf100_gr_wait_idle(gr); @@ -2298,6 +2390,10 @@ gf100_gr_init(struct gf100_gr *gr) nvkm_wr32(device, 0x400124, 0x00000002); gr->func->init_fecs_exceptions(gr); + + if (gr->func->init_40a790) + gr->func->init_40a790(gr); + if (gr->func->init_ds_hww_esr_2) gr->func->init_ds_hww_esr_2(gr); @@ -2346,19 +2442,14 @@ gf100_gr_init(struct gf100_gr *gr) nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff); } - for (rop = 0; rop < gr->rop_nr; rop++) { - nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0x40000000); - nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0x40000000); - nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff); - nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff); - } + gr->func->init_rop_exceptions(gr); nvkm_wr32(device, 0x400108, 0xffffffff); nvkm_wr32(device, 0x400138, 0xffffffff); nvkm_wr32(device, 0x400118, 0xffffffff); nvkm_wr32(device, 0x400130, 0xffffffff); - nvkm_wr32(device, 0x40011c, 0xffffffff); - nvkm_wr32(device, 0x400134, 0xffffffff); + if (gr->func->init_exception2) + gr->func->init_exception2(gr); if (gr->func->init_400054) gr->func->init_400054(gr); @@ -2371,6 +2462,18 @@ gf100_gr_init(struct gf100_gr *gr) return gf100_gr_init_ctxctl(gr); } +void +gf100_gr_fecs_reset(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + + nvkm_wr32(device, 0x409614, 0x00000070); + nvkm_usec(device, 10, NVKM_DELAY); + nvkm_mask(device, 0x409614, 0x00000700, 0x00000700); + nvkm_usec(device, 10, NVKM_DELAY); + nvkm_rd32(device, 0x409614); +} + #include "fuc/hubgf100.fuc3.h" struct gf100_gr_ucode @@ -2391,6 +2494,22 @@ gf100_gr_gpccs_ucode = { .data.size = sizeof(gf100_grgpc_data), }; +static const struct nvkm_gr_func +gf100_gr_ = { + .dtor = gf100_gr_dtor, + .oneinit = gf100_gr_oneinit, + .init = gf100_gr_init_, + .fini = gf100_gr_fini, + .reset = gf100_gr_reset, + .units = gf100_gr_units, + .chan_new = gf100_gr_chan_new, + .object_get = gf100_gr_object_get, + .chsw_load = gf100_gr_chsw_load, + .ctxsw.pause = gf100_gr_fecs_stop_ctxsw, + .ctxsw.resume = gf100_gr_fecs_start_ctxsw, + .ctxsw.inst = gf100_gr_ctxsw_inst, +}; + static const struct gf100_gr_func gf100_gr = { .oneinit_tiles = gf100_gr_oneinit_tiles, @@ -2406,10 +2525,13 @@ gf100_gr = { .init_419eb4 = gf100_gr_init_419eb4, .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, .init_shader_exceptions = gf100_gr_init_shader_exceptions, + .init_rop_exceptions = gf100_gr_init_rop_exceptions, + .init_exception2 = gf100_gr_init_exception2, .init_400054 = gf100_gr_init_400054, .trap_mp = gf100_gr_trap_mp, .mmio = gf100_gr_pack_mmio, .fecs.ucode = &gf100_gr_fecs_ucode, + .fecs.reset = gf100_gr_fecs_reset, .gpccs.ucode = &gf100_gr_gpccs_ucode, .rops = gf100_gr_rops, .grctx = &gf100_grctx, @@ -2483,6 +2605,42 @@ gf100_gr_fwif[] = { }; int +gf100_gr_new_(const struct gf100_gr_fwif *fwif, struct nvkm_device *device, + enum nvkm_subdev_type type, int inst, struct nvkm_gr **pgr) +{ + struct gf100_gr *gr; + int ret; + + if (!(gr = kzalloc(sizeof(*gr), GFP_KERNEL))) + return -ENOMEM; + *pgr = &gr->base; + + ret = nvkm_gr_ctor(&gf100_gr_, device, type, inst, true, &gr->base); + if (ret) + return ret; + + fwif = nvkm_firmware_load(&gr->base.engine.subdev, fwif, "Gr", gr); + if (IS_ERR(fwif)) + return PTR_ERR(fwif); + + gr->func = fwif->func; + + ret = nvkm_falcon_ctor(&gf100_gr_flcn, &gr->base.engine.subdev, + "fecs", 0x409000, &gr->fecs.falcon); + if (ret) + return ret; + + mutex_init(&gr->fecs.mutex); + + ret = nvkm_falcon_ctor(&gf100_gr_flcn, &gr->base.engine.subdev, + "gpccs", 0x41a000, &gr->gpccs.falcon); + if (ret) + return ret; + + return 0; +} + +int gf100_gr_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst, struct nvkm_gr **pgr) { return gf100_gr_new_(gf100_gr_fwif, device, type, inst, pgr); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h index c0038f906135..94ca7ac16acf 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h @@ -44,19 +44,6 @@ struct nvkm_acr_lsfw; #define PPC_UNIT(t, m, r) (0x503000 + (t) * 0x8000 + (m) * 0x200 + (r)) #define TPC_UNIT(t, m, r) (0x504000 + (t) * 0x8000 + (m) * 0x800 + (r)) -struct gf100_gr_data { - u32 size; - u32 align; - bool priv; -}; - -struct gf100_gr_mmio { - u32 addr; - u32 data; - u32 shift; - int buffer; -}; - struct gf100_gr_zbc_color { u32 format; u32 ds[4]; @@ -101,13 +88,19 @@ struct gf100_gr { * using hardcoded arrays. To be allocated with vzalloc(). */ struct gf100_gr_pack *sw_nonctx; + struct gf100_gr_pack *sw_nonctx1; + struct gf100_gr_pack *sw_nonctx2; + struct gf100_gr_pack *sw_nonctx3; + struct gf100_gr_pack *sw_nonctx4; struct gf100_gr_pack *sw_ctx; struct gf100_gr_pack *bundle; + struct gf100_gr_pack *bundle_veid; + struct gf100_gr_pack *bundle64; struct gf100_gr_pack *method; - struct gf100_gr_zbc_color zbc_color[NVKM_LTC_MAX_ZBC_CNT]; - struct gf100_gr_zbc_depth zbc_depth[NVKM_LTC_MAX_ZBC_CNT]; - struct gf100_gr_zbc_stencil zbc_stencil[NVKM_LTC_MAX_ZBC_CNT]; + struct gf100_gr_zbc_color zbc_color[NVKM_LTC_MAX_ZBC_COLOR_CNT]; + struct gf100_gr_zbc_depth zbc_depth[NVKM_LTC_MAX_ZBC_DEPTH_CNT]; + struct gf100_gr_zbc_stencil zbc_stencil[NVKM_LTC_MAX_ZBC_DEPTH_CNT]; u8 rop_nr; u8 gpc_nr; @@ -120,6 +113,12 @@ struct gf100_gr { u8 ppc_tpc_nr[GPC_MAX][4]; u8 ppc_tpc_min; u8 ppc_tpc_max; + u8 ppc_total; + + struct nvkm_memory *pagepool; + struct nvkm_memory *bundle_cb; + struct nvkm_memory *attrib_cb; + struct nvkm_memory *unknown; u8 screen_tile_row_offset; u8 tile[TPC_MAX]; @@ -130,8 +129,6 @@ struct gf100_gr { } sm[TPC_MAX]; u8 sm_nr; - struct gf100_gr_data mmio_data[4]; - struct gf100_gr_mmio mmio_list[4096/8]; u32 size; u32 *data; u32 size_zcull; @@ -139,6 +136,7 @@ struct gf100_gr { }; int gf100_gr_fecs_bind_pointer(struct gf100_gr *, u32 inst); +int gf100_gr_fecs_wfi_golden_save(struct gf100_gr *, u32 inst); struct gf100_gr_func_zbc { void (*clear_color)(struct gf100_gr *, int zbc); @@ -149,8 +147,9 @@ struct gf100_gr_func_zbc { }; struct gf100_gr_func { + struct nvkm_intr *(*oneinit_intr)(struct gf100_gr *, enum nvkm_intr_type *); void (*oneinit_tiles)(struct gf100_gr *); - void (*oneinit_sm_id)(struct gf100_gr *); + int (*oneinit_sm_id)(struct gf100_gr *); int (*init)(struct gf100_gr *); void (*init_419bd8)(struct gf100_gr *); void (*init_gpc_mmu)(struct gf100_gr *); @@ -164,6 +163,7 @@ struct gf100_gr_func { void (*init_swdx_pes_mask)(struct gf100_gr *); void (*init_fs)(struct gf100_gr *); void (*init_fecs_exceptions)(struct gf100_gr *); + void (*init_40a790)(struct gf100_gr *); void (*init_ds_hww_esr_2)(struct gf100_gr *); void (*init_40601c)(struct gf100_gr *); void (*init_sked_hww_esr)(struct gf100_gr *); @@ -174,6 +174,8 @@ struct gf100_gr_func { void (*init_tex_hww_esr)(struct gf100_gr *, int gpc, int tpc); void (*init_504430)(struct gf100_gr *, int gpc, int tpc); void (*init_shader_exceptions)(struct gf100_gr *, int gpc, int tpc); + void (*init_rop_exceptions)(struct gf100_gr *); + void (*init_exception2)(struct gf100_gr *); void (*init_400054)(struct gf100_gr *); void (*init_4188a4)(struct gf100_gr *); void (*trap_mp)(struct gf100_gr *, int gpc, int tpc); @@ -181,9 +183,11 @@ struct gf100_gr_func { const struct gf100_gr_pack *mmio; struct { struct gf100_gr_ucode *ucode; + void (*reset)(struct gf100_gr *); } fecs; struct { struct gf100_gr_ucode *ucode; + void (*reset)(struct gf100_gr *); } gpccs; int (*rops)(struct gf100_gr *); int gpc_nr; @@ -197,7 +201,7 @@ struct gf100_gr_func { int gf100_gr_rops(struct gf100_gr *); void gf100_gr_oneinit_tiles(struct gf100_gr *); -void gf100_gr_oneinit_sm_id(struct gf100_gr *); +int gf100_gr_oneinit_sm_id(struct gf100_gr *); int gf100_gr_init(struct gf100_gr *); void gf100_gr_init_vsc_stream_master(struct gf100_gr *); void gf100_gr_init_zcull(struct gf100_gr *); @@ -208,9 +212,12 @@ void gf100_gr_init_419cc0(struct gf100_gr *); void gf100_gr_init_419eb4(struct gf100_gr *); void gf100_gr_init_tex_hww_esr(struct gf100_gr *, int, int); void gf100_gr_init_shader_exceptions(struct gf100_gr *, int, int); +void gf100_gr_init_rop_exceptions(struct gf100_gr *); +void gf100_gr_init_exception2(struct gf100_gr *); void gf100_gr_init_400054(struct gf100_gr *); void gf100_gr_init_num_tpc_per_gpc(struct gf100_gr *, bool, bool); extern const struct gf100_gr_func_zbc gf100_gr_zbc; +void gf100_gr_fecs_reset(struct gf100_gr *); void gf117_gr_init_zcull(struct gf100_gr *); @@ -226,9 +233,13 @@ void gm107_gr_init_shader_exceptions(struct gf100_gr *, int, int); void gm107_gr_init_400054(struct gf100_gr *); int gk20a_gr_init(struct gf100_gr *); +int gk20a_gr_av_to_init_(struct nvkm_blob *, u8 count, u32 pitch, struct gf100_gr_pack **); +int gk20a_gr_av_to_init(struct nvkm_blob *, struct gf100_gr_pack **); +int gk20a_gr_aiv_to_init(struct nvkm_blob *, struct gf100_gr_pack **); +int gk20a_gr_av_to_method(struct nvkm_blob *, struct gf100_gr_pack **); void gm200_gr_oneinit_tiles(struct gf100_gr *); -void gm200_gr_oneinit_sm_id(struct gf100_gr *); +int gm200_gr_oneinit_sm_id(struct gf100_gr *); int gm200_gr_rops(struct gf100_gr *); void gm200_gr_init_num_active_ltcs(struct gf100_gr *); void gm200_gr_init_ds_hww_esr_2(struct gf100_gr *); @@ -242,14 +253,24 @@ extern const struct gf100_gr_func_zbc gp100_gr_zbc; void gp102_gr_init_swdx_pes_mask(struct gf100_gr *); extern const struct gf100_gr_func_zbc gp102_gr_zbc; +int gp102_gr_zbc_stencil_get(struct gf100_gr *, int, const u32, const u32); +void gp102_gr_zbc_clear_stencil(struct gf100_gr *, int); extern const struct gf100_gr_func gp107_gr; +int gv100_gr_oneinit_sm_id(struct gf100_gr *); +u32 gv100_gr_nonpes_aware_tpc(struct gf100_gr *gr, u32 gpc, u32 tpc); void gv100_gr_init_419bd8(struct gf100_gr *); void gv100_gr_init_504430(struct gf100_gr *, int, int); void gv100_gr_init_shader_exceptions(struct gf100_gr *, int, int); +void gv100_gr_init_4188a4(struct gf100_gr *); void gv100_gr_trap_mp(struct gf100_gr *, int, int); +int tu102_gr_av_to_init_veid(struct nvkm_blob *, struct gf100_gr_pack **); +void tu102_gr_init_zcull(struct gf100_gr *); +void tu102_gr_init_fs(struct gf100_gr *); +void tu102_gr_init_fecs_exceptions(struct gf100_gr *); + #define gf100_gr_chan(p) container_of((p), struct gf100_gr_chan, object) #include <core/object.h> @@ -258,14 +279,14 @@ struct gf100_gr_chan { struct gf100_gr *gr; struct nvkm_vmm *vmm; + struct nvkm_vma *pagepool; + struct nvkm_vma *bundle_cb; + struct nvkm_vma *attrib_cb; + struct nvkm_vma *unknown; + struct nvkm_memory *mmio; struct nvkm_vma *mmio_vma; int mmio_nr; - - struct { - struct nvkm_memory *mem; - struct nvkm_vma *vma; - } data[4]; }; void gf100_gr_ctxctl_debug(struct gf100_gr *); @@ -279,7 +300,7 @@ struct gf100_gr_init { u32 addr; u8 count; u32 pitch; - u32 data; + u64 data; }; struct gf100_gr_pack { @@ -403,6 +424,9 @@ int gf100_gr_load(struct gf100_gr *, int, const struct gf100_gr_fwif *); int gf100_gr_nofw(struct gf100_gr *, int, const struct gf100_gr_fwif *); int gk20a_gr_load_sw(struct gf100_gr *, const char *path, int ver); +int gk20a_gr_load_net(struct gf100_gr *, const char *, const char *, int, + int (*)(struct nvkm_blob *, struct gf100_gr_pack **), + struct gf100_gr_pack **); int gm200_gr_nofw(struct gf100_gr *, int, const struct gf100_gr_fwif *); int gm200_gr_load(struct gf100_gr *, int, const struct gf100_gr_fwif *); @@ -415,6 +439,8 @@ void gm20b_gr_acr_bld_patch(struct nvkm_acr *, u32, s64); extern const struct nvkm_acr_lsf_func gp108_gr_gpccs_acr; extern const struct nvkm_acr_lsf_func gp108_gr_fecs_acr; +void gp108_gr_acr_bld_write(struct nvkm_acr *, u32, struct nvkm_acr_lsfw *); +void gp108_gr_acr_bld_patch(struct nvkm_acr *, u32, s64); int gf100_gr_new_(const struct gf100_gr_fwif *, struct nvkm_device *, enum nvkm_subdev_type, int, struct nvkm_gr **); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c index 3acd99c306f2..63bd29c22fe1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c @@ -127,10 +127,13 @@ gf104_gr = { .init_419eb4 = gf100_gr_init_419eb4, .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, .init_shader_exceptions = gf100_gr_init_shader_exceptions, + .init_rop_exceptions = gf100_gr_init_rop_exceptions, + .init_exception2 = gf100_gr_init_exception2, .init_400054 = gf100_gr_init_400054, .trap_mp = gf100_gr_trap_mp, .mmio = gf104_gr_pack_mmio, .fecs.ucode = &gf100_gr_fecs_ucode, + .fecs.reset = gf100_gr_fecs_reset, .gpccs.ucode = &gf100_gr_gpccs_ucode, .rops = gf100_gr_rops, .grctx = &gf104_grctx, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c index ab3760e804b8..495a844f925f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c @@ -125,10 +125,13 @@ gf108_gr = { .init_419eb4 = gf100_gr_init_419eb4, .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, .init_shader_exceptions = gf100_gr_init_shader_exceptions, + .init_rop_exceptions = gf100_gr_init_rop_exceptions, + .init_exception2 = gf100_gr_init_exception2, .init_400054 = gf100_gr_init_400054, .trap_mp = gf100_gr_trap_mp, .mmio = gf108_gr_pack_mmio, .fecs.ucode = &gf100_gr_fecs_ucode, + .fecs.reset = gf100_gr_fecs_reset, .gpccs.ucode = &gf100_gr_gpccs_ucode, .rops = gf100_gr_rops, .grctx = &gf108_grctx, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c index 616e2def1865..70fad235d161 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c @@ -99,10 +99,13 @@ gf110_gr = { .init_419eb4 = gf100_gr_init_419eb4, .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, .init_shader_exceptions = gf100_gr_init_shader_exceptions, + .init_rop_exceptions = gf100_gr_init_rop_exceptions, + .init_exception2 = gf100_gr_init_exception2, .init_400054 = gf100_gr_init_400054, .trap_mp = gf100_gr_trap_mp, .mmio = gf110_gr_pack_mmio, .fecs.ucode = &gf100_gr_fecs_ucode, + .fecs.reset = gf100_gr_fecs_reset, .gpccs.ucode = &gf100_gr_gpccs_ucode, .rops = gf100_gr_rops, .grctx = &gf110_grctx, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c index 669e7536970e..f12728248048 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c @@ -125,7 +125,9 @@ gf117_gr_init_zcull(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total); - const u8 tile_nr = ALIGN(gr->tpc_total, 32); + /*TODO: fill in litter vals for gf117-gm2xx */ + const u8 tile_nr = !gr->func->gpc_nr ? ALIGN(gr->tpc_total, 32) : + (gr->func->gpc_nr * gr->func->tpc_nr); u8 bank[GPC_MAX] = {}, gpc, i, j; u32 data; @@ -163,10 +165,13 @@ gf117_gr = { .init_419eb4 = gf100_gr_init_419eb4, .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, .init_shader_exceptions = gf100_gr_init_shader_exceptions, + .init_rop_exceptions = gf100_gr_init_rop_exceptions, + .init_exception2 = gf100_gr_init_exception2, .init_400054 = gf100_gr_init_400054, .trap_mp = gf100_gr_trap_mp, .mmio = gf117_gr_pack_mmio, .fecs.ucode = &gf117_gr_fecs_ucode, + .fecs.reset = gf100_gr_fecs_reset, .gpccs.ucode = &gf117_gr_gpccs_ucode, .rops = gf100_gr_rops, .ppc_nr = 1, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c index 5b09bda8110c..75ceb514c06e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c @@ -190,10 +190,13 @@ gf119_gr = { .init_419eb4 = gf100_gr_init_419eb4, .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, .init_shader_exceptions = gf100_gr_init_shader_exceptions, + .init_rop_exceptions = gf100_gr_init_rop_exceptions, + .init_exception2 = gf100_gr_init_exception2, .init_400054 = gf100_gr_init_400054, .trap_mp = gf100_gr_trap_mp, .mmio = gf119_gr_pack_mmio, .fecs.ucode = &gf100_gr_fecs_ucode, + .fecs.reset = gf100_gr_fecs_reset, .gpccs.ucode = &gf100_gr_gpccs_ucode, .rops = gf100_gr_rops, .grctx = &gf119_grctx, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c index b680eaa0f350..e53ade24ad23 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c @@ -418,7 +418,7 @@ gk104_gr_init_ppc_exceptions(struct gf100_gr *gr) int gpc, ppc; for (gpc = 0; gpc < gr->gpc_nr; gpc++) { - for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++) { + for (ppc = 0; ppc < gr->func->ppc_nr; ppc++) { if (!(gr->ppc_mask[gpc] & (1 << ppc))) continue; nvkm_wr32(device, PPC_UNIT(gpc, ppc, 0x038), 0xc0000000); @@ -470,10 +470,13 @@ gk104_gr = { .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, .init_shader_exceptions = gf100_gr_init_shader_exceptions, + .init_rop_exceptions = gf100_gr_init_rop_exceptions, + .init_exception2 = gf100_gr_init_exception2, .init_400054 = gf100_gr_init_400054, .trap_mp = gf100_gr_trap_mp, .mmio = gk104_gr_pack_mmio, .fecs.ucode = &gk104_gr_fecs_ucode, + .fecs.reset = gf100_gr_fecs_reset, .gpccs.ucode = &gk104_gr_gpccs_ucode, .rops = gf100_gr_rops, .ppc_nr = 1, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c index 103e06a77e65..c7e1c5dbc6a9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c @@ -366,10 +366,13 @@ gk110_gr = { .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, .init_shader_exceptions = gf100_gr_init_shader_exceptions, + .init_rop_exceptions = gf100_gr_init_rop_exceptions, + .init_exception2 = gf100_gr_init_exception2, .init_400054 = gf100_gr_init_400054, .trap_mp = gf100_gr_trap_mp, .mmio = gk110_gr_pack_mmio, .fecs.ucode = &gk110_gr_fecs_ucode, + .fecs.reset = gf100_gr_fecs_reset, .gpccs.ucode = &gk110_gr_gpccs_ucode, .rops = gf100_gr_rops, .ppc_nr = 2, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c index 034d0b11a17d..458abae571bf 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c @@ -118,10 +118,13 @@ gk110b_gr = { .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, .init_shader_exceptions = gf100_gr_init_shader_exceptions, + .init_rop_exceptions = gf100_gr_init_rop_exceptions, + .init_exception2 = gf100_gr_init_exception2, .init_400054 = gf100_gr_init_400054, .trap_mp = gf100_gr_trap_mp, .mmio = gk110b_gr_pack_mmio, .fecs.ucode = &gk110_gr_fecs_ucode, + .fecs.reset = gf100_gr_fecs_reset, .gpccs.ucode = &gk110_gr_gpccs_ucode, .rops = gf100_gr_rops, .ppc_nr = 2, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c index 116d682f9f96..d3f6b65c21d2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c @@ -176,10 +176,13 @@ gk208_gr = { .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, .init_shader_exceptions = gf100_gr_init_shader_exceptions, + .init_rop_exceptions = gf100_gr_init_rop_exceptions, + .init_exception2 = gf100_gr_init_exception2, .init_400054 = gf100_gr_init_400054, .trap_mp = gf100_gr_trap_mp, .mmio = gk208_gr_pack_mmio, .fecs.ucode = &gk208_gr_fecs_ucode, + .fecs.reset = gf100_gr_fecs_reset, .gpccs.ucode = &gk208_gr_gpccs_ucode, .rops = gf100_gr_rops, .ppc_nr = 1, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c index be0b2cefd8e8..035ea213f543 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c @@ -33,47 +33,40 @@ struct gk20a_fw_av u32 data; }; -static int -gk20a_gr_av_to_init(struct gf100_gr *gr, const char *path, const char *name, - int ver, struct gf100_gr_pack **ppack) +int +gk20a_gr_av_to_init_(struct nvkm_blob *blob, u8 count, u32 pitch, struct gf100_gr_pack **ppack) { - struct nvkm_subdev *subdev = &gr->base.engine.subdev; - struct nvkm_blob blob; struct gf100_gr_init *init; struct gf100_gr_pack *pack; int nent; - int ret; int i; - ret = nvkm_firmware_load_blob(subdev, path, name, ver, &blob); - if (ret) - return ret; - - nent = (blob.size / sizeof(struct gk20a_fw_av)); + nent = (blob->size / sizeof(struct gk20a_fw_av)); pack = vzalloc((sizeof(*pack) * 2) + (sizeof(*init) * (nent + 1))); - if (!pack) { - ret = -ENOMEM; - goto end; - } + if (!pack) + return -ENOMEM; init = (void *)(pack + 2); pack[0].init = init; for (i = 0; i < nent; i++) { struct gf100_gr_init *ent = &init[i]; - struct gk20a_fw_av *av = &((struct gk20a_fw_av *)blob.data)[i]; + struct gk20a_fw_av *av = &((struct gk20a_fw_av *)blob->data)[i]; ent->addr = av->addr; ent->data = av->data; - ent->count = 1; - ent->pitch = 1; + ent->count = ((ent->addr & 0xffff) != 0xe100) ? count : 1; + ent->pitch = pitch; } *ppack = pack; + return 0; +} -end: - nvkm_blob_dtor(&blob); - return ret; +int +gk20a_gr_av_to_init(struct nvkm_blob *blob, struct gf100_gr_pack **ppack) +{ + return gk20a_gr_av_to_init_(blob, 1, 1, ppack); } struct gk20a_fw_aiv @@ -83,35 +76,25 @@ struct gk20a_fw_aiv u32 data; }; -static int -gk20a_gr_aiv_to_init(struct gf100_gr *gr, const char *path, const char *name, - int ver, struct gf100_gr_pack **ppack) +int +gk20a_gr_aiv_to_init(struct nvkm_blob *blob, struct gf100_gr_pack **ppack) { - struct nvkm_subdev *subdev = &gr->base.engine.subdev; - struct nvkm_blob blob; struct gf100_gr_init *init; struct gf100_gr_pack *pack; int nent; - int ret; int i; - ret = nvkm_firmware_load_blob(subdev, path, name, ver, &blob); - if (ret) - return ret; - - nent = (blob.size / sizeof(struct gk20a_fw_aiv)); + nent = (blob->size / sizeof(struct gk20a_fw_aiv)); pack = vzalloc((sizeof(*pack) * 2) + (sizeof(*init) * (nent + 1))); - if (!pack) { - ret = -ENOMEM; - goto end; - } + if (!pack) + return -ENOMEM; init = (void *)(pack + 2); pack[0].init = init; for (i = 0; i < nent; i++) { struct gf100_gr_init *ent = &init[i]; - struct gk20a_fw_aiv *av = &((struct gk20a_fw_aiv *)blob.data)[i]; + struct gk20a_fw_aiv *av = &((struct gk20a_fw_aiv *)blob->data)[i]; ent->addr = av->addr; ent->data = av->data; @@ -120,44 +103,30 @@ gk20a_gr_aiv_to_init(struct gf100_gr *gr, const char *path, const char *name, } *ppack = pack; - -end: - nvkm_blob_dtor(&blob); - return ret; + return 0; } -static int -gk20a_gr_av_to_method(struct gf100_gr *gr, const char *path, const char *name, - int ver, struct gf100_gr_pack **ppack) +int +gk20a_gr_av_to_method(struct nvkm_blob *blob, struct gf100_gr_pack **ppack) { - struct nvkm_subdev *subdev = &gr->base.engine.subdev; - struct nvkm_blob blob; struct gf100_gr_init *init; struct gf100_gr_pack *pack; /* We don't suppose we will initialize more than 16 classes here... */ static const unsigned int max_classes = 16; u32 classidx = 0, prevclass = 0; int nent; - int ret; int i; - ret = nvkm_firmware_load_blob(subdev, path, name, ver, &blob); - if (ret) - return ret; - - nent = (blob.size / sizeof(struct gk20a_fw_av)); - + nent = (blob->size / sizeof(struct gk20a_fw_av)); pack = vzalloc((sizeof(*pack) * (max_classes + 1)) + (sizeof(*init) * (nent + max_classes + 1))); - if (!pack) { - ret = -ENOMEM; - goto end; - } + if (!pack) + return -ENOMEM; init = (void *)(pack + max_classes + 1); for (i = 0; i < nent; i++, init++) { - struct gk20a_fw_av *av = &((struct gk20a_fw_av *)blob.data)[i]; + struct gk20a_fw_av *av = &((struct gk20a_fw_av *)blob->data)[i]; u32 class = av->addr & 0xffff; u32 addr = (av->addr & 0xffff0000) >> 14; @@ -169,8 +138,7 @@ gk20a_gr_av_to_method(struct gf100_gr *gr, const char *path, const char *name, prevclass = class; if (++classidx >= max_classes) { vfree(pack); - ret = -ENOSPC; - goto end; + return -ENOSPC; } } @@ -181,10 +149,7 @@ gk20a_gr_av_to_method(struct gf100_gr *gr, const char *path, const char *name, } *ppack = pack; - -end: - nvkm_blob_dtor(&blob); - return ret; + return 0; } static int @@ -294,6 +259,7 @@ gk20a_gr = { .init_rop_active_fbps = gk104_gr_init_rop_active_fbps, .trap_mp = gf100_gr_trap_mp, .set_hww_esr_report_mask = gk20a_gr_set_hww_esr_report_mask, + .fecs.reset = gf100_gr_fecs_reset, .rops = gf100_gr_rops, .ppc_nr = 1, .grctx = &gk20a_grctx, @@ -308,12 +274,29 @@ gk20a_gr = { }; int +gk20a_gr_load_net(struct gf100_gr *gr, const char *path, const char *name, int ver, + int (*load)(struct nvkm_blob *, struct gf100_gr_pack **), + struct gf100_gr_pack **ppack) +{ + struct nvkm_blob blob; + int ret; + + ret = nvkm_firmware_load_blob(&gr->base.engine.subdev, path, name, ver, &blob); + if (ret) + return ret; + + ret = load(&blob, ppack); + nvkm_blob_dtor(&blob); + return 0; +} + +int gk20a_gr_load_sw(struct gf100_gr *gr, const char *path, int ver) { - if (gk20a_gr_av_to_init(gr, path, "sw_nonctx", ver, &gr->sw_nonctx) || - gk20a_gr_aiv_to_init(gr, path, "sw_ctx", ver, &gr->sw_ctx) || - gk20a_gr_av_to_init(gr, path, "sw_bundle_init", ver, &gr->bundle) || - gk20a_gr_av_to_method(gr, path, "sw_method_init", ver, &gr->method)) + if (gk20a_gr_load_net(gr, path, "sw_nonctx", ver, gk20a_gr_av_to_init, &gr->sw_nonctx) || + gk20a_gr_load_net(gr, path, "sw_ctx", ver, gk20a_gr_aiv_to_init, &gr->sw_ctx) || + gk20a_gr_load_net(gr, path, "sw_bundle_init", ver, gk20a_gr_av_to_init, &gr->bundle) || + gk20a_gr_load_net(gr, path, "sw_method_init", ver, gk20a_gr_av_to_method, &gr->method)) return -ENOENT; return 0; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c index 310987174cb5..797b828a943b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c @@ -411,10 +411,13 @@ gm107_gr = { .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, .init_504430 = gm107_gr_init_504430, .init_shader_exceptions = gm107_gr_init_shader_exceptions, + .init_rop_exceptions = gf100_gr_init_rop_exceptions, + .init_exception2 = gf100_gr_init_exception2, .init_400054 = gm107_gr_init_400054, .trap_mp = gf100_gr_trap_mp, .mmio = gm107_gr_pack_mmio, .fecs.ucode = &gm107_gr_fecs_ucode, + .fecs.reset = gf100_gr_fecs_reset, .gpccs.ucode = &gm107_gr_gpccs_ucode, .rops = gf100_gr_rops, .ppc_nr = 2, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c index 385cfd91b266..b5210b31c1b2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c @@ -148,11 +148,11 @@ gm200_gr_tile_map_2_8[] = { 0, 1, 1, 0, 0, 1, 1, 0, }; -void +int gm200_gr_oneinit_sm_id(struct gf100_gr *gr) { /*XXX: There's a different algorithm here I've not yet figured out. */ - gf100_gr_oneinit_sm_id(gr); + return gf100_gr_oneinit_sm_id(gr); } void @@ -199,8 +199,11 @@ gm200_gr = { .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, .init_504430 = gm107_gr_init_504430, .init_shader_exceptions = gm107_gr_init_shader_exceptions, + .init_rop_exceptions = gf100_gr_init_rop_exceptions, + .init_exception2 = gf100_gr_init_exception2, .init_400054 = gm107_gr_init_400054, .trap_mp = gf100_gr_trap_mp, + .fecs.reset = gf100_gr_fecs_reset, .rops = gm200_gr_rops, .tpc_nr = 4, .ppc_nr = 2, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c index ec1c46e47e00..458cd1a00d3f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c @@ -123,6 +123,7 @@ gm20b_gr = { .init_rop_active_fbps = gk104_gr_init_rop_active_fbps, .trap_mp = gf100_gr_trap_mp, .set_hww_esr_report_mask = gm20b_gr_set_hww_esr_report_mask, + .fecs.reset = gf100_gr_fecs_reset, .rops = gm200_gr_rops, .ppc_nr = 1, .grctx = &gm20b_grctx, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c index 0550dd6f46f1..851e743d2cab 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c @@ -87,7 +87,7 @@ gp100_gr_init_419c9c(struct gf100_gr *gr) void gp100_gr_init_fecs_exceptions(struct gf100_gr *gr) { - nvkm_wr32(gr->base.engine.subdev.device, 0x409c24, 0x000f0002); + nvkm_wr32(gr->base.engine.subdev.device, 0x409c24, 0x000e0002); } void @@ -119,7 +119,10 @@ gp100_gr = { .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, .init_504430 = gm107_gr_init_504430, .init_shader_exceptions = gp100_gr_init_shader_exceptions, + .init_rop_exceptions = gf100_gr_init_rop_exceptions, + .init_exception2 = gf100_gr_init_exception2, .trap_mp = gf100_gr_trap_mp, + .fecs.reset = gf100_gr_fecs_reset, .rops = gm200_gr_rops, .gpc_nr = 6, .tpc_nr = 5, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c index 5b001f374be0..0e223b7b5f0e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c @@ -26,7 +26,7 @@ #include <nvif/class.h> -static void +void gp102_gr_zbc_clear_stencil(struct gf100_gr *gr, int zbc) { struct nvkm_device *device = gr->base.engine.subdev.device; @@ -40,14 +40,14 @@ gp102_gr_zbc_clear_stencil(struct gf100_gr *gr, int zbc) gr->zbc_stencil[zbc].format << ((znum % 4) * 7)); } -static int +int gp102_gr_zbc_stencil_get(struct gf100_gr *gr, int format, const u32 ds, const u32 l2) { struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc; int zbc = -ENOSPC, i; - for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) { + for (i = ltc->zbc_depth_min; i <= ltc->zbc_depth_max; i++) { if (gr->zbc_stencil[i].format) { if (gr->zbc_stencil[i].format != format) continue; @@ -115,7 +115,10 @@ gp102_gr = { .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, .init_504430 = gm107_gr_init_504430, .init_shader_exceptions = gp100_gr_init_shader_exceptions, + .init_rop_exceptions = gf100_gr_init_rop_exceptions, + .init_exception2 = gf100_gr_init_exception2, .trap_mp = gf100_gr_trap_mp, + .fecs.reset = gf100_gr_fecs_reset, .rops = gm200_gr_rops, .gpc_nr = 6, .tpc_nr = 5, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp104.c index 2655574ec63b..6802cb9b199f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp104.c @@ -43,7 +43,10 @@ gp104_gr = { .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, .init_504430 = gm107_gr_init_504430, .init_shader_exceptions = gp100_gr_init_shader_exceptions, + .init_rop_exceptions = gf100_gr_init_rop_exceptions, + .init_exception2 = gf100_gr_init_exception2, .trap_mp = gf100_gr_trap_mp, + .fecs.reset = gf100_gr_fecs_reset, .rops = gm200_gr_rops, .gpc_nr = 6, .tpc_nr = 5, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c index adabc04d4f3a..cc2bb0d0a987 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c @@ -45,7 +45,10 @@ gp107_gr = { .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, .init_504430 = gm107_gr_init_504430, .init_shader_exceptions = gp100_gr_init_shader_exceptions, + .init_rop_exceptions = gf100_gr_init_rop_exceptions, + .init_exception2 = gf100_gr_init_exception2, .trap_mp = gf100_gr_trap_mp, + .fecs.reset = gf100_gr_fecs_reset, .rops = gm200_gr_rops, .gpc_nr = 2, .tpc_nr = 3, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp108.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp108.c index 7310f0466bb7..311f703439e4 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp108.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp108.c @@ -25,7 +25,7 @@ #include <nvfw/flcn.h> -static void +void gp108_gr_acr_bld_patch(struct nvkm_acr *acr, u32 bld, s64 adjust) { struct flcn_bl_dmem_desc_v2 hdr; @@ -36,7 +36,7 @@ gp108_gr_acr_bld_patch(struct nvkm_acr *acr, u32 bld, s64 adjust) flcn_bl_dmem_desc_v2_dump(&acr->subdev, &hdr); } -static void +void gp108_gr_acr_bld_write(struct nvkm_acr *acr, u32 bld, struct nvkm_acr_lsfw *lsfw) { diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c index e13683b6e7b1..5008881ca079 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c @@ -55,7 +55,10 @@ gp10b_gr = { .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, .init_504430 = gm107_gr_init_504430, .init_shader_exceptions = gp100_gr_init_shader_exceptions, + .init_rop_exceptions = gf100_gr_init_rop_exceptions, + .init_exception2 = gf100_gr_init_exception2, .trap_mp = gf100_gr_trap_mp, + .fecs.reset = gf100_gr_fecs_reset, .rops = gm200_gr_rops, .gpc_nr = 1, .tpc_nr = 2, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gv100.c index 4d043c1173ea..7f7404a76140 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gv100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gv100.c @@ -52,10 +52,11 @@ gv100_gr_trap_mp(struct gf100_gr *gr, int gpc, int tpc) gv100_gr_trap_sm(gr, gpc, tpc, 1); } -static void +void gv100_gr_init_4188a4(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_mask(device, 0x4188a4, 0x03000000, 0x03000000); } @@ -65,7 +66,6 @@ gv100_gr_init_shader_exceptions(struct gf100_gr *gr, int gpc, int tpc) struct nvkm_device *device = gr->base.engine.subdev.device; int sm; for (sm = 0; sm < 0x100; sm += 0x80) { - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x728 + sm), 0x0085eb64); nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x610), 0x00000001); nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x72c + sm), 0x00000004); } @@ -85,10 +85,202 @@ gv100_gr_init_419bd8(struct gf100_gr *gr) nvkm_mask(device, 0x419bd8, 0x00000700, 0x00000000); } +u32 +gv100_gr_nonpes_aware_tpc(struct gf100_gr *gr, u32 gpc, u32 tpc) +{ + u32 pes, temp, tpc_new = 0; + + for (pes = 0; pes < gr->ppc_nr[gpc]; pes++) { + if (gr->ppc_tpc_mask[gpc][pes] & BIT(tpc)) + break; + + tpc_new += gr->ppc_tpc_nr[gpc][pes]; + } + + temp = (BIT(tpc) - 1) & gr->ppc_tpc_mask[gpc][pes]; + temp = hweight32(temp); + return tpc_new + temp; +} + +static int +gv100_gr_scg_estimate_perf(struct gf100_gr *gr, unsigned long *gpc_tpc_mask, + u32 disable_gpc, u32 disable_tpc, int *perf) +{ + const u32 scale_factor = 512UL; /* Use fx23.9 */ + const u32 pix_scale = 1024*1024UL; /* Pix perf in [29:20] */ + const u32 world_scale = 1024UL; /* World performance in [19:10] */ + const u32 tpc_scale = 1; /* TPC balancing in [9:0] */ + u32 scg_num_pes = 0; + u32 min_scg_gpc_pix_perf = scale_factor; /* Init perf as maximum */ + u32 average_tpcs = 0; /* Average of # of TPCs per GPC */ + u32 deviation; /* absolute diff between TPC# and average_tpcs, averaged across GPCs */ + u32 norm_tpc_deviation; /* deviation/max_tpc_per_gpc */ + u32 tpc_balance; + u32 scg_gpc_pix_perf; + u32 scg_world_perf; + u32 gpc; + u32 pes; + int diff; + bool tpc_removed_gpc = false; + bool tpc_removed_pes = false; + u32 max_tpc_gpc = 0; + u32 num_tpc_mask; + u32 *num_tpc_gpc; + int ret = -EINVAL; + + if (!(num_tpc_gpc = kcalloc(gr->gpc_nr, sizeof(*num_tpc_gpc), GFP_KERNEL))) + return -ENOMEM; + + /* Calculate pix-perf-reduction-rate per GPC and find bottleneck TPC */ + for (gpc = 0; gpc < gr->gpc_nr; gpc++) { + num_tpc_mask = gpc_tpc_mask[gpc]; + + if ((gpc == disable_gpc) && num_tpc_mask & BIT(disable_tpc)) { + /* Safety check if a TPC is removed twice */ + if (WARN_ON(tpc_removed_gpc)) + goto done; + + /* Remove logical TPC from set */ + num_tpc_mask &= ~BIT(disable_tpc); + tpc_removed_gpc = true; + } + + /* track balancing of tpcs across gpcs */ + num_tpc_gpc[gpc] = hweight32(num_tpc_mask); + average_tpcs += num_tpc_gpc[gpc]; + + /* save the maximum numer of gpcs */ + max_tpc_gpc = num_tpc_gpc[gpc] > max_tpc_gpc ? num_tpc_gpc[gpc] : max_tpc_gpc; + + /* + * Calculate ratio between TPC count and post-FS and post-SCG + * + * ratio represents relative throughput of the GPC + */ + scg_gpc_pix_perf = scale_factor * num_tpc_gpc[gpc] / gr->tpc_nr[gpc]; + if (min_scg_gpc_pix_perf > scg_gpc_pix_perf) + min_scg_gpc_pix_perf = scg_gpc_pix_perf; + + /* Calculate # of surviving PES */ + for (pes = 0; pes < gr->ppc_nr[gpc]; pes++) { + /* Count the number of TPC on the set */ + num_tpc_mask = gr->ppc_tpc_mask[gpc][pes] & gpc_tpc_mask[gpc]; + + if ((gpc == disable_gpc) && (num_tpc_mask & BIT(disable_tpc))) { + if (WARN_ON(tpc_removed_pes)) + goto done; + + num_tpc_mask &= ~BIT(disable_tpc); + tpc_removed_pes = true; + } + + if (hweight32(num_tpc_mask)) + scg_num_pes++; + } + } + + if (WARN_ON(!tpc_removed_gpc || !tpc_removed_pes)) + goto done; + + if (max_tpc_gpc == 0) { + *perf = 0; + goto done_ok; + } + + /* Now calculate perf */ + scg_world_perf = (scale_factor * scg_num_pes) / gr->ppc_total; + deviation = 0; + average_tpcs = scale_factor * average_tpcs / gr->gpc_nr; + for (gpc = 0; gpc < gr->gpc_nr; gpc++) { + diff = average_tpcs - scale_factor * num_tpc_gpc[gpc]; + if (diff < 0) + diff = -diff; + + deviation += diff; + } + + deviation /= gr->gpc_nr; + + norm_tpc_deviation = deviation / max_tpc_gpc; + + tpc_balance = scale_factor - norm_tpc_deviation; + + if ((tpc_balance > scale_factor) || + (scg_world_perf > scale_factor) || + (min_scg_gpc_pix_perf > scale_factor) || + (norm_tpc_deviation > scale_factor)) { + WARN_ON(1); + goto done; + } + + *perf = (pix_scale * min_scg_gpc_pix_perf) + + (world_scale * scg_world_perf) + + (tpc_scale * tpc_balance); +done_ok: + ret = 0; +done: + kfree(num_tpc_gpc); + return ret; +} + +int +gv100_gr_oneinit_sm_id(struct gf100_gr *gr) +{ + unsigned long *gpc_tpc_mask; + u32 *tpc_table, *gpc_table; + u32 gpc, tpc, pes, gtpc; + int perf, maxperf, ret = 0; + + gpc_tpc_mask = kcalloc(gr->gpc_nr, sizeof(*gpc_tpc_mask), GFP_KERNEL); + gpc_table = kcalloc(gr->tpc_total, sizeof(*gpc_table), GFP_KERNEL); + tpc_table = kcalloc(gr->tpc_total, sizeof(*tpc_table), GFP_KERNEL); + if (!gpc_table || !tpc_table || !gpc_tpc_mask) { + ret = -ENOMEM; + goto done; + } + + for (gpc = 0; gpc < gr->gpc_nr; gpc++) { + for (pes = 0; pes < gr->ppc_nr[gpc]; pes++) + gpc_tpc_mask[gpc] |= gr->ppc_tpc_mask[gpc][pes]; + } + + for (gtpc = 0; gtpc < gr->tpc_total; gtpc++) { + for (maxperf = -1, gpc = 0; gpc < gr->gpc_nr; gpc++) { + for_each_set_bit(tpc, &gpc_tpc_mask[gpc], gr->tpc_nr[gpc]) { + ret = gv100_gr_scg_estimate_perf(gr, gpc_tpc_mask, gpc, tpc, &perf); + if (ret) + goto done; + + /* nvgpu does ">=" here, but this gets us RM's numbers. */ + if (perf > maxperf) { + maxperf = perf; + gpc_table[gtpc] = gpc; + tpc_table[gtpc] = tpc; + } + } + } + + gpc_tpc_mask[gpc_table[gtpc]] &= ~BIT(tpc_table[gtpc]); + } + + /*TODO: build table for sm_per_tpc != 1, don't use yet, but might need later? */ + for (gtpc = 0; gtpc < gr->tpc_total; gtpc++) { + gr->sm[gtpc].gpc = gpc_table[gtpc]; + gr->sm[gtpc].tpc = tpc_table[gtpc]; + gr->sm_nr++; + } + +done: + kfree(gpc_table); + kfree(tpc_table); + kfree(gpc_tpc_mask); + return ret; +} + static const struct gf100_gr_func gv100_gr = { .oneinit_tiles = gm200_gr_oneinit_tiles, - .oneinit_sm_id = gm200_gr_oneinit_sm_id, + .oneinit_sm_id = gv100_gr_oneinit_sm_id, .init = gf100_gr_init, .init_419bd8 = gv100_gr_init_419bd8, .init_gpc_mmu = gm200_gr_init_gpc_mmu, @@ -103,11 +295,14 @@ gv100_gr = { .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, .init_504430 = gv100_gr_init_504430, .init_shader_exceptions = gv100_gr_init_shader_exceptions, + .init_rop_exceptions = gf100_gr_init_rop_exceptions, + .init_exception2 = gf100_gr_init_exception2, .init_4188a4 = gv100_gr_init_4188a4, .trap_mp = gv100_gr_trap_mp, + .fecs.reset = gf100_gr_fecs_reset, .rops = gm200_gr_rops, .gpc_nr = 6, - .tpc_nr = 5, + .tpc_nr = 7, .ppc_nr = 3, .grctx = &gv100_grctx, .zbc = &gp102_gr_zbc, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv04.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv04.c index 0bc1a238de43..81bd682c2102 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv04.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv04.c @@ -1192,7 +1192,7 @@ nv04_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch, return -ENOMEM; nvkm_object_ctor(&nv04_gr_chan, oclass, &chan->object); chan->gr = gr; - chan->chid = fifoch->chid; + chan->chid = fifoch->id; *pobject = &chan->object; *ctx_reg(chan, NV04_PGRAPH_DEBUG_3) = 0xfad4ff31; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv10.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv10.c index 942450b33bc6..7fe6e58f6bab 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv10.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv10.c @@ -1011,7 +1011,7 @@ nv10_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch, return -ENOMEM; nvkm_object_ctor(&nv10_gr_chan, oclass, &chan->object); chan->gr = gr; - chan->chid = fifoch->chid; + chan->chid = fifoch->id; *pobject = &chan->object; NV_WRITE_CTX(0x00400e88, 0x08000000); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.c index 6bff10cee71b..75434f5de7ad 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.c @@ -83,7 +83,7 @@ nv20_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch, return -ENOMEM; nvkm_object_ctor(&nv20_gr_chan, oclass, &chan->object); chan->gr = gr; - chan->chid = fifoch->chid; + chan->chid = fifoch->id; *pobject = &chan->object; ret = nvkm_memory_new(gr->base.engine.subdev.device, @@ -182,7 +182,7 @@ nv20_gr_intr(struct nvkm_gr *base) struct nv20_gr *gr = nv20_gr(base); struct nvkm_subdev *subdev = &gr->base.engine.subdev; struct nvkm_device *device = subdev->device; - struct nvkm_fifo_chan *chan; + struct nvkm_chan *chan; u32 stat = nvkm_rd32(device, NV03_PGRAPH_INTR); u32 nsource = nvkm_rd32(device, NV03_PGRAPH_NSOURCE); u32 nstatus = nvkm_rd32(device, NV03_PGRAPH_NSTATUS); @@ -196,7 +196,7 @@ nv20_gr_intr(struct nvkm_gr *base) char msg[128], src[128], sta[128]; unsigned long flags; - chan = nvkm_fifo_chan_chid(device->fifo, chid, &flags); + chan = nvkm_chan_get_chid(&gr->base.engine, chid, &flags); nvkm_wr32(device, NV03_PGRAPH_INTR, stat); nvkm_wr32(device, NV04_PGRAPH_FIFO, 0x00000001); @@ -209,11 +209,11 @@ nv20_gr_intr(struct nvkm_gr *base) "nstatus %08x [%s] ch %d [%s] subc %d " "class %04x mthd %04x data %08x\n", show, msg, nsource, src, nstatus, sta, chid, - chan ? chan->object.client->name : "unknown", + chan ? chan->name : "unknown", subc, class, mthd, data); } - nvkm_fifo_chan_put(device->fifo, flags, &chan); + nvkm_chan_put(&chan, flags); } int diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv25.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv25.c index f3a56f17d94a..94685e4d4f87 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv25.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv25.c @@ -29,7 +29,7 @@ nv25_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch, return -ENOMEM; nvkm_object_ctor(&nv25_gr_chan, oclass, &chan->object); chan->gr = gr; - chan->chid = fifoch->chid; + chan->chid = fifoch->id; *pobject = &chan->object; ret = nvkm_memory_new(gr->base.engine.subdev.device, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv2a.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv2a.c index f268d2642d29..2d6273675291 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv2a.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv2a.c @@ -29,7 +29,7 @@ nv2a_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch, return -ENOMEM; nvkm_object_ctor(&nv2a_gr_chan, oclass, &chan->object); chan->gr = gr; - chan->chid = fifoch->chid; + chan->chid = fifoch->id; *pobject = &chan->object; ret = nvkm_memory_new(gr->base.engine.subdev.device, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv30.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv30.c index e5737cdf2fa1..647bd6fede04 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv30.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv30.c @@ -30,7 +30,7 @@ nv30_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch, return -ENOMEM; nvkm_object_ctor(&nv30_gr_chan, oclass, &chan->object); chan->gr = gr; - chan->chid = fifoch->chid; + chan->chid = fifoch->id; *pobject = &chan->object; ret = nvkm_memory_new(gr->base.engine.subdev.device, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv34.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv34.c index 1ab2da8ebf4e..2eae3fe4ef4e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv34.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv34.c @@ -29,7 +29,7 @@ nv34_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch, return -ENOMEM; nvkm_object_ctor(&nv34_gr_chan, oclass, &chan->object); chan->gr = gr; - chan->chid = fifoch->chid; + chan->chid = fifoch->id; *pobject = &chan->object; ret = nvkm_memory_new(gr->base.engine.subdev.device, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv35.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv35.c index 591260f5676b..657d7cdba369 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv35.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv35.c @@ -29,7 +29,7 @@ nv35_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch, return -ENOMEM; nvkm_object_ctor(&nv35_gr_chan, oclass, &chan->object); chan->gr = gr; - chan->chid = fifoch->chid; + chan->chid = fifoch->id; *pobject = &chan->object; ret = nvkm_memory_new(gr->base.engine.subdev.device, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.c index 67f3535ff97e..d2df097a6cf6 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.c @@ -275,8 +275,8 @@ nv40_gr_intr(struct nvkm_gr *base) "nstatus %08x [%s] ch %d [%08x %s] subc %d " "class %04x mthd %04x data %08x\n", show, msg, nsource, src, nstatus, sta, - chan ? chan->fifo->chid : -1, inst << 4, - chan ? chan->fifo->object.client->name : "unknown", + chan ? chan->fifo->id : -1, inst << 4, + chan ? chan->fifo->name : "unknown", subc, class, mthd, data); } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.c index 563a10097e95..1ba18a8e380f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.c @@ -622,7 +622,7 @@ nv50_gr_intr(struct nvkm_gr *base) struct nv50_gr *gr = nv50_gr(base); struct nvkm_subdev *subdev = &gr->base.engine.subdev; struct nvkm_device *device = subdev->device; - struct nvkm_fifo_chan *chan; + struct nvkm_chan *chan; u32 stat = nvkm_rd32(device, 0x400100); u32 inst = nvkm_rd32(device, 0x40032c) & 0x0fffffff; u32 addr = nvkm_rd32(device, 0x400704); @@ -637,10 +637,10 @@ nv50_gr_intr(struct nvkm_gr *base) char msg[128]; int chid = -1; - chan = nvkm_fifo_chan_inst(device->fifo, (u64)inst << 12, &flags); + chan = nvkm_chan_get_inst(&gr->base.engine, (u64)inst << 12, &flags); if (chan) { - name = chan->object.client->name; - chid = chan->chid; + name = chan->name; + chid = chan->id; } if (show & 0x00100000) { @@ -672,7 +672,7 @@ nv50_gr_intr(struct nvkm_gr *base) if (nvkm_rd32(device, 0x400824) & (1 << 31)) nvkm_wr32(device, 0x400824, nvkm_rd32(device, 0x400824) & ~(1 << 31)); - nvkm_fifo_chan_put(device->fifo, flags, &chan); + nvkm_chan_put(&chan, flags); } int diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h index 9b2c66e8be90..08d5c96e6458 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h @@ -17,6 +17,7 @@ struct nvkm_gr_func { int (*oneinit)(struct nvkm_gr *); int (*init)(struct nvkm_gr *); int (*fini)(struct nvkm_gr *, bool); + int (*reset)(struct nvkm_gr *); void (*intr)(struct nvkm_gr *); void (*tile)(struct nvkm_gr *, int region, struct nvkm_fb_tile *); int (*tlb_flush)(struct nvkm_gr *); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c index 1a8a21844e12..3b6c8100a242 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c @@ -24,13 +24,13 @@ #include <nvif/class.h> -static void +void tu102_gr_init_fecs_exceptions(struct gf100_gr *gr) { - nvkm_wr32(gr->base.engine.subdev.device, 0x409c24, 0x006f0002); + nvkm_wr32(gr->base.engine.subdev.device, 0x409c24, 0x006e0003); } -static void +void tu102_gr_init_fs(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; @@ -40,20 +40,21 @@ tu102_gr_init_fs(struct gf100_gr *gr) gk104_grctx_generate_gpc_tpc_nr(gr); for (sm = 0; sm < gr->sm_nr; sm++) { - nvkm_wr32(device, GPC_UNIT(gr->sm[sm].gpc, 0x0c10 + - gr->sm[sm].tpc * 4), sm); + int tpc = gv100_gr_nonpes_aware_tpc(gr, gr->sm[sm].gpc, gr->sm[sm].tpc); + + nvkm_wr32(device, GPC_UNIT(gr->sm[sm].gpc, 0x0c10 + tpc * 4), sm); } gm200_grctx_generate_dist_skip_table(gr); gf100_gr_init_num_tpc_per_gpc(gr, true, true); } -static void +void tu102_gr_init_zcull(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total); - const u8 tile_nr = ALIGN(gr->tpc_total, 64); + const u8 tile_nr = gr->func->gpc_nr * gr->func->tpc_nr; u8 bank[GPC_MAX] = {}, gpc, i, j; u32 data; @@ -93,7 +94,7 @@ tu102_gr_init_gpc_mmu(struct gf100_gr *gr) static const struct gf100_gr_func tu102_gr = { .oneinit_tiles = gm200_gr_oneinit_tiles, - .oneinit_sm_id = gm200_gr_oneinit_sm_id, + .oneinit_sm_id = gv100_gr_oneinit_sm_id, .init = gf100_gr_init, .init_419bd8 = gv100_gr_init_419bd8, .init_gpc_mmu = tu102_gr_init_gpc_mmu, @@ -109,10 +110,14 @@ tu102_gr = { .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, .init_504430 = gv100_gr_init_504430, .init_shader_exceptions = gv100_gr_init_shader_exceptions, + .init_rop_exceptions = gf100_gr_init_rop_exceptions, + .init_exception2 = gf100_gr_init_exception2, + .init_4188a4 = gv100_gr_init_4188a4, .trap_mp = gv100_gr_trap_mp, + .fecs.reset = gf100_gr_fecs_reset, .rops = gm200_gr_rops, .gpc_nr = 6, - .tpc_nr = 5, + .tpc_nr = 6, .ppc_nr = 3, .grctx = &tu102_grctx, .zbc = &gp102_gr_zbc, @@ -137,6 +142,7 @@ MODULE_FIRMWARE("nvidia/tu102/gr/sw_ctx.bin"); MODULE_FIRMWARE("nvidia/tu102/gr/sw_nonctx.bin"); MODULE_FIRMWARE("nvidia/tu102/gr/sw_bundle_init.bin"); MODULE_FIRMWARE("nvidia/tu102/gr/sw_method_init.bin"); +MODULE_FIRMWARE("nvidia/tu102/gr/sw_veid_bundle_init.bin"); MODULE_FIRMWARE("nvidia/tu104/gr/fecs_bl.bin"); MODULE_FIRMWARE("nvidia/tu104/gr/fecs_inst.bin"); @@ -150,6 +156,7 @@ MODULE_FIRMWARE("nvidia/tu104/gr/sw_ctx.bin"); MODULE_FIRMWARE("nvidia/tu104/gr/sw_nonctx.bin"); MODULE_FIRMWARE("nvidia/tu104/gr/sw_bundle_init.bin"); MODULE_FIRMWARE("nvidia/tu104/gr/sw_method_init.bin"); +MODULE_FIRMWARE("nvidia/tu104/gr/sw_veid_bundle_init.bin"); MODULE_FIRMWARE("nvidia/tu106/gr/fecs_bl.bin"); MODULE_FIRMWARE("nvidia/tu106/gr/fecs_inst.bin"); @@ -163,6 +170,7 @@ MODULE_FIRMWARE("nvidia/tu106/gr/sw_ctx.bin"); MODULE_FIRMWARE("nvidia/tu106/gr/sw_nonctx.bin"); MODULE_FIRMWARE("nvidia/tu106/gr/sw_bundle_init.bin"); MODULE_FIRMWARE("nvidia/tu106/gr/sw_method_init.bin"); +MODULE_FIRMWARE("nvidia/tu106/gr/sw_veid_bundle_init.bin"); MODULE_FIRMWARE("nvidia/tu117/gr/fecs_bl.bin"); MODULE_FIRMWARE("nvidia/tu117/gr/fecs_inst.bin"); @@ -176,6 +184,7 @@ MODULE_FIRMWARE("nvidia/tu117/gr/sw_ctx.bin"); MODULE_FIRMWARE("nvidia/tu117/gr/sw_nonctx.bin"); MODULE_FIRMWARE("nvidia/tu117/gr/sw_bundle_init.bin"); MODULE_FIRMWARE("nvidia/tu117/gr/sw_method_init.bin"); +MODULE_FIRMWARE("nvidia/tu117/gr/sw_veid_bundle_init.bin"); MODULE_FIRMWARE("nvidia/tu116/gr/fecs_bl.bin"); MODULE_FIRMWARE("nvidia/tu116/gr/fecs_inst.bin"); @@ -189,6 +198,26 @@ MODULE_FIRMWARE("nvidia/tu116/gr/sw_ctx.bin"); MODULE_FIRMWARE("nvidia/tu116/gr/sw_nonctx.bin"); MODULE_FIRMWARE("nvidia/tu116/gr/sw_bundle_init.bin"); MODULE_FIRMWARE("nvidia/tu116/gr/sw_method_init.bin"); +MODULE_FIRMWARE("nvidia/tu116/gr/sw_veid_bundle_init.bin"); + +int +tu102_gr_av_to_init_veid(struct nvkm_blob *blob, struct gf100_gr_pack **ppack) +{ + return gk20a_gr_av_to_init_(blob, 64, 0x00100000, ppack); +} + +int +tu102_gr_load(struct gf100_gr *gr, int ver, const struct gf100_gr_fwif *fwif) +{ + int ret; + + ret = gm200_gr_load(gr, ver, fwif); + if (ret) + return ret; + + return gk20a_gr_load_net(gr, "gr/", "sw_veid_bundle_init", ver, tu102_gr_av_to_init_veid, + &gr->bundle_veid); +} static const struct gf100_gr_fwif tu102_gr_fwif[] = { |