linux-brain/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c

358 lines
8.6 KiB
C
Raw Normal View History

/*
* Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <core/tegra.h>
#ifdef CONFIG_NOUVEAU_PLATFORM_DRIVER
#include "priv.h"
drm/nouveau: tegra: Detach from ARM DMA/IOMMU mapping [ Upstream commit b59fb482b52269977ee5de205308e5b236a03917 ] Depending on the kernel configuration, early ARM architecture setup code may have attached the GPU to a DMA/IOMMU mapping that transparently uses the IOMMU to back the DMA API. Tegra requires special handling for IOMMU backed buffers (a special bit in the GPU's MMU page tables indicates the memory path to take: via the SMMU or directly to the memory controller). Transparently backing DMA memory with an IOMMU prevents Nouveau from properly handling such memory accesses and causes memory access faults. As a side-note: buffers other than those allocated in instance memory don't need to be physically contiguous from the GPU's perspective since the GPU can map them into contiguous buffers using its own MMU. Mapping these buffers through the IOMMU is unnecessary and will even lead to performance degradation because of the additional translation. One exception to this are compressible buffers which need large pages. In order to enable these large pages, multiple small pages will have to be combined into one large (I/O virtually contiguous) mapping via the IOMMU. However, that is a topic outside the scope of this fix and isn't currently supported. An implementation will want to explicitly create these large pages in the Nouveau driver, so detaching from a DMA/IOMMU mapping would still be required. Signed-off-by: Thierry Reding <treding@nvidia.com> Acked-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Robin Murphy <robin.murphy@arm.com> Tested-by: Nicolas Chauvet <kwizart@gmail.com> Signed-off-by: Ben Skeggs <bskeggs@redhat.com> Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2018-05-30 23:06:25 +09:00
#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)
#include <asm/dma-iommu.h>
#endif
static int
nvkm_device_tegra_power_up(struct nvkm_device_tegra *tdev)
{
int ret;
ret = regulator_enable(tdev->vdd);
if (ret)
goto err_power;
ret = clk_prepare_enable(tdev->clk);
if (ret)
goto err_clk;
if (tdev->clk_ref) {
ret = clk_prepare_enable(tdev->clk_ref);
if (ret)
goto err_clk_ref;
}
ret = clk_prepare_enable(tdev->clk_pwr);
if (ret)
goto err_clk_pwr;
clk_set_rate(tdev->clk_pwr, 204000000);
udelay(10);
reset_control_assert(tdev->rst);
udelay(10);
ret = tegra_powergate_remove_clamping(TEGRA_POWERGATE_3D);
if (ret)
goto err_clamp;
udelay(10);
reset_control_deassert(tdev->rst);
udelay(10);
return 0;
err_clamp:
clk_disable_unprepare(tdev->clk_pwr);
err_clk_pwr:
if (tdev->clk_ref)
clk_disable_unprepare(tdev->clk_ref);
err_clk_ref:
clk_disable_unprepare(tdev->clk);
err_clk:
regulator_disable(tdev->vdd);
err_power:
return ret;
}
static int
nvkm_device_tegra_power_down(struct nvkm_device_tegra *tdev)
{
reset_control_assert(tdev->rst);
udelay(10);
clk_disable_unprepare(tdev->clk_pwr);
if (tdev->clk_ref)
clk_disable_unprepare(tdev->clk_ref);
clk_disable_unprepare(tdev->clk);
udelay(10);
return regulator_disable(tdev->vdd);
}
static void
nvkm_device_tegra_probe_iommu(struct nvkm_device_tegra *tdev)
{
#if IS_ENABLED(CONFIG_IOMMU_API)
struct device *dev = &tdev->pdev->dev;
unsigned long pgsize_bitmap;
int ret;
drm/nouveau: tegra: Detach from ARM DMA/IOMMU mapping [ Upstream commit b59fb482b52269977ee5de205308e5b236a03917 ] Depending on the kernel configuration, early ARM architecture setup code may have attached the GPU to a DMA/IOMMU mapping that transparently uses the IOMMU to back the DMA API. Tegra requires special handling for IOMMU backed buffers (a special bit in the GPU's MMU page tables indicates the memory path to take: via the SMMU or directly to the memory controller). Transparently backing DMA memory with an IOMMU prevents Nouveau from properly handling such memory accesses and causes memory access faults. As a side-note: buffers other than those allocated in instance memory don't need to be physically contiguous from the GPU's perspective since the GPU can map them into contiguous buffers using its own MMU. Mapping these buffers through the IOMMU is unnecessary and will even lead to performance degradation because of the additional translation. One exception to this are compressible buffers which need large pages. In order to enable these large pages, multiple small pages will have to be combined into one large (I/O virtually contiguous) mapping via the IOMMU. However, that is a topic outside the scope of this fix and isn't currently supported. An implementation will want to explicitly create these large pages in the Nouveau driver, so detaching from a DMA/IOMMU mapping would still be required. Signed-off-by: Thierry Reding <treding@nvidia.com> Acked-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Robin Murphy <robin.murphy@arm.com> Tested-by: Nicolas Chauvet <kwizart@gmail.com> Signed-off-by: Ben Skeggs <bskeggs@redhat.com> Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2018-05-30 23:06:25 +09:00
#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)
if (dev->archdata.mapping) {
struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
arm_iommu_detach_device(dev);
arm_iommu_release_mapping(mapping);
}
#endif
if (!tdev->func->iommu_bit)
return;
mutex_init(&tdev->iommu.mutex);
if (iommu_present(&platform_bus_type)) {
tdev->iommu.domain = iommu_domain_alloc(&platform_bus_type);
if (IS_ERR(tdev->iommu.domain))
goto error;
/*
* A IOMMU is only usable if it supports page sizes smaller
* or equal to the system's PAGE_SIZE, with a preference if
* both are equal.
*/
pgsize_bitmap = tdev->iommu.domain->ops->pgsize_bitmap;
if (pgsize_bitmap & PAGE_SIZE) {
tdev->iommu.pgshift = PAGE_SHIFT;
} else {
tdev->iommu.pgshift = fls(pgsize_bitmap & ~PAGE_MASK);
if (tdev->iommu.pgshift == 0) {
dev_warn(dev, "unsupported IOMMU page size\n");
goto free_domain;
}
tdev->iommu.pgshift -= 1;
}
ret = iommu_attach_device(tdev->iommu.domain, dev);
if (ret)
goto free_domain;
ret = nvkm_mm_init(&tdev->iommu.mm, 0,
(1ULL << tdev->func->iommu_bit) >>
tdev->iommu.pgshift, 1);
if (ret)
goto detach_device;
}
return;
detach_device:
iommu_detach_device(tdev->iommu.domain, dev);
free_domain:
iommu_domain_free(tdev->iommu.domain);
error:
tdev->iommu.domain = NULL;
tdev->iommu.pgshift = 0;
dev_err(dev, "cannot initialize IOMMU MM\n");
#endif
}
static void
nvkm_device_tegra_remove_iommu(struct nvkm_device_tegra *tdev)
{
#if IS_ENABLED(CONFIG_IOMMU_API)
if (tdev->iommu.domain) {
nvkm_mm_fini(&tdev->iommu.mm);
iommu_detach_device(tdev->iommu.domain, tdev->device.dev);
iommu_domain_free(tdev->iommu.domain);
}
#endif
}
static struct nvkm_device_tegra *
nvkm_device_tegra(struct nvkm_device *device)
{
return container_of(device, struct nvkm_device_tegra, device);
}
static struct resource *
nvkm_device_tegra_resource(struct nvkm_device *device, unsigned bar)
{
struct nvkm_device_tegra *tdev = nvkm_device_tegra(device);
return platform_get_resource(tdev->pdev, IORESOURCE_MEM, bar);
}
static resource_size_t
nvkm_device_tegra_resource_addr(struct nvkm_device *device, unsigned bar)
{
struct resource *res = nvkm_device_tegra_resource(device, bar);
return res ? res->start : 0;
}
static resource_size_t
nvkm_device_tegra_resource_size(struct nvkm_device *device, unsigned bar)
{
struct resource *res = nvkm_device_tegra_resource(device, bar);
return res ? resource_size(res) : 0;
}
static irqreturn_t
nvkm_device_tegra_intr(int irq, void *arg)
{
struct nvkm_device_tegra *tdev = arg;
struct nvkm_device *device = &tdev->device;
bool handled = false;
nvkm_mc_intr_unarm(device);
nvkm_mc_intr(device, &handled);
nvkm_mc_intr_rearm(device);
return handled ? IRQ_HANDLED : IRQ_NONE;
}
static void
nvkm_device_tegra_fini(struct nvkm_device *device, bool suspend)
{
struct nvkm_device_tegra *tdev = nvkm_device_tegra(device);
if (tdev->irq) {
free_irq(tdev->irq, tdev);
tdev->irq = 0;
};
}
static int
nvkm_device_tegra_init(struct nvkm_device *device)
{
struct nvkm_device_tegra *tdev = nvkm_device_tegra(device);
int irq, ret;
irq = platform_get_irq_byname(tdev->pdev, "stall");
if (irq < 0)
return irq;
ret = request_irq(irq, nvkm_device_tegra_intr,
IRQF_SHARED, "nvkm", tdev);
if (ret)
return ret;
tdev->irq = irq;
return 0;
}
static void *
nvkm_device_tegra_dtor(struct nvkm_device *device)
{
struct nvkm_device_tegra *tdev = nvkm_device_tegra(device);
nvkm_device_tegra_power_down(tdev);
nvkm_device_tegra_remove_iommu(tdev);
return tdev;
}
static const struct nvkm_device_func
nvkm_device_tegra_func = {
.tegra = nvkm_device_tegra,
.dtor = nvkm_device_tegra_dtor,
.init = nvkm_device_tegra_init,
.fini = nvkm_device_tegra_fini,
.resource_addr = nvkm_device_tegra_resource_addr,
.resource_size = nvkm_device_tegra_resource_size,
.cpu_coherent = false,
};
int
nvkm_device_tegra_new(const struct nvkm_device_tegra_func *func,
struct platform_device *pdev,
const char *cfg, const char *dbg,
bool detect, bool mmio, u64 subdev_mask,
struct nvkm_device **pdevice)
{
struct nvkm_device_tegra *tdev;
int ret;
if (!(tdev = kzalloc(sizeof(*tdev), GFP_KERNEL)))
return -ENOMEM;
tdev->func = func;
tdev->pdev = pdev;
tdev->vdd = devm_regulator_get(&pdev->dev, "vdd");
if (IS_ERR(tdev->vdd)) {
ret = PTR_ERR(tdev->vdd);
goto free;
}
tdev->rst = devm_reset_control_get(&pdev->dev, "gpu");
if (IS_ERR(tdev->rst)) {
ret = PTR_ERR(tdev->rst);
goto free;
}
tdev->clk = devm_clk_get(&pdev->dev, "gpu");
if (IS_ERR(tdev->clk)) {
ret = PTR_ERR(tdev->clk);
goto free;
}
if (func->require_ref_clk)
tdev->clk_ref = devm_clk_get(&pdev->dev, "ref");
if (IS_ERR(tdev->clk_ref)) {
ret = PTR_ERR(tdev->clk_ref);
goto free;
}
tdev->clk_pwr = devm_clk_get(&pdev->dev, "pwr");
if (IS_ERR(tdev->clk_pwr)) {
ret = PTR_ERR(tdev->clk_pwr);
goto free;
}
/**
* The IOMMU bit defines the upper limit of the GPU-addressable space.
* This will be refined in nouveau_ttm_init but we need to do it early
* for instmem to behave properly
*/
ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(tdev->func->iommu_bit));
if (ret)
goto free;
nvkm_device_tegra_probe_iommu(tdev);
ret = nvkm_device_tegra_power_up(tdev);
if (ret)
goto remove;
tdev->gpu_speedo = tegra_sku_info.gpu_speedo_value;
tdev->gpu_speedo_id = tegra_sku_info.gpu_speedo_id;
ret = nvkm_device_ctor(&nvkm_device_tegra_func, NULL, &pdev->dev,
NVKM_DEVICE_TEGRA, pdev->id, NULL,
cfg, dbg, detect, mmio, subdev_mask,
&tdev->device);
if (ret)
goto powerdown;
*pdevice = &tdev->device;
return 0;
powerdown:
nvkm_device_tegra_power_down(tdev);
remove:
nvkm_device_tegra_remove_iommu(tdev);
free:
kfree(tdev);
return ret;
}
#else
int
nvkm_device_tegra_new(const struct nvkm_device_tegra_func *func,
struct platform_device *pdev,
const char *cfg, const char *dbg,
bool detect, bool mmio, u64 subdev_mask,
struct nvkm_device **pdevice)
{
return -ENOSYS;
}
#endif