diff --git a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
index 160ce3c060a5..7e4e2959bf4f 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
@@ -3,6 +3,8 @@
  * Copyright (C) 2014-2018 Etnaviv Project
  */
 
+#include <drm/drm_drv.h>
+
 #include "etnaviv_cmdbuf.h"
 #include "etnaviv_gpu.h"
 #include "etnaviv_gem.h"
@@ -116,7 +118,9 @@ static void etnaviv_buffer_dump(struct etnaviv_gpu *gpu,
 	u32 *ptr = buf->vaddr + off;
 
 	dev_info(gpu->dev, "virt %p phys 0x%08x free 0x%08x\n",
-			ptr, etnaviv_cmdbuf_get_va(buf) + off, size - len * 4 - off);
+			ptr, etnaviv_cmdbuf_get_va(buf,
+			&gpu->mmu_context->cmdbuf_mapping) +
+			off, size - len * 4 - off);
 
 	print_hex_dump(KERN_INFO, "cmd ", DUMP_PREFIX_OFFSET, 16, 4,
 			ptr, len * 4, 0);
@@ -149,7 +153,9 @@ static u32 etnaviv_buffer_reserve(struct etnaviv_gpu *gpu,
 	if (buffer->user_size + cmd_dwords * sizeof(u64) > buffer->size)
 		buffer->user_size = 0;
 
-	return etnaviv_cmdbuf_get_va(buffer) + buffer->user_size;
+	return etnaviv_cmdbuf_get_va(buffer,
+				     &gpu->mmu_context->cmdbuf_mapping) +
+	       buffer->user_size;
 }
 
 u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu)
@@ -162,8 +168,9 @@ u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu)
 	buffer->user_size = 0;
 
 	CMD_WAIT(buffer);
-	CMD_LINK(buffer, 2, etnaviv_cmdbuf_get_va(buffer) +
-		 buffer->user_size - 4);
+	CMD_LINK(buffer, 2,
+		 etnaviv_cmdbuf_get_va(buffer, &gpu->mmu_context->cmdbuf_mapping)
+		 + buffer->user_size - 4);
 
 	return buffer->user_size / 8;
 }
@@ -203,7 +210,7 @@ u16 etnaviv_buffer_config_mmuv2(struct etnaviv_gpu *gpu, u32 mtlb_addr, u32 safe
 	return buffer->user_size / 8;
 }
 
-u16 etnaviv_buffer_config_pta(struct etnaviv_gpu *gpu)
+u16 etnaviv_buffer_config_pta(struct etnaviv_gpu *gpu, unsigned short id)
 {
 	struct etnaviv_cmdbuf *buffer = &gpu->buffer;
 
@@ -212,7 +219,7 @@ u16 etnaviv_buffer_config_pta(struct etnaviv_gpu *gpu)
 	buffer->user_size = 0;
 
 	CMD_LOAD_STATE(buffer, VIVS_MMUv2_PTA_CONFIG,
-		       VIVS_MMUv2_PTA_CONFIG_INDEX(0));
+		       VIVS_MMUv2_PTA_CONFIG_INDEX(id));
 
 	CMD_END(buffer);
 
@@ -289,8 +296,9 @@ void etnaviv_sync_point_queue(struct etnaviv_gpu *gpu, unsigned int event)
 
 	/* Append waitlink */
 	CMD_WAIT(buffer);
-	CMD_LINK(buffer, 2, etnaviv_cmdbuf_get_va(buffer) +
-			    buffer->user_size - 4);
+	CMD_LINK(buffer, 2,
+		 etnaviv_cmdbuf_get_va(buffer, &gpu->mmu_context->cmdbuf_mapping)
+		 + buffer->user_size - 4);
 
 	/*
 	 * Kick off the 'sync point' command by replacing the previous
@@ -304,36 +312,41 @@ void etnaviv_sync_point_queue(struct etnaviv_gpu *gpu, unsigned int event)
 
 /* Append a command buffer to the ring buffer. */
 void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state,
-	unsigned int event, struct etnaviv_cmdbuf *cmdbuf)
+	struct etnaviv_iommu_context *mmu_context, unsigned int event,
+	struct etnaviv_cmdbuf *cmdbuf)
 {
 	struct etnaviv_cmdbuf *buffer = &gpu->buffer;
 	unsigned int waitlink_offset = buffer->user_size - 16;
 	u32 return_target, return_dwords;
 	u32 link_target, link_dwords;
 	bool switch_context = gpu->exec_state != exec_state;
+	bool switch_mmu_context = gpu->mmu_context != mmu_context;
+	unsigned int new_flush_seq = READ_ONCE(gpu->mmu_context->flush_seq);
+	bool need_flush = switch_mmu_context || gpu->flush_seq != new_flush_seq;
 
 	lockdep_assert_held(&gpu->lock);
 
 	if (drm_debug & DRM_UT_DRIVER)
 		etnaviv_buffer_dump(gpu, buffer, 0, 0x50);
 
-	link_target = etnaviv_cmdbuf_get_va(cmdbuf);
+	link_target = etnaviv_cmdbuf_get_va(cmdbuf,
+					    &gpu->mmu_context->cmdbuf_mapping);
 	link_dwords = cmdbuf->size / 8;
 
 	/*
-	 * If we need maintanence prior to submitting this buffer, we will
+	 * If we need maintenance prior to submitting this buffer, we will
 	 * need to append a mmu flush load state, followed by a new
 	 * link to this buffer - a total of four additional words.
 	 */
-	if (gpu->mmu->need_flush || switch_context) {
+	if (need_flush || switch_context) {
 		u32 target, extra_dwords;
 
 		/* link command */
 		extra_dwords = 1;
 
 		/* flush command */
-		if (gpu->mmu->need_flush) {
-			if (gpu->mmu->version == ETNAVIV_IOMMU_V1)
+		if (need_flush) {
+			if (gpu->mmu_context->global->version == ETNAVIV_IOMMU_V1)
 				extra_dwords += 1;
 			else
 				extra_dwords += 3;
@@ -343,11 +356,28 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state,
 		if (switch_context)
 			extra_dwords += 4;
 
-		target = etnaviv_buffer_reserve(gpu, buffer, extra_dwords);
+		/* PTA load command */
+		if (switch_mmu_context && gpu->sec_mode == ETNA_SEC_KERNEL)
+			extra_dwords += 1;
 
-		if (gpu->mmu->need_flush) {
+		target = etnaviv_buffer_reserve(gpu, buffer, extra_dwords);
+		/*
+		 * Switch MMU context if necessary. Must be done after the
+		 * link target has been calculated, as the jump forward in the
+		 * kernel ring still uses the last active MMU context before
+		 * the switch.
+		 */
+		if (switch_mmu_context) {
+			struct etnaviv_iommu_context *old_context = gpu->mmu_context;
+
+			etnaviv_iommu_context_get(mmu_context);
+			gpu->mmu_context = mmu_context;
+			etnaviv_iommu_context_put(old_context);
+		}
+
+		if (need_flush) {
 			/* Add the MMU flush */
-			if (gpu->mmu->version == ETNAVIV_IOMMU_V1) {
+			if (gpu->mmu_context->global->version == ETNAVIV_IOMMU_V1) {
 				CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_MMU,
 					       VIVS_GL_FLUSH_MMU_FLUSH_FEMMU |
 					       VIVS_GL_FLUSH_MMU_FLUSH_UNK1 |
@@ -355,17 +385,30 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state,
 					       VIVS_GL_FLUSH_MMU_FLUSH_PEMMU |
 					       VIVS_GL_FLUSH_MMU_FLUSH_UNK4);
 			} else {
+				u32 flush = VIVS_MMUv2_CONFIGURATION_MODE_MASK |
+					    VIVS_MMUv2_CONFIGURATION_FLUSH_FLUSH;
+
+				if (switch_mmu_context &&
+				    gpu->sec_mode == ETNA_SEC_KERNEL) {
+					unsigned short id =
+						etnaviv_iommuv2_get_pta_id(gpu->mmu_context);
+					CMD_LOAD_STATE(buffer,
+						VIVS_MMUv2_PTA_CONFIG,
+						VIVS_MMUv2_PTA_CONFIG_INDEX(id));
+				}
+
+				if (gpu->sec_mode == ETNA_SEC_NONE)
+					flush |= etnaviv_iommuv2_get_mtlb_addr(gpu->mmu_context);
+
 				CMD_LOAD_STATE(buffer, VIVS_MMUv2_CONFIGURATION,
-					VIVS_MMUv2_CONFIGURATION_MODE_MASK |
-					VIVS_MMUv2_CONFIGURATION_ADDRESS_MASK |
-					VIVS_MMUv2_CONFIGURATION_FLUSH_FLUSH);
+					       flush);
 				CMD_SEM(buffer, SYNC_RECIPIENT_FE,
 					SYNC_RECIPIENT_PE);
 				CMD_STALL(buffer, SYNC_RECIPIENT_FE,
 					SYNC_RECIPIENT_PE);
 			}
 
-			gpu->mmu->need_flush = false;
+			gpu->flush_seq = new_flush_seq;
 		}
 
 		if (switch_context) {
@@ -374,6 +417,8 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state,
 		}
 
 		/* And the link to the submitted buffer */
+		link_target = etnaviv_cmdbuf_get_va(cmdbuf,
+					&gpu->mmu_context->cmdbuf_mapping);
 		CMD_LINK(buffer, link_dwords, link_target);
 
 		/* Update the link target to point to above instructions */
@@ -410,12 +455,14 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state,
 	CMD_LOAD_STATE(buffer, VIVS_GL_EVENT, VIVS_GL_EVENT_EVENT_ID(event) |
 		       VIVS_GL_EVENT_FROM_PE);
 	CMD_WAIT(buffer);
-	CMD_LINK(buffer, 2, etnaviv_cmdbuf_get_va(buffer) +
-			    buffer->user_size - 4);
+	CMD_LINK(buffer, 2,
+		 etnaviv_cmdbuf_get_va(buffer, &gpu->mmu_context->cmdbuf_mapping)
+		 + buffer->user_size - 4);
 
 	if (drm_debug & DRM_UT_DRIVER)
 		pr_info("stream link to 0x%08x @ 0x%08x %p\n",
-			return_target, etnaviv_cmdbuf_get_va(cmdbuf),
+			return_target,
+			etnaviv_cmdbuf_get_va(cmdbuf, &gpu->mmu_context->cmdbuf_mapping),
 			cmdbuf->vaddr);
 
 	if (drm_debug & DRM_UT_DRIVER) {
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c
index a3c44f145c1d..9dc20d892c15 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c
@@ -3,27 +3,26 @@
  * Copyright (C) 2017-2018 Etnaviv Project
  */
 
+#include <linux/dma-mapping.h>
+
 #include <drm/drm_mm.h>
 
 #include "etnaviv_cmdbuf.h"
+#include "etnaviv_gem.h"
 #include "etnaviv_gpu.h"
 #include "etnaviv_mmu.h"
 #include "etnaviv_perfmon.h"
 
-#define SUBALLOC_SIZE		SZ_256K
+#define SUBALLOC_SIZE		SZ_512K
 #define SUBALLOC_GRANULE	SZ_4K
 #define SUBALLOC_GRANULES	(SUBALLOC_SIZE / SUBALLOC_GRANULE)
 
 struct etnaviv_cmdbuf_suballoc {
 	/* suballocated dma buffer properties */
-	struct etnaviv_gpu *gpu;
+	struct device *dev;
 	void *vaddr;
 	dma_addr_t paddr;
 
-	/* GPU mapping */
-	u32 iova;
-	struct drm_mm_node vram_node; /* only used on MMUv2 */
-
 	/* allocation management */
 	struct mutex lock;
 	DECLARE_BITMAP(granule_map, SUBALLOC_GRANULES);
@@ -32,7 +31,7 @@ struct etnaviv_cmdbuf_suballoc {
 };
 
 struct etnaviv_cmdbuf_suballoc *
-etnaviv_cmdbuf_suballoc_new(struct etnaviv_gpu * gpu)
+etnaviv_cmdbuf_suballoc_new(struct device *dev)
 {
 	struct etnaviv_cmdbuf_suballoc *suballoc;
 	int ret;
@@ -41,36 +40,44 @@ etnaviv_cmdbuf_suballoc_new(struct etnaviv_gpu * gpu)
 	if (!suballoc)
 		return ERR_PTR(-ENOMEM);
 
-	suballoc->gpu = gpu;
+	suballoc->dev = dev;
 	mutex_init(&suballoc->lock);
 	init_waitqueue_head(&suballoc->free_event);
 
-	suballoc->vaddr = dma_alloc_wc(gpu->dev, SUBALLOC_SIZE,
+	BUILD_BUG_ON(ETNAVIV_SOFTPIN_START_ADDRESS < SUBALLOC_SIZE);
+	suballoc->vaddr = dma_alloc_wc(dev, SUBALLOC_SIZE,
 				       &suballoc->paddr, GFP_KERNEL);
-	if (!suballoc->vaddr)
+	if (!suballoc->vaddr) {
+		ret = -ENOMEM;
 		goto free_suballoc;
-
-	ret = etnaviv_iommu_get_suballoc_va(gpu, suballoc->paddr,
-					    &suballoc->vram_node, SUBALLOC_SIZE,
-					    &suballoc->iova);
-	if (ret)
-		goto free_dma;
+	}
 
 	return suballoc;
 
-free_dma:
-	dma_free_wc(gpu->dev, SUBALLOC_SIZE, suballoc->vaddr, suballoc->paddr);
 free_suballoc:
 	kfree(suballoc);
 
-	return NULL;
+	return ERR_PTR(ret);
+}
+
+int etnaviv_cmdbuf_suballoc_map(struct etnaviv_cmdbuf_suballoc *suballoc,
+				struct etnaviv_iommu_context *context,
+				struct etnaviv_vram_mapping *mapping,
+				u32 memory_base)
+{
+	return etnaviv_iommu_get_suballoc_va(context, mapping, memory_base,
+					     suballoc->paddr, SUBALLOC_SIZE);
+}
+
+void etnaviv_cmdbuf_suballoc_unmap(struct etnaviv_iommu_context *context,
+				   struct etnaviv_vram_mapping *mapping)
+{
+	etnaviv_iommu_put_suballoc_va(context, mapping);
 }
 
 void etnaviv_cmdbuf_suballoc_destroy(struct etnaviv_cmdbuf_suballoc *suballoc)
 {
-	etnaviv_iommu_put_suballoc_va(suballoc->gpu, &suballoc->vram_node,
-				      SUBALLOC_SIZE, suballoc->iova);
-	dma_free_wc(suballoc->gpu->dev, SUBALLOC_SIZE, suballoc->vaddr,
+	dma_free_wc(suballoc->dev, SUBALLOC_SIZE, suballoc->vaddr,
 		    suballoc->paddr);
 	kfree(suballoc);
 }
@@ -95,7 +102,7 @@ int etnaviv_cmdbuf_init(struct etnaviv_cmdbuf_suballoc *suballoc,
 						       suballoc->free_space,
 						       msecs_to_jiffies(10 * 1000));
 		if (!ret) {
-			dev_err(suballoc->gpu->dev,
+			dev_err(suballoc->dev,
 				"Timeout waiting for cmdbuf space\n");
 			return -ETIMEDOUT;
 		}
@@ -123,9 +130,10 @@ void etnaviv_cmdbuf_free(struct etnaviv_cmdbuf *cmdbuf)
 	wake_up_all(&suballoc->free_event);
 }
 
-u32 etnaviv_cmdbuf_get_va(struct etnaviv_cmdbuf *buf)
+u32 etnaviv_cmdbuf_get_va(struct etnaviv_cmdbuf *buf,
+			  struct etnaviv_vram_mapping *mapping)
 {
-	return buf->suballoc->iova + buf->suballoc_offset;
+	return mapping->iova + buf->suballoc_offset;
 }
 
 dma_addr_t etnaviv_cmdbuf_get_pa(struct etnaviv_cmdbuf *buf)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.h b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.h
index 4d5d1a77eb2a..ad6fd8eb0378 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.h
@@ -8,7 +8,9 @@
 
 #include <linux/types.h>
 
-struct etnaviv_gpu;
+struct device;
+struct etnaviv_iommu_context;
+struct etnaviv_vram_mapping;
 struct etnaviv_cmdbuf_suballoc;
 struct etnaviv_perfmon_request;
 
@@ -23,15 +25,22 @@ struct etnaviv_cmdbuf {
 };
 
 struct etnaviv_cmdbuf_suballoc *
-etnaviv_cmdbuf_suballoc_new(struct etnaviv_gpu * gpu);
+etnaviv_cmdbuf_suballoc_new(struct device *dev);
 void etnaviv_cmdbuf_suballoc_destroy(struct etnaviv_cmdbuf_suballoc *suballoc);
+int etnaviv_cmdbuf_suballoc_map(struct etnaviv_cmdbuf_suballoc *suballoc,
+				struct etnaviv_iommu_context *context,
+				struct etnaviv_vram_mapping *mapping,
+				u32 memory_base);
+void etnaviv_cmdbuf_suballoc_unmap(struct etnaviv_iommu_context *context,
+				   struct etnaviv_vram_mapping *mapping);
 
 
 int etnaviv_cmdbuf_init(struct etnaviv_cmdbuf_suballoc *suballoc,
 		struct etnaviv_cmdbuf *cmdbuf, u32 size);
 void etnaviv_cmdbuf_free(struct etnaviv_cmdbuf *cmdbuf);
 
-u32 etnaviv_cmdbuf_get_va(struct etnaviv_cmdbuf *buf);
+u32 etnaviv_cmdbuf_get_va(struct etnaviv_cmdbuf *buf,
+			  struct etnaviv_vram_mapping *mapping);
 dma_addr_t etnaviv_cmdbuf_get_pa(struct etnaviv_cmdbuf *buf);
 
 #endif /* __ETNAVIV_CMDBUF_H__ */
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
index 08e033c1758d..1f9c01be40d7 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
@@ -4,8 +4,17 @@
  */
 
 #include <linux/component.h>
+#include <linux/dma-mapping.h>
+#include <linux/module.h>
 #include <linux/of_platform.h>
+#include <linux/uaccess.h>
+
+#include <drm/drm_debugfs.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
+#include <drm/drm_ioctl.h>
 #include <drm/drm_of.h>
+#include <drm/drm_prime.h>
 
 #include "etnaviv_cmdbuf.h"
 #include "etnaviv_drv.h"
@@ -41,12 +50,19 @@ static int etnaviv_open(struct drm_device *dev, struct drm_file *file)
 {
 	struct etnaviv_drm_private *priv = dev->dev_private;
 	struct etnaviv_file_private *ctx;
-	int i;
+	int ret, i;
 
 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
 		return -ENOMEM;
 
+	ctx->mmu = etnaviv_iommu_context_init(priv->mmu_global,
+					      priv->cmdbuf_suballoc);
+	if (!ctx->mmu) {
+		ret = -ENOMEM;
+		goto out_free;
+	}
+
 	for (i = 0; i < ETNA_MAX_PIPES; i++) {
 		struct etnaviv_gpu *gpu = priv->gpu[i];
 		struct drm_sched_rq *rq;
@@ -61,6 +77,10 @@ static int etnaviv_open(struct drm_device *dev, struct drm_file *file)
 	file->driver_priv = ctx;
 
 	return 0;
+
+out_free:
+	kfree(ctx);
+	return ret;
 }
 
 static void etnaviv_postclose(struct drm_device *dev, struct drm_file *file)
@@ -76,6 +96,8 @@ static void etnaviv_postclose(struct drm_device *dev, struct drm_file *file)
 			drm_sched_entity_destroy(&ctx->sched_entity[i]);
 	}
 
+	etnaviv_iommu_context_put(ctx->mmu);
+
 	kfree(ctx);
 }
 
@@ -107,12 +129,29 @@ static int etnaviv_mm_show(struct drm_device *dev, struct seq_file *m)
 static int etnaviv_mmu_show(struct etnaviv_gpu *gpu, struct seq_file *m)
 {
 	struct drm_printer p = drm_seq_file_printer(m);
+	struct etnaviv_iommu_context *mmu_context;
 
 	seq_printf(m, "Active Objects (%s):\n", dev_name(gpu->dev));
 
-	mutex_lock(&gpu->mmu->lock);
-	drm_mm_print(&gpu->mmu->mm, &p);
-	mutex_unlock(&gpu->mmu->lock);
+	/*
+	 * Lock the GPU to avoid a MMU context switch just now and elevate
+	 * the refcount of the current context to avoid it disappearing from
+	 * under our feet.
+	 */
+	mutex_lock(&gpu->lock);
+	mmu_context = gpu->mmu_context;
+	if (mmu_context)
+		etnaviv_iommu_context_get(mmu_context);
+	mutex_unlock(&gpu->lock);
+
+	if (!mmu_context)
+		return 0;
+
+	mutex_lock(&mmu_context->lock);
+	drm_mm_print(&mmu_context->mm, &p);
+	mutex_unlock(&mmu_context->lock);
+
+	etnaviv_iommu_context_put(mmu_context);
 
 	return 0;
 }
@@ -486,7 +525,7 @@ static struct drm_driver etnaviv_drm_driver = {
 	.desc               = "etnaviv DRM",
 	.date               = "20151214",
 	.major              = 1,
-	.minor              = 2,
+	.minor              = 3,
 };
 
 /*
@@ -517,23 +556,32 @@ static int etnaviv_bind(struct device *dev)
 	INIT_LIST_HEAD(&priv->gem_list);
 	priv->num_gpus = 0;
 
+	priv->cmdbuf_suballoc = etnaviv_cmdbuf_suballoc_new(drm->dev);
+	if (IS_ERR(priv->cmdbuf_suballoc)) {
+		dev_err(drm->dev, "Failed to create cmdbuf suballocator\n");
+		ret = PTR_ERR(priv->cmdbuf_suballoc);
+		goto out_free_priv;
+	}
+
 	dev_set_drvdata(dev, drm);
 
 	ret = component_bind_all(dev, drm);
 	if (ret < 0)
-		goto out_bind;
+		goto out_destroy_suballoc;
 
 	load_gpu(drm);
 
 	ret = drm_dev_register(drm, 0);
 	if (ret)
-		goto out_register;
+		goto out_unbind;
 
 	return 0;
 
-out_register:
+out_unbind:
 	component_unbind_all(dev, drm);
-out_bind:
+out_destroy_suballoc:
+	etnaviv_cmdbuf_suballoc_destroy(priv->cmdbuf_suballoc);
+out_free_priv:
 	kfree(priv);
 out_put:
 	drm_dev_put(drm);
@@ -552,6 +600,8 @@ static void etnaviv_unbind(struct device *dev)
 
 	dev->dma_parms = NULL;
 
+	etnaviv_cmdbuf_suballoc_destroy(priv->cmdbuf_suballoc);
+
 	drm->dev_private = NULL;
 	kfree(priv);
 
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.h b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
index 8798423705e1..32cfa5a48d42 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
@@ -6,21 +6,12 @@
 #ifndef __ETNAVIV_DRV_H__
 #define __ETNAVIV_DRV_H__
 
-#include <linux/kernel.h>
-#include <linux/clk.h>
-#include <linux/cpufreq.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/pm.h>
-#include <linux/pm_runtime.h>
-#include <linux/slab.h>
 #include <linux/list.h>
+#include <linux/mm_types.h>
+#include <linux/sizes.h>
 #include <linux/time64.h>
 #include <linux/types.h>
-#include <linux/sizes.h>
-#include <linux/mm_types.h>
 
-#include <drm/drmP.h>
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_gem.h>
 #include <drm/etnaviv_drm.h>
@@ -31,12 +22,12 @@ struct etnaviv_gpu;
 struct etnaviv_mmu;
 struct etnaviv_gem_object;
 struct etnaviv_gem_submit;
+struct etnaviv_iommu_global;
+
+#define ETNAVIV_SOFTPIN_START_ADDRESS	SZ_4M /* must be >= SUBALLOC_SIZE */
 
 struct etnaviv_file_private {
-	/*
-	 * When per-context address spaces are supported we'd keep track of
-	 * the context's page-tables here.
-	 */
+	struct etnaviv_iommu_context	*mmu;
 	struct drm_sched_entity		sched_entity[ETNA_MAX_PIPES];
 };
 
@@ -45,6 +36,9 @@ struct etnaviv_drm_private {
 	struct device_dma_parameters dma_parms;
 	struct etnaviv_gpu *gpu[ETNA_MAX_PIPES];
 
+	struct etnaviv_cmdbuf_suballoc *cmdbuf_suballoc;
+	struct etnaviv_iommu_global *mmu_global;
+
 	/* list of GEM objects: */
 	struct mutex gem_lock;
 	struct list_head gem_list;
@@ -76,10 +70,11 @@ int etnaviv_gem_new_userptr(struct drm_device *dev, struct drm_file *file,
 	uintptr_t ptr, u32 size, u32 flags, u32 *handle);
 u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu);
 u16 etnaviv_buffer_config_mmuv2(struct etnaviv_gpu *gpu, u32 mtlb_addr, u32 safe_addr);
-u16 etnaviv_buffer_config_pta(struct etnaviv_gpu *gpu);
+u16 etnaviv_buffer_config_pta(struct etnaviv_gpu *gpu, unsigned short id);
 void etnaviv_buffer_end(struct etnaviv_gpu *gpu);
 void etnaviv_sync_point_queue(struct etnaviv_gpu *gpu, unsigned int event);
 void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state,
+	struct etnaviv_iommu_context *mmu,
 	unsigned int event, struct etnaviv_cmdbuf *cmdbuf);
 void etnaviv_validate_init(void);
 bool etnaviv_cmd_validate_one(struct etnaviv_gpu *gpu,
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
index 9a6f5b65488f..698db540972c 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_dump.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
@@ -4,6 +4,8 @@
  */
 
 #include <linux/devcoredump.h>
+#include <linux/moduleparam.h>
+
 #include "etnaviv_cmdbuf.h"
 #include "etnaviv_dump.h"
 #include "etnaviv_gem.h"
@@ -91,9 +93,9 @@ static void etnaviv_core_dump_registers(struct core_dump_iterator *iter,
 }
 
 static void etnaviv_core_dump_mmu(struct core_dump_iterator *iter,
-	struct etnaviv_gpu *gpu, size_t mmu_size)
+	struct etnaviv_iommu_context *mmu, size_t mmu_size)
 {
-	etnaviv_iommu_dump(gpu->mmu, iter->data);
+	etnaviv_iommu_dump(mmu, iter->data);
 
 	etnaviv_core_dump_header(iter, ETDUMP_BUF_MMU, iter->data + mmu_size);
 }
@@ -108,46 +110,35 @@ static void etnaviv_core_dump_mem(struct core_dump_iterator *iter, u32 type,
 	etnaviv_core_dump_header(iter, type, iter->data + size);
 }
 
-void etnaviv_core_dump(struct etnaviv_gpu *gpu)
+void etnaviv_core_dump(struct etnaviv_gem_submit *submit)
 {
+	struct etnaviv_gpu *gpu = submit->gpu;
 	struct core_dump_iterator iter;
-	struct etnaviv_vram_mapping *vram;
 	struct etnaviv_gem_object *obj;
-	struct etnaviv_gem_submit *submit;
-	struct drm_sched_job *s_job;
 	unsigned int n_obj, n_bomap_pages;
 	size_t file_size, mmu_size;
 	__le64 *bomap, *bomap_start;
+	int i;
 
 	/* Only catch the first event, or when manually re-armed */
 	if (!etnaviv_dump_core)
 		return;
 	etnaviv_dump_core = false;
 
-	mutex_lock(&gpu->mmu->lock);
+	mutex_lock(&gpu->mmu_context->lock);
 
-	mmu_size = etnaviv_iommu_dump_size(gpu->mmu);
+	mmu_size = etnaviv_iommu_dump_size(gpu->mmu_context);
 
-	/* We always dump registers, mmu, ring and end marker */
-	n_obj = 4;
+	/* We always dump registers, mmu, ring, hanging cmdbuf and end marker */
+	n_obj = 5;
 	n_bomap_pages = 0;
 	file_size = ARRAY_SIZE(etnaviv_dump_registers) *
 			sizeof(struct etnaviv_dump_registers) +
-		    mmu_size + gpu->buffer.size;
-
-	/* Add in the active command buffers */
-	list_for_each_entry(s_job, &gpu->sched.ring_mirror_list, node) {
-		submit = to_etnaviv_submit(s_job);
-		file_size += submit->cmdbuf.size;
-		n_obj++;
-	}
+		    mmu_size + gpu->buffer.size + submit->cmdbuf.size;
 
 	/* Add in the active buffer objects */
-	list_for_each_entry(vram, &gpu->mmu->mappings, mmu_node) {
-		if (!vram->use)
-			continue;
-
-		obj = vram->object;
+	for (i = 0; i < submit->nr_bos; i++) {
+		obj = submit->bos[i].obj;
 		file_size += obj->base.size;
 		n_bomap_pages += obj->base.size >> PAGE_SHIFT;
 		n_obj++;
@@ -166,7 +157,7 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
 	iter.start = __vmalloc(file_size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY,
 			       PAGE_KERNEL);
 	if (!iter.start) {
-		mutex_unlock(&gpu->mmu->lock);
+		mutex_unlock(&gpu->mmu_context->lock);
 		dev_warn(gpu->dev, "failed to allocate devcoredump file\n");
 		return;
 	}
@@ -178,17 +169,16 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
 	memset(iter.hdr, 0, iter.data - iter.start);
 
 	etnaviv_core_dump_registers(&iter, gpu);
-	etnaviv_core_dump_mmu(&iter, gpu, mmu_size);
+	etnaviv_core_dump_mmu(&iter, gpu->mmu_context, mmu_size);
 	etnaviv_core_dump_mem(&iter, ETDUMP_BUF_RING, gpu->buffer.vaddr,
 			      gpu->buffer.size,
-			      etnaviv_cmdbuf_get_va(&gpu->buffer));
+			      etnaviv_cmdbuf_get_va(&gpu->buffer,
+					&gpu->mmu_context->cmdbuf_mapping));
 
-	list_for_each_entry(s_job, &gpu->sched.ring_mirror_list, node) {
-		submit = to_etnaviv_submit(s_job);
-		etnaviv_core_dump_mem(&iter, ETDUMP_BUF_CMD,
-				      submit->cmdbuf.vaddr, submit->cmdbuf.size,
-				      etnaviv_cmdbuf_get_va(&submit->cmdbuf));
-	}
+	etnaviv_core_dump_mem(&iter, ETDUMP_BUF_CMD,
+			      submit->cmdbuf.vaddr, submit->cmdbuf.size,
+			      etnaviv_cmdbuf_get_va(&submit->cmdbuf,
+					&gpu->mmu_context->cmdbuf_mapping));
 
 	/* Reserve space for the bomap */
 	if (n_bomap_pages) {
@@ -201,14 +191,13 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
 		bomap_start = bomap = NULL;
 	}
 
-	list_for_each_entry(vram, &gpu->mmu->mappings, mmu_node) {
+	for (i = 0; i < submit->nr_bos; i++) {
+		struct etnaviv_vram_mapping *vram;
 		struct page **pages;
 		void *vaddr;
 
-		if (vram->use == 0)
-			continue;
-
-		obj = vram->object;
+		obj = submit->bos[i].obj;
+		vram = submit->bos[i].mapping;
 
 		mutex_lock(&obj->lock);
 		pages = etnaviv_gem_get_pages(obj);
@@ -232,7 +221,7 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
 					 obj->base.size);
 	}
 
-	mutex_unlock(&gpu->mmu->lock);
+	mutex_unlock(&gpu->mmu_context->lock);
 
 	etnaviv_core_dump_header(&iter, ETDUMP_BUF_END, iter.data);
 
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.h b/drivers/gpu/drm/etnaviv/etnaviv_dump.h
index 2d916c2667ee..a125c46b895b 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_dump.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.h
@@ -35,8 +35,8 @@ struct etnaviv_dump_registers {
 };
 
 #ifdef __KERNEL__
-struct etnaviv_gpu;
-void etnaviv_core_dump(struct etnaviv_gpu *gpu);
+struct etnaviv_gem_submit;
+void etnaviv_core_dump(struct etnaviv_gem_submit *submit);
 #endif
 
 #endif
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
index 7d83e04ec36e..cb1faaac380a 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@@ -3,10 +3,11 @@
  * Copyright (C) 2015-2018 Etnaviv Project
  */
 
-#include <linux/spinlock.h>
+#include <drm/drm_prime.h>
+#include <linux/dma-mapping.h>
 #include <linux/shmem_fs.h>
-#include <linux/sched/mm.h>
-#include <linux/sched/task.h>
+#include <linux/spinlock.h>
+#include <linux/vmalloc.h>
 
 #include "etnaviv_drv.h"
 #include "etnaviv_gem.h"
@@ -222,30 +223,18 @@ int etnaviv_gem_mmap_offset(struct drm_gem_object *obj, u64 *offset)
 
 static struct etnaviv_vram_mapping *
 etnaviv_gem_get_vram_mapping(struct etnaviv_gem_object *obj,
-			     struct etnaviv_iommu *mmu)
+			     struct etnaviv_iommu_context *context)
 {
 	struct etnaviv_vram_mapping *mapping;
 
 	list_for_each_entry(mapping, &obj->vram_list, obj_node) {
-		if (mapping->mmu == mmu)
+		if (mapping->context == context)
 			return mapping;
 	}
 
 	return NULL;
 }
 
-void etnaviv_gem_mapping_reference(struct etnaviv_vram_mapping *mapping)
-{
-	struct etnaviv_gem_object *etnaviv_obj = mapping->object;
-
-	drm_gem_object_get(&etnaviv_obj->base);
-
-	mutex_lock(&etnaviv_obj->lock);
-	WARN_ON(mapping->use == 0);
-	mapping->use += 1;
-	mutex_unlock(&etnaviv_obj->lock);
-}
-
 void etnaviv_gem_mapping_unreference(struct etnaviv_vram_mapping *mapping)
 {
 	struct etnaviv_gem_object *etnaviv_obj = mapping->object;
@@ -259,7 +248,8 @@ void etnaviv_gem_mapping_unreference(struct etnaviv_vram_mapping *mapping)
 }
 
 struct etnaviv_vram_mapping *etnaviv_gem_mapping_get(
-	struct drm_gem_object *obj, struct etnaviv_gpu *gpu)
+	struct drm_gem_object *obj, struct etnaviv_iommu_context *mmu_context,
+	u64 va)
 {
 	struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
 	struct etnaviv_vram_mapping *mapping;
@@ -267,7 +257,7 @@ struct etnaviv_vram_mapping *etnaviv_gem_mapping_get(
 	int ret = 0;
 
 	mutex_lock(&etnaviv_obj->lock);
-	mapping = etnaviv_gem_get_vram_mapping(etnaviv_obj, gpu->mmu);
+	mapping = etnaviv_gem_get_vram_mapping(etnaviv_obj, mmu_context);
 	if (mapping) {
 		/*
 		 * Holding the object lock prevents the use count changing
@@ -276,12 +266,12 @@ struct etnaviv_vram_mapping *etnaviv_gem_mapping_get(
 		 * the MMU owns this mapping to close this race.
 		 */
 		if (mapping->use == 0) {
-			mutex_lock(&gpu->mmu->lock);
-			if (mapping->mmu == gpu->mmu)
+			mutex_lock(&mmu_context->lock);
+			if (mapping->context == mmu_context)
 				mapping->use += 1;
 			else
 				mapping = NULL;
-			mutex_unlock(&gpu->mmu->lock);
+			mutex_unlock(&mmu_context->lock);
 			if (mapping)
 				goto out;
 		} else {
@@ -314,15 +304,19 @@ struct etnaviv_vram_mapping *etnaviv_gem_mapping_get(
 		list_del(&mapping->obj_node);
 	}
 
-	mapping->mmu = gpu->mmu;
+	etnaviv_iommu_context_get(mmu_context);
+	mapping->context = mmu_context;
 	mapping->use = 1;
 
-	ret = etnaviv_iommu_map_gem(gpu->mmu, etnaviv_obj, gpu->memory_base,
-				    mapping);
-	if (ret < 0)
+	ret = etnaviv_iommu_map_gem(mmu_context, etnaviv_obj,
+				    mmu_context->global->memory_base,
+				    mapping, va);
+	if (ret < 0) {
+		etnaviv_iommu_context_put(mmu_context);
 		kfree(mapping);
-	else
+	} else {
 		list_add_tail(&mapping->obj_node, &etnaviv_obj->vram_list);
+	}
 
 out:
 	mutex_unlock(&etnaviv_obj->lock);
@@ -536,12 +530,14 @@ void etnaviv_gem_free_object(struct drm_gem_object *obj)
 
 	list_for_each_entry_safe(mapping, tmp, &etnaviv_obj->vram_list,
 				 obj_node) {
-		struct etnaviv_iommu *mmu = mapping->mmu;
+		struct etnaviv_iommu_context *context = mapping->context;
 
 		WARN_ON(mapping->use);
 
-		if (mmu)
-			etnaviv_iommu_unmap_gem(mmu, mapping);
+		if (context) {
+			etnaviv_iommu_unmap_gem(context, mapping);
+			etnaviv_iommu_context_put(context);
+		}
 
 		list_del(&mapping->obj_node);
 		kfree(mapping);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.h b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
index 28379e4df253..d6270acce619 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
@@ -25,7 +25,7 @@ struct etnaviv_vram_mapping {
 	struct list_head scan_node;
 	struct list_head mmu_node;
 	struct etnaviv_gem_object *object;
-	struct etnaviv_iommu *mmu;
+	struct etnaviv_iommu_context *context;
 	struct drm_mm_node vram_node;
 	unsigned int use;
 	u32 iova;
@@ -77,6 +77,7 @@ static inline bool is_active(struct etnaviv_gem_object *etnaviv_obj)
 
 struct etnaviv_gem_submit_bo {
 	u32 flags;
+	u64 va;
 	struct etnaviv_gem_object *obj;
 	struct etnaviv_vram_mapping *mapping;
 	struct dma_fence *excl;
@@ -93,6 +94,7 @@ struct etnaviv_gem_submit {
 	struct kref refcount;
 	struct etnaviv_file_private *ctx;
 	struct etnaviv_gpu *gpu;
+	struct etnaviv_iommu_context *mmu_context, *prev_mmu_context;
 	struct dma_fence *out_fence, *in_fence;
 	int out_fence_id;
 	struct list_head node; /* GPU active submit list */
@@ -118,8 +120,8 @@ struct page **etnaviv_gem_get_pages(struct etnaviv_gem_object *obj);
 void etnaviv_gem_put_pages(struct etnaviv_gem_object *obj);
 
 struct etnaviv_vram_mapping *etnaviv_gem_mapping_get(
-	struct drm_gem_object *obj, struct etnaviv_gpu *gpu);
-void etnaviv_gem_mapping_reference(struct etnaviv_vram_mapping *mapping);
+	struct drm_gem_object *obj, struct etnaviv_iommu_context *mmu_context,
+	u64 va);
 void etnaviv_gem_mapping_unreference(struct etnaviv_vram_mapping *mapping);
 
 #endif /* __ETNAVIV_GEM_H__ */
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
index a05292e8ed6f..f24dd21c2363 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
@@ -3,7 +3,9 @@
  * Copyright (C) 2014-2018 Etnaviv Project
  */
 
+#include <drm/drm_prime.h>
 #include <linux/dma-buf.h>
+
 #include "etnaviv_drv.h"
 #include "etnaviv_gem.h"
 
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
index 998c96b40d8a..aa3e4c3b063a 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
@@ -3,9 +3,15 @@
  * Copyright (C) 2015 Etnaviv Project
  */
 
+#include <drm/drm_file.h>
 #include <linux/dma-fence-array.h>
+#include <linux/file.h>
+#include <linux/pm_runtime.h>
 #include <linux/dma-resv.h>
 #include <linux/sync_file.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+
 #include "etnaviv_cmdbuf.h"
 #include "etnaviv_drv.h"
 #include "etnaviv_gpu.h"
@@ -66,6 +72,14 @@ static int submit_lookup_objects(struct etnaviv_gem_submit *submit,
 		}
 
 		submit->bos[i].flags = bo->flags;
+		if (submit->flags & ETNA_SUBMIT_SOFTPIN) {
+			if (bo->presumed < ETNAVIV_SOFTPIN_START_ADDRESS) {
+				DRM_ERROR("invalid softpin address\n");
+				ret = -EINVAL;
+				goto out_unlock;
+			}
+			submit->bos[i].va = bo->presumed;
+		}
 
 		/* normally use drm_gem_object_lookup(), but for bulk lookup
 		 * all under single table_lock just hit object_idr directly:
@@ -218,11 +232,17 @@ static int submit_pin_objects(struct etnaviv_gem_submit *submit)
 		struct etnaviv_vram_mapping *mapping;
 
 		mapping = etnaviv_gem_mapping_get(&etnaviv_obj->base,
-						  submit->gpu);
+						  submit->mmu_context,
+						  submit->bos[i].va);
 		if (IS_ERR(mapping)) {
 			ret = PTR_ERR(mapping);
 			break;
 		}
+
+		if ((submit->flags & ETNA_SUBMIT_SOFTPIN) &&
+		     submit->bos[i].va != mapping->iova)
+			return -EINVAL;
+
 		atomic_inc(&etnaviv_obj->gpu_active);
 
 		submit->bos[i].flags |= BO_PINNED;
@@ -255,6 +275,10 @@ static int submit_reloc(struct etnaviv_gem_submit *submit, void *stream,
 	u32 *ptr = stream;
 	int ret;
 
+	/* Submits using softpin don't blend with relocs */
+	if ((submit->flags & ETNA_SUBMIT_SOFTPIN) && nr_relocs != 0)
+		return -EINVAL;
+
 	for (i = 0; i < nr_relocs; i++) {
 		const struct drm_etnaviv_gem_submit_reloc *r = relocs + i;
 		struct etnaviv_gem_submit_bo *bo;
@@ -355,6 +379,12 @@ static void submit_cleanup(struct kref *kref)
 	if (submit->cmdbuf.suballoc)
 		etnaviv_cmdbuf_free(&submit->cmdbuf);
 
+	if (submit->mmu_context)
+		etnaviv_iommu_context_put(submit->mmu_context);
+
+	if (submit->prev_mmu_context)
+		etnaviv_iommu_context_put(submit->prev_mmu_context);
+
 	for (i = 0; i < submit->nr_bos; i++) {
 		struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj;
 
@@ -433,6 +463,12 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
+	if ((args->flags & ETNA_SUBMIT_SOFTPIN) &&
+	    priv->mmu_global->version != ETNAVIV_IOMMU_V2) {
+		DRM_ERROR("softpin requested on incompatible MMU\n");
+		return -EINVAL;
+	}
+
 	/*
 	 * Copy the command submission and bo array to kernel space in
 	 * one go, and do this outside of any locks.
@@ -490,12 +526,14 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
 		goto err_submit_ww_acquire;
 	}
 
-	ret = etnaviv_cmdbuf_init(gpu->cmdbuf_suballoc, &submit->cmdbuf,
+	ret = etnaviv_cmdbuf_init(priv->cmdbuf_suballoc, &submit->cmdbuf,
 				  ALIGN(args->stream_size, 8) + 8);
 	if (ret)
 		goto err_submit_objects;
 
 	submit->ctx = file->driver_priv;
+	etnaviv_iommu_context_get(submit->ctx->mmu);
+	submit->mmu_context = submit->ctx->mmu;
 	submit->exec_state = args->exec_state;
 	submit->flags = args->flags;
 
@@ -503,7 +541,8 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
 	if (ret)
 		goto err_submit_objects;
 
-	if (!etnaviv_cmd_validate_one(gpu, stream, args->stream_size / 4,
+	if ((priv->mmu_global->version != ETNAVIV_IOMMU_V2) &&
+	    !etnaviv_cmd_validate_one(gpu, stream, args->stream_size / 4,
 				      relocs, args->nr_relocs)) {
 		ret = -EINVAL;
 		goto err_submit_objects;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index 5418a1a87b2c..d47d1a8e0219 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -5,9 +5,13 @@
 
 #include <linux/clk.h>
 #include <linux/component.h>
+#include <linux/delay.h>
 #include <linux/dma-fence.h>
-#include <linux/moduleparam.h>
+#include <linux/dma-mapping.h>
+#include <linux/module.h>
 #include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 #include <linux/regulator/consumer.h>
 #include <linux/thermal.h>
 
@@ -38,6 +42,8 @@ static const struct platform_device_id gpu_ids[] = {
 
 int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value)
 {
+	struct etnaviv_drm_private *priv = gpu->drm->dev_private;
+
 	switch (param) {
 	case ETNAVIV_PARAM_GPU_MODEL:
 		*value = gpu->identity.model;
@@ -143,6 +149,13 @@ int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value)
 		*value = gpu->identity.varyings_count;
 		break;
 
+	case ETNAVIV_PARAM_SOFTPIN_START_ADDR:
+		if (priv->mmu_global->version == ETNAVIV_IOMMU_V2)
+			*value = ETNAVIV_SOFTPIN_START_ADDRESS;
+		else
+			*value = ~0ULL;
+		break;
+
 	default:
 		DBG("%s: invalid param: %u", dev_name(gpu->dev), param);
 		return -EINVAL;
@@ -596,6 +609,21 @@ void etnaviv_gpu_start_fe(struct etnaviv_gpu *gpu, u32 address, u16 prefetch)
 	}
 }
 
+static void etnaviv_gpu_start_fe_idleloop(struct etnaviv_gpu *gpu)
+{
+	u32 address = etnaviv_cmdbuf_get_va(&gpu->buffer,
+				&gpu->mmu_context->cmdbuf_mapping);
+	u16 prefetch;
+
+	/* setup the MMU */
+	etnaviv_iommu_restore(gpu, gpu->mmu_context);
+
+	/* Start command processor */
+	prefetch = etnaviv_buffer_init(gpu);
+
+	etnaviv_gpu_start_fe(gpu, address, prefetch);
+}
+
 static void etnaviv_gpu_setup_pulse_eater(struct etnaviv_gpu *gpu)
 {
 	/*
@@ -629,8 +657,6 @@ static void etnaviv_gpu_setup_pulse_eater(struct etnaviv_gpu *gpu)
 
 static void etnaviv_gpu_hw_init(struct etnaviv_gpu *gpu)
 {
-	u16 prefetch;
-
 	if ((etnaviv_is_model_rev(gpu, GC320, 0x5007) ||
 	     etnaviv_is_model_rev(gpu, GC320, 0x5220)) &&
 	    gpu_read(gpu, VIVS_HI_CHIP_TIME) != 0x2062400) {
@@ -676,19 +702,12 @@ static void etnaviv_gpu_hw_init(struct etnaviv_gpu *gpu)
 	/* setup the pulse eater */
 	etnaviv_gpu_setup_pulse_eater(gpu);
 
-	/* setup the MMU */
-	etnaviv_iommu_restore(gpu);
-
-	/* Start command processor */
-	prefetch = etnaviv_buffer_init(gpu);
-
 	gpu_write(gpu, VIVS_HI_INTR_ENBL, ~0U);
-	etnaviv_gpu_start_fe(gpu, etnaviv_cmdbuf_get_va(&gpu->buffer),
-			     prefetch);
 }
 
 int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 {
+	struct etnaviv_drm_private *priv = gpu->drm->dev_private;
 	int ret, i;
 
 	ret = pm_runtime_get_sync(gpu->dev);
@@ -713,28 +732,6 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 		goto fail;
 	}
 
-	/*
-	 * Set the GPU linear window to be at the end of the DMA window, where
-	 * the CMA area is likely to reside. This ensures that we are able to
-	 * map the command buffers while having the linear window overlap as
-	 * much RAM as possible, so we can optimize mappings for other buffers.
-	 *
-	 * For 3D cores only do this if MC2.0 is present, as with MC1.0 it leads
-	 * to different views of the memory on the individual engines.
-	 */
-	if (!(gpu->identity.features & chipFeatures_PIPE_3D) ||
-	    (gpu->identity.minor_features0 & chipMinorFeatures0_MC20)) {
-		u32 dma_mask = (u32)dma_get_required_mask(gpu->dev);
-		if (dma_mask < PHYS_OFFSET + SZ_2G)
-			gpu->memory_base = PHYS_OFFSET;
-		else
-			gpu->memory_base = dma_mask - SZ_2G + 1;
-	} else if (PHYS_OFFSET >= SZ_2G) {
-		dev_info(gpu->dev, "Need to move linear window on MC1.0, disabling TS\n");
-		gpu->memory_base = PHYS_OFFSET;
-		gpu->identity.features &= ~chipFeatures_FAST_CLEAR;
-	}
-
 	/*
 	 * On cores with security features supported, we claim control over the
 	 * security states.
@@ -749,34 +746,38 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 		goto fail;
 	}
 
-	gpu->mmu = etnaviv_iommu_new(gpu);
-	if (IS_ERR(gpu->mmu)) {
-		dev_err(gpu->dev, "Failed to instantiate GPU IOMMU\n");
-		ret = PTR_ERR(gpu->mmu);
+	ret = etnaviv_iommu_global_init(gpu);
+	if (ret)
 		goto fail;
-	}
 
-	gpu->cmdbuf_suballoc = etnaviv_cmdbuf_suballoc_new(gpu);
-	if (IS_ERR(gpu->cmdbuf_suballoc)) {
-		dev_err(gpu->dev, "Failed to create cmdbuf suballocator\n");
-		ret = PTR_ERR(gpu->cmdbuf_suballoc);
-		goto destroy_iommu;
+	/*
+	 * Set the GPU linear window to be at the end of the DMA window, where
+	 * the CMA area is likely to reside. This ensures that we are able to
+	 * map the command buffers while having the linear window overlap as
+	 * much RAM as possible, so we can optimize mappings for other buffers.
+	 *
+	 * For 3D cores only do this if MC2.0 is present, as with MC1.0 it leads
+	 * to different views of the memory on the individual engines.
+	 */
+	if (!(gpu->identity.features & chipFeatures_PIPE_3D) ||
+	    (gpu->identity.minor_features0 & chipMinorFeatures0_MC20)) {
+		u32 dma_mask = (u32)dma_get_required_mask(gpu->dev);
+		if (dma_mask < PHYS_OFFSET + SZ_2G)
+			priv->mmu_global->memory_base = PHYS_OFFSET;
+		else
+			priv->mmu_global->memory_base = dma_mask - SZ_2G + 1;
+	} else if (PHYS_OFFSET >= SZ_2G) {
+		dev_info(gpu->dev, "Need to move linear window on MC1.0, disabling TS\n");
+		priv->mmu_global->memory_base = PHYS_OFFSET;
+		gpu->identity.features &= ~chipFeatures_FAST_CLEAR;
 	}
 
 	/* Create buffer: */
-	ret = etnaviv_cmdbuf_init(gpu->cmdbuf_suballoc, &gpu->buffer,
+	ret = etnaviv_cmdbuf_init(priv->cmdbuf_suballoc, &gpu->buffer,
 				  PAGE_SIZE);
 	if (ret) {
 		dev_err(gpu->dev, "could not create command buffer\n");
-		goto destroy_suballoc;
-	}
-
-	if (gpu->mmu->version == ETNAVIV_IOMMU_V1 &&
-	    etnaviv_cmdbuf_get_va(&gpu->buffer) > 0x80000000) {
-		ret = -EINVAL;
-		dev_err(gpu->dev,
-			"command buffer outside valid memory window\n");
-		goto free_buffer;
+		goto fail;
 	}
 
 	/* Setup event management */
@@ -795,17 +796,10 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 	pm_runtime_mark_last_busy(gpu->dev);
 	pm_runtime_put_autosuspend(gpu->dev);
 
+	gpu->initialized = true;
+
 	return 0;
 
-free_buffer:
-	etnaviv_cmdbuf_free(&gpu->buffer);
-	gpu->buffer.suballoc = NULL;
-destroy_suballoc:
-	etnaviv_cmdbuf_suballoc_destroy(gpu->cmdbuf_suballoc);
-	gpu->cmdbuf_suballoc = NULL;
-destroy_iommu:
-	etnaviv_iommu_destroy(gpu->mmu);
-	gpu->mmu = NULL;
 fail:
 	pm_runtime_mark_last_busy(gpu->dev);
 	pm_runtime_put_autosuspend(gpu->dev);
@@ -999,6 +993,7 @@ void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu)
 
 	etnaviv_gpu_hw_init(gpu);
 	gpu->exec_state = -1;
+	gpu->mmu_context = NULL;
 
 	mutex_unlock(&gpu->lock);
 	pm_runtime_mark_last_busy(gpu->dev);
@@ -1305,6 +1300,15 @@ struct dma_fence *etnaviv_gpu_submit(struct etnaviv_gem_submit *submit)
 		goto out_unlock;
 	}
 
+	if (!gpu->mmu_context) {
+		etnaviv_iommu_context_get(submit->mmu_context);
+		gpu->mmu_context = submit->mmu_context;
+		etnaviv_gpu_start_fe_idleloop(gpu);
+	} else {
+		etnaviv_iommu_context_get(gpu->mmu_context);
+		submit->prev_mmu_context = gpu->mmu_context;
+	}
+
 	if (submit->nr_pmrs) {
 		gpu->event[event[1]].sync_point = &sync_point_perfmon_sample_pre;
 		kref_get(&submit->refcount);
@@ -1314,8 +1318,8 @@ struct dma_fence *etnaviv_gpu_submit(struct etnaviv_gem_submit *submit)
 
 	gpu->event[event[0]].fence = gpu_fence;
 	submit->cmdbuf.user_size = submit->cmdbuf.size - 8;
-	etnaviv_buffer_queue(gpu, submit->exec_state, event[0],
-			     &submit->cmdbuf);
+	etnaviv_buffer_queue(gpu, submit->exec_state, submit->mmu_context,
+			     event[0], &submit->cmdbuf);
 
 	if (submit->nr_pmrs) {
 		gpu->event[event[2]].sync_point = &sync_point_perfmon_sample_post;
@@ -1517,7 +1521,7 @@ int etnaviv_gpu_wait_idle(struct etnaviv_gpu *gpu, unsigned int timeout_ms)
 
 static int etnaviv_gpu_hw_suspend(struct etnaviv_gpu *gpu)
 {
-	if (gpu->buffer.suballoc) {
+	if (gpu->initialized && gpu->mmu_context) {
 		/* Replace the last WAIT with END */
 		mutex_lock(&gpu->lock);
 		etnaviv_buffer_end(gpu);
@@ -1529,8 +1533,13 @@ static int etnaviv_gpu_hw_suspend(struct etnaviv_gpu *gpu)
 		 * we fail, just warn and continue.
 		 */
 		etnaviv_gpu_wait_idle(gpu, 100);
+
+		etnaviv_iommu_context_put(gpu->mmu_context);
+		gpu->mmu_context = NULL;
 	}
 
+	gpu->exec_state = -1;
+
 	return etnaviv_gpu_clk_disable(gpu);
 }
 
@@ -1546,8 +1555,6 @@ static int etnaviv_gpu_hw_resume(struct etnaviv_gpu *gpu)
 	etnaviv_gpu_update_clock(gpu);
 	etnaviv_gpu_hw_init(gpu);
 
-	gpu->exec_state = -1;
-
 	mutex_unlock(&gpu->lock);
 
 	return 0;
@@ -1676,17 +1683,10 @@ static void etnaviv_gpu_unbind(struct device *dev, struct device *master,
 	etnaviv_gpu_hw_suspend(gpu);
 #endif
 
-	if (gpu->buffer.suballoc)
+	if (gpu->initialized) {
 		etnaviv_cmdbuf_free(&gpu->buffer);
-
-	if (gpu->cmdbuf_suballoc) {
-		etnaviv_cmdbuf_suballoc_destroy(gpu->cmdbuf_suballoc);
-		gpu->cmdbuf_suballoc = NULL;
-	}
-
-	if (gpu->mmu) {
-		etnaviv_iommu_destroy(gpu->mmu);
-		gpu->mmu = NULL;
+		etnaviv_iommu_global_fini(gpu);
+		gpu->initialized = false;
 	}
 
 	gpu->drm = NULL;
@@ -1714,7 +1714,6 @@ static int etnaviv_gpu_platform_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct etnaviv_gpu *gpu;
-	struct resource *res;
 	int err;
 
 	gpu = devm_kzalloc(dev, sizeof(*gpu), GFP_KERNEL);
@@ -1726,8 +1725,7 @@ static int etnaviv_gpu_platform_probe(struct platform_device *pdev)
 	mutex_init(&gpu->fence_lock);
 
 	/* Map registers: */
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	gpu->mmio = devm_ioremap_resource(&pdev->dev, res);
+	gpu->mmio = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(gpu->mmio))
 		return PTR_ERR(gpu->mmio);
 
@@ -1825,7 +1823,7 @@ static int etnaviv_gpu_rpm_resume(struct device *dev)
 		return ret;
 
 	/* Re-initialise the basic hardware state */
-	if (gpu->drm && gpu->buffer.suballoc) {
+	if (gpu->drm && gpu->initialized) {
 		ret = etnaviv_gpu_hw_resume(gpu);
 		if (ret) {
 			etnaviv_gpu_clk_disable(gpu);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
index 9bcf151f706b..8f9bd4edc96a 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
@@ -7,6 +7,8 @@
 #define __ETNAVIV_GPU_H__
 
 #include "etnaviv_cmdbuf.h"
+#include "etnaviv_gem.h"
+#include "etnaviv_mmu.h"
 #include "etnaviv_drv.h"
 
 struct etnaviv_gem_submit;
@@ -84,7 +86,6 @@ struct etnaviv_event {
 };
 
 struct etnaviv_cmdbuf_suballoc;
-struct etnaviv_cmdbuf;
 struct regulator;
 struct clk;
 
@@ -99,14 +100,12 @@ struct etnaviv_gpu {
 	enum etnaviv_sec_mode sec_mode;
 	struct workqueue_struct *wq;
 	struct drm_gpu_scheduler sched;
+	bool initialized;
 
 	/* 'ring'-buffer: */
 	struct etnaviv_cmdbuf buffer;
 	int exec_state;
 
-	/* bus base address of memory  */
-	u32 memory_base;
-
 	/* event management: */
 	DECLARE_BITMAP(event_bitmap, ETNA_NR_EVENTS);
 	struct etnaviv_event event[ETNA_NR_EVENTS];
@@ -134,8 +133,8 @@ struct etnaviv_gpu {
 	void __iomem *mmio;
 	int irq;
 
-	struct etnaviv_iommu *mmu;
-	struct etnaviv_cmdbuf_suballoc *cmdbuf_suballoc;
+	struct etnaviv_iommu_context *mmu_context;
+	unsigned int flush_seq;
 
 	/* Power Control: */
 	struct clk *clk_bus;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu.c b/drivers/gpu/drm/etnaviv/etnaviv_iommu.c
index b163bdbcb880..aac8dbf3ea56 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_iommu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu.c
@@ -3,15 +3,14 @@
  * Copyright (C) 2014-2018 Etnaviv Project
  */
 
+#include <linux/bitops.h>
+#include <linux/dma-mapping.h>
 #include <linux/platform_device.h>
 #include <linux/sizes.h>
 #include <linux/slab.h>
-#include <linux/dma-mapping.h>
-#include <linux/bitops.h>
 
 #include "etnaviv_gpu.h"
 #include "etnaviv_mmu.h"
-#include "etnaviv_iommu.h"
 #include "state_hi.xml.h"
 
 #define PT_SIZE		SZ_2M
@@ -19,124 +18,89 @@
 
 #define GPU_MEM_START	0x80000000
 
-struct etnaviv_iommuv1_domain {
-	struct etnaviv_iommu_domain base;
+struct etnaviv_iommuv1_context {
+	struct etnaviv_iommu_context base;
 	u32 *pgtable_cpu;
 	dma_addr_t pgtable_dma;
 };
 
-static struct etnaviv_iommuv1_domain *
-to_etnaviv_domain(struct etnaviv_iommu_domain *domain)
+static struct etnaviv_iommuv1_context *
+to_v1_context(struct etnaviv_iommu_context *context)
 {
-	return container_of(domain, struct etnaviv_iommuv1_domain, base);
+	return container_of(context, struct etnaviv_iommuv1_context, base);
 }
 
-static int __etnaviv_iommu_init(struct etnaviv_iommuv1_domain *etnaviv_domain)
+static void etnaviv_iommuv1_free(struct etnaviv_iommu_context *context)
 {
-	u32 *p;
-	int i;
+	struct etnaviv_iommuv1_context *v1_context = to_v1_context(context);
 
-	etnaviv_domain->base.bad_page_cpu =
-			dma_alloc_wc(etnaviv_domain->base.dev, SZ_4K,
-				     &etnaviv_domain->base.bad_page_dma,
-				     GFP_KERNEL);
-	if (!etnaviv_domain->base.bad_page_cpu)
-		return -ENOMEM;
+	drm_mm_takedown(&context->mm);
 
-	p = etnaviv_domain->base.bad_page_cpu;
-	for (i = 0; i < SZ_4K / 4; i++)
-		*p++ = 0xdead55aa;
+	dma_free_wc(context->global->dev, PT_SIZE, v1_context->pgtable_cpu,
+		    v1_context->pgtable_dma);
 
-	etnaviv_domain->pgtable_cpu = dma_alloc_wc(etnaviv_domain->base.dev,
-						   PT_SIZE,
-						   &etnaviv_domain->pgtable_dma,
-						   GFP_KERNEL);
-	if (!etnaviv_domain->pgtable_cpu) {
-		dma_free_wc(etnaviv_domain->base.dev, SZ_4K,
-			    etnaviv_domain->base.bad_page_cpu,
-			    etnaviv_domain->base.bad_page_dma);
-		return -ENOMEM;
-	}
+	context->global->v1.shared_context = NULL;
 
-	memset32(etnaviv_domain->pgtable_cpu, etnaviv_domain->base.bad_page_dma,
-		 PT_ENTRIES);
-
-	return 0;
+	kfree(v1_context);
 }
 
-static void etnaviv_iommuv1_domain_free(struct etnaviv_iommu_domain *domain)
-{
-	struct etnaviv_iommuv1_domain *etnaviv_domain =
-			to_etnaviv_domain(domain);
-
-	dma_free_wc(etnaviv_domain->base.dev, PT_SIZE,
-		    etnaviv_domain->pgtable_cpu, etnaviv_domain->pgtable_dma);
-
-	dma_free_wc(etnaviv_domain->base.dev, SZ_4K,
-		    etnaviv_domain->base.bad_page_cpu,
-		    etnaviv_domain->base.bad_page_dma);
-
-	kfree(etnaviv_domain);
-}
-
-static int etnaviv_iommuv1_map(struct etnaviv_iommu_domain *domain,
+static int etnaviv_iommuv1_map(struct etnaviv_iommu_context *context,
 			       unsigned long iova, phys_addr_t paddr,
 			       size_t size, int prot)
 {
-	struct etnaviv_iommuv1_domain *etnaviv_domain = to_etnaviv_domain(domain);
+	struct etnaviv_iommuv1_context *v1_context = to_v1_context(context);
 	unsigned int index = (iova - GPU_MEM_START) / SZ_4K;
 
 	if (size != SZ_4K)
 		return -EINVAL;
 
-	etnaviv_domain->pgtable_cpu[index] = paddr;
+	v1_context->pgtable_cpu[index] = paddr;
 
 	return 0;
 }
 
-static size_t etnaviv_iommuv1_unmap(struct etnaviv_iommu_domain *domain,
+static size_t etnaviv_iommuv1_unmap(struct etnaviv_iommu_context *context,
 	unsigned long iova, size_t size)
 {
-	struct etnaviv_iommuv1_domain *etnaviv_domain =
-			to_etnaviv_domain(domain);
+	struct etnaviv_iommuv1_context *v1_context = to_v1_context(context);
 	unsigned int index = (iova - GPU_MEM_START) / SZ_4K;
 
 	if (size != SZ_4K)
 		return -EINVAL;
 
-	etnaviv_domain->pgtable_cpu[index] = etnaviv_domain->base.bad_page_dma;
+	v1_context->pgtable_cpu[index] = context->global->bad_page_dma;
 
 	return SZ_4K;
 }
 
-static size_t etnaviv_iommuv1_dump_size(struct etnaviv_iommu_domain *domain)
+static size_t etnaviv_iommuv1_dump_size(struct etnaviv_iommu_context *context)
 {
 	return PT_SIZE;
 }
 
-static void etnaviv_iommuv1_dump(struct etnaviv_iommu_domain *domain, void *buf)
+static void etnaviv_iommuv1_dump(struct etnaviv_iommu_context *context,
+				 void *buf)
 {
-	struct etnaviv_iommuv1_domain *etnaviv_domain =
-			to_etnaviv_domain(domain);
+	struct etnaviv_iommuv1_context *v1_context = to_v1_context(context);
 
-	memcpy(buf, etnaviv_domain->pgtable_cpu, PT_SIZE);
+	memcpy(buf, v1_context->pgtable_cpu, PT_SIZE);
 }
 
-void etnaviv_iommuv1_restore(struct etnaviv_gpu *gpu)
+static void etnaviv_iommuv1_restore(struct etnaviv_gpu *gpu,
+			     struct etnaviv_iommu_context *context)
 {
-	struct etnaviv_iommuv1_domain *etnaviv_domain =
-			to_etnaviv_domain(gpu->mmu->domain);
+	struct etnaviv_iommuv1_context *v1_context = to_v1_context(context);
 	u32 pgtable;
 
 	/* set base addresses */
-	gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_RA, gpu->memory_base);
-	gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_FE, gpu->memory_base);
-	gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_TX, gpu->memory_base);
-	gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_PEZ, gpu->memory_base);
-	gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_PE, gpu->memory_base);
+	gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_RA, context->global->memory_base);
+	gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_FE, context->global->memory_base);
+	gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_TX, context->global->memory_base);
+	gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_PEZ, context->global->memory_base);
+	gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_PE, context->global->memory_base);
 
 	/* set page table address in MC */
-	pgtable = (u32)etnaviv_domain->pgtable_dma;
+	pgtable = (u32)v1_context->pgtable_dma;
 
 	gpu_write(gpu, VIVS_MC_MMU_FE_PAGE_TABLE, pgtable);
 	gpu_write(gpu, VIVS_MC_MMU_TX_PAGE_TABLE, pgtable);
@@ -145,39 +109,62 @@ void etnaviv_iommuv1_restore(struct etnaviv_gpu *gpu)
 	gpu_write(gpu, VIVS_MC_MMU_RA_PAGE_TABLE, pgtable);
 }
 
-static const struct etnaviv_iommu_domain_ops etnaviv_iommuv1_ops = {
-	.free = etnaviv_iommuv1_domain_free,
+
+const struct etnaviv_iommu_ops etnaviv_iommuv1_ops = {
+	.free = etnaviv_iommuv1_free,
 	.map = etnaviv_iommuv1_map,
 	.unmap = etnaviv_iommuv1_unmap,
 	.dump_size = etnaviv_iommuv1_dump_size,
 	.dump = etnaviv_iommuv1_dump,
+	.restore = etnaviv_iommuv1_restore,
 };
 
-struct etnaviv_iommu_domain *
-etnaviv_iommuv1_domain_alloc(struct etnaviv_gpu *gpu)
+struct etnaviv_iommu_context *
+etnaviv_iommuv1_context_alloc(struct etnaviv_iommu_global *global)
 {
-	struct etnaviv_iommuv1_domain *etnaviv_domain;
-	struct etnaviv_iommu_domain *domain;
-	int ret;
+	struct etnaviv_iommuv1_context *v1_context;
+	struct etnaviv_iommu_context *context;
 
-	etnaviv_domain = kzalloc(sizeof(*etnaviv_domain), GFP_KERNEL);
-	if (!etnaviv_domain)
+	mutex_lock(&global->lock);
+
+	/*
+	 * MMUv1 does not support switching between different contexts without
+	 * a stop the world operation, so we only support a single shared
+	 * context with this version.
+	 */
+	if (global->v1.shared_context) {
+		context = global->v1.shared_context;
+		etnaviv_iommu_context_get(context);
+		mutex_unlock(&global->lock);
+		return context;
+	}
+
+	v1_context = kzalloc(sizeof(*v1_context), GFP_KERNEL);
+	if (!v1_context)
 		return NULL;
 
-	domain = &etnaviv_domain->base;
-
-	domain->dev = gpu->dev;
-	domain->base = GPU_MEM_START;
-	domain->size = PT_ENTRIES * SZ_4K;
-	domain->ops = &etnaviv_iommuv1_ops;
-
-	ret = __etnaviv_iommu_init(etnaviv_domain);
-	if (ret)
+	v1_context->pgtable_cpu = dma_alloc_wc(global->dev, PT_SIZE,
+					       &v1_context->pgtable_dma,
+					       GFP_KERNEL);
+	if (!v1_context->pgtable_cpu)
 		goto out_free;
 
-	return &etnaviv_domain->base;
+	memset32(v1_context->pgtable_cpu, global->bad_page_dma, PT_ENTRIES);
+
+	context = &v1_context->base;
+	context->global = global;
+	kref_init(&context->refcount);
+	mutex_init(&context->lock);
+	INIT_LIST_HEAD(&context->mappings);
+	drm_mm_init(&context->mm, GPU_MEM_START, PT_ENTRIES * SZ_4K);
+	context->global->v1.shared_context = context;
+
+	mutex_unlock(&global->lock);
+
+	return context;
 
 out_free:
-	kfree(etnaviv_domain);
+	mutex_unlock(&global->lock);
+	kfree(v1_context);
 	return NULL;
 }
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu.h b/drivers/gpu/drm/etnaviv/etnaviv_iommu.h
deleted file mode 100644
index b279404ce91a..000000000000
--- a/drivers/gpu/drm/etnaviv/etnaviv_iommu.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2014-2018 Etnaviv Project
- */
-
-#ifndef __ETNAVIV_IOMMU_H__
-#define __ETNAVIV_IOMMU_H__
-
-struct etnaviv_gpu;
-struct etnaviv_iommu_domain;
-
-struct etnaviv_iommu_domain *
-etnaviv_iommuv1_domain_alloc(struct etnaviv_gpu *gpu);
-void etnaviv_iommuv1_restore(struct etnaviv_gpu *gpu);
-
-struct etnaviv_iommu_domain *
-etnaviv_iommuv2_domain_alloc(struct etnaviv_gpu *gpu);
-void etnaviv_iommuv2_restore(struct etnaviv_gpu *gpu);
-
-#endif /* __ETNAVIV_IOMMU_H__ */
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
index f794e04be9e6..043111a1d60c 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
@@ -3,16 +3,16 @@
  * Copyright (C) 2016-2018 Etnaviv Project
  */
 
+#include <linux/bitops.h>
+#include <linux/dma-mapping.h>
 #include <linux/platform_device.h>
 #include <linux/sizes.h>
 #include <linux/slab.h>
-#include <linux/dma-mapping.h>
-#include <linux/bitops.h>
+#include <linux/vmalloc.h>
 
 #include "etnaviv_cmdbuf.h"
 #include "etnaviv_gpu.h"
 #include "etnaviv_mmu.h"
-#include "etnaviv_iommu.h"
 #include "state.xml.h"
 #include "state_hi.xml.h"
 
@@ -27,11 +27,9 @@
 
 #define MMUv2_MAX_STLB_ENTRIES		1024
 
-struct etnaviv_iommuv2_domain {
-	struct etnaviv_iommu_domain base;
-	/* P(age) T(able) A(rray) */
-	u64 *pta_cpu;
-	dma_addr_t pta_dma;
+struct etnaviv_iommuv2_context {
+	struct etnaviv_iommu_context base;
+	unsigned short id;
 	/* M(aster) TLB aka first level pagetable */
 	u32 *mtlb_cpu;
 	dma_addr_t mtlb_dma;
@@ -40,41 +38,62 @@ struct etnaviv_iommuv2_domain {
 	dma_addr_t stlb_dma[MMUv2_MAX_STLB_ENTRIES];
 };
 
-static struct etnaviv_iommuv2_domain *
-to_etnaviv_domain(struct etnaviv_iommu_domain *domain)
+static struct etnaviv_iommuv2_context *
+to_v2_context(struct etnaviv_iommu_context *context)
 {
-	return container_of(domain, struct etnaviv_iommuv2_domain, base);
+	return container_of(context, struct etnaviv_iommuv2_context, base);
 }
 
+static void etnaviv_iommuv2_free(struct etnaviv_iommu_context *context)
+{
+	struct etnaviv_iommuv2_context *v2_context = to_v2_context(context);
+	int i;
+
+	drm_mm_takedown(&context->mm);
+
+	for (i = 0; i < MMUv2_MAX_STLB_ENTRIES; i++) {
+		if (v2_context->stlb_cpu[i])
+			dma_free_wc(context->global->dev, SZ_4K,
+				    v2_context->stlb_cpu[i],
+				    v2_context->stlb_dma[i]);
+	}
+
+	dma_free_wc(context->global->dev, SZ_4K, v2_context->mtlb_cpu,
+		    v2_context->mtlb_dma);
+
+	clear_bit(v2_context->id, context->global->v2.pta_alloc);
+
+	vfree(v2_context);
+}
 static int
-etnaviv_iommuv2_ensure_stlb(struct etnaviv_iommuv2_domain *etnaviv_domain,
+etnaviv_iommuv2_ensure_stlb(struct etnaviv_iommuv2_context *v2_context,
 			    int stlb)
 {
-	if (etnaviv_domain->stlb_cpu[stlb])
+	if (v2_context->stlb_cpu[stlb])
 		return 0;
 
-	etnaviv_domain->stlb_cpu[stlb] =
-			dma_alloc_wc(etnaviv_domain->base.dev, SZ_4K,
-				     &etnaviv_domain->stlb_dma[stlb],
+	v2_context->stlb_cpu[stlb] =
+			dma_alloc_wc(v2_context->base.global->dev, SZ_4K,
+				     &v2_context->stlb_dma[stlb],
 				     GFP_KERNEL);
 
-	if (!etnaviv_domain->stlb_cpu[stlb])
+	if (!v2_context->stlb_cpu[stlb])
 		return -ENOMEM;
 
-	memset32(etnaviv_domain->stlb_cpu[stlb], MMUv2_PTE_EXCEPTION,
+	memset32(v2_context->stlb_cpu[stlb], MMUv2_PTE_EXCEPTION,
 		 SZ_4K / sizeof(u32));
 
-	etnaviv_domain->mtlb_cpu[stlb] = etnaviv_domain->stlb_dma[stlb] |
-						      MMUv2_PTE_PRESENT;
+	v2_context->mtlb_cpu[stlb] =
+			v2_context->stlb_dma[stlb] | MMUv2_PTE_PRESENT;
+
 	return 0;
 }
 
-static int etnaviv_iommuv2_map(struct etnaviv_iommu_domain *domain,
+static int etnaviv_iommuv2_map(struct etnaviv_iommu_context *context,
 			       unsigned long iova, phys_addr_t paddr,
 			       size_t size, int prot)
 {
-	struct etnaviv_iommuv2_domain *etnaviv_domain =
-			to_etnaviv_domain(domain);
+	struct etnaviv_iommuv2_context *v2_context = to_v2_context(context);
 	int mtlb_entry, stlb_entry, ret;
 	u32 entry = lower_32_bits(paddr) | MMUv2_PTE_PRESENT;
 
@@ -90,20 +109,19 @@ static int etnaviv_iommuv2_map(struct etnaviv_iommu_domain *domain,
 	mtlb_entry = (iova & MMUv2_MTLB_MASK) >> MMUv2_MTLB_SHIFT;
 	stlb_entry = (iova & MMUv2_STLB_MASK) >> MMUv2_STLB_SHIFT;
 
-	ret = etnaviv_iommuv2_ensure_stlb(etnaviv_domain, mtlb_entry);
+	ret = etnaviv_iommuv2_ensure_stlb(v2_context, mtlb_entry);
 	if (ret)
 		return ret;
 
-	etnaviv_domain->stlb_cpu[mtlb_entry][stlb_entry] = entry;
+	v2_context->stlb_cpu[mtlb_entry][stlb_entry] = entry;
 
 	return 0;
 }
 
-static size_t etnaviv_iommuv2_unmap(struct etnaviv_iommu_domain *domain,
+static size_t etnaviv_iommuv2_unmap(struct etnaviv_iommu_context *context,
 				    unsigned long iova, size_t size)
 {
-	struct etnaviv_iommuv2_domain *etnaviv_domain =
-			to_etnaviv_domain(domain);
+	struct etnaviv_iommuv2_context *etnaviv_domain = to_v2_context(context);
 	int mtlb_entry, stlb_entry;
 
 	if (size != SZ_4K)
@@ -117,118 +135,35 @@ static size_t etnaviv_iommuv2_unmap(struct etnaviv_iommu_domain *domain,
 	return SZ_4K;
 }
 
-static int etnaviv_iommuv2_init(struct etnaviv_iommuv2_domain *etnaviv_domain)
+static size_t etnaviv_iommuv2_dump_size(struct etnaviv_iommu_context *context)
 {
-	int ret;
-
-	/* allocate scratch page */
-	etnaviv_domain->base.bad_page_cpu =
-			dma_alloc_wc(etnaviv_domain->base.dev, SZ_4K,
-				     &etnaviv_domain->base.bad_page_dma,
-				     GFP_KERNEL);
-	if (!etnaviv_domain->base.bad_page_cpu) {
-		ret = -ENOMEM;
-		goto fail_mem;
-	}
-
-	memset32(etnaviv_domain->base.bad_page_cpu, 0xdead55aa,
-		 SZ_4K / sizeof(u32));
-
-	etnaviv_domain->pta_cpu = dma_alloc_wc(etnaviv_domain->base.dev,
-					       SZ_4K, &etnaviv_domain->pta_dma,
-					       GFP_KERNEL);
-	if (!etnaviv_domain->pta_cpu) {
-		ret = -ENOMEM;
-		goto fail_mem;
-	}
-
-	etnaviv_domain->mtlb_cpu = dma_alloc_wc(etnaviv_domain->base.dev,
-						SZ_4K, &etnaviv_domain->mtlb_dma,
-						GFP_KERNEL);
-	if (!etnaviv_domain->mtlb_cpu) {
-		ret = -ENOMEM;
-		goto fail_mem;
-	}
-
-	memset32(etnaviv_domain->mtlb_cpu, MMUv2_PTE_EXCEPTION,
-		 MMUv2_MAX_STLB_ENTRIES);
-
-	return 0;
-
-fail_mem:
-	if (etnaviv_domain->base.bad_page_cpu)
-		dma_free_wc(etnaviv_domain->base.dev, SZ_4K,
-			    etnaviv_domain->base.bad_page_cpu,
-			    etnaviv_domain->base.bad_page_dma);
-
-	if (etnaviv_domain->pta_cpu)
-		dma_free_wc(etnaviv_domain->base.dev, SZ_4K,
-			    etnaviv_domain->pta_cpu, etnaviv_domain->pta_dma);
-
-	if (etnaviv_domain->mtlb_cpu)
-		dma_free_wc(etnaviv_domain->base.dev, SZ_4K,
-			    etnaviv_domain->mtlb_cpu, etnaviv_domain->mtlb_dma);
-
-	return ret;
-}
-
-static void etnaviv_iommuv2_domain_free(struct etnaviv_iommu_domain *domain)
-{
-	struct etnaviv_iommuv2_domain *etnaviv_domain =
-			to_etnaviv_domain(domain);
-	int i;
-
-	dma_free_wc(etnaviv_domain->base.dev, SZ_4K,
-		    etnaviv_domain->base.bad_page_cpu,
-		    etnaviv_domain->base.bad_page_dma);
-
-	dma_free_wc(etnaviv_domain->base.dev, SZ_4K,
-		    etnaviv_domain->pta_cpu, etnaviv_domain->pta_dma);
-
-	dma_free_wc(etnaviv_domain->base.dev, SZ_4K,
-		    etnaviv_domain->mtlb_cpu, etnaviv_domain->mtlb_dma);
-
-	for (i = 0; i < MMUv2_MAX_STLB_ENTRIES; i++) {
-		if (etnaviv_domain->stlb_cpu[i])
-			dma_free_wc(etnaviv_domain->base.dev, SZ_4K,
-				    etnaviv_domain->stlb_cpu[i],
-				    etnaviv_domain->stlb_dma[i]);
-	}
-
-	vfree(etnaviv_domain);
-}
-
-static size_t etnaviv_iommuv2_dump_size(struct etnaviv_iommu_domain *domain)
-{
-	struct etnaviv_iommuv2_domain *etnaviv_domain =
-			to_etnaviv_domain(domain);
+	struct etnaviv_iommuv2_context *v2_context = to_v2_context(context);
 	size_t dump_size = SZ_4K;
 	int i;
 
 	for (i = 0; i < MMUv2_MAX_STLB_ENTRIES; i++)
-		if (etnaviv_domain->mtlb_cpu[i] & MMUv2_PTE_PRESENT)
+		if (v2_context->mtlb_cpu[i] & MMUv2_PTE_PRESENT)
 			dump_size += SZ_4K;
 
 	return dump_size;
 }
 
-static void etnaviv_iommuv2_dump(struct etnaviv_iommu_domain *domain, void *buf)
+static void etnaviv_iommuv2_dump(struct etnaviv_iommu_context *context, void *buf)
 {
-	struct etnaviv_iommuv2_domain *etnaviv_domain =
-			to_etnaviv_domain(domain);
+	struct etnaviv_iommuv2_context *v2_context = to_v2_context(context);
 	int i;
 
-	memcpy(buf, etnaviv_domain->mtlb_cpu, SZ_4K);
+	memcpy(buf, v2_context->mtlb_cpu, SZ_4K);
 	buf += SZ_4K;
 	for (i = 0; i < MMUv2_MAX_STLB_ENTRIES; i++, buf += SZ_4K)
-		if (etnaviv_domain->mtlb_cpu[i] & MMUv2_PTE_PRESENT)
-			memcpy(buf, etnaviv_domain->stlb_cpu[i], SZ_4K);
+		if (v2_context->mtlb_cpu[i] & MMUv2_PTE_PRESENT)
+			memcpy(buf, v2_context->stlb_cpu[i], SZ_4K);
 }
 
-static void etnaviv_iommuv2_restore_nonsec(struct etnaviv_gpu *gpu)
+static void etnaviv_iommuv2_restore_nonsec(struct etnaviv_gpu *gpu,
+	struct etnaviv_iommu_context *context)
 {
-	struct etnaviv_iommuv2_domain *etnaviv_domain =
-			to_etnaviv_domain(gpu->mmu->domain);
+	struct etnaviv_iommuv2_context *v2_context = to_v2_context(context);
 	u16 prefetch;
 
 	/* If the MMU is already enabled the state is still there. */
@@ -236,8 +171,8 @@ static void etnaviv_iommuv2_restore_nonsec(struct etnaviv_gpu *gpu)
 		return;
 
 	prefetch = etnaviv_buffer_config_mmuv2(gpu,
-				(u32)etnaviv_domain->mtlb_dma,
-				(u32)etnaviv_domain->base.bad_page_dma);
+				(u32)v2_context->mtlb_dma,
+				(u32)context->global->bad_page_dma);
 	etnaviv_gpu_start_fe(gpu, (u32)etnaviv_cmdbuf_get_pa(&gpu->buffer),
 			     prefetch);
 	etnaviv_gpu_wait_idle(gpu, 100);
@@ -245,10 +180,10 @@ static void etnaviv_iommuv2_restore_nonsec(struct etnaviv_gpu *gpu)
 	gpu_write(gpu, VIVS_MMUv2_CONTROL, VIVS_MMUv2_CONTROL_ENABLE);
 }
 
-static void etnaviv_iommuv2_restore_sec(struct etnaviv_gpu *gpu)
+static void etnaviv_iommuv2_restore_sec(struct etnaviv_gpu *gpu,
+	struct etnaviv_iommu_context *context)
 {
-	struct etnaviv_iommuv2_domain *etnaviv_domain =
-				to_etnaviv_domain(gpu->mmu->domain);
+	struct etnaviv_iommuv2_context *v2_context = to_v2_context(context);
 	u16 prefetch;
 
 	/* If the MMU is already enabled the state is still there. */
@@ -256,26 +191,26 @@ static void etnaviv_iommuv2_restore_sec(struct etnaviv_gpu *gpu)
 		return;
 
 	gpu_write(gpu, VIVS_MMUv2_PTA_ADDRESS_LOW,
-		  lower_32_bits(etnaviv_domain->pta_dma));
+		  lower_32_bits(context->global->v2.pta_dma));
 	gpu_write(gpu, VIVS_MMUv2_PTA_ADDRESS_HIGH,
-		  upper_32_bits(etnaviv_domain->pta_dma));
+		  upper_32_bits(context->global->v2.pta_dma));
 	gpu_write(gpu, VIVS_MMUv2_PTA_CONTROL, VIVS_MMUv2_PTA_CONTROL_ENABLE);
 
 	gpu_write(gpu, VIVS_MMUv2_NONSEC_SAFE_ADDR_LOW,
-		  lower_32_bits(etnaviv_domain->base.bad_page_dma));
+		  lower_32_bits(context->global->bad_page_dma));
 	gpu_write(gpu, VIVS_MMUv2_SEC_SAFE_ADDR_LOW,
-		  lower_32_bits(etnaviv_domain->base.bad_page_dma));
+		  lower_32_bits(context->global->bad_page_dma));
 	gpu_write(gpu, VIVS_MMUv2_SAFE_ADDRESS_CONFIG,
 		  VIVS_MMUv2_SAFE_ADDRESS_CONFIG_NON_SEC_SAFE_ADDR_HIGH(
-		  upper_32_bits(etnaviv_domain->base.bad_page_dma)) |
+		  upper_32_bits(context->global->bad_page_dma)) |
 		  VIVS_MMUv2_SAFE_ADDRESS_CONFIG_SEC_SAFE_ADDR_HIGH(
-		  upper_32_bits(etnaviv_domain->base.bad_page_dma)));
+		  upper_32_bits(context->global->bad_page_dma)));
 
-	etnaviv_domain->pta_cpu[0] = etnaviv_domain->mtlb_dma |
-				     VIVS_MMUv2_CONFIGURATION_MODE_MODE4_K;
+	context->global->v2.pta_cpu[v2_context->id] = v2_context->mtlb_dma |
+				 	 VIVS_MMUv2_CONFIGURATION_MODE_MODE4_K;
 
 	/* trigger a PTA load through the FE */
-	prefetch = etnaviv_buffer_config_pta(gpu);
+	prefetch = etnaviv_buffer_config_pta(gpu, v2_context->id);
 	etnaviv_gpu_start_fe(gpu, (u32)etnaviv_cmdbuf_get_pa(&gpu->buffer),
 			     prefetch);
 	etnaviv_gpu_wait_idle(gpu, 100);
@@ -283,14 +218,28 @@ static void etnaviv_iommuv2_restore_sec(struct etnaviv_gpu *gpu)
 	gpu_write(gpu, VIVS_MMUv2_SEC_CONTROL, VIVS_MMUv2_SEC_CONTROL_ENABLE);
 }
 
-void etnaviv_iommuv2_restore(struct etnaviv_gpu *gpu)
+u32 etnaviv_iommuv2_get_mtlb_addr(struct etnaviv_iommu_context *context)
+{
+	struct etnaviv_iommuv2_context *v2_context = to_v2_context(context);
+
+	return v2_context->mtlb_dma;
+}
+
+unsigned short etnaviv_iommuv2_get_pta_id(struct etnaviv_iommu_context *context)
+{
+	struct etnaviv_iommuv2_context *v2_context = to_v2_context(context);
+
+	return v2_context->id;
+}
+static void etnaviv_iommuv2_restore(struct etnaviv_gpu *gpu,
+				    struct etnaviv_iommu_context *context)
 {
 	switch (gpu->sec_mode) {
 	case ETNA_SEC_NONE:
-		etnaviv_iommuv2_restore_nonsec(gpu);
+		etnaviv_iommuv2_restore_nonsec(gpu, context);
 		break;
 	case ETNA_SEC_KERNEL:
-		etnaviv_iommuv2_restore_sec(gpu);
+		etnaviv_iommuv2_restore_sec(gpu, context);
 		break;
 	default:
 		WARN(1, "unhandled GPU security mode\n");
@@ -298,39 +247,58 @@ void etnaviv_iommuv2_restore(struct etnaviv_gpu *gpu)
 	}
 }
 
-static const struct etnaviv_iommu_domain_ops etnaviv_iommuv2_ops = {
-	.free = etnaviv_iommuv2_domain_free,
+const struct etnaviv_iommu_ops etnaviv_iommuv2_ops = {
+	.free = etnaviv_iommuv2_free,
 	.map = etnaviv_iommuv2_map,
 	.unmap = etnaviv_iommuv2_unmap,
 	.dump_size = etnaviv_iommuv2_dump_size,
 	.dump = etnaviv_iommuv2_dump,
+	.restore = etnaviv_iommuv2_restore,
 };
 
-struct etnaviv_iommu_domain *
-etnaviv_iommuv2_domain_alloc(struct etnaviv_gpu *gpu)
+struct etnaviv_iommu_context *
+etnaviv_iommuv2_context_alloc(struct etnaviv_iommu_global *global)
 {
-	struct etnaviv_iommuv2_domain *etnaviv_domain;
-	struct etnaviv_iommu_domain *domain;
-	int ret;
+	struct etnaviv_iommuv2_context *v2_context;
+	struct etnaviv_iommu_context *context;
 
-	etnaviv_domain = vzalloc(sizeof(*etnaviv_domain));
-	if (!etnaviv_domain)
+	v2_context = vzalloc(sizeof(*v2_context));
+	if (!v2_context)
 		return NULL;
 
-	domain = &etnaviv_domain->base;
-
-	domain->dev = gpu->dev;
-	domain->base = SZ_4K;
-	domain->size = (u64)SZ_1G * 4 - SZ_4K;
-	domain->ops = &etnaviv_iommuv2_ops;
-
-	ret = etnaviv_iommuv2_init(etnaviv_domain);
-	if (ret)
+	mutex_lock(&global->lock);
+	v2_context->id = find_first_zero_bit(global->v2.pta_alloc,
+					     ETNAVIV_PTA_ENTRIES);
+	if (v2_context->id < ETNAVIV_PTA_ENTRIES) {
+		set_bit(v2_context->id, global->v2.pta_alloc);
+	} else {
+		mutex_unlock(&global->lock);
 		goto out_free;
+	}
+	mutex_unlock(&global->lock);
 
-	return &etnaviv_domain->base;
+	v2_context->mtlb_cpu = dma_alloc_wc(global->dev, SZ_4K,
+					    &v2_context->mtlb_dma, GFP_KERNEL);
+	if (!v2_context->mtlb_cpu)
+		goto out_free_id;
 
+	memset32(v2_context->mtlb_cpu, MMUv2_PTE_EXCEPTION,
+		 MMUv2_MAX_STLB_ENTRIES);
+
+	global->v2.pta_cpu[v2_context->id] = v2_context->mtlb_dma;
+
+	context = &v2_context->base;
+	context->global = global;
+	kref_init(&context->refcount);
+	mutex_init(&context->lock);
+	INIT_LIST_HEAD(&context->mappings);
+	drm_mm_init(&context->mm, SZ_4K, (u64)SZ_1G * 4 - SZ_4K);
+
+	return context;
+
+out_free_id:
+	clear_bit(v2_context->id, global->v2.pta_alloc);
 out_free:
-	vfree(etnaviv_domain);
+	vfree(v2_context);
 	return NULL;
 }
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
index 8069f9f36a2e..35ebae6a1be7 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
@@ -3,15 +3,17 @@
  * Copyright (C) 2015-2018 Etnaviv Project
  */
 
+#include <linux/dma-mapping.h>
+#include <linux/scatterlist.h>
+
 #include "common.xml.h"
 #include "etnaviv_cmdbuf.h"
 #include "etnaviv_drv.h"
 #include "etnaviv_gem.h"
 #include "etnaviv_gpu.h"
-#include "etnaviv_iommu.h"
 #include "etnaviv_mmu.h"
 
-static void etnaviv_domain_unmap(struct etnaviv_iommu_domain *domain,
+static void etnaviv_context_unmap(struct etnaviv_iommu_context *context,
 				 unsigned long iova, size_t size)
 {
 	size_t unmapped_page, unmapped = 0;
@@ -24,7 +26,8 @@ static void etnaviv_domain_unmap(struct etnaviv_iommu_domain *domain,
 	}
 
 	while (unmapped < size) {
-		unmapped_page = domain->ops->unmap(domain, iova, pgsize);
+		unmapped_page = context->global->ops->unmap(context, iova,
+							    pgsize);
 		if (!unmapped_page)
 			break;
 
@@ -33,7 +36,7 @@ static void etnaviv_domain_unmap(struct etnaviv_iommu_domain *domain,
 	}
 }
 
-static int etnaviv_domain_map(struct etnaviv_iommu_domain *domain,
+static int etnaviv_context_map(struct etnaviv_iommu_context *context,
 			      unsigned long iova, phys_addr_t paddr,
 			      size_t size, int prot)
 {
@@ -49,7 +52,8 @@ static int etnaviv_domain_map(struct etnaviv_iommu_domain *domain,
 	}
 
 	while (size) {
-		ret = domain->ops->map(domain, iova, paddr, pgsize, prot);
+		ret = context->global->ops->map(context, iova, paddr, pgsize,
+						prot);
 		if (ret)
 			break;
 
@@ -60,21 +64,19 @@ static int etnaviv_domain_map(struct etnaviv_iommu_domain *domain,
 
 	/* unroll mapping in case something went wrong */
 	if (ret)
-		etnaviv_domain_unmap(domain, orig_iova, orig_size - size);
+		etnaviv_context_unmap(context, orig_iova, orig_size - size);
 
 	return ret;
 }
 
-static int etnaviv_iommu_map(struct etnaviv_iommu *iommu, u32 iova,
+static int etnaviv_iommu_map(struct etnaviv_iommu_context *context, u32 iova,
 			     struct sg_table *sgt, unsigned len, int prot)
-{
-	struct etnaviv_iommu_domain *domain = iommu->domain;
-	struct scatterlist *sg;
+{	struct scatterlist *sg;
 	unsigned int da = iova;
 	unsigned int i, j;
 	int ret;
 
-	if (!domain || !sgt)
+	if (!context || !sgt)
 		return -EINVAL;
 
 	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
@@ -83,7 +85,7 @@ static int etnaviv_iommu_map(struct etnaviv_iommu *iommu, u32 iova,
 
 		VERB("map[%d]: %08x %08x(%zx)", i, iova, pa, bytes);
 
-		ret = etnaviv_domain_map(domain, da, pa, bytes, prot);
+		ret = etnaviv_context_map(context, da, pa, bytes, prot);
 		if (ret)
 			goto fail;
 
@@ -98,16 +100,15 @@ static int etnaviv_iommu_map(struct etnaviv_iommu *iommu, u32 iova,
 	for_each_sg(sgt->sgl, sg, i, j) {
 		size_t bytes = sg_dma_len(sg) + sg->offset;
 
-		etnaviv_domain_unmap(domain, da, bytes);
+		etnaviv_context_unmap(context, da, bytes);
 		da += bytes;
 	}
 	return ret;
 }
 
-static void etnaviv_iommu_unmap(struct etnaviv_iommu *iommu, u32 iova,
+static void etnaviv_iommu_unmap(struct etnaviv_iommu_context *context, u32 iova,
 				struct sg_table *sgt, unsigned len)
 {
-	struct etnaviv_iommu_domain *domain = iommu->domain;
 	struct scatterlist *sg;
 	unsigned int da = iova;
 	int i;
@@ -115,7 +116,7 @@ static void etnaviv_iommu_unmap(struct etnaviv_iommu *iommu, u32 iova,
 	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
 		size_t bytes = sg_dma_len(sg) + sg->offset;
 
-		etnaviv_domain_unmap(domain, da, bytes);
+		etnaviv_context_unmap(context, da, bytes);
 
 		VERB("unmap[%d]: %08x(%zx)", i, iova, bytes);
 
@@ -125,24 +126,24 @@ static void etnaviv_iommu_unmap(struct etnaviv_iommu *iommu, u32 iova,
 	}
 }
 
-static void etnaviv_iommu_remove_mapping(struct etnaviv_iommu *mmu,
+static void etnaviv_iommu_remove_mapping(struct etnaviv_iommu_context *context,
 	struct etnaviv_vram_mapping *mapping)
 {
 	struct etnaviv_gem_object *etnaviv_obj = mapping->object;
 
-	etnaviv_iommu_unmap(mmu, mapping->vram_node.start,
+	etnaviv_iommu_unmap(context, mapping->vram_node.start,
 			    etnaviv_obj->sgt, etnaviv_obj->base.size);
 	drm_mm_remove_node(&mapping->vram_node);
 }
 
-static int etnaviv_iommu_find_iova(struct etnaviv_iommu *mmu,
+static int etnaviv_iommu_find_iova(struct etnaviv_iommu_context *context,
 				   struct drm_mm_node *node, size_t size)
 {
 	struct etnaviv_vram_mapping *free = NULL;
 	enum drm_mm_insert_mode mode = DRM_MM_INSERT_LOW;
 	int ret;
 
-	lockdep_assert_held(&mmu->lock);
+	lockdep_assert_held(&context->lock);
 
 	while (1) {
 		struct etnaviv_vram_mapping *m, *n;
@@ -150,17 +151,17 @@ static int etnaviv_iommu_find_iova(struct etnaviv_iommu *mmu,
 		struct list_head list;
 		bool found;
 
-		ret = drm_mm_insert_node_in_range(&mmu->mm, node,
+		ret = drm_mm_insert_node_in_range(&context->mm, node,
 						  size, 0, 0, 0, U64_MAX, mode);
 		if (ret != -ENOSPC)
 			break;
 
 		/* Try to retire some entries */
-		drm_mm_scan_init(&scan, &mmu->mm, size, 0, 0, mode);
+		drm_mm_scan_init(&scan, &context->mm, size, 0, 0, mode);
 
 		found = 0;
 		INIT_LIST_HEAD(&list);
-		list_for_each_entry(free, &mmu->mappings, mmu_node) {
+		list_for_each_entry(free, &context->mappings, mmu_node) {
 			/* If this vram node has not been used, skip this. */
 			if (!free->vram_node.mm)
 				continue;
@@ -202,8 +203,8 @@ static int etnaviv_iommu_find_iova(struct etnaviv_iommu *mmu,
 		 * this mapping.
 		 */
 		list_for_each_entry_safe(m, n, &list, scan_node) {
-			etnaviv_iommu_remove_mapping(mmu, m);
-			m->mmu = NULL;
+			etnaviv_iommu_remove_mapping(context, m);
+			m->context = NULL;
 			list_del_init(&m->mmu_node);
 			list_del_init(&m->scan_node);
 		}
@@ -219,9 +220,16 @@ static int etnaviv_iommu_find_iova(struct etnaviv_iommu *mmu,
 	return ret;
 }
 
-int etnaviv_iommu_map_gem(struct etnaviv_iommu *mmu,
+static int etnaviv_iommu_insert_exact(struct etnaviv_iommu_context *context,
+		   struct drm_mm_node *node, size_t size, u64 va)
+{
+	return drm_mm_insert_node_in_range(&context->mm, node, size, 0, 0, va,
+					   va + size, DRM_MM_INSERT_LOWEST);
+}
+
+int etnaviv_iommu_map_gem(struct etnaviv_iommu_context *context,
 	struct etnaviv_gem_object *etnaviv_obj, u32 memory_base,
-	struct etnaviv_vram_mapping *mapping)
+	struct etnaviv_vram_mapping *mapping, u64 va)
 {
 	struct sg_table *sgt = etnaviv_obj->sgt;
 	struct drm_mm_node *node;
@@ -229,17 +237,17 @@ int etnaviv_iommu_map_gem(struct etnaviv_iommu *mmu,
 
 	lockdep_assert_held(&etnaviv_obj->lock);
 
-	mutex_lock(&mmu->lock);
+	mutex_lock(&context->lock);
 
 	/* v1 MMU can optimize single entry (contiguous) scatterlists */
-	if (mmu->version == ETNAVIV_IOMMU_V1 &&
+	if (context->global->version == ETNAVIV_IOMMU_V1 &&
 	    sgt->nents == 1 && !(etnaviv_obj->flags & ETNA_BO_FORCE_MMU)) {
 		u32 iova;
 
 		iova = sg_dma_address(sgt->sgl) - memory_base;
 		if (iova < 0x80000000 - sg_dma_len(sgt->sgl)) {
 			mapping->iova = iova;
-			list_add_tail(&mapping->mmu_node, &mmu->mappings);
+			list_add_tail(&mapping->mmu_node, &context->mappings);
 			ret = 0;
 			goto unlock;
 		}
@@ -247,12 +255,17 @@ int etnaviv_iommu_map_gem(struct etnaviv_iommu *mmu,
 
 	node = &mapping->vram_node;
 
-	ret = etnaviv_iommu_find_iova(mmu, node, etnaviv_obj->base.size);
+	if (va)
+		ret = etnaviv_iommu_insert_exact(context, node,
+						 etnaviv_obj->base.size, va);
+	else
+		ret = etnaviv_iommu_find_iova(context, node,
+					      etnaviv_obj->base.size);
 	if (ret < 0)
 		goto unlock;
 
 	mapping->iova = node->start;
-	ret = etnaviv_iommu_map(mmu, node->start, sgt, etnaviv_obj->base.size,
+	ret = etnaviv_iommu_map(context, node->start, sgt, etnaviv_obj->base.size,
 				ETNAVIV_PROT_READ | ETNAVIV_PROT_WRITE);
 
 	if (ret < 0) {
@@ -260,130 +273,233 @@ int etnaviv_iommu_map_gem(struct etnaviv_iommu *mmu,
 		goto unlock;
 	}
 
-	list_add_tail(&mapping->mmu_node, &mmu->mappings);
-	mmu->need_flush = true;
+	list_add_tail(&mapping->mmu_node, &context->mappings);
+	context->flush_seq++;
 unlock:
-	mutex_unlock(&mmu->lock);
+	mutex_unlock(&context->lock);
 
 	return ret;
 }
 
-void etnaviv_iommu_unmap_gem(struct etnaviv_iommu *mmu,
+void etnaviv_iommu_unmap_gem(struct etnaviv_iommu_context *context,
 	struct etnaviv_vram_mapping *mapping)
 {
 	WARN_ON(mapping->use);
 
-	mutex_lock(&mmu->lock);
+	mutex_lock(&context->lock);
 
 	/* If the vram node is on the mm, unmap and remove the node */
-	if (mapping->vram_node.mm == &mmu->mm)
-		etnaviv_iommu_remove_mapping(mmu, mapping);
+	if (mapping->vram_node.mm == &context->mm)
+		etnaviv_iommu_remove_mapping(context, mapping);
 
 	list_del(&mapping->mmu_node);
-	mmu->need_flush = true;
-	mutex_unlock(&mmu->lock);
+	context->flush_seq++;
+	mutex_unlock(&context->lock);
 }
 
-void etnaviv_iommu_destroy(struct etnaviv_iommu *mmu)
+static void etnaviv_iommu_context_free(struct kref *kref)
 {
-	drm_mm_takedown(&mmu->mm);
-	mmu->domain->ops->free(mmu->domain);
-	kfree(mmu);
+	struct etnaviv_iommu_context *context =
+		container_of(kref, struct etnaviv_iommu_context, refcount);
+
+	etnaviv_cmdbuf_suballoc_unmap(context, &context->cmdbuf_mapping);
+
+	context->global->ops->free(context);
+}
+void etnaviv_iommu_context_put(struct etnaviv_iommu_context *context)
+{
+	kref_put(&context->refcount, etnaviv_iommu_context_free);
 }
 
-struct etnaviv_iommu *etnaviv_iommu_new(struct etnaviv_gpu *gpu)
+struct etnaviv_iommu_context *
+etnaviv_iommu_context_init(struct etnaviv_iommu_global *global,
+			   struct etnaviv_cmdbuf_suballoc *suballoc)
 {
-	enum etnaviv_iommu_version version;
-	struct etnaviv_iommu *mmu;
+	struct etnaviv_iommu_context *ctx;
+	int ret;
 
-	mmu = kzalloc(sizeof(*mmu), GFP_KERNEL);
-	if (!mmu)
-		return ERR_PTR(-ENOMEM);
-
-	if (!(gpu->identity.minor_features1 & chipMinorFeatures1_MMU_VERSION)) {
-		mmu->domain = etnaviv_iommuv1_domain_alloc(gpu);
-		version = ETNAVIV_IOMMU_V1;
-	} else {
-		mmu->domain = etnaviv_iommuv2_domain_alloc(gpu);
-		version = ETNAVIV_IOMMU_V2;
-	}
-
-	if (!mmu->domain) {
-		dev_err(gpu->dev, "Failed to allocate GPU IOMMU domain\n");
-		kfree(mmu);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	mmu->gpu = gpu;
-	mmu->version = version;
-	mutex_init(&mmu->lock);
-	INIT_LIST_HEAD(&mmu->mappings);
-
-	drm_mm_init(&mmu->mm, mmu->domain->base, mmu->domain->size);
-
-	return mmu;
-}
-
-void etnaviv_iommu_restore(struct etnaviv_gpu *gpu)
-{
-	if (gpu->mmu->version == ETNAVIV_IOMMU_V1)
-		etnaviv_iommuv1_restore(gpu);
+	if (global->version == ETNAVIV_IOMMU_V1)
+		ctx = etnaviv_iommuv1_context_alloc(global);
 	else
-		etnaviv_iommuv2_restore(gpu);
+		ctx = etnaviv_iommuv2_context_alloc(global);
+
+	if (!ctx)
+		return NULL;
+
+	ret = etnaviv_cmdbuf_suballoc_map(suballoc, ctx, &ctx->cmdbuf_mapping,
+					  global->memory_base);
+	if (ret) {
+		global->ops->free(ctx);
+		return NULL;
+	}
+
+	return ctx;
 }
 
-int etnaviv_iommu_get_suballoc_va(struct etnaviv_gpu *gpu, dma_addr_t paddr,
-				  struct drm_mm_node *vram_node, size_t size,
-				  u32 *iova)
+void etnaviv_iommu_restore(struct etnaviv_gpu *gpu,
+			   struct etnaviv_iommu_context *context)
 {
-	struct etnaviv_iommu *mmu = gpu->mmu;
+	context->global->ops->restore(gpu, context);
+}
 
-	if (mmu->version == ETNAVIV_IOMMU_V1) {
-		*iova = paddr - gpu->memory_base;
+int etnaviv_iommu_get_suballoc_va(struct etnaviv_iommu_context *context,
+				  struct etnaviv_vram_mapping *mapping,
+				  u32 memory_base, dma_addr_t paddr,
+				  size_t size)
+{
+	mutex_lock(&context->lock);
+
+	if (mapping->use > 0) {
+		mapping->use++;
+		mutex_unlock(&context->lock);
 		return 0;
+	}
+
+	/*
+	 * For MMUv1 we don't add the suballoc region to the pagetables, as
+	 * those GPUs can only work with cmdbufs accessed through the linear
+	 * window. Instead we manufacture a mapping to make it look uniform
+	 * to the upper layers.
+	 */
+	if (context->global->version == ETNAVIV_IOMMU_V1) {
+		mapping->iova = paddr - memory_base;
 	} else {
+		struct drm_mm_node *node = &mapping->vram_node;
 		int ret;
 
-		mutex_lock(&mmu->lock);
-		ret = etnaviv_iommu_find_iova(mmu, vram_node, size);
+		ret = etnaviv_iommu_find_iova(context, node, size);
 		if (ret < 0) {
-			mutex_unlock(&mmu->lock);
+			mutex_unlock(&context->lock);
 			return ret;
 		}
-		ret = etnaviv_domain_map(mmu->domain, vram_node->start, paddr,
-					 size, ETNAVIV_PROT_READ);
-		if (ret < 0) {
-			drm_mm_remove_node(vram_node);
-			mutex_unlock(&mmu->lock);
-			return ret;
-		}
-		gpu->mmu->need_flush = true;
-		mutex_unlock(&mmu->lock);
 
-		*iova = (u32)vram_node->start;
+		mapping->iova = node->start;
+		ret = etnaviv_context_map(context, node->start, paddr, size,
+					  ETNAVIV_PROT_READ);
+		if (ret < 0) {
+			drm_mm_remove_node(node);
+			mutex_unlock(&context->lock);
+			return ret;
+		}
+
+		context->flush_seq++;
+	}
+
+	list_add_tail(&mapping->mmu_node, &context->mappings);
+	mapping->use = 1;
+
+	mutex_unlock(&context->lock);
+
+	return 0;
+}
+
+void etnaviv_iommu_put_suballoc_va(struct etnaviv_iommu_context *context,
+		  struct etnaviv_vram_mapping *mapping)
+{
+	struct drm_mm_node *node = &mapping->vram_node;
+
+	mutex_lock(&context->lock);
+	mapping->use--;
+
+	if (mapping->use > 0 || context->global->version == ETNAVIV_IOMMU_V1) {
+		mutex_unlock(&context->lock);
+		return;
+	}
+
+	etnaviv_context_unmap(context, node->start, node->size);
+	drm_mm_remove_node(node);
+	mutex_unlock(&context->lock);
+}
+
+size_t etnaviv_iommu_dump_size(struct etnaviv_iommu_context *context)
+{
+	return context->global->ops->dump_size(context);
+}
+
+void etnaviv_iommu_dump(struct etnaviv_iommu_context *context, void *buf)
+{
+	context->global->ops->dump(context, buf);
+}
+
+int etnaviv_iommu_global_init(struct etnaviv_gpu *gpu)
+{
+	enum etnaviv_iommu_version version = ETNAVIV_IOMMU_V1;
+	struct etnaviv_drm_private *priv = gpu->drm->dev_private;
+	struct etnaviv_iommu_global *global;
+	struct device *dev = gpu->drm->dev;
+
+	if (gpu->identity.minor_features1 & chipMinorFeatures1_MMU_VERSION)
+		version = ETNAVIV_IOMMU_V2;
+
+	if (priv->mmu_global) {
+		if (priv->mmu_global->version != version) {
+			dev_err(gpu->dev,
+				"MMU version doesn't match global version\n");
+			return -ENXIO;
+		}
+
+		priv->mmu_global->use++;
 		return 0;
 	}
-}
 
-void etnaviv_iommu_put_suballoc_va(struct etnaviv_gpu *gpu,
-				   struct drm_mm_node *vram_node, size_t size,
-				   u32 iova)
-{
-	struct etnaviv_iommu *mmu = gpu->mmu;
+	global = kzalloc(sizeof(*global), GFP_KERNEL);
+	if (!global)
+		return -ENOMEM;
 
-	if (mmu->version == ETNAVIV_IOMMU_V2) {
-		mutex_lock(&mmu->lock);
-		etnaviv_domain_unmap(mmu->domain, iova, size);
-		drm_mm_remove_node(vram_node);
-		mutex_unlock(&mmu->lock);
+	global->bad_page_cpu = dma_alloc_wc(dev, SZ_4K, &global->bad_page_dma,
+					    GFP_KERNEL);
+	if (!global->bad_page_cpu)
+		goto free_global;
+
+	memset32(global->bad_page_cpu, 0xdead55aa, SZ_4K / sizeof(u32));
+
+	if (version == ETNAVIV_IOMMU_V2) {
+		global->v2.pta_cpu = dma_alloc_wc(dev, ETNAVIV_PTA_SIZE,
+					       &global->v2.pta_dma, GFP_KERNEL);
+		if (!global->v2.pta_cpu)
+			goto free_bad_page;
 	}
-}
-size_t etnaviv_iommu_dump_size(struct etnaviv_iommu *iommu)
-{
-	return iommu->domain->ops->dump_size(iommu->domain);
+
+	global->dev = dev;
+	global->version = version;
+	global->use = 1;
+	mutex_init(&global->lock);
+
+	if (version == ETNAVIV_IOMMU_V1)
+		global->ops = &etnaviv_iommuv1_ops;
+	else
+		global->ops = &etnaviv_iommuv2_ops;
+
+	priv->mmu_global = global;
+
+	return 0;
+
+free_bad_page:
+	dma_free_wc(dev, SZ_4K, global->bad_page_cpu, global->bad_page_dma);
+free_global:
+	kfree(global);
+
+	return -ENOMEM;
 }
 
-void etnaviv_iommu_dump(struct etnaviv_iommu *iommu, void *buf)
+void etnaviv_iommu_global_fini(struct etnaviv_gpu *gpu)
 {
-	iommu->domain->ops->dump(iommu->domain, buf);
+	struct etnaviv_drm_private *priv = gpu->drm->dev_private;
+	struct etnaviv_iommu_global *global = priv->mmu_global;
+
+	if (--global->use > 0)
+		return;
+
+	if (global->v2.pta_cpu)
+		dma_free_wc(global->dev, ETNAVIV_PTA_SIZE,
+			    global->v2.pta_cpu, global->v2.pta_dma);
+
+	if (global->bad_page_cpu)
+		dma_free_wc(global->dev, SZ_4K,
+			    global->bad_page_cpu, global->bad_page_dma);
+
+	mutex_destroy(&global->lock);
+	kfree(global);
+
+	priv->mmu_global = NULL;
 }
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.h b/drivers/gpu/drm/etnaviv/etnaviv_mmu.h
index a0db17ffb686..d1d6902fd13b 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.h
@@ -16,61 +16,109 @@ enum etnaviv_iommu_version {
 
 struct etnaviv_gpu;
 struct etnaviv_vram_mapping;
-struct etnaviv_iommu_domain;
+struct etnaviv_iommu_global;
+struct etnaviv_iommu_context;
 
-struct etnaviv_iommu_domain_ops {
-	void (*free)(struct etnaviv_iommu_domain *);
-	int (*map)(struct etnaviv_iommu_domain *domain, unsigned long iova,
+struct etnaviv_iommu_ops {
+	struct etnaviv_iommu_context *(*init)(struct etnaviv_iommu_global *);
+	void (*free)(struct etnaviv_iommu_context *);
+	int (*map)(struct etnaviv_iommu_context *context, unsigned long iova,
 		   phys_addr_t paddr, size_t size, int prot);
-	size_t (*unmap)(struct etnaviv_iommu_domain *domain, unsigned long iova,
+	size_t (*unmap)(struct etnaviv_iommu_context *context, unsigned long iova,
 			size_t size);
-	size_t (*dump_size)(struct etnaviv_iommu_domain *);
-	void (*dump)(struct etnaviv_iommu_domain *, void *);
+	size_t (*dump_size)(struct etnaviv_iommu_context *);
+	void (*dump)(struct etnaviv_iommu_context *, void *);
+	void (*restore)(struct etnaviv_gpu *, struct etnaviv_iommu_context *);
 };
 
-struct etnaviv_iommu_domain {
+extern const struct etnaviv_iommu_ops etnaviv_iommuv1_ops;
+extern const struct etnaviv_iommu_ops etnaviv_iommuv2_ops;
+
+#define ETNAVIV_PTA_SIZE	SZ_4K
+#define ETNAVIV_PTA_ENTRIES	(ETNAVIV_PTA_SIZE / sizeof(u64))
+
+struct etnaviv_iommu_global {
 	struct device *dev;
+	enum etnaviv_iommu_version version;
+	const struct etnaviv_iommu_ops *ops;
+	unsigned int use;
+	struct mutex lock;
+
 	void *bad_page_cpu;
 	dma_addr_t bad_page_dma;
-	u64 base;
-	u64 size;
 
-	const struct etnaviv_iommu_domain_ops *ops;
+	u32 memory_base;
+
+	/*
+	 * This union holds members needed by either MMUv1 or MMUv2, which
+	 * can not exist at the same time.
+	 */
+	union {
+		struct {
+			struct etnaviv_iommu_context *shared_context;
+		} v1;
+		struct {
+			/* P(age) T(able) A(rray) */
+			u64 *pta_cpu;
+			dma_addr_t pta_dma;
+			struct spinlock pta_lock;
+			DECLARE_BITMAP(pta_alloc, ETNAVIV_PTA_ENTRIES);
+		} v2;
+	};
 };
 
-struct etnaviv_iommu {
-	struct etnaviv_gpu *gpu;
-	struct etnaviv_iommu_domain *domain;
-
-	enum etnaviv_iommu_version version;
+struct etnaviv_iommu_context {
+	struct kref refcount;
+	struct etnaviv_iommu_global *global;
 
 	/* memory manager for GPU address area */
 	struct mutex lock;
 	struct list_head mappings;
 	struct drm_mm mm;
-	bool need_flush;
+	unsigned int flush_seq;
+
+	/* Not part of the context, but needs to have the same lifetime */
+	struct etnaviv_vram_mapping cmdbuf_mapping;
 };
 
+int etnaviv_iommu_global_init(struct etnaviv_gpu *gpu);
+void etnaviv_iommu_global_fini(struct etnaviv_gpu *gpu);
+
 struct etnaviv_gem_object;
 
-int etnaviv_iommu_map_gem(struct etnaviv_iommu *mmu,
+int etnaviv_iommu_map_gem(struct etnaviv_iommu_context *context,
 	struct etnaviv_gem_object *etnaviv_obj, u32 memory_base,
-	struct etnaviv_vram_mapping *mapping);
-void etnaviv_iommu_unmap_gem(struct etnaviv_iommu *mmu,
+	struct etnaviv_vram_mapping *mapping, u64 va);
+void etnaviv_iommu_unmap_gem(struct etnaviv_iommu_context *context,
 	struct etnaviv_vram_mapping *mapping);
 
-int etnaviv_iommu_get_suballoc_va(struct etnaviv_gpu *gpu, dma_addr_t paddr,
-				  struct drm_mm_node *vram_node, size_t size,
-				  u32 *iova);
-void etnaviv_iommu_put_suballoc_va(struct etnaviv_gpu *gpu,
-				   struct drm_mm_node *vram_node, size_t size,
-				   u32 iova);
+int etnaviv_iommu_get_suballoc_va(struct etnaviv_iommu_context *ctx,
+				  struct etnaviv_vram_mapping *mapping,
+				  u32 memory_base, dma_addr_t paddr,
+				  size_t size);
+void etnaviv_iommu_put_suballoc_va(struct etnaviv_iommu_context *ctx,
+				   struct etnaviv_vram_mapping *mapping);
 
-size_t etnaviv_iommu_dump_size(struct etnaviv_iommu *iommu);
-void etnaviv_iommu_dump(struct etnaviv_iommu *iommu, void *buf);
+size_t etnaviv_iommu_dump_size(struct etnaviv_iommu_context *ctx);
+void etnaviv_iommu_dump(struct etnaviv_iommu_context *ctx, void *buf);
 
-struct etnaviv_iommu *etnaviv_iommu_new(struct etnaviv_gpu *gpu);
-void etnaviv_iommu_destroy(struct etnaviv_iommu *iommu);
-void etnaviv_iommu_restore(struct etnaviv_gpu *gpu);
+struct etnaviv_iommu_context *
+etnaviv_iommu_context_init(struct etnaviv_iommu_global *global,
+			   struct etnaviv_cmdbuf_suballoc *suballoc);
+static inline void etnaviv_iommu_context_get(struct etnaviv_iommu_context *ctx)
+{
+	kref_get(&ctx->refcount);
+}
+void etnaviv_iommu_context_put(struct etnaviv_iommu_context *ctx);
+void etnaviv_iommu_restore(struct etnaviv_gpu *gpu,
+			   struct etnaviv_iommu_context *ctx);
+
+struct etnaviv_iommu_context *
+etnaviv_iommuv1_context_alloc(struct etnaviv_iommu_global *global);
+struct etnaviv_iommu_context *
+etnaviv_iommuv2_context_alloc(struct etnaviv_iommu_global *global);
+
+u32 etnaviv_iommuv2_get_mtlb_addr(struct etnaviv_iommu_context *context);
+unsigned short etnaviv_iommuv2_get_pta_id(struct etnaviv_iommu_context *context);
 
 #endif /* __ETNAVIV_MMU_H__ */
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c
index 4227a4006c34..8adbf2861bff 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c
@@ -4,6 +4,7 @@
  * Copyright (C) 2017 Zodiac Inflight Innovations
  */
 
+#include "common.xml.h"
 #include "etnaviv_gpu.h"
 #include "etnaviv_perfmon.h"
 #include "state_hi.xml.h"
@@ -15,8 +16,8 @@ struct etnaviv_pm_signal {
 	u32 data;
 
 	u32 (*sample)(struct etnaviv_gpu *gpu,
-	              const struct etnaviv_pm_domain *domain,
-	              const struct etnaviv_pm_signal *signal);
+		      const struct etnaviv_pm_domain *domain,
+		      const struct etnaviv_pm_signal *signal);
 };
 
 struct etnaviv_pm_domain {
@@ -35,13 +36,6 @@ struct etnaviv_pm_domain_meta {
 	u32 nr_domains;
 };
 
-static u32 simple_reg_read(struct etnaviv_gpu *gpu,
-	const struct etnaviv_pm_domain *domain,
-	const struct etnaviv_pm_signal *signal)
-{
-	return gpu_read(gpu, signal->data);
-}
-
 static u32 perf_reg_read(struct etnaviv_gpu *gpu,
 	const struct etnaviv_pm_domain *domain,
 	const struct etnaviv_pm_signal *signal)
@@ -75,6 +69,34 @@ static u32 pipe_reg_read(struct etnaviv_gpu *gpu,
 	return value;
 }
 
+static u32 hi_total_cycle_read(struct etnaviv_gpu *gpu,
+	const struct etnaviv_pm_domain *domain,
+	const struct etnaviv_pm_signal *signal)
+{
+	u32 reg = VIVS_HI_PROFILE_TOTAL_CYCLES;
+
+	if (gpu->identity.model == chipModel_GC880 ||
+		gpu->identity.model == chipModel_GC2000 ||
+		gpu->identity.model == chipModel_GC2100)
+		reg = VIVS_MC_PROFILE_CYCLE_COUNTER;
+
+	return gpu_read(gpu, reg);
+}
+
+static u32 hi_total_idle_cycle_read(struct etnaviv_gpu *gpu,
+	const struct etnaviv_pm_domain *domain,
+	const struct etnaviv_pm_signal *signal)
+{
+	u32 reg = VIVS_HI_PROFILE_IDLE_CYCLES;
+
+	if (gpu->identity.model == chipModel_GC880 ||
+		gpu->identity.model == chipModel_GC2000 ||
+		gpu->identity.model == chipModel_GC2100)
+		reg = VIVS_HI_PROFILE_TOTAL_CYCLES;
+
+	return gpu_read(gpu, reg);
+}
+
 static const struct etnaviv_pm_domain doms_3d[] = {
 	{
 		.name = "HI",
@@ -84,13 +106,13 @@ static const struct etnaviv_pm_domain doms_3d[] = {
 		.signal = (const struct etnaviv_pm_signal[]) {
 			{
 				"TOTAL_CYCLES",
-				VIVS_HI_PROFILE_TOTAL_CYCLES,
-				&simple_reg_read
+				0,
+				&hi_total_cycle_read
 			},
 			{
 				"IDLE_CYCLES",
-				VIVS_HI_PROFILE_IDLE_CYCLES,
-				&simple_reg_read
+				0,
+				&hi_total_idle_cycle_read
 			},
 			{
 				"AXI_CYCLES_READ_REQUEST_STALLED",
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
index a813c824e154..4e3e95dce6d8 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
@@ -3,7 +3,7 @@
  * Copyright (C) 2017 Etnaviv Project
  */
 
-#include <linux/kthread.h>
+#include <linux/moduleparam.h>
 
 #include "etnaviv_drv.h"
 #include "etnaviv_dump.h"
@@ -115,7 +115,7 @@ static void etnaviv_sched_timedout_job(struct drm_sched_job *sched_job)
 		drm_sched_increase_karma(sched_job);
 
 	/* get the GPU back into the init state */
-	etnaviv_core_dump(gpu);
+	etnaviv_core_dump(submit);
 	etnaviv_gpu_recover_hang(gpu);
 
 	drm_sched_resubmit_jobs(&gpu->sched);
diff --git a/include/uapi/drm/etnaviv_drm.h b/include/uapi/drm/etnaviv_drm.h
index 0d5c49dc478c..09d0df8b71c5 100644
--- a/include/uapi/drm/etnaviv_drm.h
+++ b/include/uapi/drm/etnaviv_drm.h
@@ -73,6 +73,7 @@ struct drm_etnaviv_timespec {
 #define ETNAVIV_PARAM_GPU_INSTRUCTION_COUNT         0x18
 #define ETNAVIV_PARAM_GPU_NUM_CONSTANTS             0x19
 #define ETNAVIV_PARAM_GPU_NUM_VARYINGS              0x1a
+#define ETNAVIV_PARAM_SOFTPIN_START_ADDR            0x1b
 
 #define ETNA_MAX_PIPES 4
 
@@ -148,6 +149,11 @@ struct drm_etnaviv_gem_submit_reloc {
  * then patching the cmdstream for this entry is skipped.  This can
  * avoid kernel needing to map/access the cmdstream bo in the common
  * case.
+ * If the submit is a softpin submit (ETNA_SUBMIT_SOFTPIN) the 'presumed'
+ * field is interpreted as the fixed location to map the bo into the gpu
+ * virtual address space. If the kernel is unable to map the buffer at
+ * this location the submit will fail. This means userspace is responsible
+ * for the whole gpu virtual address management.
  */
 #define ETNA_SUBMIT_BO_READ             0x0001
 #define ETNA_SUBMIT_BO_WRITE            0x0002
@@ -177,9 +183,11 @@ struct drm_etnaviv_gem_submit_pmr {
 #define ETNA_SUBMIT_NO_IMPLICIT         0x0001
 #define ETNA_SUBMIT_FENCE_FD_IN         0x0002
 #define ETNA_SUBMIT_FENCE_FD_OUT        0x0004
+#define ETNA_SUBMIT_SOFTPIN             0x0008
 #define ETNA_SUBMIT_FLAGS		(ETNA_SUBMIT_NO_IMPLICIT | \
 					 ETNA_SUBMIT_FENCE_FD_IN | \
-					 ETNA_SUBMIT_FENCE_FD_OUT)
+					 ETNA_SUBMIT_FENCE_FD_OUT| \
+					 ETNA_SUBMIT_SOFTPIN)
 #define ETNA_PIPE_3D      0x00
 #define ETNA_PIPE_2D      0x01
 #define ETNA_PIPE_VG      0x02