add swiotlb support to arm

This fixes a cascade of regressions that originally started with
 the addition of the ia64 port, but only got fatal once we removed
 most uses of block layer bounce buffering in Linux 4.18.
 
 The reason is that while the original i386/PAE code that was the first
 architecture that supported > 4GB of memory without an iommu decided to
 leave bounce buffering to the subsystems, which in those days just mean
 block and networking as no one else consumer arbitrary userspace memory.
 
 Later with ia64, x86_64 and other ports we assumed that either an iommu
 or something that fakes it up ("software IOTLB" in beautiful Intel
 speak) is present and that subsystems can rely on that for dealing with
 addressing limitations in devices.   Except that the ARM LPAE scheme
 that added larger physical address to 32-bit ARM did not follow that
 scheme and thus only worked by chance and only for block and networking
 I/O directly to highmem.
 
 Long story, short fix - add swiotlb support to arm when build for LPAE
 platforms, which actuallys turns out to be pretty trivial with the
 modern dma-direct / swiotlb code to fix the Linux 4.18-ish regression.
 -----BEGIN PGP SIGNATURE-----
 
 iQI/BAABCgApFiEEgdbnc3r/njty3Iq9D55TZVIEUYMFAl1DFj8LHGhjaEBsc3Qu
 ZGUACgkQD55TZVIEUYPFqg/+Oh62VCFCkIK07NAeTq6EmrfHI8I1Wm/SFWPOOB+a
 vm7nMcSG3C8K8PRHzGc6Zk3SC1+RrHghcyKw54yLT1Mhroakv6Um7p2y8S3M4tmZ
 uEg8yYbtzxvuaY9T42s2msZURbBCEELzA2bYbQzgQ1zczRI1zuMI07ssMr91IQ91
 HC1OjAUoxUkp/+2uU/X2k6DvPQLSJSyWvKgbi1bjNpE+FRCKJP+2a2K3psBQuDBe
 aJXiz/kD2L/JNvF/e4c414d5GnGXwtIYs1kbskmnj3LeToS+JjX+6ZcENorpScIP
 c20s/3H6nsb14TFy548rJUlAHdcd9kOdeTw+0oPUliNLCogGs6FKNU4N5gVAo+bC
 AWDP0wMHMWkrVz6lQL9PR78IHrHOxFYS5/uHsqqdKo5YTsgaHnwKEiPxX1aiKQ67
 ovUrOnGRo4R9Y4YwD+BbHY9qw9jFMqazBdLWMivK5NxqltsahOug8w2emTFfXzQn
 m4APJYa0RVJA4mkh3ejcci5qHyyzPOjslyIJn7eaJPV2rknkxRn9UngkgJLnzHfc
 +lKiD1zaRy82nV4auPjYRiOdAoQN40YFB/RT16OVkjkT+jJEE2UAMjqh2SRlRusp
 Ce8vK7pw6VpDNGJRQveQA+1n9OR/jl0Jf8R7GFRrf9c/bM1J8GErJ6xS/EwNPrgI
 5dE=
 =D6Uy
 -----END PGP SIGNATURE-----

Merge tag 'arm-swiotlb-5.3' of git://git.infradead.org/users/hch/dma-mapping

Pull arm swiotlb support from Christoph Hellwig:
 "This fixes a cascade of regressions that originally started with the
  addition of the ia64 port, but only got fatal once we removed most
  uses of block layer bounce buffering in Linux 4.18.

  The reason is that while the original i386/PAE code that was the first
  architecture that supported > 4GB of memory without an iommu decided
  to leave bounce buffering to the subsystems, which in those days just
  mean block and networking as no one else consumed arbitrary userspace
  memory.

  Later with ia64, x86_64 and other ports we assumed that either an
  iommu or something that fakes it up ("software IOTLB" in beautiful
  Intel speak) is present and that subsystems can rely on that for
  dealing with addressing limitations in devices. Except that the ARM
  LPAE scheme that added larger physical address to 32-bit ARM did not
  follow that scheme and thus only worked by chance and only for block
  and networking I/O directly to highmem.

  Long story, short fix - add swiotlb support to arm when build for LPAE
  platforms, which actuallys turns out to be pretty trivial with the
  modern dma-direct / swiotlb code to fix the Linux 4.18-ish regression"

* tag 'arm-swiotlb-5.3' of git://git.infradead.org/users/hch/dma-mapping:
  arm: use swiotlb for bounce buffering on LPAE configs
  dma-mapping: check pfn validity in dma_common_{mmap,get_sgtable}
This commit is contained in:
Linus Torvalds 2019-08-02 08:44:33 -07:00
commit 234172f6bb
5 changed files with 85 additions and 3 deletions

View File

@ -18,7 +18,9 @@ extern const struct dma_map_ops arm_coherent_dma_ops;
static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
{
return IS_ENABLED(CONFIG_MMU) ? &arm_dma_ops : NULL;
if (IS_ENABLED(CONFIG_MMU) && !IS_ENABLED(CONFIG_ARM_LPAE))
return &arm_dma_ops;
return NULL;
}
#ifdef __arch_page_to_dma

View File

@ -663,6 +663,11 @@ config ARM_LPAE
depends on MMU && CPU_32v7 && !CPU_32v6 && !CPU_32v5 && \
!CPU_32v4 && !CPU_32v3
select PHYS_ADDR_T_64BIT
select SWIOTLB
select ARCH_HAS_DMA_COHERENT_TO_PFN
select ARCH_HAS_DMA_MMAP_PGPROT
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
select ARCH_HAS_SYNC_DMA_FOR_CPU
help
Say Y if you have an ARMv7 processor supporting the LPAE page
table format and you would like to access memory beyond the

View File

@ -15,6 +15,7 @@
#include <linux/init.h>
#include <linux/device.h>
#include <linux/dma-mapping.h>
#include <linux/dma-noncoherent.h>
#include <linux/dma-contiguous.h>
#include <linux/highmem.h>
#include <linux/memblock.h>
@ -1125,6 +1126,19 @@ int arm_dma_supported(struct device *dev, u64 mask)
static const struct dma_map_ops *arm_get_dma_map_ops(bool coherent)
{
/*
* When CONFIG_ARM_LPAE is set, physical address can extend above
* 32-bits, which then can't be addressed by devices that only support
* 32-bit DMA.
* Use the generic dma-direct / swiotlb ops code in that case, as that
* handles bounce buffering for us.
*
* Note: this checks CONFIG_ARM_LPAE instead of CONFIG_SWIOTLB as the
* latter is also selected by the Xen code, but that code for now relies
* on non-NULL dev_dma_ops. To be cleaned up later.
*/
if (IS_ENABLED(CONFIG_ARM_LPAE))
return NULL;
return coherent ? &arm_coherent_dma_ops : &arm_dma_ops;
}
@ -2329,6 +2343,9 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
const struct dma_map_ops *dma_ops;
dev->archdata.dma_coherent = coherent;
#ifdef CONFIG_SWIOTLB
dev->dma_coherent = coherent;
#endif
/*
* Don't override the dma_ops if they have already been set. Ideally
@ -2363,3 +2380,47 @@ void arch_teardown_dma_ops(struct device *dev)
/* Let arch_setup_dma_ops() start again from scratch upon re-probe */
set_dma_ops(dev, NULL);
}
#ifdef CONFIG_SWIOTLB
void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
size_t size, enum dma_data_direction dir)
{
__dma_page_cpu_to_dev(phys_to_page(paddr), paddr & (PAGE_SIZE - 1),
size, dir);
}
void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
size_t size, enum dma_data_direction dir)
{
__dma_page_dev_to_cpu(phys_to_page(paddr), paddr & (PAGE_SIZE - 1),
size, dir);
}
long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
dma_addr_t dma_addr)
{
return dma_to_pfn(dev, dma_addr);
}
pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
unsigned long attrs)
{
if (!dev_is_dma_coherent(dev))
return __get_dma_pgprot(attrs, prot);
return prot;
}
void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
gfp_t gfp, unsigned long attrs)
{
return __dma_alloc(dev, size, dma_handle, gfp,
__get_dma_pgprot(attrs, PAGE_KERNEL), false,
attrs, __builtin_return_address(0));
}
void arch_dma_free(struct device *dev, size_t size, void *cpu_addr,
dma_addr_t dma_handle, unsigned long attrs)
{
__arm_dma_free(dev, size, cpu_addr, dma_handle, attrs, false);
}
#endif /* CONFIG_SWIOTLB */

View File

@ -21,6 +21,7 @@
#include <linux/dma-contiguous.h>
#include <linux/sizes.h>
#include <linux/stop_machine.h>
#include <linux/swiotlb.h>
#include <asm/cp15.h>
#include <asm/mach-types.h>
@ -463,6 +464,10 @@ static void __init free_highpages(void)
*/
void __init mem_init(void)
{
#ifdef CONFIG_ARM_LPAE
swiotlb_init(1);
#endif
set_max_mapnr(pfn_to_page(max_pfn) - mem_map);
/* this will put all unused low memory onto the freelists */

View File

@ -116,11 +116,16 @@ int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt,
int ret;
if (!dev_is_dma_coherent(dev)) {
unsigned long pfn;
if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_COHERENT_TO_PFN))
return -ENXIO;
page = pfn_to_page(arch_dma_coherent_to_pfn(dev, cpu_addr,
dma_addr));
/* If the PFN is not valid, we do not have a struct page */
pfn = arch_dma_coherent_to_pfn(dev, cpu_addr, dma_addr);
if (!pfn_valid(pfn))
return -ENXIO;
page = pfn_to_page(pfn);
} else {
page = virt_to_page(cpu_addr);
}
@ -170,7 +175,11 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
if (!dev_is_dma_coherent(dev)) {
if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_COHERENT_TO_PFN))
return -ENXIO;
/* If the PFN is not valid, we do not have a struct page */
pfn = arch_dma_coherent_to_pfn(dev, cpu_addr, dma_addr);
if (!pfn_valid(pfn))
return -ENXIO;
} else {
pfn = page_to_pfn(virt_to_page(cpu_addr));
}