summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-11-21 21:40:50 +0100
committerLinus Torvalds <torvalds@linux-foundation.org>2024-11-21 21:40:50 +0100
commit341d041daae52cd5f014f68c1c7d9039db818fca (patch)
tree87630919e6b6b456c8b7d32c2c5d52de6264b247 /include
parentMerge tag 'dma-mapping-6.13-2024-11-19' of git://git.infradead.org/users/hch/... (diff)
parentiommu/arm-smmu-v3: Import IOMMUFD module namespace (diff)
downloadlinux-341d041daae52cd5f014f68c1c7d9039db818fca.tar.xz
linux-341d041daae52cd5f014f68c1c7d9039db818fca.zip
Merge tag 'for-linus-iommufd' of git://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd
Pull iommufd updates from Jason Gunthorpe: "Several new features and uAPI for iommufd: - IOMMU_IOAS_MAP_FILE allows passing in a file descriptor as the backing memory for an iommu mapping. To date VFIO/iommufd have used VMA's and pin_user_pages(), this now allows using memfds and memfd_pin_folios(). Notably this creates a pure folio path from the memfd to the iommu page table where memory is never broken down to PAGE_SIZE. - IOMMU_IOAS_CHANGE_PROCESS moves the pinned page accounting between two processes. Combined with the above this allows iommufd to support a VMM re-start using exec() where something like qemu would exec() a new version of itself and fd pass the memfds/iommufd/etc to the new process. The memfd allows DMA access to the memory to continue while the new process is getting setup, and the CHANGE_PROCESS updates all the accounting. - Support for fault reporting to userspace on non-PRI HW, such as ARM stall-mode embedded devices. - IOMMU_VIOMMU_ALLOC introduces the concept of a HW/driver backed virtual iommu. This will be used by VMMs to access hardware features that are contained with in a VM. The first use is to inform the kernel of the virtual SID to physical SID mapping when issuing SID based invalidation on ARM. Further uses will tie HW features that are directly accessed by the VM, such as invalidation queue assignment and others. - IOMMU_VDEVICE_ALLOC informs the kernel about the mapping of virtual device to physical device within a VIOMMU. Minimially this is used to translate VM issued cache invalidation commands from virtual to physical device IDs. - Enhancements to IOMMU_HWPT_INVALIDATE and IOMMU_HWPT_ALLOC to work with the VIOMMU - ARM SMMuv3 support for nested translation. Using the VIOMMU and VDEVICE the driver can model this HW's behavior for nested translation. This includes a shared branch from Will" * tag 'for-linus-iommufd' of git://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd: (51 commits) iommu/arm-smmu-v3: Import IOMMUFD module namespace iommufd: IOMMU_IOAS_CHANGE_PROCESS selftest iommufd: Add IOMMU_IOAS_CHANGE_PROCESS iommufd: Lock all IOAS objects iommufd: Export do_update_pinned iommu/arm-smmu-v3: Support IOMMU_HWPT_INVALIDATE using a VIOMMU object iommu/arm-smmu-v3: Allow ATS for IOMMU_DOMAIN_NESTED iommu/arm-smmu-v3: Use S2FWB for NESTED domains iommu/arm-smmu-v3: Support IOMMU_DOMAIN_NESTED iommu/arm-smmu-v3: Support IOMMU_VIOMMU_ALLOC Documentation: userspace-api: iommufd: Update vDEVICE iommufd/selftest: Add vIOMMU coverage for IOMMU_HWPT_INVALIDATE ioctl iommufd/selftest: Add IOMMU_TEST_OP_DEV_CHECK_CACHE test command iommufd/selftest: Add mock_viommu_cache_invalidate iommufd/viommu: Add iommufd_viommu_find_dev helper iommu: Add iommu_copy_struct_from_full_user_array helper iommufd: Allow hwpt_id to carry viommu_id for IOMMU_HWPT_INVALIDATE iommu/viommu: Add cache_invalidate to iommufd_viommu_ops iommufd/selftest: Add IOMMU_VDEVICE_ALLOC test coverage iommufd/viommu: Add IOMMUFD_OBJ_VDEVICE and IOMMU_VDEVICE_ALLOC ioctl ...
Diffstat (limited to 'include')
-rw-r--r--include/acpi/actbl2.h3
-rw-r--r--include/linux/io-pgtable.h2
-rw-r--r--include/linux/iommu.h67
-rw-r--r--include/linux/iommufd.h108
-rw-r--r--include/linux/mm.h1
-rw-r--r--include/uapi/linux/iommufd.h216
-rw-r--r--include/uapi/linux/vfio.h2
7 files changed, 384 insertions, 15 deletions
diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
index d3858eebc255..2e917a8f8bca 100644
--- a/include/acpi/actbl2.h
+++ b/include/acpi/actbl2.h
@@ -453,7 +453,7 @@ struct acpi_table_ccel {
* IORT - IO Remapping Table
*
* Conforms to "IO Remapping Table System Software on ARM Platforms",
- * Document number: ARM DEN 0049E.e, Sep 2022
+ * Document number: ARM DEN 0049E.f, Apr 2024
*
******************************************************************************/
@@ -524,6 +524,7 @@ struct acpi_iort_memory_access {
#define ACPI_IORT_MF_COHERENCY (1)
#define ACPI_IORT_MF_ATTRIBUTES (1<<1)
+#define ACPI_IORT_MF_CANWBS (1<<2)
/*
* IORT node specific subtables
diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index b1ecfc3cd5bc..ce86b09ae80f 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -87,6 +87,7 @@ struct io_pgtable_cfg {
* attributes set in the TCR for a non-coherent page-table walker.
*
* IO_PGTABLE_QUIRK_ARM_HD: Enables dirty tracking in stage 1 pagetable.
+ * IO_PGTABLE_QUIRK_ARM_S2FWB: Use the FWB format for the MemAttrs bits
*/
#define IO_PGTABLE_QUIRK_ARM_NS BIT(0)
#define IO_PGTABLE_QUIRK_NO_PERMS BIT(1)
@@ -95,6 +96,7 @@ struct io_pgtable_cfg {
#define IO_PGTABLE_QUIRK_ARM_TTBR1 BIT(5)
#define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA BIT(6)
#define IO_PGTABLE_QUIRK_ARM_HD BIT(7)
+ #define IO_PGTABLE_QUIRK_ARM_S2FWB BIT(8)
unsigned long quirks;
unsigned long pgsize_bitmap;
unsigned int ias;
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index bd722f473635..8b02adbd14f7 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -42,6 +42,8 @@ struct notifier_block;
struct iommu_sva;
struct iommu_dma_cookie;
struct iommu_fault_param;
+struct iommufd_ctx;
+struct iommufd_viommu;
#define IOMMU_FAULT_PERM_READ (1 << 0) /* read */
#define IOMMU_FAULT_PERM_WRITE (1 << 1) /* write */
@@ -491,7 +493,9 @@ static inline int __iommu_copy_struct_from_user_array(
* @index: Index to the location in the array to copy user data from
* @min_last: The last member of the data structure @kdst points in the
* initial version.
- * Return 0 for success, otherwise -error.
+ *
+ * Copy a single entry from a user array. Return 0 for success, otherwise
+ * -error.
*/
#define iommu_copy_struct_from_user_array(kdst, user_array, data_type, index, \
min_last) \
@@ -500,6 +504,50 @@ static inline int __iommu_copy_struct_from_user_array(
offsetofend(typeof(*(kdst)), min_last))
/**
+ * iommu_copy_struct_from_full_user_array - Copy iommu driver specific user
+ * space data from an iommu_user_data_array
+ * @kdst: Pointer to an iommu driver specific user data that is defined in
+ * include/uapi/linux/iommufd.h
+ * @kdst_entry_size: sizeof(*kdst)
+ * @user_array: Pointer to a struct iommu_user_data_array for a user space
+ * array
+ * @data_type: The data type of the @kdst. Must match with @user_array->type
+ *
+ * Copy the entire user array. kdst must have room for kdst_entry_size *
+ * user_array->entry_num bytes. Return 0 for success, otherwise -error.
+ */
+static inline int
+iommu_copy_struct_from_full_user_array(void *kdst, size_t kdst_entry_size,
+ struct iommu_user_data_array *user_array,
+ unsigned int data_type)
+{
+ unsigned int i;
+ int ret;
+
+ if (user_array->type != data_type)
+ return -EINVAL;
+ if (!user_array->entry_num)
+ return -EINVAL;
+ if (likely(user_array->entry_len == kdst_entry_size)) {
+ if (copy_from_user(kdst, user_array->uptr,
+ user_array->entry_num *
+ user_array->entry_len))
+ return -EFAULT;
+ }
+
+ /* Copy item by item */
+ for (i = 0; i != user_array->entry_num; i++) {
+ ret = copy_struct_from_user(
+ kdst + kdst_entry_size * i, kdst_entry_size,
+ user_array->uptr + user_array->entry_len * i,
+ user_array->entry_len);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+/**
* struct iommu_ops - iommu ops and capabilities
* @capable: check capability
* @hw_info: report iommu hardware information. The data buffer returned by this
@@ -542,6 +590,14 @@ static inline int __iommu_copy_struct_from_user_array(
* @remove_dev_pasid: Remove any translation configurations of a specific
* pasid, so that any DMA transactions with this pasid
* will be blocked by the hardware.
+ * @viommu_alloc: Allocate an iommufd_viommu on a physical IOMMU instance behind
+ * the @dev, as the set of virtualization resources shared/passed
+ * to user space IOMMU instance. And associate it with a nesting
+ * @parent_domain. The @viommu_type must be defined in the header
+ * include/uapi/linux/iommufd.h
+ * It is required to call iommufd_viommu_alloc() helper for
+ * a bundled allocation of the core and the driver structures,
+ * using the given @ictx pointer.
* @pgsize_bitmap: bitmap of all possible supported page sizes
* @owner: Driver module providing these ops
* @identity_domain: An always available, always attachable identity
@@ -591,6 +647,10 @@ struct iommu_ops {
void (*remove_dev_pasid)(struct device *dev, ioasid_t pasid,
struct iommu_domain *domain);
+ struct iommufd_viommu *(*viommu_alloc)(
+ struct device *dev, struct iommu_domain *parent_domain,
+ struct iommufd_ctx *ictx, unsigned int viommu_type);
+
const struct iommu_domain_ops *default_domain_ops;
unsigned long pgsize_bitmap;
struct module *owner;
@@ -635,7 +695,6 @@ struct iommu_ops {
* @enforce_cache_coherency: Prevent any kind of DMA from bypassing IOMMU_CACHE,
* including no-snoop TLPs on PCIe or other platform
* specific mechanisms.
- * @enable_nesting: Enable nesting
* @set_pgtable_quirks: Set io page table quirks (IO_PGTABLE_QUIRK_*)
* @free: Release the domain after use.
*/
@@ -663,7 +722,6 @@ struct iommu_domain_ops {
dma_addr_t iova);
bool (*enforce_cache_coherency)(struct iommu_domain *domain);
- int (*enable_nesting)(struct iommu_domain *domain);
int (*set_pgtable_quirks)(struct iommu_domain *domain,
unsigned long quirks);
@@ -844,7 +902,6 @@ extern void iommu_group_put(struct iommu_group *group);
extern int iommu_group_id(struct iommu_group *group);
extern struct iommu_domain *iommu_group_default_domain(struct iommu_group *);
-int iommu_enable_nesting(struct iommu_domain *domain);
int iommu_set_pgtable_quirks(struct iommu_domain *domain,
unsigned long quirks);
@@ -994,6 +1051,8 @@ struct iommu_fwspec {
/* ATS is supported */
#define IOMMU_FWSPEC_PCI_RC_ATS (1 << 0)
+/* CANWBS is supported */
+#define IOMMU_FWSPEC_PCI_RC_CANWBS (1 << 1)
/*
* An iommu attach handle represents a relationship between an iommu domain
diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h
index 30f832a60ccb..11110c749200 100644
--- a/include/linux/iommufd.h
+++ b/include/linux/iommufd.h
@@ -8,16 +8,46 @@
#include <linux/err.h>
#include <linux/errno.h>
+#include <linux/refcount.h>
#include <linux/types.h>
+#include <linux/xarray.h>
struct device;
struct file;
struct iommu_group;
+struct iommu_user_data;
+struct iommu_user_data_array;
struct iommufd_access;
struct iommufd_ctx;
struct iommufd_device;
+struct iommufd_viommu_ops;
struct page;
+enum iommufd_object_type {
+ IOMMUFD_OBJ_NONE,
+ IOMMUFD_OBJ_ANY = IOMMUFD_OBJ_NONE,
+ IOMMUFD_OBJ_DEVICE,
+ IOMMUFD_OBJ_HWPT_PAGING,
+ IOMMUFD_OBJ_HWPT_NESTED,
+ IOMMUFD_OBJ_IOAS,
+ IOMMUFD_OBJ_ACCESS,
+ IOMMUFD_OBJ_FAULT,
+ IOMMUFD_OBJ_VIOMMU,
+ IOMMUFD_OBJ_VDEVICE,
+#ifdef CONFIG_IOMMUFD_TEST
+ IOMMUFD_OBJ_SELFTEST,
+#endif
+ IOMMUFD_OBJ_MAX,
+};
+
+/* Base struct for all objects with a userspace ID handle. */
+struct iommufd_object {
+ refcount_t shortterm_users;
+ refcount_t users;
+ enum iommufd_object_type type;
+ unsigned int id;
+};
+
struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx,
struct device *dev, u32 *id);
void iommufd_device_unbind(struct iommufd_device *idev);
@@ -54,6 +84,45 @@ void iommufd_access_detach(struct iommufd_access *access);
void iommufd_ctx_get(struct iommufd_ctx *ictx);
+struct iommufd_viommu {
+ struct iommufd_object obj;
+ struct iommufd_ctx *ictx;
+ struct iommu_device *iommu_dev;
+ struct iommufd_hwpt_paging *hwpt;
+
+ const struct iommufd_viommu_ops *ops;
+
+ struct xarray vdevs;
+
+ unsigned int type;
+};
+
+/**
+ * struct iommufd_viommu_ops - vIOMMU specific operations
+ * @destroy: Clean up all driver-specific parts of an iommufd_viommu. The memory
+ * of the vIOMMU will be free-ed by iommufd core after calling this op
+ * @alloc_domain_nested: Allocate a IOMMU_DOMAIN_NESTED on a vIOMMU that holds a
+ * nesting parent domain (IOMMU_DOMAIN_PAGING). @user_data
+ * must be defined in include/uapi/linux/iommufd.h.
+ * It must fully initialize the new iommu_domain before
+ * returning. Upon failure, ERR_PTR must be returned.
+ * @cache_invalidate: Flush hardware cache used by a vIOMMU. It can be used for
+ * any IOMMU hardware specific cache: TLB and device cache.
+ * The @array passes in the cache invalidation requests, in
+ * form of a driver data structure. A driver must update the
+ * array->entry_num to report the number of handled requests.
+ * The data structure of the array entry must be defined in
+ * include/uapi/linux/iommufd.h
+ */
+struct iommufd_viommu_ops {
+ void (*destroy)(struct iommufd_viommu *viommu);
+ struct iommu_domain *(*alloc_domain_nested)(
+ struct iommufd_viommu *viommu, u32 flags,
+ const struct iommu_user_data *user_data);
+ int (*cache_invalidate)(struct iommufd_viommu *viommu,
+ struct iommu_user_data_array *array);
+};
+
#if IS_ENABLED(CONFIG_IOMMUFD)
struct iommufd_ctx *iommufd_ctx_from_file(struct file *file);
struct iommufd_ctx *iommufd_ctx_from_fd(int fd);
@@ -111,4 +180,43 @@ static inline int iommufd_vfio_compat_set_no_iommu(struct iommufd_ctx *ictx)
return -EOPNOTSUPP;
}
#endif /* CONFIG_IOMMUFD */
+
+#if IS_ENABLED(CONFIG_IOMMUFD_DRIVER_CORE)
+struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
+ size_t size,
+ enum iommufd_object_type type);
+struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu,
+ unsigned long vdev_id);
+#else /* !CONFIG_IOMMUFD_DRIVER_CORE */
+static inline struct iommufd_object *
+_iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size,
+ enum iommufd_object_type type)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline struct device *
+iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id)
+{
+ return NULL;
+}
+#endif /* CONFIG_IOMMUFD_DRIVER_CORE */
+
+/*
+ * Helpers for IOMMU driver to allocate driver structures that will be freed by
+ * the iommufd core. The free op will be called prior to freeing the memory.
+ */
+#define iommufd_viommu_alloc(ictx, drv_struct, member, viommu_ops) \
+ ({ \
+ drv_struct *ret; \
+ \
+ static_assert(__same_type(struct iommufd_viommu, \
+ ((drv_struct *)NULL)->member)); \
+ static_assert(offsetof(drv_struct, member.obj) == 0); \
+ ret = (drv_struct *)_iommufd_object_alloc( \
+ ictx, sizeof(drv_struct), IOMMUFD_OBJ_VIOMMU); \
+ if (!IS_ERR(ret)) \
+ ret->member.ops = viommu_ops; \
+ ret; \
+ })
#endif
diff --git a/include/linux/mm.h b/include/linux/mm.h
index feb5c8021bef..673771f34674 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2536,6 +2536,7 @@ long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
long memfd_pin_folios(struct file *memfd, loff_t start, loff_t end,
struct folio **folios, unsigned int max_folios,
pgoff_t *offset);
+int folio_add_pins(struct folio *folio, unsigned int pins);
int get_user_pages_fast(unsigned long start, int nr_pages,
unsigned int gup_flags, struct page **pages);
diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index 72010f71c5e4..4ae8b1ee0444 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -51,6 +51,10 @@ enum {
IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP = 0x8c,
IOMMUFD_CMD_HWPT_INVALIDATE = 0x8d,
IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e,
+ IOMMUFD_CMD_IOAS_MAP_FILE = 0x8f,
+ IOMMUFD_CMD_VIOMMU_ALLOC = 0x90,
+ IOMMUFD_CMD_VDEVICE_ALLOC = 0x91,
+ IOMMUFD_CMD_IOAS_CHANGE_PROCESS = 0x92,
};
/**
@@ -214,6 +218,30 @@ struct iommu_ioas_map {
#define IOMMU_IOAS_MAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP)
/**
+ * struct iommu_ioas_map_file - ioctl(IOMMU_IOAS_MAP_FILE)
+ * @size: sizeof(struct iommu_ioas_map_file)
+ * @flags: same as for iommu_ioas_map
+ * @ioas_id: same as for iommu_ioas_map
+ * @fd: the memfd to map
+ * @start: byte offset from start of file to map from
+ * @length: same as for iommu_ioas_map
+ * @iova: same as for iommu_ioas_map
+ *
+ * Set an IOVA mapping from a memfd file. All other arguments and semantics
+ * match those of IOMMU_IOAS_MAP.
+ */
+struct iommu_ioas_map_file {
+ __u32 size;
+ __u32 flags;
+ __u32 ioas_id;
+ __s32 fd;
+ __aligned_u64 start;
+ __aligned_u64 length;
+ __aligned_u64 iova;
+};
+#define IOMMU_IOAS_MAP_FILE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP_FILE)
+
+/**
* struct iommu_ioas_copy - ioctl(IOMMU_IOAS_COPY)
* @size: sizeof(struct iommu_ioas_copy)
* @flags: Combination of enum iommufd_ioas_map_flags
@@ -395,13 +423,35 @@ struct iommu_hwpt_vtd_s1 {
};
/**
+ * struct iommu_hwpt_arm_smmuv3 - ARM SMMUv3 nested STE
+ * (IOMMU_HWPT_DATA_ARM_SMMUV3)
+ *
+ * @ste: The first two double words of the user space Stream Table Entry for
+ * the translation. Must be little-endian.
+ * Allowed fields: (Refer to "5.2 Stream Table Entry" in SMMUv3 HW Spec)
+ * - word-0: V, Cfg, S1Fmt, S1ContextPtr, S1CDMax
+ * - word-1: EATS, S1DSS, S1CIR, S1COR, S1CSH, S1STALLD
+ *
+ * -EIO will be returned if @ste is not legal or contains any non-allowed field.
+ * Cfg can be used to select a S1, Bypass or Abort configuration. A Bypass
+ * nested domain will translate the same as the nesting parent. The S1 will
+ * install a Context Descriptor Table pointing at userspace memory translated
+ * by the nesting parent.
+ */
+struct iommu_hwpt_arm_smmuv3 {
+ __aligned_le64 ste[2];
+};
+
+/**
* enum iommu_hwpt_data_type - IOMMU HWPT Data Type
* @IOMMU_HWPT_DATA_NONE: no data
* @IOMMU_HWPT_DATA_VTD_S1: Intel VT-d stage-1 page table
+ * @IOMMU_HWPT_DATA_ARM_SMMUV3: ARM SMMUv3 Context Descriptor Table
*/
enum iommu_hwpt_data_type {
IOMMU_HWPT_DATA_NONE = 0,
IOMMU_HWPT_DATA_VTD_S1 = 1,
+ IOMMU_HWPT_DATA_ARM_SMMUV3 = 2,
};
/**
@@ -409,7 +459,7 @@ enum iommu_hwpt_data_type {
* @size: sizeof(struct iommu_hwpt_alloc)
* @flags: Combination of enum iommufd_hwpt_alloc_flags
* @dev_id: The device to allocate this HWPT for
- * @pt_id: The IOAS or HWPT to connect this HWPT to
+ * @pt_id: The IOAS or HWPT or vIOMMU to connect this HWPT to
* @out_hwpt_id: The ID of the new HWPT
* @__reserved: Must be 0
* @data_type: One of enum iommu_hwpt_data_type
@@ -428,11 +478,13 @@ enum iommu_hwpt_data_type {
* IOMMU_HWPT_DATA_NONE. The HWPT can be allocated as a parent HWPT for a
* nesting configuration by passing IOMMU_HWPT_ALLOC_NEST_PARENT via @flags.
*
- * A user-managed nested HWPT will be created from a given parent HWPT via
- * @pt_id, in which the parent HWPT must be allocated previously via the
- * same ioctl from a given IOAS (@pt_id). In this case, the @data_type
- * must be set to a pre-defined type corresponding to an I/O page table
- * type supported by the underlying IOMMU hardware.
+ * A user-managed nested HWPT will be created from a given vIOMMU (wrapping a
+ * parent HWPT) or a parent HWPT via @pt_id, in which the parent HWPT must be
+ * allocated previously via the same ioctl from a given IOAS (@pt_id). In this
+ * case, the @data_type must be set to a pre-defined type corresponding to an
+ * I/O page table type supported by the underlying IOMMU hardware. The device
+ * via @dev_id and the vIOMMU via @pt_id must be associated to the same IOMMU
+ * instance.
*
* If the @data_type is set to IOMMU_HWPT_DATA_NONE, @data_len and
* @data_uptr should be zero. Otherwise, both @data_len and @data_uptr
@@ -485,14 +537,49 @@ struct iommu_hw_info_vtd {
};
/**
+ * struct iommu_hw_info_arm_smmuv3 - ARM SMMUv3 hardware information
+ * (IOMMU_HW_INFO_TYPE_ARM_SMMUV3)
+ *
+ * @flags: Must be set to 0
+ * @__reserved: Must be 0
+ * @idr: Implemented features for ARM SMMU Non-secure programming interface
+ * @iidr: Information about the implementation and implementer of ARM SMMU,
+ * and architecture version supported
+ * @aidr: ARM SMMU architecture version
+ *
+ * For the details of @idr, @iidr and @aidr, please refer to the chapters
+ * from 6.3.1 to 6.3.6 in the SMMUv3 Spec.
+ *
+ * User space should read the underlying ARM SMMUv3 hardware information for
+ * the list of supported features.
+ *
+ * Note that these values reflect the raw HW capability, without any insight if
+ * any required kernel driver support is present. Bits may be set indicating the
+ * HW has functionality that is lacking kernel software support, such as BTM. If
+ * a VMM is using this information to construct emulated copies of these
+ * registers it should only forward bits that it knows it can support.
+ *
+ * In future, presence of required kernel support will be indicated in flags.
+ */
+struct iommu_hw_info_arm_smmuv3 {
+ __u32 flags;
+ __u32 __reserved;
+ __u32 idr[6];
+ __u32 iidr;
+ __u32 aidr;
+};
+
+/**
* enum iommu_hw_info_type - IOMMU Hardware Info Types
* @IOMMU_HW_INFO_TYPE_NONE: Used by the drivers that do not report hardware
* info
* @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type
+ * @IOMMU_HW_INFO_TYPE_ARM_SMMUV3: ARM SMMUv3 iommu info type
*/
enum iommu_hw_info_type {
IOMMU_HW_INFO_TYPE_NONE = 0,
IOMMU_HW_INFO_TYPE_INTEL_VTD = 1,
+ IOMMU_HW_INFO_TYPE_ARM_SMMUV3 = 2,
};
/**
@@ -627,9 +714,11 @@ struct iommu_hwpt_get_dirty_bitmap {
* enum iommu_hwpt_invalidate_data_type - IOMMU HWPT Cache Invalidation
* Data Type
* @IOMMU_HWPT_INVALIDATE_DATA_VTD_S1: Invalidation data for VTD_S1
+ * @IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3: Invalidation data for ARM SMMUv3
*/
enum iommu_hwpt_invalidate_data_type {
IOMMU_HWPT_INVALIDATE_DATA_VTD_S1 = 0,
+ IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3 = 1,
};
/**
@@ -669,9 +758,31 @@ struct iommu_hwpt_vtd_s1_invalidate {
};
/**
+ * struct iommu_viommu_arm_smmuv3_invalidate - ARM SMMUv3 cahce invalidation
+ * (IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3)
+ * @cmd: 128-bit cache invalidation command that runs in SMMU CMDQ.
+ * Must be little-endian.
+ *
+ * Supported command list only when passing in a vIOMMU via @hwpt_id:
+ * CMDQ_OP_TLBI_NSNH_ALL
+ * CMDQ_OP_TLBI_NH_VA
+ * CMDQ_OP_TLBI_NH_VAA
+ * CMDQ_OP_TLBI_NH_ALL
+ * CMDQ_OP_TLBI_NH_ASID
+ * CMDQ_OP_ATC_INV
+ * CMDQ_OP_CFGI_CD
+ * CMDQ_OP_CFGI_CD_ALL
+ *
+ * -EIO will be returned if the command is not supported.
+ */
+struct iommu_viommu_arm_smmuv3_invalidate {
+ __aligned_le64 cmd[2];
+};
+
+/**
* struct iommu_hwpt_invalidate - ioctl(IOMMU_HWPT_INVALIDATE)
* @size: sizeof(struct iommu_hwpt_invalidate)
- * @hwpt_id: ID of a nested HWPT for cache invalidation
+ * @hwpt_id: ID of a nested HWPT or a vIOMMU, for cache invalidation
* @data_uptr: User pointer to an array of driver-specific cache invalidation
* data.
* @data_type: One of enum iommu_hwpt_invalidate_data_type, defining the data
@@ -682,8 +793,11 @@ struct iommu_hwpt_vtd_s1_invalidate {
* Output the number of requests successfully handled by kernel.
* @__reserved: Must be 0.
*
- * Invalidate the iommu cache for user-managed page table. Modifications on a
- * user-managed page table should be followed by this operation to sync cache.
+ * Invalidate iommu cache for user-managed page table or vIOMMU. Modifications
+ * on a user-managed page table should be followed by this operation, if a HWPT
+ * is passed in via @hwpt_id. Other caches, such as device cache or descriptor
+ * cache can be flushed if a vIOMMU is passed in via the @hwpt_id field.
+ *
* Each ioctl can support one or more cache invalidation requests in the array
* that has a total size of @entry_len * @entry_num.
*
@@ -797,4 +911,88 @@ struct iommu_fault_alloc {
__u32 out_fault_fd;
};
#define IOMMU_FAULT_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_FAULT_QUEUE_ALLOC)
+
+/**
+ * enum iommu_viommu_type - Virtual IOMMU Type
+ * @IOMMU_VIOMMU_TYPE_DEFAULT: Reserved for future use
+ * @IOMMU_VIOMMU_TYPE_ARM_SMMUV3: ARM SMMUv3 driver specific type
+ */
+enum iommu_viommu_type {
+ IOMMU_VIOMMU_TYPE_DEFAULT = 0,
+ IOMMU_VIOMMU_TYPE_ARM_SMMUV3 = 1,
+};
+
+/**
+ * struct iommu_viommu_alloc - ioctl(IOMMU_VIOMMU_ALLOC)
+ * @size: sizeof(struct iommu_viommu_alloc)
+ * @flags: Must be 0
+ * @type: Type of the virtual IOMMU. Must be defined in enum iommu_viommu_type
+ * @dev_id: The device's physical IOMMU will be used to back the virtual IOMMU
+ * @hwpt_id: ID of a nesting parent HWPT to associate to
+ * @out_viommu_id: Output virtual IOMMU ID for the allocated object
+ *
+ * Allocate a virtual IOMMU object, representing the underlying physical IOMMU's
+ * virtualization support that is a security-isolated slice of the real IOMMU HW
+ * that is unique to a specific VM. Operations global to the IOMMU are connected
+ * to the vIOMMU, such as:
+ * - Security namespace for guest owned ID, e.g. guest-controlled cache tags
+ * - Non-device-affiliated event reporting, e.g. invalidation queue errors
+ * - Access to a sharable nesting parent pagetable across physical IOMMUs
+ * - Virtualization of various platforms IDs, e.g. RIDs and others
+ * - Delivery of paravirtualized invalidation
+ * - Direct assigned invalidation queues
+ * - Direct assigned interrupts
+ */
+struct iommu_viommu_alloc {
+ __u32 size;
+ __u32 flags;
+ __u32 type;
+ __u32 dev_id;
+ __u32 hwpt_id;
+ __u32 out_viommu_id;
+};
+#define IOMMU_VIOMMU_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIOMMU_ALLOC)
+
+/**
+ * struct iommu_vdevice_alloc - ioctl(IOMMU_VDEVICE_ALLOC)
+ * @size: sizeof(struct iommu_vdevice_alloc)
+ * @viommu_id: vIOMMU ID to associate with the virtual device
+ * @dev_id: The physical device to allocate a virtual instance on the vIOMMU
+ * @out_vdevice_id: Object handle for the vDevice. Pass to IOMMU_DESTORY
+ * @virt_id: Virtual device ID per vIOMMU, e.g. vSID of ARM SMMUv3, vDeviceID
+ * of AMD IOMMU, and vRID of a nested Intel VT-d to a Context Table
+ *
+ * Allocate a virtual device instance (for a physical device) against a vIOMMU.
+ * This instance holds the device's information (related to its vIOMMU) in a VM.
+ */
+struct iommu_vdevice_alloc {
+ __u32 size;
+ __u32 viommu_id;
+ __u32 dev_id;
+ __u32 out_vdevice_id;
+ __aligned_u64 virt_id;
+};
+#define IOMMU_VDEVICE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VDEVICE_ALLOC)
+
+/**
+ * struct iommu_ioas_change_process - ioctl(VFIO_IOAS_CHANGE_PROCESS)
+ * @size: sizeof(struct iommu_ioas_change_process)
+ * @__reserved: Must be 0
+ *
+ * This transfers pinned memory counts for every memory map in every IOAS
+ * in the context to the current process. This only supports maps created
+ * with IOMMU_IOAS_MAP_FILE, and returns EINVAL if other maps are present.
+ * If the ioctl returns a failure status, then nothing is changed.
+ *
+ * This API is useful for transferring operation of a device from one process
+ * to another, such as during userland live update.
+ */
+struct iommu_ioas_change_process {
+ __u32 size;
+ __u32 __reserved;
+};
+
+#define IOMMU_IOAS_CHANGE_PROCESS \
+ _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_CHANGE_PROCESS)
+
#endif
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 2b68e6cdf190..c8dbf8219c4f 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -35,7 +35,7 @@
#define VFIO_EEH 5
/* Two-stage IOMMU */
-#define VFIO_TYPE1_NESTING_IOMMU 6 /* Implies v2 */
+#define __VFIO_RESERVED_TYPE1_NESTING_IOMMU 6 /* Implies v2 */
#define VFIO_SPAPR_TCE_v2_IOMMU 7