[9/9] accel/rocket: Add IOCTLs for synchronizing memory accesses

Message ID 20240612-6-10-rocket-v1-9-060e48eea250@tomeuvizoso.net (mailing list archive)
State Not Applicable
Headers
Series New DRM accel driver for Rockchip's RKNN NPU |

Commit Message

Tomeu Vizoso June 12, 2024, 1:53 p.m. UTC
  The NPU cores have their own access to the memory bus, and this isn't
cache coherent with the CPUs.

Add IOCTLs so userspace can mark when the caches need to be flushed, and
also when a writer job needs to be waited for before the buffer can be
accessed from the CPU.

Initially based on the same IOCTLs from the Etnaviv driver.

Signed-off-by: Tomeu Vizoso <tomeu@tomeuvizoso.net>
---
 drivers/accel/rocket/rocket_drv.c |  2 ++
 drivers/accel/rocket/rocket_gem.c | 68 +++++++++++++++++++++++++++++++++++++++
 drivers/accel/rocket/rocket_gem.h |  7 +++-
 include/uapi/drm/rocket_accel.h   | 20 +++++++++++-
 4 files changed, 95 insertions(+), 2 deletions(-)
  

Comments

Friedrich Vock June 12, 2024, 7:44 p.m. UTC | #1
On 12.06.24 15:53, Tomeu Vizoso wrote:
> The NPU cores have their own access to the memory bus, and this isn't
> cache coherent with the CPUs.
>
> Add IOCTLs so userspace can mark when the caches need to be flushed, and
> also when a writer job needs to be waited for before the buffer can be
> accessed from the CPU.
>
> Initially based on the same IOCTLs from the Etnaviv driver.
>
> Signed-off-by: Tomeu Vizoso <tomeu@tomeuvizoso.net>
> ---
>   drivers/accel/rocket/rocket_drv.c |  2 ++
>   drivers/accel/rocket/rocket_gem.c | 68 +++++++++++++++++++++++++++++++++++++++
>   drivers/accel/rocket/rocket_gem.h |  7 +++-
>   include/uapi/drm/rocket_accel.h   | 20 +++++++++++-
>   4 files changed, 95 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/accel/rocket/rocket_drv.c b/drivers/accel/rocket/rocket_drv.c
> index adcb9a685dd8..d41a4f4b330d 100644
> --- a/drivers/accel/rocket/rocket_drv.c
> +++ b/drivers/accel/rocket/rocket_drv.c
> @@ -73,6 +73,8 @@ static const struct drm_ioctl_desc rocket_drm_driver_ioctls[] = {
>   	DRM_IOCTL_DEF_DRV(ROCKET_##n, rocket_ioctl_##func, 0)
>
>   	ROCKET_IOCTL(CREATE_BO, create_bo),
> +	ROCKET_IOCTL(PREP_BO, prep_bo),
> +	ROCKET_IOCTL(FINI_BO, fini_bo),
>   	ROCKET_IOCTL(SUBMIT, submit),
>   };
>
> diff --git a/drivers/accel/rocket/rocket_gem.c b/drivers/accel/rocket/rocket_gem.c
> index e10eb886f150..afacdf91491e 100644
> --- a/drivers/accel/rocket/rocket_gem.c
> +++ b/drivers/accel/rocket/rocket_gem.c
> @@ -2,7 +2,9 @@
>   /* Copyright 2024 Tomeu Vizoso <tomeu@tomeuvizoso.net> */
>
>   #include <drm/drm_device.h>
> +#include <drm/drm_utils.h>
>   #include <drm/rocket_accel.h>
> +#include <linux/dma-mapping.h>
>
>   #include "rocket_gem.h"
>
> @@ -66,3 +68,69 @@ int rocket_ioctl_create_bo(struct drm_device *dev, void *data, struct drm_file *
>
>   	return ret;
>   }
> +
> +static inline enum dma_data_direction rocket_op_to_dma_dir(u32 op)
> +{
> +	if (op & ROCKET_PREP_READ)
> +		return DMA_FROM_DEVICE;
> +	else if (op & ROCKET_PREP_WRITE)
> +		return DMA_TO_DEVICE;
> +	else
> +		return DMA_BIDIRECTIONAL;

Drive-by comment: This logic looks incorrect to me? If op ==
ROCKET_PREP_READ | ROCKET_PREP_WRITE, this code will return
DMA_FROM_DEVICE when it should return DMA_BIDIRECTIONAL.

The logic would work if it was inverted:
if (!(op & ROCKET_PREP_WRITE))
	return DMA_FROM_DEVICE;
else if (!(op & ROCKET_PREP_READ))
	return DMA_TO_DEVICE;
else
	return DMA_BIDIRECTIONAL;

Thanks,
Friedrich

> +}
> +
> +int rocket_ioctl_prep_bo(struct drm_device *dev, void *data, struct drm_file *file)
> +{
> +	struct drm_rocket_prep_bo *args = data;
> +	unsigned long timeout = drm_timeout_abs_to_jiffies(args->timeout_ns);
> +	struct drm_gem_object *gem_obj;
> +	struct drm_gem_shmem_object *shmem_obj;
> +	bool write = !!(args->op & ROCKET_PREP_WRITE);
> +	long ret = 0;
> +
> +	if (args->op & ~(ROCKET_PREP_READ | ROCKET_PREP_WRITE))
> +		return -EINVAL;
> +
> +	gem_obj = drm_gem_object_lookup(file, args->handle);
> +	if (!gem_obj)
> +		return -ENOENT;
> +
> +	ret = dma_resv_wait_timeout(gem_obj->resv, dma_resv_usage_rw(write),
> +				    true, timeout);
> +	if (!ret)
> +		ret = timeout ? -ETIMEDOUT : -EBUSY;
> +
> +	shmem_obj = &to_rocket_bo(gem_obj)->base;
> +
> +	dma_sync_sgtable_for_cpu(dev->dev, shmem_obj->sgt, rocket_op_to_dma_dir(args->op));
> +	to_rocket_bo(gem_obj)->last_cpu_prep_op = args->op;
> +
> +	drm_gem_object_put(gem_obj);
> +
> +	return ret;
> +}
> +
> +int rocket_ioctl_fini_bo(struct drm_device *dev, void *data, struct drm_file *file)
> +{
> +	struct drm_rocket_fini_bo *args = data;
> +	struct drm_gem_object *gem_obj;
> +	struct rocket_gem_object *rkt_obj;
> +	struct drm_gem_shmem_object *shmem_obj;
> +
> +	gem_obj = drm_gem_object_lookup(file, args->handle);
> +	if (!gem_obj)
> +		return -ENOENT;
> +
> +	rkt_obj = to_rocket_bo(gem_obj);
> +	shmem_obj = &rkt_obj->base;
> +
> +	WARN_ON(rkt_obj->last_cpu_prep_op == 0);
> +
> +	dma_sync_sgtable_for_device(dev->dev, shmem_obj->sgt,
> +				    rocket_op_to_dma_dir(rkt_obj->last_cpu_prep_op));
> +	rkt_obj->last_cpu_prep_op = 0;
> +
> +	drm_gem_object_put(gem_obj);
> +
> +	return 0;
> +}
> diff --git a/drivers/accel/rocket/rocket_gem.h b/drivers/accel/rocket/rocket_gem.h
> index 2cb294f25c19..9b1c485ec600 100644
> --- a/drivers/accel/rocket/rocket_gem.h
> +++ b/drivers/accel/rocket/rocket_gem.h
> @@ -13,16 +13,21 @@ struct rocket_gem_object {
>   	struct mutex mutex;
>   	size_t size;
>   	u32 offset;
> +	u32 last_cpu_prep_op;
>   };
>
>   struct drm_gem_object *rocket_gem_create_object(struct drm_device *dev, size_t size);
>
>   int rocket_ioctl_create_bo(struct drm_device *dev, void *data, struct drm_file *file);
>
> +int rocket_ioctl_prep_bo(struct drm_device *dev, void *data, struct drm_file *file);
> +
> +int rocket_ioctl_fini_bo(struct drm_device *dev, void *data, struct drm_file *file);
> +
>   static inline
>   struct  rocket_gem_object *to_rocket_bo(struct drm_gem_object *obj)
>   {
>   	return container_of(to_drm_gem_shmem_obj(obj), struct rocket_gem_object, base);
>   }
>
> -#endif
> \ No newline at end of file
> +#endif
> diff --git a/include/uapi/drm/rocket_accel.h b/include/uapi/drm/rocket_accel.h
> index 888c9413e4cd..1539af0af4fe 100644
> --- a/include/uapi/drm/rocket_accel.h
> +++ b/include/uapi/drm/rocket_accel.h
> @@ -12,9 +12,13 @@ extern "C" {
>   #endif
>
>   #define DRM_ROCKET_CREATE_BO			0x00
> -#define DRM_ROCKET_SUBMIT			0x01
> +#define DRM_ROCKET_PREP_BO			0x01
> +#define DRM_ROCKET_FINI_BO			0x02
> +#define DRM_ROCKET_SUBMIT			0x03
>
>   #define DRM_IOCTL_ROCKET_CREATE_BO		DRM_IOWR(DRM_COMMAND_BASE + DRM_ROCKET_CREATE_BO, struct drm_rocket_create_bo)
> +#define DRM_IOCTL_ROCKET_PREP_BO		DRM_IOW(DRM_COMMAND_BASE + DRM_ROCKET_PREP_BO, struct drm_rocket_prep_bo)
> +#define DRM_IOCTL_ROCKET_FINI_BO		DRM_IOW(DRM_COMMAND_BASE + DRM_ROCKET_FINI_BO, struct drm_rocket_fini_bo)
>   #define DRM_IOCTL_ROCKET_SUBMIT			DRM_IOW(DRM_COMMAND_BASE + DRM_ROCKET_SUBMIT, struct drm_rocket_submit)
>
>   /**
> @@ -38,6 +42,20 @@ struct drm_rocket_create_bo {
>   	__u64 offset;
>   };
>
> +#define ROCKET_PREP_READ        0x01
> +#define ROCKET_PREP_WRITE       0x02
> +
> +struct drm_rocket_prep_bo {
> +	__u32 handle;		/* in */
> +	__u32 op;		/* in, mask of ROCKET_PREP_x */
> +	__s64 timeout_ns;	/* in */
> +};
> +
> +struct drm_rocket_fini_bo {
> +	__u32 handle;		/* in */
> +	__u32 flags;		/* in, placeholder for now, no defined values */
> +};
> +
>   /**
>    * struct drm_rocket_task - A task to be run on the NPU
>    *
>
  
Jeffrey Hugo June 14, 2024, 4:39 p.m. UTC | #2
On 6/12/2024 7:53 AM, Tomeu Vizoso wrote:
> diff --git a/include/uapi/drm/rocket_accel.h b/include/uapi/drm/rocket_accel.h
> index 888c9413e4cd..1539af0af4fe 100644
> --- a/include/uapi/drm/rocket_accel.h
> +++ b/include/uapi/drm/rocket_accel.h
> @@ -12,9 +12,13 @@ extern "C" {
>   #endif
>   
>   #define DRM_ROCKET_CREATE_BO			0x00
> -#define DRM_ROCKET_SUBMIT			0x01
> +#define DRM_ROCKET_PREP_BO			0x01
> +#define DRM_ROCKET_FINI_BO			0x02
> +#define DRM_ROCKET_SUBMIT			0x03

This looks like a uAPI breaking change.  Shouldn't you have defined 
SUBMIT as 0x03 from the beginning, or put the new BO ioctls after it?
  

Patch

diff --git a/drivers/accel/rocket/rocket_drv.c b/drivers/accel/rocket/rocket_drv.c
index adcb9a685dd8..d41a4f4b330d 100644
--- a/drivers/accel/rocket/rocket_drv.c
+++ b/drivers/accel/rocket/rocket_drv.c
@@ -73,6 +73,8 @@  static const struct drm_ioctl_desc rocket_drm_driver_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(ROCKET_##n, rocket_ioctl_##func, 0)
 
 	ROCKET_IOCTL(CREATE_BO, create_bo),
+	ROCKET_IOCTL(PREP_BO, prep_bo),
+	ROCKET_IOCTL(FINI_BO, fini_bo),
 	ROCKET_IOCTL(SUBMIT, submit),
 };
 
diff --git a/drivers/accel/rocket/rocket_gem.c b/drivers/accel/rocket/rocket_gem.c
index e10eb886f150..afacdf91491e 100644
--- a/drivers/accel/rocket/rocket_gem.c
+++ b/drivers/accel/rocket/rocket_gem.c
@@ -2,7 +2,9 @@ 
 /* Copyright 2024 Tomeu Vizoso <tomeu@tomeuvizoso.net> */
 
 #include <drm/drm_device.h>
+#include <drm/drm_utils.h>
 #include <drm/rocket_accel.h>
+#include <linux/dma-mapping.h>
 
 #include "rocket_gem.h"
 
@@ -66,3 +68,69 @@  int rocket_ioctl_create_bo(struct drm_device *dev, void *data, struct drm_file *
 
 	return ret;
 }
+
+static inline enum dma_data_direction rocket_op_to_dma_dir(u32 op)
+{
+	if (op & ROCKET_PREP_READ)
+		return DMA_FROM_DEVICE;
+	else if (op & ROCKET_PREP_WRITE)
+		return DMA_TO_DEVICE;
+	else
+		return DMA_BIDIRECTIONAL;
+}
+
+int rocket_ioctl_prep_bo(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct drm_rocket_prep_bo *args = data;
+	unsigned long timeout = drm_timeout_abs_to_jiffies(args->timeout_ns);
+	struct drm_gem_object *gem_obj;
+	struct drm_gem_shmem_object *shmem_obj;
+	bool write = !!(args->op & ROCKET_PREP_WRITE);
+	long ret = 0;
+
+	if (args->op & ~(ROCKET_PREP_READ | ROCKET_PREP_WRITE))
+		return -EINVAL;
+
+	gem_obj = drm_gem_object_lookup(file, args->handle);
+	if (!gem_obj)
+		return -ENOENT;
+
+	ret = dma_resv_wait_timeout(gem_obj->resv, dma_resv_usage_rw(write),
+				    true, timeout);
+	if (!ret)
+		ret = timeout ? -ETIMEDOUT : -EBUSY;
+
+	shmem_obj = &to_rocket_bo(gem_obj)->base;
+
+	dma_sync_sgtable_for_cpu(dev->dev, shmem_obj->sgt, rocket_op_to_dma_dir(args->op));
+	to_rocket_bo(gem_obj)->last_cpu_prep_op = args->op;
+
+	drm_gem_object_put(gem_obj);
+
+	return ret;
+}
+
+int rocket_ioctl_fini_bo(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct drm_rocket_fini_bo *args = data;
+	struct drm_gem_object *gem_obj;
+	struct rocket_gem_object *rkt_obj;
+	struct drm_gem_shmem_object *shmem_obj;
+
+	gem_obj = drm_gem_object_lookup(file, args->handle);
+	if (!gem_obj)
+		return -ENOENT;
+
+	rkt_obj = to_rocket_bo(gem_obj);
+	shmem_obj = &rkt_obj->base;
+
+	WARN_ON(rkt_obj->last_cpu_prep_op == 0);
+
+	dma_sync_sgtable_for_device(dev->dev, shmem_obj->sgt,
+				    rocket_op_to_dma_dir(rkt_obj->last_cpu_prep_op));
+	rkt_obj->last_cpu_prep_op = 0;
+
+	drm_gem_object_put(gem_obj);
+
+	return 0;
+}
diff --git a/drivers/accel/rocket/rocket_gem.h b/drivers/accel/rocket/rocket_gem.h
index 2cb294f25c19..9b1c485ec600 100644
--- a/drivers/accel/rocket/rocket_gem.h
+++ b/drivers/accel/rocket/rocket_gem.h
@@ -13,16 +13,21 @@  struct rocket_gem_object {
 	struct mutex mutex;
 	size_t size;
 	u32 offset;
+	u32 last_cpu_prep_op;
 };
 
 struct drm_gem_object *rocket_gem_create_object(struct drm_device *dev, size_t size);
 
 int rocket_ioctl_create_bo(struct drm_device *dev, void *data, struct drm_file *file);
 
+int rocket_ioctl_prep_bo(struct drm_device *dev, void *data, struct drm_file *file);
+
+int rocket_ioctl_fini_bo(struct drm_device *dev, void *data, struct drm_file *file);
+
 static inline
 struct  rocket_gem_object *to_rocket_bo(struct drm_gem_object *obj)
 {
 	return container_of(to_drm_gem_shmem_obj(obj), struct rocket_gem_object, base);
 }
 
-#endif
\ No newline at end of file
+#endif
diff --git a/include/uapi/drm/rocket_accel.h b/include/uapi/drm/rocket_accel.h
index 888c9413e4cd..1539af0af4fe 100644
--- a/include/uapi/drm/rocket_accel.h
+++ b/include/uapi/drm/rocket_accel.h
@@ -12,9 +12,13 @@  extern "C" {
 #endif
 
 #define DRM_ROCKET_CREATE_BO			0x00
-#define DRM_ROCKET_SUBMIT			0x01
+#define DRM_ROCKET_PREP_BO			0x01
+#define DRM_ROCKET_FINI_BO			0x02
+#define DRM_ROCKET_SUBMIT			0x03
 
 #define DRM_IOCTL_ROCKET_CREATE_BO		DRM_IOWR(DRM_COMMAND_BASE + DRM_ROCKET_CREATE_BO, struct drm_rocket_create_bo)
+#define DRM_IOCTL_ROCKET_PREP_BO		DRM_IOW(DRM_COMMAND_BASE + DRM_ROCKET_PREP_BO, struct drm_rocket_prep_bo)
+#define DRM_IOCTL_ROCKET_FINI_BO		DRM_IOW(DRM_COMMAND_BASE + DRM_ROCKET_FINI_BO, struct drm_rocket_fini_bo)
 #define DRM_IOCTL_ROCKET_SUBMIT			DRM_IOW(DRM_COMMAND_BASE + DRM_ROCKET_SUBMIT, struct drm_rocket_submit)
 
 /**
@@ -38,6 +42,20 @@  struct drm_rocket_create_bo {
 	__u64 offset;
 };
 
+#define ROCKET_PREP_READ        0x01
+#define ROCKET_PREP_WRITE       0x02
+
+struct drm_rocket_prep_bo {
+	__u32 handle;		/* in */
+	__u32 op;		/* in, mask of ROCKET_PREP_x */
+	__s64 timeout_ns;	/* in */
+};
+
+struct drm_rocket_fini_bo {
+	__u32 handle;		/* in */
+	__u32 flags;		/* in, placeholder for now, no defined values */
+};
+
 /**
  * struct drm_rocket_task - A task to be run on the NPU
  *