[v2,29/34] staging: vchiq: Add 36-bit address support
Commit Message
From: Phil Elwell <phil@raspberrypi.org>
Conditional on a new compatible string, change the pagelist encoding
such that the top 24 bits are the pfn, leaving 8 bits for run length
(-1).
Signed-off-by: Phil Elwell <phil@raspberrypi.org>
Signed-off-by: Jacopo Mondi <jacopo@jmondi.org>
---
.../interface/vchiq_arm/vchiq_2835_arm.c | 88 ++++++++++++++-----
.../interface/vchiq_arm/vchiq_arm.c | 6 ++
.../interface/vchiq_arm/vchiq_arm.h | 1 +
3 files changed, 74 insertions(+), 21 deletions(-)
Comments
Hi Phil, Laurent,
On Mon, 2020-05-04 at 12:26 +0300, Laurent Pinchart wrote:
> From: Phil Elwell <phil@raspberrypi.org>
>
> Conditional on a new compatible string, change the pagelist encoding
> such that the top 24 bits are the pfn, leaving 8 bits for run length
> (-1).
>
> Signed-off-by: Phil Elwell <phil@raspberrypi.org>
> Signed-off-by: Jacopo Mondi <jacopo@jmondi.org>
> ---
> .../interface/vchiq_arm/vchiq_2835_arm.c | 88 ++++++++++++++-----
> .../interface/vchiq_arm/vchiq_arm.c | 6 ++
> .../interface/vchiq_arm/vchiq_arm.h | 1 +
> 3 files changed, 74 insertions(+), 21 deletions(-)
>
> diff --git
> a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
> b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
> index 3e422a7eb3f1..ecec84ad4345 100644
> --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
> +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
> @@ -16,6 +16,8 @@
> #include <soc/bcm2835/raspberrypi-firmware.h>
>
> #define TOTAL_SLOTS (VCHIQ_SLOT_ZERO_SLOTS + 2 * 32)
> +#define VC_SAFE(x) (g_use_36bit_addrs ? ((u32)(x) | 0xc0000000) : (u32)(x))
> +#define IS_VC_SAFE(x) (g_use_36bit_addrs ? !((x) & ~0x3fffffffull) : 1)
As I commented earlier, this is a sign your dma-ranges are wrong, most of the
code below reimplements what is already done by dma-direct (see
kernel/dma/direct.c). Once properly setup, you should be able to use whatever
phys address dmam_alloc_coherent() provides and drop g_use_36bit_addrs.
Note that on arm32+LPAE, dma-direct/swiotlb are the default dma_ops, so this
also applies there.
Regards,
Nicolas
> #include "vchiq_arm.h"
> #include "vchiq_connected.h"
> @@ -62,6 +64,7 @@ static void __iomem *g_regs;
> */
> static unsigned int g_cache_line_size = 32;
> static struct dma_pool *g_dma_pool;
> +static unsigned int g_use_36bit_addrs = 0;
> static unsigned int g_fragments_size;
> static char *g_fragments_base;
> static char *g_free_fragments;
> @@ -104,6 +107,8 @@ int vchiq_platform_init(struct platform_device *pdev,
> struct vchiq_state *state)
> g_cache_line_size = drvdata->cache_line_size;
> g_fragments_size = 2 * g_cache_line_size;
>
> + g_use_36bit_addrs = (dev->dma_pfn_offset == 0);
> +
> /* Allocate space for the channels in coherent memory */
> slot_mem_size = PAGE_ALIGN(TOTAL_SLOTS * VCHIQ_SLOT_SIZE);
> frag_mem_size = PAGE_ALIGN(g_fragments_size * MAX_FRAGMENTS);
> @@ -115,14 +120,21 @@ int vchiq_platform_init(struct platform_device *pdev,
> struct vchiq_state *state)
> return -ENOMEM;
> }
>
> + if (!IS_VC_SAFE(slot_phys)) {
> + dev_err(dev, "allocated DMA memory %pad is not VC-safe\n",
> + &slot_phys);
> + return -ENOMEM;
> + }
> +
> WARN_ON(((unsigned long)slot_mem & (PAGE_SIZE - 1)) != 0);
> + channelbase = VC_SAFE(slot_phys);
>
> vchiq_slot_zero = vchiq_init_slots(slot_mem, slot_mem_size);
> if (!vchiq_slot_zero)
> return -EINVAL;
>
> vchiq_slot_zero->platform_data[VCHIQ_PLATFORM_FRAGMENTS_OFFSET_IDX] =
> - (int)slot_phys + slot_mem_size;
> + channelbase + slot_mem_size;
> vchiq_slot_zero->platform_data[VCHIQ_PLATFORM_FRAGMENTS_COUNT_IDX] =
> MAX_FRAGMENTS;
>
> @@ -155,7 +167,6 @@ int vchiq_platform_init(struct platform_device *pdev,
> struct vchiq_state *state)
> }
>
> /* Send the base address of the slots to VideoCore */
> - channelbase = slot_phys;
> err = rpi_firmware_property(fw, RPI_FIRMWARE_VCHIQ_INIT,
> &channelbase, sizeof(channelbase));
> if (err || channelbase) {
> @@ -241,7 +252,7 @@ vchiq_prepare_bulk_data(struct vchiq_bulk *bulk, void
> *offset, int size,
> if (!pagelistinfo)
> return VCHIQ_ERROR;
>
> - bulk->data = (void *)(unsigned long)pagelistinfo->dma_addr;
> + bulk->data = (void *)VC_SAFE(pagelistinfo->dma_addr);
>
> /*
> * Store the pagelistinfo address in remote_data,
> @@ -475,25 +486,60 @@ create_pagelist(char __user *buf, size_t count, unsigned
> short type)
>
> /* Combine adjacent blocks for performance */
> k = 0;
> - for_each_sg(scatterlist, sg, dma_buffers, i) {
> - u32 len = sg_dma_len(sg);
> - u32 addr = sg_dma_address(sg);
> + if (g_use_36bit_addrs) {
> + for_each_sg(scatterlist, sg, dma_buffers, i) {
> + u32 len = sg_dma_len(sg);
> + u64 addr = sg_dma_address(sg);
> + u32 page_id = (u32)((addr >> 4) & ~0xff);
> + u32 sg_pages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
>
> - /* Note: addrs is the address + page_count - 1
> - * The firmware expects blocks after the first to be page-
> - * aligned and a multiple of the page size
> - */
> - WARN_ON(len == 0);
> - WARN_ON(i && (i != (dma_buffers - 1)) && (len & ~PAGE_MASK));
> - WARN_ON(i && (addr & ~PAGE_MASK));
> - if (k > 0 &&
> - ((addrs[k - 1] & PAGE_MASK) +
> - (((addrs[k - 1] & ~PAGE_MASK) + 1) << PAGE_SHIFT))
> - == (addr & PAGE_MASK))
> - addrs[k - 1] += ((len + PAGE_SIZE - 1) >> PAGE_SHIFT);
> - else
> - addrs[k++] = (addr & PAGE_MASK) |
> - (((len + PAGE_SIZE - 1) >> PAGE_SHIFT) - 1);
> + /* Note: addrs is the address + page_count - 1
> + * The firmware expects blocks after the first to be
> page-
> + * aligned and a multiple of the page size
> + */
> + WARN_ON(len == 0);
> + WARN_ON(i &&
> + (i != (dma_buffers - 1)) && (len & ~PAGE_MASK));
> + WARN_ON(i && (addr & ~PAGE_MASK));
> + WARN_ON(upper_32_bits(addr) > 0xf);
> + if (k > 0 &&
> + ((addrs[k - 1] & ~0xff) +
> + (((addrs[k - 1] & 0xff) + 1) << 8)
> + == page_id)) {
> + u32 inc_pages = min(sg_pages,
> + 0xff - (addrs[k - 1] &
> 0xff));
> + addrs[k - 1] += inc_pages;
> + page_id += inc_pages << 8;
> + sg_pages -= inc_pages;
> + }
> + while (sg_pages) {
> + u32 inc_pages = min(sg_pages, 0x100u);
> + addrs[k++] = page_id | (inc_pages - 1);
> + page_id += inc_pages << 8;
> + sg_pages -= inc_pages;
> + }
> + }
> + } else {
> + for_each_sg(scatterlist, sg, dma_buffers, i) {
> + u32 len = sg_dma_len(sg);
> + u32 addr = VC_SAFE(sg_dma_address(sg));
> + u32 new_pages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
> +
> + /* Note: addrs is the address + page_count - 1
> + * The firmware expects blocks after the first to be
> page-
> + * aligned and a multiple of the page size
> + */
> + WARN_ON(len == 0);
> + WARN_ON(i && (i != (dma_buffers - 1)) && (len &
> ~PAGE_MASK));
> + WARN_ON(i && (addr & ~PAGE_MASK));
> + if (k > 0 &&
> + ((addrs[k - 1] & PAGE_MASK) +
> + (((addrs[k - 1] & ~PAGE_MASK) + 1) << PAGE_SHIFT))
> + == (addr & PAGE_MASK))
> + addrs[k - 1] += new_pages;
> + else
> + addrs[k++] = (addr & PAGE_MASK) | (new_pages -
> 1);
> + }
> }
>
> /* Partial cache lines (fragments) require special measures */
> diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
> b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
> index d1a556f16499..dd3c8f829daa 100644
> --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
> +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
> @@ -117,6 +117,11 @@ static struct vchiq_drvdata bcm2836_drvdata = {
> .cache_line_size = 64,
> };
>
> +static struct vchiq_drvdata bcm2711_drvdata = {
> + .cache_line_size = 64,
> + .use_36bit_addrs = true,
> +};
> +
> static const char *const ioctl_names[] = {
> "CONNECT",
> "SHUTDOWN",
> @@ -2710,6 +2715,7 @@ void vchiq_platform_conn_state_changed(struct
> vchiq_state *state,
> static const struct of_device_id vchiq_of_match[] = {
> { .compatible = "brcm,bcm2835-vchiq", .data = &bcm2835_drvdata },
> { .compatible = "brcm,bcm2836-vchiq", .data = &bcm2836_drvdata },
> + { .compatible = "brcm,bcm2711-vchiq", .data = &bcm2711_drvdata },
> {},
> };
> MODULE_DEVICE_TABLE(of, vchiq_of_match);
> diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.h
> b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.h
> index 0784c5002417..f8b1c005af62 100644
> --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.h
> +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.h
> @@ -52,6 +52,7 @@ struct vchiq_arm_state {
>
> struct vchiq_drvdata {
> const unsigned int cache_line_size;
> + const bool use_36bit_addrs;
> struct rpi_firmware *fw;
> };
>
Hi Nicolas,
On 04/05/2020 18:40, Nicolas Saenz Julienne wrote:
> Hi Phil, Laurent,
>
> On Mon, 2020-05-04 at 12:26 +0300, Laurent Pinchart wrote:
>> From: Phil Elwell <phil@raspberrypi.org>
>>
>> Conditional on a new compatible string, change the pagelist encoding
>> such that the top 24 bits are the pfn, leaving 8 bits for run length
>> (-1).
>>
>> Signed-off-by: Phil Elwell <phil@raspberrypi.org>
>> Signed-off-by: Jacopo Mondi <jacopo@jmondi.org>
>> ---
>> .../interface/vchiq_arm/vchiq_2835_arm.c | 88 ++++++++++++++-----
>> .../interface/vchiq_arm/vchiq_arm.c | 6 ++
>> .../interface/vchiq_arm/vchiq_arm.h | 1 +
>> 3 files changed, 74 insertions(+), 21 deletions(-)
>>
>> diff --git
>> a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
>> b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
>> index 3e422a7eb3f1..ecec84ad4345 100644
>> --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
>> +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
>> @@ -16,6 +16,8 @@
>> #include <soc/bcm2835/raspberrypi-firmware.h>
>> #define TOTAL_SLOTS (VCHIQ_SLOT_ZERO_SLOTS + 2 * 32)
>> +#define VC_SAFE(x) (g_use_36bit_addrs ? ((u32)(x) | 0xc0000000) : (u32)(x))
>> +#define IS_VC_SAFE(x) (g_use_36bit_addrs ? !((x) & ~0x3fffffffull) : 1)
>
> As I commented earlier, this is a sign your dma-ranges are wrong, most of the
> code below reimplements what is already done by dma-direct (see
> kernel/dma/direct.c). Once properly setup, you should be able to use whatever
> phys address dmam_alloc_coherent() provides and drop g_use_36bit_addrs.
>
> Note that on arm32+LPAE, dma-direct/swiotlb are the default dma_ops, so this
> also applies there.
As I explained in an offline email, the problem is that VCHIQ needs access to two
kinds of "DMA" addresses - those suitable for the 40-bit DMA channel, and the
30-bit addresses that the VPU can use. Since each DT node only has access to a
single set of DMA ranges, I can't see how to use dma-direct to calculate addreses
for everything, but feel free to submit an alternative implementation showing how
it should be done.
Phil
On Mon, 2020-05-04 at 21:46 +0100, Phil Elwell wrote:
> Hi Nicolas,
>
> On 04/05/2020 18:40, Nicolas Saenz Julienne wrote:
> > Hi Phil, Laurent,
> >
> > On Mon, 2020-05-04 at 12:26 +0300, Laurent Pinchart wrote:
> > > From: Phil Elwell <phil@raspberrypi.org>
> > >
> > > Conditional on a new compatible string, change the pagelist encoding
> > > such that the top 24 bits are the pfn, leaving 8 bits for run length
> > > (-1).
> > >
> > > Signed-off-by: Phil Elwell <phil@raspberrypi.org>
> > > Signed-off-by: Jacopo Mondi <jacopo@jmondi.org>
> > > ---
> > > .../interface/vchiq_arm/vchiq_2835_arm.c | 88 ++++++++++++++-----
> > > .../interface/vchiq_arm/vchiq_arm.c | 6 ++
> > > .../interface/vchiq_arm/vchiq_arm.h | 1 +
> > > 3 files changed, 74 insertions(+), 21 deletions(-)
> > >
> > > diff --git
> > > a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
> > > b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
> > > index 3e422a7eb3f1..ecec84ad4345 100644
> > > --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
> > > +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
> > > @@ -16,6 +16,8 @@
> > > #include <soc/bcm2835/raspberrypi-firmware.h>
> > > #define TOTAL_SLOTS (VCHIQ_SLOT_ZERO_SLOTS + 2 * 32)
> > > +#define VC_SAFE(x) (g_use_36bit_addrs ? ((u32)(x) | 0xc0000000) :
> > > (u32)(x))
> > > +#define IS_VC_SAFE(x) (g_use_36bit_addrs ? !((x) & ~0x3fffffffull) : 1)
> >
> > As I commented earlier, this is a sign your dma-ranges are wrong, most of
> > the
> > code below reimplements what is already done by dma-direct (see
> > kernel/dma/direct.c). Once properly setup, you should be able to use
> > whatever
> > phys address dmam_alloc_coherent() provides and drop g_use_36bit_addrs.
> >
> > Note that on arm32+LPAE, dma-direct/swiotlb are the default dma_ops, so this
> > also applies there.
>
> As I explained in an offline email, the problem is that VCHIQ needs access to
We discussed this privately, but I wanted to start from scratch, specially as I
hope I won't be the only one reviewing this :).
> two
> kinds of "DMA" addresses - those suitable for the 40-bit DMA channel, and the
> 30-bit addresses that the VPU can use. Since each DT node only has access to a
> single set of DMA ranges, I can't see how to use dma-direct to calculate
> addreses
> for everything, but feel free to submit an alternative implementation showing
> how
> it should be done.
How about this):
- Move vchiq to /soc
- Get a handle to the relevant dma controller device pointer (so 30-bit DMA
controller on old RPis, 40-bit on RPi4)
- Allocate slotmem/pagelist with dma_alloc_coherent(vpu_dev, ...)
- Map pages with dma_map_sg(dma_dev, ...)
I hope I'm not missing anything, but short of implementing it and seeing the
end result, I think you'd be free of any rpi[1-3]/rpi4 distinction in the code.
Regards,
Nicolas
Hi Nicolas,
On Tue, 5 May 2020 at 11:13, Nicolas Saenz Julienne
<nsaenzjulienne@suse.de> wrote:
>
> On Mon, 2020-05-04 at 21:46 +0100, Phil Elwell wrote:
> > Hi Nicolas,
> >
> > On 04/05/2020 18:40, Nicolas Saenz Julienne wrote:
> > > Hi Phil, Laurent,
> > >
> > > On Mon, 2020-05-04 at 12:26 +0300, Laurent Pinchart wrote:
> > > > From: Phil Elwell <phil@raspberrypi.org>
> > > >
> > > > Conditional on a new compatible string, change the pagelist encoding
> > > > such that the top 24 bits are the pfn, leaving 8 bits for run length
> > > > (-1).
> > > >
> > > > Signed-off-by: Phil Elwell <phil@raspberrypi.org>
> > > > Signed-off-by: Jacopo Mondi <jacopo@jmondi.org>
> > > > ---
> > > > .../interface/vchiq_arm/vchiq_2835_arm.c | 88 ++++++++++++++-----
> > > > .../interface/vchiq_arm/vchiq_arm.c | 6 ++
> > > > .../interface/vchiq_arm/vchiq_arm.h | 1 +
> > > > 3 files changed, 74 insertions(+), 21 deletions(-)
> > > >
> > > > diff --git
> > > > a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
> > > > b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
> > > > index 3e422a7eb3f1..ecec84ad4345 100644
> > > > --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
> > > > +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
> > > > @@ -16,6 +16,8 @@
> > > > #include <soc/bcm2835/raspberrypi-firmware.h>
> > > > #define TOTAL_SLOTS (VCHIQ_SLOT_ZERO_SLOTS + 2 * 32)
> > > > +#define VC_SAFE(x) (g_use_36bit_addrs ? ((u32)(x) | 0xc0000000) :
> > > > (u32)(x))
> > > > +#define IS_VC_SAFE(x) (g_use_36bit_addrs ? !((x) & ~0x3fffffffull) : 1)
> > >
> > > As I commented earlier, this is a sign your dma-ranges are wrong, most of
> > > the
> > > code below reimplements what is already done by dma-direct (see
> > > kernel/dma/direct.c). Once properly setup, you should be able to use
> > > whatever
> > > phys address dmam_alloc_coherent() provides and drop g_use_36bit_addrs.
> > >
> > > Note that on arm32+LPAE, dma-direct/swiotlb are the default dma_ops, so this
> > > also applies there.
> >
> > As I explained in an offline email, the problem is that VCHIQ needs access to
>
> We discussed this privately, but I wanted to start from scratch, specially as I
> hope I won't be the only one reviewing this :).
>
> > two
> > kinds of "DMA" addresses - those suitable for the 40-bit DMA channel, and the
> > 30-bit addresses that the VPU can use. Since each DT node only has access to a
> > single set of DMA ranges, I can't see how to use dma-direct to calculate
> > addreses
> > for everything, but feel free to submit an alternative implementation showing
> > how
> > it should be done.
>
> How about this):
> - Move vchiq to /soc
> - Get a handle to the relevant dma controller device pointer (so 30-bit DMA
> controller on old RPis, 40-bit on RPi4)
> - Allocate slotmem/pagelist with dma_alloc_coherent(vpu_dev, ...)
> - Map pages with dma_map_sg(dma_dev, ...)
>
> I hope I'm not missing anything, but short of implementing it and seeing the
> end result, I think you'd be free of any rpi[1-3]/rpi4 distinction in the code.
Thanks for the suggestion - I hadn't considered using a device pointer
for the controller rather than the current (client) device. If that
works then I propose
to make the 40-bit dma property optional such that dma_dev defaults to
vpu_dev.
Phil
Hi Laurent,
Thank you for the patch! Perhaps something to improve:
[auto build test WARNING on linus/master]
[also build test WARNING on v5.7-rc4]
[cannot apply to linuxtv-media/master anholt/for-next next-20200505]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]
url: https://github.com/0day-ci/linux/commits/Laurent-Pinchart/Drivers-for-the-BCM283x-CSI-2-CCP2-receiver-and-ISP/20200505-054310
base: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 9851a0dee7c28514f149f7e4f60ec1b06286cc1b
config: s390-allyesconfig (attached as .config)
compiler: s390-linux-gcc (GCC) 9.3.0
reproduce:
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day GCC_VERSION=9.3.0 make.cross ARCH=s390
If you fix the issue, kindly add following tag as appropriate
Reported-by: kbuild test robot <lkp@intel.com>
All warnings (new ones prefixed by >>):
drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c: In function 'vchiq_prepare_bulk_data':
>> drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c:255:15: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
255 | bulk->data = (void *)VC_SAFE(pagelistinfo->dma_addr);
| ^
vim +255 drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
240
241 enum vchiq_status
242 vchiq_prepare_bulk_data(struct vchiq_bulk *bulk, void *offset, int size,
243 int dir)
244 {
245 struct vchiq_pagelist_info *pagelistinfo;
246
247 pagelistinfo = create_pagelist((char __user *)offset, size,
248 (dir == VCHIQ_BULK_RECEIVE)
249 ? PAGELIST_READ
250 : PAGELIST_WRITE);
251
252 if (!pagelistinfo)
253 return VCHIQ_ERROR;
254
> 255 bulk->data = (void *)VC_SAFE(pagelistinfo->dma_addr);
256
257 /*
258 * Store the pagelistinfo address in remote_data,
259 * which isn't used by the slave.
260 */
261 bulk->remote_data = pagelistinfo;
262
263 return VCHIQ_SUCCESS;
264 }
265
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
@@ -16,6 +16,8 @@
#include <soc/bcm2835/raspberrypi-firmware.h>
#define TOTAL_SLOTS (VCHIQ_SLOT_ZERO_SLOTS + 2 * 32)
+#define VC_SAFE(x) (g_use_36bit_addrs ? ((u32)(x) | 0xc0000000) : (u32)(x))
+#define IS_VC_SAFE(x) (g_use_36bit_addrs ? !((x) & ~0x3fffffffull) : 1)
#include "vchiq_arm.h"
#include "vchiq_connected.h"
@@ -62,6 +64,7 @@ static void __iomem *g_regs;
*/
static unsigned int g_cache_line_size = 32;
static struct dma_pool *g_dma_pool;
+static unsigned int g_use_36bit_addrs = 0;
static unsigned int g_fragments_size;
static char *g_fragments_base;
static char *g_free_fragments;
@@ -104,6 +107,8 @@ int vchiq_platform_init(struct platform_device *pdev, struct vchiq_state *state)
g_cache_line_size = drvdata->cache_line_size;
g_fragments_size = 2 * g_cache_line_size;
+ g_use_36bit_addrs = (dev->dma_pfn_offset == 0);
+
/* Allocate space for the channels in coherent memory */
slot_mem_size = PAGE_ALIGN(TOTAL_SLOTS * VCHIQ_SLOT_SIZE);
frag_mem_size = PAGE_ALIGN(g_fragments_size * MAX_FRAGMENTS);
@@ -115,14 +120,21 @@ int vchiq_platform_init(struct platform_device *pdev, struct vchiq_state *state)
return -ENOMEM;
}
+ if (!IS_VC_SAFE(slot_phys)) {
+ dev_err(dev, "allocated DMA memory %pad is not VC-safe\n",
+ &slot_phys);
+ return -ENOMEM;
+ }
+
WARN_ON(((unsigned long)slot_mem & (PAGE_SIZE - 1)) != 0);
+ channelbase = VC_SAFE(slot_phys);
vchiq_slot_zero = vchiq_init_slots(slot_mem, slot_mem_size);
if (!vchiq_slot_zero)
return -EINVAL;
vchiq_slot_zero->platform_data[VCHIQ_PLATFORM_FRAGMENTS_OFFSET_IDX] =
- (int)slot_phys + slot_mem_size;
+ channelbase + slot_mem_size;
vchiq_slot_zero->platform_data[VCHIQ_PLATFORM_FRAGMENTS_COUNT_IDX] =
MAX_FRAGMENTS;
@@ -155,7 +167,6 @@ int vchiq_platform_init(struct platform_device *pdev, struct vchiq_state *state)
}
/* Send the base address of the slots to VideoCore */
- channelbase = slot_phys;
err = rpi_firmware_property(fw, RPI_FIRMWARE_VCHIQ_INIT,
&channelbase, sizeof(channelbase));
if (err || channelbase) {
@@ -241,7 +252,7 @@ vchiq_prepare_bulk_data(struct vchiq_bulk *bulk, void *offset, int size,
if (!pagelistinfo)
return VCHIQ_ERROR;
- bulk->data = (void *)(unsigned long)pagelistinfo->dma_addr;
+ bulk->data = (void *)VC_SAFE(pagelistinfo->dma_addr);
/*
* Store the pagelistinfo address in remote_data,
@@ -475,25 +486,60 @@ create_pagelist(char __user *buf, size_t count, unsigned short type)
/* Combine adjacent blocks for performance */
k = 0;
- for_each_sg(scatterlist, sg, dma_buffers, i) {
- u32 len = sg_dma_len(sg);
- u32 addr = sg_dma_address(sg);
+ if (g_use_36bit_addrs) {
+ for_each_sg(scatterlist, sg, dma_buffers, i) {
+ u32 len = sg_dma_len(sg);
+ u64 addr = sg_dma_address(sg);
+ u32 page_id = (u32)((addr >> 4) & ~0xff);
+ u32 sg_pages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
- /* Note: addrs is the address + page_count - 1
- * The firmware expects blocks after the first to be page-
- * aligned and a multiple of the page size
- */
- WARN_ON(len == 0);
- WARN_ON(i && (i != (dma_buffers - 1)) && (len & ~PAGE_MASK));
- WARN_ON(i && (addr & ~PAGE_MASK));
- if (k > 0 &&
- ((addrs[k - 1] & PAGE_MASK) +
- (((addrs[k - 1] & ~PAGE_MASK) + 1) << PAGE_SHIFT))
- == (addr & PAGE_MASK))
- addrs[k - 1] += ((len + PAGE_SIZE - 1) >> PAGE_SHIFT);
- else
- addrs[k++] = (addr & PAGE_MASK) |
- (((len + PAGE_SIZE - 1) >> PAGE_SHIFT) - 1);
+ /* Note: addrs is the address + page_count - 1
+ * The firmware expects blocks after the first to be page-
+ * aligned and a multiple of the page size
+ */
+ WARN_ON(len == 0);
+ WARN_ON(i &&
+ (i != (dma_buffers - 1)) && (len & ~PAGE_MASK));
+ WARN_ON(i && (addr & ~PAGE_MASK));
+ WARN_ON(upper_32_bits(addr) > 0xf);
+ if (k > 0 &&
+ ((addrs[k - 1] & ~0xff) +
+ (((addrs[k - 1] & 0xff) + 1) << 8)
+ == page_id)) {
+ u32 inc_pages = min(sg_pages,
+ 0xff - (addrs[k - 1] & 0xff));
+ addrs[k - 1] += inc_pages;
+ page_id += inc_pages << 8;
+ sg_pages -= inc_pages;
+ }
+ while (sg_pages) {
+ u32 inc_pages = min(sg_pages, 0x100u);
+ addrs[k++] = page_id | (inc_pages - 1);
+ page_id += inc_pages << 8;
+ sg_pages -= inc_pages;
+ }
+ }
+ } else {
+ for_each_sg(scatterlist, sg, dma_buffers, i) {
+ u32 len = sg_dma_len(sg);
+ u32 addr = VC_SAFE(sg_dma_address(sg));
+ u32 new_pages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
+ /* Note: addrs is the address + page_count - 1
+ * The firmware expects blocks after the first to be page-
+ * aligned and a multiple of the page size
+ */
+ WARN_ON(len == 0);
+ WARN_ON(i && (i != (dma_buffers - 1)) && (len & ~PAGE_MASK));
+ WARN_ON(i && (addr & ~PAGE_MASK));
+ if (k > 0 &&
+ ((addrs[k - 1] & PAGE_MASK) +
+ (((addrs[k - 1] & ~PAGE_MASK) + 1) << PAGE_SHIFT))
+ == (addr & PAGE_MASK))
+ addrs[k - 1] += new_pages;
+ else
+ addrs[k++] = (addr & PAGE_MASK) | (new_pages - 1);
+ }
}
/* Partial cache lines (fragments) require special measures */
@@ -117,6 +117,11 @@ static struct vchiq_drvdata bcm2836_drvdata = {
.cache_line_size = 64,
};
+static struct vchiq_drvdata bcm2711_drvdata = {
+ .cache_line_size = 64,
+ .use_36bit_addrs = true,
+};
+
static const char *const ioctl_names[] = {
"CONNECT",
"SHUTDOWN",
@@ -2710,6 +2715,7 @@ void vchiq_platform_conn_state_changed(struct vchiq_state *state,
static const struct of_device_id vchiq_of_match[] = {
{ .compatible = "brcm,bcm2835-vchiq", .data = &bcm2835_drvdata },
{ .compatible = "brcm,bcm2836-vchiq", .data = &bcm2836_drvdata },
+ { .compatible = "brcm,bcm2711-vchiq", .data = &bcm2711_drvdata },
{},
};
MODULE_DEVICE_TABLE(of, vchiq_of_match);
@@ -52,6 +52,7 @@ struct vchiq_arm_state {
struct vchiq_drvdata {
const unsigned int cache_line_size;
+ const bool use_36bit_addrs;
struct rpi_firmware *fw;
};