[v2,2/3] dma-buf: system_heap: Add pagepool support to system heap
Commit Message
Reuse/abuse the pagepool code from the network code to speed
up allocation performance.
This is similar to the ION pagepool usage, but tries to
utilize generic code instead of a custom implementation.
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: Liam Mark <lmark@codeaurora.org>
Cc: Chris Goldsworthy <cgoldswo@codeaurora.org>
Cc: Laura Abbott <labbott@kernel.org>
Cc: Brian Starkey <Brian.Starkey@arm.com>
Cc: Hridya Valsaraju <hridya@google.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sandeep Patil <sspatil@google.com>
Cc: Daniel Mentz <danielmentz@google.com>
Cc: Ørjan Eide <orjan.eide@arm.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Ezequiel Garcia <ezequiel@collabora.com>
Cc: Simon Ser <contact@emersion.fr>
Cc: James Jones <jajones@nvidia.com>
Cc: linux-media@vger.kernel.org
Cc: dri-devel@lists.freedesktop.org
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
v2:
* Fix build issue caused by selecting PAGE_POOL w/o NET
as Reported-by: kernel test robot <lkp@intel.com>
---
drivers/dma-buf/heaps/Kconfig | 2 +
drivers/dma-buf/heaps/system_heap.c | 68 +++++++++++++++++++++++++++--
2 files changed, 66 insertions(+), 4 deletions(-)
Comments
On Fri, Jan 22, 2021 at 7:47 PM John Stultz <john.stultz@linaro.org> wrote:
> +static int system_heap_clear_pages(struct page **pages, int num, pgprot_t pgprot)
> +{
> + void *addr = vmap(pages, num, VM_MAP, pgprot);
> +
> + if (!addr)
> + return -ENOMEM;
> + memset(addr, 0, PAGE_SIZE * num);
> + vunmap(addr);
> + return 0;
> +}
I thought that vmap/vunmap are expensive, and I am wondering if
there's a faster way that avoids vmap.
How about lifting this code from lib/iov_iter.c
static void memzero_page(struct page *page, size_t offset, size_t len)
{
char *addr = kmap_atomic(page);
memset(addr + offset, 0, len);
kunmap_atomic(addr);
}
Or what about lifting that code from the old ion_cma_heap.c
if (PageHighMem(pages)) {
unsigned long nr_clear_pages = nr_pages;
struct page *page = pages;
while (nr_clear_pages > 0) {
void *vaddr = kmap_atomic(page);
memset(vaddr, 0, PAGE_SIZE);
kunmap_atomic(vaddr);
page++;
nr_clear_pages--;
}
} else {
memset(page_address(pages), 0, size);
}
On Wed, Jan 27, 2021 at 12:21 PM Daniel Mentz <danielmentz@google.com> wrote:
>
> On Fri, Jan 22, 2021 at 7:47 PM John Stultz <john.stultz@linaro.org> wrote:
> > +static int system_heap_clear_pages(struct page **pages, int num, pgprot_t pgprot)
> > +{
> > + void *addr = vmap(pages, num, VM_MAP, pgprot);
> > +
> > + if (!addr)
> > + return -ENOMEM;
> > + memset(addr, 0, PAGE_SIZE * num);
> > + vunmap(addr);
> > + return 0;
> > +}
>
> I thought that vmap/vunmap are expensive, and I am wondering if
> there's a faster way that avoids vmap.
>
> How about lifting this code from lib/iov_iter.c
> static void memzero_page(struct page *page, size_t offset, size_t len)
> {
> char *addr = kmap_atomic(page);
> memset(addr + offset, 0, len);
> kunmap_atomic(addr);
> }
>
> Or what about lifting that code from the old ion_cma_heap.c
>
> if (PageHighMem(pages)) {
> unsigned long nr_clear_pages = nr_pages;
> struct page *page = pages;
>
> while (nr_clear_pages > 0) {
> void *vaddr = kmap_atomic(page);
>
> memset(vaddr, 0, PAGE_SIZE);
> kunmap_atomic(vaddr);
> page++;
> nr_clear_pages--;
> }
> } else {
> memset(page_address(pages), 0, size);
> }
Though, this last memset only works since CMA is contiguous, so it
probably needs to always do the kmap_atomic for each page, right?
I'm still a little worried if this is right, as the current
implementation with the vmap comes from the old ion_heap_sglist_zero
logic, which similarly tries to batch the vmaps 32 pages at at time,
but I'll give it a try.
thanks
-john
On Wed, Jan 27, 2021 at 9:10 PM John Stultz <john.stultz@linaro.org> wrote:
>
> On Wed, Jan 27, 2021 at 12:21 PM Daniel Mentz <danielmentz@google.com> wrote:
> >
> > On Fri, Jan 22, 2021 at 7:47 PM John Stultz <john.stultz@linaro.org> wrote:
> > > +static int system_heap_clear_pages(struct page **pages, int num, pgprot_t pgprot)
> > > +{
> > > + void *addr = vmap(pages, num, VM_MAP, pgprot);
> > > +
> > > + if (!addr)
> > > + return -ENOMEM;
> > > + memset(addr, 0, PAGE_SIZE * num);
> > > + vunmap(addr);
> > > + return 0;
> > > +}
> >
> > I thought that vmap/vunmap are expensive, and I am wondering if
> > there's a faster way that avoids vmap.
> >
> > How about lifting this code from lib/iov_iter.c
> > static void memzero_page(struct page *page, size_t offset, size_t len)
> > {
> > char *addr = kmap_atomic(page);
> > memset(addr + offset, 0, len);
> > kunmap_atomic(addr);
> > }
> >
> > Or what about lifting that code from the old ion_cma_heap.c
> >
> > if (PageHighMem(pages)) {
> > unsigned long nr_clear_pages = nr_pages;
> > struct page *page = pages;
> >
> > while (nr_clear_pages > 0) {
> > void *vaddr = kmap_atomic(page);
> >
> > memset(vaddr, 0, PAGE_SIZE);
> > kunmap_atomic(vaddr);
> > page++;
> > nr_clear_pages--;
> > }
> > } else {
> > memset(page_address(pages), 0, size);
> > }
>
> Though, this last memset only works since CMA is contiguous, so it
> probably needs to always do the kmap_atomic for each page, right?
Yeah, but with the system heap page pool, some of these pages might be
64KB or 1MB large. kmap_atomic(page) just maps to page_address(page)
in most cases. I think iterating over all pages individually in this
manner might still be faster than using vmap.
>
> I'm still a little worried if this is right, as the current
> implementation with the vmap comes from the old ion_heap_sglist_zero
> logic, which similarly tries to batch the vmaps 32 pages at at time,
> but I'll give it a try.
@@ -4,6 +4,8 @@ config DMABUF_HEAPS_DEFERRED_FREE
config DMABUF_HEAPS_SYSTEM
bool "DMA-BUF System Heap"
depends on DMABUF_HEAPS
+ select NET
+ select PAGE_POOL
help
Choose this option to enable the system dmabuf heap. The system heap
is backed by pages from the buddy allocator. If in doubt, say Y.
@@ -20,6 +20,7 @@
#include <linux/scatterlist.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
+#include <net/page_pool.h>
static struct dma_heap *sys_heap;
@@ -53,6 +54,7 @@ static gfp_t order_flags[] = {HIGH_ORDER_GFP, LOW_ORDER_GFP, LOW_ORDER_GFP};
*/
static const unsigned int orders[] = {8, 4, 0};
#define NUM_ORDERS ARRAY_SIZE(orders)
+struct page_pool *pools[NUM_ORDERS];
static struct sg_table *dup_sg_table(struct sg_table *table)
{
@@ -281,18 +283,59 @@ static void system_heap_vunmap(struct dma_buf *dmabuf, struct dma_buf_map *map)
dma_buf_map_clear(map);
}
+static int system_heap_clear_pages(struct page **pages, int num, pgprot_t pgprot)
+{
+ void *addr = vmap(pages, num, VM_MAP, pgprot);
+
+ if (!addr)
+ return -ENOMEM;
+ memset(addr, 0, PAGE_SIZE * num);
+ vunmap(addr);
+ return 0;
+}
+
+static int system_heap_zero_buffer(struct system_heap_buffer *buffer)
+{
+ struct sg_table *sgt = &buffer->sg_table;
+ struct sg_page_iter piter;
+ struct page *pages[32];
+ int p = 0;
+ int ret = 0;
+
+ for_each_sgtable_page(sgt, &piter, 0) {
+ pages[p++] = sg_page_iter_page(&piter);
+ if (p == ARRAY_SIZE(pages)) {
+ ret = system_heap_clear_pages(pages, p, PAGE_KERNEL);
+ if (ret)
+ return ret;
+ p = 0;
+ }
+ }
+ if (p)
+ ret = system_heap_clear_pages(pages, p, PAGE_KERNEL);
+
+ return ret;
+}
+
static void system_heap_dma_buf_release(struct dma_buf *dmabuf)
{
struct system_heap_buffer *buffer = dmabuf->priv;
struct sg_table *table;
struct scatterlist *sg;
- int i;
+ int i, j;
+
+ /* Zero the buffer pages before adding back to the pool */
+ system_heap_zero_buffer(buffer);
table = &buffer->sg_table;
for_each_sg(table->sgl, sg, table->nents, i) {
struct page *page = sg_page(sg);
- __free_pages(page, compound_order(page));
+ for (j = 0; j < NUM_ORDERS; j++) {
+ if (compound_order(page) == orders[j])
+ break;
+ }
+ page_pool_put_full_page(pools[j], page, false);
}
sg_free_table(table);
kfree(buffer);
@@ -322,8 +365,7 @@ static struct page *alloc_largest_available(unsigned long size,
continue;
if (max_order < orders[i])
continue;
-
- page = alloc_pages(order_flags[i], orders[i]);
+ page = page_pool_alloc_pages(pools[i], order_flags[i]);
if (!page)
continue;
return page;
@@ -428,6 +470,24 @@ static const struct dma_heap_ops system_heap_ops = {
static int system_heap_create(void)
{
struct dma_heap_export_info exp_info;
+ int i;
+
+ for (i = 0; i < NUM_ORDERS; i++) {
+ struct page_pool_params pp;
+
+ memset(&pp, 0, sizeof(pp));
+ pp.order = orders[i];
+ pools[i] = page_pool_create(&pp);
+
+ if (IS_ERR(pools[i])) {
+ int j;
+
+ pr_err("%s: page pool creation failed!\n", __func__);
+ for (j = 0; j < i; j++)
+ page_pool_destroy(pools[j]);
+ return PTR_ERR(pools[i]);
+ }
+ }
exp_info.name = "system";
exp_info.ops = &system_heap_ops;