media: vb2: vmalloc-based allocator user pointer handling

Message ID 1320231122-22518-1-git-send-email-andrzej.p@samsung.com (mailing list archive)
State Changes Requested, archived
Headers

Commit Message

Andrzej Pietrasiewicz Nov. 2, 2011, 10:52 a.m. UTC
  vmalloc-based allocator user pointer handling

Signed-off-by: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
---
 drivers/media/video/videobuf2-vmalloc.c |   86 ++++++++++++++++++++++++++++++-
 1 files changed, 85 insertions(+), 1 deletions(-)
  

Comments

Laurent Pinchart Nov. 2, 2011, 1:53 p.m. UTC | #1
Hi Andrzej,

Thanks for the patch.

On Wednesday 02 November 2011 11:52:02 Andrzej Pietrasiewicz wrote:
> vmalloc-based allocator user pointer handling
> 
> Signed-off-by: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
> Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
> ---
>  drivers/media/video/videobuf2-vmalloc.c |   86 +++++++++++++++++++++++++++-
>  1 files changed, 85 insertions(+), 1 deletions(-)
> 
> diff --git a/drivers/media/video/videobuf2-vmalloc.c
> b/drivers/media/video/videobuf2-vmalloc.c index a3a8842..ee0ee37 100644
> --- a/drivers/media/video/videobuf2-vmalloc.c
> +++ b/drivers/media/video/videobuf2-vmalloc.c
> @@ -12,6 +12,7 @@
> 
>  #include <linux/module.h>
>  #include <linux/mm.h>
> +#include <linux/sched.h>
>  #include <linux/slab.h>
>  #include <linux/vmalloc.h>
> 
> @@ -20,7 +21,10 @@
> 
>  struct vb2_vmalloc_buf {
>  	void				*vaddr;
> +	struct page			**pages;
> +	int				write;
>  	unsigned long			size;
> +	unsigned int			n_pages;
>  	atomic_t			refcount;
>  	struct vb2_vmarea_handler	handler;
>  };
> @@ -66,6 +70,83 @@ static void vb2_vmalloc_put(void *buf_priv)
>  	}
>  }
> 
> +static void *vb2_vmalloc_get_userptr(void *alloc_ctx, unsigned long vaddr,
> +				     unsigned long size, int write)
> +{
> +	struct vb2_vmalloc_buf *buf;
> +
> +	unsigned long first, last;
> +	int n_pages_from_user, offset;

Doesn't the kernel coding style prefer one variable declaration per line ?

> +
> +	buf = kzalloc(sizeof *buf, GFP_KERNEL);
> +	if (!buf)
> +		return NULL;
> +
> +	buf->vaddr = NULL;
> +	buf->write = write;
> +	offset = vaddr & ~PAGE_MASK;
> +	buf->size = size;
> +
> +	first = (vaddr & PAGE_MASK) >> PAGE_SHIFT;
> +	last  = ((vaddr + size - 1) & PAGE_MASK) >> PAGE_SHIFT;
> +	buf->n_pages = last - first + 1;
> +	buf->pages = kzalloc(buf->n_pages * sizeof(struct page *), GFP_KERNEL);
> +	if (!buf->pages)
> +		goto userptr_fail_pages_array_alloc;
> +
> +	down_read(&current->mm->mmap_sem);
> +	n_pages_from_user = get_user_pages(current, current->mm,
> +					     vaddr & PAGE_MASK,
> +					     buf->n_pages,
> +					     write,
> +					     1, /* force */
> +					     buf->pages,
> +					     NULL);
> +	up_read(&current->mm->mmap_sem);

This can cause an AB-BA deadlock, and will be reported by deadlock detection 
if enabled.

The issue is that the mmap() handler is called by the MM core with current-
>mm->mmap_sem held, and then takes the driver's lock before calling 
videobuf2's mmap handler. The VIDIOC_QBUF handler, on the other hand, will 
first take the driver's lock and will then try to take current->mm->mmap_sem 
here.

This can result in a deadlock if both MMAP and USERPTR buffers are used by the 
same driver at the same time.

If we assume that MMAP and USERPTR buffers can't be used on the same queue at 
the same time (VIDIOC_CREATEBUFS doesn't allow that if I'm not mistaken, so we 
should be safe, at least for now), this can be fixed by having a per-queue 
lock in the driver instead of a global device lock. However, that means that 
drivers that want to support USERPTR will not be allowed to delegate lock 
handling to the V4L2 core and video_ioctl2().

> +	if (n_pages_from_user != buf->n_pages)
> +		goto userptr_fail_get_user_pages;
> +
> +	buf->vaddr = vm_map_ram(buf->pages, buf->n_pages, -1, PAGE_KERNEL);

Will this create a second kernel mapping ? What if the user tries to pass 
framebuffer memory that has been mapped uncacheable to userspace ?

> +
> +	if (buf->vaddr) {
> +		buf->vaddr += offset;
> +		return buf;
> +	}

if () statements with a return look like error handling, what about

	if (buf->vaddr == NULL)
		goto userptr_fail_get_user_pages;

	buf->vaddr += offset;
	return buf;

> +
> +userptr_fail_get_user_pages:
> +	printk(KERN_DEBUG "get_user_pages requested/got: %d/%d]\n",
> +	       n_pages_from_user, buf->n_pages);

Do we really need that debug printk ?

> +	while (--n_pages_from_user >= 0)
> +		put_page(buf->pages[n_pages_from_user]);
> +	kfree(buf->pages);
> +
> +userptr_fail_pages_array_alloc:
> +	kfree(buf);
> +
> +	return NULL;
> +}
> +
> +static void vb2_vmalloc_put_userptr(void *buf_priv)
> +{
> +	struct vb2_vmalloc_buf *buf = buf_priv;
> +
> +	int i = buf->n_pages;
> +	int offset = (unsigned long)buf->vaddr & ~PAGE_MASK;
> +
> +	printk(KERN_DEBUG "%s: Releasing userspace buffer of %d pages\n",
> +	       __func__, buf->n_pages);
> +	if (buf->vaddr)
> +		vm_unmap_ram((const void *)((unsigned long)buf->vaddr - offset),
> +			     buf->n_pages);
> +	while (--i >= 0) {

Anything wrong with

for (i = 0; i < buf->n_pages; ++i)

? :-)

You could then make i an unsigned int, which would match buf->n_pages.

> +		if (buf->write)
> +			set_page_dirty_lock(buf->pages[i]);
> +		put_page(buf->pages[i]);
> +	}
> +	kfree(buf->pages);
> +	kfree(buf);
> +}
> +
>  static void *vb2_vmalloc_vaddr(void *buf_priv)
>  {
>  	struct vb2_vmalloc_buf *buf = buf_priv;
  
Guennadi Liakhovetski Nov. 2, 2011, 6:59 p.m. UTC | #2
On Wed, 2 Nov 2011, Laurent Pinchart wrote:

> Hi Andrzej,
> 
> Thanks for the patch.
> 
> On Wednesday 02 November 2011 11:52:02 Andrzej Pietrasiewicz wrote:
> > vmalloc-based allocator user pointer handling
> > 
> > Signed-off-by: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
> > Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
> > ---
> >  drivers/media/video/videobuf2-vmalloc.c |   86 +++++++++++++++++++++++++++-
> >  1 files changed, 85 insertions(+), 1 deletions(-)
> > 
> > diff --git a/drivers/media/video/videobuf2-vmalloc.c
> > b/drivers/media/video/videobuf2-vmalloc.c index a3a8842..ee0ee37 100644
> > --- a/drivers/media/video/videobuf2-vmalloc.c
> > +++ b/drivers/media/video/videobuf2-vmalloc.c
> > @@ -12,6 +12,7 @@
> > 
> >  #include <linux/module.h>
> >  #include <linux/mm.h>
> > +#include <linux/sched.h>
> >  #include <linux/slab.h>
> >  #include <linux/vmalloc.h>
> > 
> > @@ -20,7 +21,10 @@
> > 
> >  struct vb2_vmalloc_buf {
> >  	void				*vaddr;
> > +	struct page			**pages;
> > +	int				write;
> >  	unsigned long			size;
> > +	unsigned int			n_pages;
> >  	atomic_t			refcount;
> >  	struct vb2_vmarea_handler	handler;
> >  };
> > @@ -66,6 +70,83 @@ static void vb2_vmalloc_put(void *buf_priv)
> >  	}
> >  }
> > 
> > +static void *vb2_vmalloc_get_userptr(void *alloc_ctx, unsigned long vaddr,
> > +				     unsigned long size, int write)
> > +{
> > +	struct vb2_vmalloc_buf *buf;
> > +
> > +	unsigned long first, last;
> > +	int n_pages_from_user, offset;
> 
> Doesn't the kernel coding style prefer one variable declaration per line ?

Wow?... That's something soooo new to me, that I'm (well, almost;-)) 
prepared to eat my hat, if this is stated in CodingStyle or if checkpatch 
complains about it...

> 
> > +
> > +	buf = kzalloc(sizeof *buf, GFP_KERNEL);
> > +	if (!buf)
> > +		return NULL;
> > +
> > +	buf->vaddr = NULL;

Technically, this is not needed, since kzalloc() already allocates zeroed 
memory, but it's up to the author to keep it, if he thinks, that this is 
important semantically.

> > +	buf->write = write;
> > +	offset = vaddr & ~PAGE_MASK;
> > +	buf->size = size;
> > +
> > +	first = (vaddr & PAGE_MASK) >> PAGE_SHIFT;
> > +	last  = ((vaddr + size - 1) & PAGE_MASK) >> PAGE_SHIFT;
> > +	buf->n_pages = last - first + 1;
> > +	buf->pages = kzalloc(buf->n_pages * sizeof(struct page *), GFP_KERNEL);
> > +	if (!buf->pages)
> > +		goto userptr_fail_pages_array_alloc;
> > +
> > +	down_read(&current->mm->mmap_sem);
> > +	n_pages_from_user = get_user_pages(current, current->mm,
> > +					     vaddr & PAGE_MASK,
> > +					     buf->n_pages,
> > +					     write,
> > +					     1, /* force */
> > +					     buf->pages,
> > +					     NULL);
> > +	up_read(&current->mm->mmap_sem);
> 
> This can cause an AB-BA deadlock, and will be reported by deadlock detection 
> if enabled.
> 
> The issue is that the mmap() handler is called by the MM core with current-
> >mm->mmap_sem held, and then takes the driver's lock before calling 
> videobuf2's mmap handler. The VIDIOC_QBUF handler, on the other hand, will 
> first take the driver's lock and will then try to take current->mm->mmap_sem 
> here.
> 
> This can result in a deadlock if both MMAP and USERPTR buffers are used by the 
> same driver at the same time.
> 
> If we assume that MMAP and USERPTR buffers can't be used on the same queue at 
> the same time (VIDIOC_CREATEBUFS doesn't allow that if I'm not mistaken, so we 

I don't think this is checked in the version, waiting to be pulled in my 
tree. And I don't remember a patch for this, but we definitely want one, 
until we have a better solution for this.

> should be safe, at least for now), this can be fixed by having a per-queue 
> lock in the driver instead of a global device lock. However, that means that 
> drivers that want to support USERPTR will not be allowed to delegate lock 
> handling to the V4L2 core and video_ioctl2().
> 
> > +	if (n_pages_from_user != buf->n_pages)
> > +		goto userptr_fail_get_user_pages;
> > +
> > +	buf->vaddr = vm_map_ram(buf->pages, buf->n_pages, -1, PAGE_KERNEL);
> 
> Will this create a second kernel mapping ? What if the user tries to pass 
> framebuffer memory that has been mapped uncacheable to userspace ?
> 
> > +
> > +	if (buf->vaddr) {
> > +		buf->vaddr += offset;
> > +		return buf;
> > +	}
> 
> if () statements with a return look like error handling, what about
> 
> 	if (buf->vaddr == NULL)
> 		goto userptr_fail_get_user_pages;
> 
> 	buf->vaddr += offset;
> 	return buf;
> 
> > +
> > +userptr_fail_get_user_pages:
> > +	printk(KERN_DEBUG "get_user_pages requested/got: %d/%d]\n",
> > +	       n_pages_from_user, buf->n_pages);
> 
> Do we really need that debug printk ?

...and if we _do_ need it, then, I think, pr_debug() is preferred these 
days.

> 
> > +	while (--n_pages_from_user >= 0)
> > +		put_page(buf->pages[n_pages_from_user]);
> > +	kfree(buf->pages);
> > +
> > +userptr_fail_pages_array_alloc:
> > +	kfree(buf);
> > +
> > +	return NULL;
> > +}
> > +
> > +static void vb2_vmalloc_put_userptr(void *buf_priv)
> > +{
> > +	struct vb2_vmalloc_buf *buf = buf_priv;
> > +
> > +	int i = buf->n_pages;
> > +	int offset = (unsigned long)buf->vaddr & ~PAGE_MASK;
> > +
> > +	printk(KERN_DEBUG "%s: Releasing userspace buffer of %d pages\n",
> > +	       __func__, buf->n_pages);
> > +	if (buf->vaddr)
> > +		vm_unmap_ram((const void *)((unsigned long)buf->vaddr - offset),
> > +			     buf->n_pages);
> > +	while (--i >= 0) {
> 
> Anything wrong with
> 
> for (i = 0; i < buf->n_pages; ++i)
> 
> ? :-)
> 
> You could then make i an unsigned int, which would match buf->n_pages.
> 
> > +		if (buf->write)
> > +			set_page_dirty_lock(buf->pages[i]);
> > +		put_page(buf->pages[i]);
> > +	}
> > +	kfree(buf->pages);
> > +	kfree(buf);
> > +}
> > +
> >  static void *vb2_vmalloc_vaddr(void *buf_priv)
> >  {
> >  	struct vb2_vmalloc_buf *buf = buf_priv;
> 
> -- 
> Regards,
> 
> Laurent Pinchart

Thanks
Guennadi
---
Guennadi Liakhovetski, Ph.D.
Freelance Open-Source Software Developer
http://www.open-technology.de/
--
To unsubscribe from this list: send the line "unsubscribe linux-media" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
  
Marek Szyprowski Nov. 3, 2011, 7:40 a.m. UTC | #3
Hello,

On Wednesday, November 02, 2011 2:54 PM Laurent Pinchart wrote:
> On Wednesday 02 November 2011 11:52:02 Andrzej Pietrasiewicz wrote:
> > vmalloc-based allocator user pointer handling
> >
> > Signed-off-by: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
> > Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
> > ---
> >  drivers/media/video/videobuf2-vmalloc.c |   86 +++++++++++++++++++++++++++-
> >  1 files changed, 85 insertions(+), 1 deletions(-)
> >
> > diff --git a/drivers/media/video/videobuf2-vmalloc.c
> > b/drivers/media/video/videobuf2-vmalloc.c index a3a8842..ee0ee37 100644
> > --- a/drivers/media/video/videobuf2-vmalloc.c
> > +++ b/drivers/media/video/videobuf2-vmalloc.c
> > @@ -12,6 +12,7 @@
> >
> >  #include <linux/module.h>
> >  #include <linux/mm.h>
> > +#include <linux/sched.h>
> >  #include <linux/slab.h>
> >  #include <linux/vmalloc.h>
> >
> > @@ -20,7 +21,10 @@
> >
> >  struct vb2_vmalloc_buf {
> >  	void				*vaddr;
> > +	struct page			**pages;
> > +	int				write;
> >  	unsigned long			size;
> > +	unsigned int			n_pages;
> >  	atomic_t			refcount;
> >  	struct vb2_vmarea_handler	handler;
> >  };
> > @@ -66,6 +70,83 @@ static void vb2_vmalloc_put(void *buf_priv)
> >  	}
> >  }
> >
> > +static void *vb2_vmalloc_get_userptr(void *alloc_ctx, unsigned long vaddr,
> > +				     unsigned long size, int write)
> > +{
> > +	struct vb2_vmalloc_buf *buf;
> > +
> > +	unsigned long first, last;
> > +	int n_pages_from_user, offset;
> 
> Doesn't the kernel coding style prefer one variable declaration per line ?
> 
> > +
> > +	buf = kzalloc(sizeof *buf, GFP_KERNEL);
> > +	if (!buf)
> > +		return NULL;
> > +
> > +	buf->vaddr = NULL;
> > +	buf->write = write;
> > +	offset = vaddr & ~PAGE_MASK;
> > +	buf->size = size;
> > +
> > +	first = (vaddr & PAGE_MASK) >> PAGE_SHIFT;
> > +	last  = ((vaddr + size - 1) & PAGE_MASK) >> PAGE_SHIFT;
> > +	buf->n_pages = last - first + 1;
> > +	buf->pages = kzalloc(buf->n_pages * sizeof(struct page *), GFP_KERNEL);
> > +	if (!buf->pages)
> > +		goto userptr_fail_pages_array_alloc;
> > +
> > +	down_read(&current->mm->mmap_sem);
> > +	n_pages_from_user = get_user_pages(current, current->mm,
> > +					     vaddr & PAGE_MASK,
> > +					     buf->n_pages,
> > +					     write,
> > +					     1, /* force */
> > +					     buf->pages,
> > +					     NULL);
> > +	up_read(&current->mm->mmap_sem);
> 
> This can cause an AB-BA deadlock, and will be reported by deadlock detection
> if enabled.
> 
> The issue is that the mmap() handler is called by the MM core with current-
> >mm->mmap_sem held, and then takes the driver's lock before calling
> videobuf2's mmap handler. The VIDIOC_QBUF handler, on the other hand, will
> first take the driver's lock and will then try to take current->mm->mmap_sem
> here.
> 
> This can result in a deadlock if both MMAP and USERPTR buffers are used by the
> same driver at the same time.
> 
> If we assume that MMAP and USERPTR buffers can't be used on the same queue at
> the same time (VIDIOC_CREATEBUFS doesn't allow that if I'm not mistaken, so we
> should be safe, at least for now), this can be fixed by having a per-queue
> lock in the driver instead of a global device lock. However, that means that
> drivers that want to support USERPTR will not be allowed to delegate lock
> handling to the V4L2 core and video_ioctl2().

Thanks for pointing this issue! This problem is already present in the other 
videobuf2 memory allocators as well as the old videobuf and other v4l2 drivers
which implement queue handling by themselves.

The only solution that will not complicate the videobuf2 and allocators code
is to move taking current->mm->mmap_sem lock into videobuf2 core. Before acquiring
this lock, vb2 calls wait_prepare to release device lock and then once mmap_sem is
locked, calls wait_finish to get it again. This way the deadlock is avoided and 
allocators are free to call get_user_pages() without further messing with locks.
The only drawback is the fact that a bit more code will be executed under mmap_sem
lock.

What do you think about such solution?

> > +	if (n_pages_from_user != buf->n_pages)
> > +		goto userptr_fail_get_user_pages;
> > +
> > +	buf->vaddr = vm_map_ram(buf->pages, buf->n_pages, -1, PAGE_KERNEL);
> 
> Will this create a second kernel mapping ?

Yes, it is very similar to vmalloc function which grabs a set of pages and 
creates contiguous virtual kernel mapping for them.

> What if the user tries to pass
> framebuffer memory that has been mapped uncacheable to userspace ?

get_user_pages() fails if it is called for framebuffer memory (VM_PFNMAP type
mappings).

Best regards
  
Andrzej Pietrasiewicz Nov. 3, 2011, 10:40 a.m. UTC | #4
Hello Laurent,

Thank you for quickly responding with a review. As for coding style
remarks I generally agree. However, Guennadi seems to have a different
opinion on one of them.

On Wednesday, November 02, 2011 2:54 PM Laurent Pinchart wrote:

> 
> This can cause an AB-BA deadlock, and will be reported by deadlock
> detection
> if enabled.
> 
Marek has already wrote about this. The same problem relates to other
allocators, AFAIK. He proposed a solution.

Regards,

Andrzej



--
To unsubscribe from this list: send the line "unsubscribe linux-media" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
  
Laurent Pinchart Nov. 8, 2011, 11:32 a.m. UTC | #5
Hi Marek,

On Thursday 03 November 2011 08:40:26 Marek Szyprowski wrote:
> On Wednesday, November 02, 2011 2:54 PM Laurent Pinchart wrote:
> > On Wednesday 02 November 2011 11:52:02 Andrzej Pietrasiewicz wrote:
> > > vmalloc-based allocator user pointer handling

[snip]

> > > @@ -66,6 +70,83 @@ static void vb2_vmalloc_put(void *buf_priv)
> > > 
> > >  	}
> > >  
> > >  }
> > > 
> > > +static void *vb2_vmalloc_get_userptr(void *alloc_ctx, unsigned long
> > > vaddr, +				     unsigned long size, int write)
> > > +{
> > > +	struct vb2_vmalloc_buf *buf;
> > > +
> > > +	unsigned long first, last;
> > > +	int n_pages_from_user, offset;
> > 
> > Doesn't the kernel coding style prefer one variable declaration per line
> > ?
> > 
> > > +
> > > +	buf = kzalloc(sizeof *buf, GFP_KERNEL);
> > > +	if (!buf)
> > > +		return NULL;
> > > +
> > > +	buf->vaddr = NULL;
> > > +	buf->write = write;
> > > +	offset = vaddr & ~PAGE_MASK;
> > > +	buf->size = size;
> > > +
> > > +	first = (vaddr & PAGE_MASK) >> PAGE_SHIFT;
> > > +	last  = ((vaddr + size - 1) & PAGE_MASK) >> PAGE_SHIFT;
> > > +	buf->n_pages = last - first + 1;
> > > +	buf->pages = kzalloc(buf->n_pages * sizeof(struct page *),
> > > GFP_KERNEL); +	if (!buf->pages)
> > > +		goto userptr_fail_pages_array_alloc;
> > > +
> > > +	down_read(&current->mm->mmap_sem);
> > > +	n_pages_from_user = get_user_pages(current, current->mm,
> > > +					     vaddr & PAGE_MASK,
> > > +					     buf->n_pages,
> > > +					     write,
> > > +					     1, /* force */
> > > +					     buf->pages,
> > > +					     NULL);
> > > +	up_read(&current->mm->mmap_sem);
> > 
> > This can cause an AB-BA deadlock, and will be reported by deadlock
> > detection if enabled.
> > 
> > The issue is that the mmap() handler is called by the MM core with
> > current->mm->mmap_sem held, and then takes the driver's lock before
> > calling videobuf2's mmap handler. The VIDIOC_QBUF handler, on the other
> > hand, will first take the driver's lock and will then try to take
> > current->mm->mmap_sem here.
> > 
> > This can result in a deadlock if both MMAP and USERPTR buffers are used
> > by the same driver at the same time.
> > 
> > If we assume that MMAP and USERPTR buffers can't be used on the same
> > queue at the same time (VIDIOC_CREATEBUFS doesn't allow that if I'm not
> > mistaken, so we should be safe, at least for now), this can be fixed by
> > having a per-queue lock in the driver instead of a global device lock.
> > However, that means that drivers that want to support USERPTR will not
> > be allowed to delegate lock handling to the V4L2 core and
> > video_ioctl2().
> 
> Thanks for pointing this issue! This problem is already present in the
> other videobuf2 memory allocators as well as the old videobuf and other
> v4l2 drivers which implement queue handling by themselves.

The problem is present in most (but not all) drivers, yes. That's one more 
reason to fix it in videobuf2 :-)

> The only solution that will not complicate the videobuf2 and allocators
> code is to move taking current->mm->mmap_sem lock into videobuf2 core.
> Before acquiring this lock, vb2 calls wait_prepare to release device lock
> and then once mmap_sem is locked, calls wait_finish to get it again. This
> way the deadlock is avoided and allocators are free to call
> get_user_pages() without further messing with locks. The only drawback is
> the fact that a bit more code will be executed under mmap_sem lock.
> 
> What do you think about such solution?

Won't that create a race condition ? Wouldn't an application for instance be 
able to call VIDIOC_REQBUFS(0) during the time window where the device lock is 
released ?

> > > +	if (n_pages_from_user != buf->n_pages)
> > > +		goto userptr_fail_get_user_pages;
> > > +
> > > +	buf->vaddr = vm_map_ram(buf->pages, buf->n_pages, -1, PAGE_KERNEL);
> > 
> > Will this create a second kernel mapping ?
> 
> Yes, it is very similar to vmalloc function which grabs a set of pages and
> creates contiguous virtual kernel mapping for them.
> 
> > What if the user tries to pass framebuffer memory that has been mapped
> > uncacheable to userspace ?
> 
> get_user_pages() fails if it is called for framebuffer memory (VM_PFNMAP
> type mappings).

Right. Do you think we should handle them, or should we wait for the buffer 
sharing API ?
  
Marek Szyprowski Nov. 8, 2011, 1:57 p.m. UTC | #6
Hello,

On Tuesday, November 08, 2011 12:32 PM Laurent Pinchart wrote:
> On Thursday 03 November 2011 08:40:26 Marek Szyprowski wrote:
> > On Wednesday, November 02, 2011 2:54 PM Laurent Pinchart wrote:
> > > On Wednesday 02 November 2011 11:52:02 Andrzej Pietrasiewicz wrote:
> > > > vmalloc-based allocator user pointer handling
> 
> [snip]
> 
> > > This can cause an AB-BA deadlock, and will be reported by deadlock
> > > detection if enabled.
> > >
> > > The issue is that the mmap() handler is called by the MM core with
> > > current->mm->mmap_sem held, and then takes the driver's lock before
> > > calling videobuf2's mmap handler. The VIDIOC_QBUF handler, on the other
> > > hand, will first take the driver's lock and will then try to take
> > > current->mm->mmap_sem here.
> > >
> > > This can result in a deadlock if both MMAP and USERPTR buffers are used
> > > by the same driver at the same time.
> > >
> > > If we assume that MMAP and USERPTR buffers can't be used on the same
> > > queue at the same time (VIDIOC_CREATEBUFS doesn't allow that if I'm not
> > > mistaken, so we should be safe, at least for now), this can be fixed by
> > > having a per-queue lock in the driver instead of a global device lock.
> > > However, that means that drivers that want to support USERPTR will not
> > > be allowed to delegate lock handling to the V4L2 core and
> > > video_ioctl2().
> >
> > Thanks for pointing this issue! This problem is already present in the
> > other videobuf2 memory allocators as well as the old videobuf and other
> > v4l2 drivers which implement queue handling by themselves.
> 
> The problem is present in most (but not all) drivers, yes. That's one more
> reason to fix it in videobuf2 :-)
>
> > The only solution that will not complicate the videobuf2 and allocators
> > code is to move taking current->mm->mmap_sem lock into videobuf2 core.
> > Before acquiring this lock, vb2 calls wait_prepare to release device lock
> > and then once mmap_sem is locked, calls wait_finish to get it again. This
> > way the deadlock is avoided and allocators are free to call
> > get_user_pages() without further messing with locks. The only drawback is
> > the fact that a bit more code will be executed under mmap_sem lock.
> >
> > What do you think about such solution?
> 
> Won't that create a race condition ? Wouldn't an application for instance be
> able to call VIDIOC_REQBUFS(0) during the time window where the device lock is
> released ?

Hmm... Right... 

The only solution I see now is to move acquiring mmap_sem as early as possible
to make the possible race harmless. The first operation in vb2_qbuf will be then:

if (b->memory == V4L2_MEMORY_USERPTR) {
       call_qop(q, wait_prepare, q);
       down_read(&current->mm->mmap_sem);
       call_qop(q, wait_finish, q);
}

This should solve the race although all userptr buffers will be handled under
mmap_sem lock. Do you have any other idea?
 
> > > > +	if (n_pages_from_user != buf->n_pages)
> > > > +		goto userptr_fail_get_user_pages;
> > > > +
> > > > +	buf->vaddr = vm_map_ram(buf->pages, buf->n_pages, -1, PAGE_KERNEL);
> > >
> > > Will this create a second kernel mapping ?
> >
> > Yes, it is very similar to vmalloc function which grabs a set of pages and
> > creates contiguous virtual kernel mapping for them.
> >
> > > What if the user tries to pass framebuffer memory that has been mapped
> > > uncacheable to userspace ?
> >
> > get_user_pages() fails if it is called for framebuffer memory (VM_PFNMAP
> > type mappings).
> 
> Right. Do you think we should handle them, or should we wait for the buffer
> sharing API ?

I'm not sure that waiting for buffer sharing API makes much sense here. 
First I would like to have vmalloc allocator finished for the typical desktop
centric use cases (well, that's the most common use case for usb cams). Code
for handling VM_PFNMAP buffers can be added later in the separate patches as
it is useful mainly in the embedded world... 

Best regards
  
Laurent Pinchart Nov. 8, 2011, 2:01 p.m. UTC | #7
Hi Marek,

On Tuesday 08 November 2011 14:57:40 Marek Szyprowski wrote:
> On Tuesday, November 08, 2011 12:32 PM Laurent Pinchart wrote:
> > On Thursday 03 November 2011 08:40:26 Marek Szyprowski wrote:
> > > On Wednesday, November 02, 2011 2:54 PM Laurent Pinchart wrote:
> > > > On Wednesday 02 November 2011 11:52:02 Andrzej Pietrasiewicz wrote:
> > > > > vmalloc-based allocator user pointer handling
> > 
> > [snip]
> > 
> > > > This can cause an AB-BA deadlock, and will be reported by deadlock
> > > > detection if enabled.
> > > > 
> > > > The issue is that the mmap() handler is called by the MM core with
> > > > current->mm->mmap_sem held, and then takes the driver's lock before
> > > > calling videobuf2's mmap handler. The VIDIOC_QBUF handler, on the
> > > > other hand, will first take the driver's lock and will then try to
> > > > take current->mm->mmap_sem here.
> > > > 
> > > > This can result in a deadlock if both MMAP and USERPTR buffers are
> > > > used by the same driver at the same time.
> > > > 
> > > > If we assume that MMAP and USERPTR buffers can't be used on the same
> > > > queue at the same time (VIDIOC_CREATEBUFS doesn't allow that if I'm
> > > > not mistaken, so we should be safe, at least for now), this can be
> > > > fixed by having a per-queue lock in the driver instead of a global
> > > > device lock. However, that means that drivers that want to support
> > > > USERPTR will not be allowed to delegate lock handling to the V4L2
> > > > core and
> > > > video_ioctl2().
> > > 
> > > Thanks for pointing this issue! This problem is already present in the
> > > other videobuf2 memory allocators as well as the old videobuf and other
> > > v4l2 drivers which implement queue handling by themselves.
> > 
> > The problem is present in most (but not all) drivers, yes. That's one
> > more reason to fix it in videobuf2 :-)
> > 
> > > The only solution that will not complicate the videobuf2 and allocators
> > > code is to move taking current->mm->mmap_sem lock into videobuf2 core.
> > > Before acquiring this lock, vb2 calls wait_prepare to release device
> > > lock and then once mmap_sem is locked, calls wait_finish to get it
> > > again. This way the deadlock is avoided and allocators are free to
> > > call
> > > get_user_pages() without further messing with locks. The only drawback
> > > is the fact that a bit more code will be executed under mmap_sem lock.
> > > 
> > > What do you think about such solution?
> > 
> > Won't that create a race condition ? Wouldn't an application for instance
> > be able to call VIDIOC_REQBUFS(0) during the time window where the
> > device lock is released ?
> 
> Hmm... Right...
> 
> The only solution I see now is to move acquiring mmap_sem as early as
> possible to make the possible race harmless. The first operation in
> vb2_qbuf will be then:
> 
> if (b->memory == V4L2_MEMORY_USERPTR) {
>        call_qop(q, wait_prepare, q);
>        down_read(&current->mm->mmap_sem);
>        call_qop(q, wait_finish, q);
> }
> 
> This should solve the race although all userptr buffers will be handled
> under mmap_sem lock. Do you have any other idea?

If queues don't mix MMAP and USERPTR buffers (is that something we want to 
allow ?), wouldn't using a per-queue lock instead of a device-wide lock be a 
better way to fix the problem ?

> > > > > +	if (n_pages_from_user != buf->n_pages)
> > > > > +		goto userptr_fail_get_user_pages;
> > > > > +
> > > > > +	buf->vaddr = vm_map_ram(buf->pages, buf->n_pages, -1,
> > > > > PAGE_KERNEL);
> > > > 
> > > > Will this create a second kernel mapping ?
> > > 
> > > Yes, it is very similar to vmalloc function which grabs a set of pages
> > > and creates contiguous virtual kernel mapping for them.
> > > 
> > > > What if the user tries to pass framebuffer memory that has been
> > > > mapped uncacheable to userspace ?
> > > 
> > > get_user_pages() fails if it is called for framebuffer memory
> > > (VM_PFNMAP type mappings).
> > 
> > Right. Do you think we should handle them, or should we wait for the
> > buffer sharing API ?
> 
> I'm not sure that waiting for buffer sharing API makes much sense here.
> First I would like to have vmalloc allocator finished for the typical
> desktop centric use cases (well, that's the most common use case for usb
> cams). Code for handling VM_PFNMAP buffers can be added later in the
> separate patches as it is useful mainly in the embedded world...

Capturing video directly to the frame buffer is a very common use case in the 
embedded world. I agree that we can implement that in a second step.
  
Marek Szyprowski Nov. 8, 2011, 2:29 p.m. UTC | #8
Hello,

On Tuesday, November 08, 2011 3:01 PM Laurent Pinchart wrote:
> On Tuesday 08 November 2011 14:57:40 Marek Szyprowski wrote:
> > On Tuesday, November 08, 2011 12:32 PM Laurent Pinchart wrote:
> > > On Thursday 03 November 2011 08:40:26 Marek Szyprowski wrote:
> > > > On Wednesday, November 02, 2011 2:54 PM Laurent Pinchart wrote:
> > > > > On Wednesday 02 November 2011 11:52:02 Andrzej Pietrasiewicz wrote:
> > > > > > vmalloc-based allocator user pointer handling
> > >
> > > [snip]
> > >
> > > > > This can cause an AB-BA deadlock, and will be reported by deadlock
> > > > > detection if enabled.
> > > > >
> > > > > The issue is that the mmap() handler is called by the MM core with
> > > > > current->mm->mmap_sem held, and then takes the driver's lock before
> > > > > calling videobuf2's mmap handler. The VIDIOC_QBUF handler, on the
> > > > > other hand, will first take the driver's lock and will then try to
> > > > > take current->mm->mmap_sem here.
> > > > >
> > > > > This can result in a deadlock if both MMAP and USERPTR buffers are
> > > > > used by the same driver at the same time.
> > > > >
> > > > > If we assume that MMAP and USERPTR buffers can't be used on the same
> > > > > queue at the same time (VIDIOC_CREATEBUFS doesn't allow that if I'm
> > > > > not mistaken, so we should be safe, at least for now), this can be
> > > > > fixed by having a per-queue lock in the driver instead of a global
> > > > > device lock. However, that means that drivers that want to support
> > > > > USERPTR will not be allowed to delegate lock handling to the V4L2
> > > > > core and
> > > > > video_ioctl2().
> > > >
> > > > Thanks for pointing this issue! This problem is already present in the
> > > > other videobuf2 memory allocators as well as the old videobuf and other
> > > > v4l2 drivers which implement queue handling by themselves.
> > >
> > > The problem is present in most (but not all) drivers, yes. That's one
> > > more reason to fix it in videobuf2 :-)
> > >
> > > > The only solution that will not complicate the videobuf2 and allocators
> > > > code is to move taking current->mm->mmap_sem lock into videobuf2 core.
> > > > Before acquiring this lock, vb2 calls wait_prepare to release device
> > > > lock and then once mmap_sem is locked, calls wait_finish to get it
> > > > again. This way the deadlock is avoided and allocators are free to
> > > > call
> > > > get_user_pages() without further messing with locks. The only drawback
> > > > is the fact that a bit more code will be executed under mmap_sem lock.
> > > >
> > > > What do you think about such solution?
> > >
> > > Won't that create a race condition ? Wouldn't an application for instance
> > > be able to call VIDIOC_REQBUFS(0) during the time window where the
> > > device lock is released ?
> >
> > Hmm... Right...
> >
> > The only solution I see now is to move acquiring mmap_sem as early as
> > possible to make the possible race harmless. The first operation in
> > vb2_qbuf will be then:
> >
> > if (b->memory == V4L2_MEMORY_USERPTR) {
> >        call_qop(q, wait_prepare, q);
> >        down_read(&current->mm->mmap_sem);
> >        call_qop(q, wait_finish, q);
> > }
> >
> > This should solve the race although all userptr buffers will be handled
> > under mmap_sem lock. Do you have any other idea?
> 
> If queues don't mix MMAP and USERPTR buffers (is that something we want to
> allow ?), wouldn't using a per-queue lock instead of a device-wide lock be a
> better way to fix the problem ?

It is not really about allowing mixing MMAP & USERPTR. Even if your 
application use only USRPTR  buffers, a malicious task might open the video
node and call mmap operation what might cause a deadlock in certain rare 
cases.

I'm against adding internal locks to vb2 queue. Avoiding deadlocks will be a 
nightmare when you will try to handle or synchronize more than one queue in a
single call...

Best regards
  
Laurent Pinchart Nov. 8, 2011, 2:43 p.m. UTC | #9
Hi Marek,

On Tuesday 08 November 2011 15:29:00 Marek Szyprowski wrote:
> On Tuesday, November 08, 2011 3:01 PM Laurent Pinchart wrote:
> > On Tuesday 08 November 2011 14:57:40 Marek Szyprowski wrote:
> > > On Tuesday, November 08, 2011 12:32 PM Laurent Pinchart wrote:
> > > > On Thursday 03 November 2011 08:40:26 Marek Szyprowski wrote:
> > > > > On Wednesday, November 02, 2011 2:54 PM Laurent Pinchart wrote:

[snip]

> > > > > > This can cause an AB-BA deadlock, and will be reported by
> > > > > > deadlock detection if enabled.
> > > > > > 
> > > > > > The issue is that the mmap() handler is called by the MM core
> > > > > > with current->mm->mmap_sem held, and then takes the driver's
> > > > > > lock before calling videobuf2's mmap handler. The VIDIOC_QBUF
> > > > > > handler, on the other hand, will first take the driver's lock
> > > > > > and will then try to take current->mm->mmap_sem here.
> > > > > > 
> > > > > > This can result in a deadlock if both MMAP and USERPTR buffers
> > > > > > are used by the same driver at the same time.
> > > > > > 
> > > > > > If we assume that MMAP and USERPTR buffers can't be used on the
> > > > > > same queue at the same time (VIDIOC_CREATEBUFS doesn't allow
> > > > > > that if I'm not mistaken, so we should be safe, at least for
> > > > > > now), this can be fixed by having a per-queue lock in the driver
> > > > > > instead of a global device lock. However, that means that
> > > > > > drivers that want to support USERPTR will not be allowed to
> > > > > > delegate lock handling to the V4L2 core and
> > > > > > video_ioctl2().
> > > > > 
> > > > > Thanks for pointing this issue! This problem is already present in
> > > > > the other videobuf2 memory allocators as well as the old videobuf
> > > > > and other v4l2 drivers which implement queue handling by
> > > > > themselves.
> > > > 
> > > > The problem is present in most (but not all) drivers, yes. That's one
> > > > more reason to fix it in videobuf2 :-)
> > > > 
> > > > > The only solution that will not complicate the videobuf2 and
> > > > > allocators code is to move taking current->mm->mmap_sem lock into
> > > > > videobuf2 core. Before acquiring this lock, vb2 calls wait_prepare
> > > > > to release device lock and then once mmap_sem is locked, calls
> > > > > wait_finish to get it again. This way the deadlock is avoided and
> > > > > allocators are free to call
> > > > > get_user_pages() without further messing with locks. The only
> > > > > drawback is the fact that a bit more code will be executed under
> > > > > mmap_sem lock.
> > > > > 
> > > > > What do you think about such solution?
> > > > 
> > > > Won't that create a race condition ? Wouldn't an application for
> > > > instance be able to call VIDIOC_REQBUFS(0) during the time window
> > > > where the device lock is released ?
> > > 
> > > Hmm... Right...
> > > 
> > > The only solution I see now is to move acquiring mmap_sem as early as
> > > possible to make the possible race harmless. The first operation in
> > > vb2_qbuf will be then:
> > > 
> > > if (b->memory == V4L2_MEMORY_USERPTR) {
> > > 
> > >        call_qop(q, wait_prepare, q);
> > >        down_read(&current->mm->mmap_sem);
> > >        call_qop(q, wait_finish, q);
> > > 
> > > }
> > > 
> > > This should solve the race although all userptr buffers will be handled
> > > under mmap_sem lock. Do you have any other idea?
> > 
> > If queues don't mix MMAP and USERPTR buffers (is that something we want
> > to allow ?), wouldn't using a per-queue lock instead of a device-wide
> > lock be a better way to fix the problem ?
> 
> It is not really about allowing mixing MMAP & USERPTR. Even if your
> application use only USRPTR  buffers, a malicious task might open the video
> node and call mmap operation what might cause a deadlock in certain rare
> cases.

Right :-/

The mmap() call would fail, but it still takes the lock before failing.

Would there be a way to make mmap() fail on queues configured for USERPTR 
without taking the lock ?

> I'm against adding internal locks to vb2 queue. Avoiding deadlocks will be
> a nightmare when you will try to handle or synchronize more than one queue
> in a single call...

I wasn't proposing adding internal locks to vb2 queue, but using per-queue 
locks inside the driver. vb2 would still not handle locking itself.
  
Marek Szyprowski Nov. 8, 2011, 3:14 p.m. UTC | #10
Hello,

On Tuesday, November 08, 2011 3:44 PM Laurent Pinchart wrote:
> On Tuesday 08 November 2011 15:29:00 Marek Szyprowski wrote:
> > On Tuesday, November 08, 2011 3:01 PM Laurent Pinchart wrote:
> > > On Tuesday 08 November 2011 14:57:40 Marek Szyprowski wrote:
> > > > On Tuesday, November 08, 2011 12:32 PM Laurent Pinchart wrote:
> > > > > On Thursday 03 November 2011 08:40:26 Marek Szyprowski wrote:
> > > > > > On Wednesday, November 02, 2011 2:54 PM Laurent Pinchart wrote:
> 
> [snip]
> 
> > > > > > > This can cause an AB-BA deadlock, and will be reported by
> > > > > > > deadlock detection if enabled.
> > > > > > >
> > > > > > > The issue is that the mmap() handler is called by the MM core
> > > > > > > with current->mm->mmap_sem held, and then takes the driver's
> > > > > > > lock before calling videobuf2's mmap handler. The VIDIOC_QBUF
> > > > > > > handler, on the other hand, will first take the driver's lock
> > > > > > > and will then try to take current->mm->mmap_sem here.
> > > > > > >
> > > > > > > This can result in a deadlock if both MMAP and USERPTR buffers
> > > > > > > are used by the same driver at the same time.
> > > > > > >
> > > > > > > If we assume that MMAP and USERPTR buffers can't be used on the
> > > > > > > same queue at the same time (VIDIOC_CREATEBUFS doesn't allow
> > > > > > > that if I'm not mistaken, so we should be safe, at least for
> > > > > > > now), this can be fixed by having a per-queue lock in the driver
> > > > > > > instead of a global device lock. However, that means that
> > > > > > > drivers that want to support USERPTR will not be allowed to
> > > > > > > delegate lock handling to the V4L2 core and
> > > > > > > video_ioctl2().
> > > > > >
> > > > > > Thanks for pointing this issue! This problem is already present in
> > > > > > the other videobuf2 memory allocators as well as the old videobuf
> > > > > > and other v4l2 drivers which implement queue handling by
> > > > > > themselves.
> > > > >
> > > > > The problem is present in most (but not all) drivers, yes. That's one
> > > > > more reason to fix it in videobuf2 :-)
> > > > >
> > > > > > The only solution that will not complicate the videobuf2 and
> > > > > > allocators code is to move taking current->mm->mmap_sem lock into
> > > > > > videobuf2 core. Before acquiring this lock, vb2 calls wait_prepare
> > > > > > to release device lock and then once mmap_sem is locked, calls
> > > > > > wait_finish to get it again. This way the deadlock is avoided and
> > > > > > allocators are free to call
> > > > > > get_user_pages() without further messing with locks. The only
> > > > > > drawback is the fact that a bit more code will be executed under
> > > > > > mmap_sem lock.
> > > > > >
> > > > > > What do you think about such solution?
> > > > >
> > > > > Won't that create a race condition ? Wouldn't an application for
> > > > > instance be able to call VIDIOC_REQBUFS(0) during the time window
> > > > > where the device lock is released ?
> > > >
> > > > Hmm... Right...
> > > >
> > > > The only solution I see now is to move acquiring mmap_sem as early as
> > > > possible to make the possible race harmless. The first operation in
> > > > vb2_qbuf will be then:
> > > >
> > > > if (b->memory == V4L2_MEMORY_USERPTR) {
> > > >
> > > >        call_qop(q, wait_prepare, q);
> > > >        down_read(&current->mm->mmap_sem);
> > > >        call_qop(q, wait_finish, q);
> > > >
> > > > }
> > > >
> > > > This should solve the race although all userptr buffers will be handled
> > > > under mmap_sem lock. Do you have any other idea?
> > >
> > > If queues don't mix MMAP and USERPTR buffers (is that something we want
> > > to allow ?), wouldn't using a per-queue lock instead of a device-wide
> > > lock be a better way to fix the problem ?
> >
> > It is not really about allowing mixing MMAP & USERPTR. Even if your
> > application use only USRPTR  buffers, a malicious task might open the video
> > node and call mmap operation what might cause a deadlock in certain rare
> > cases.
> 
> Right :-/
> 
> The mmap() call would fail, but it still takes the lock before failing.
> 
> Would there be a way to make mmap() fail on queues configured for USERPTR
> without taking the lock ?

The problem is the fact that mmap_sem is taken first by the kernel core before
calling driver's mmap method and you can do nothing about it. You also cannot
release mmap_sem lock in mmap method and take it again to avoid deadlock because
you will exchange one race (ioctl race) into another (mmap vs. other user memory
related functions).

> > I'm against adding internal locks to vb2 queue. Avoiding deadlocks will be
> > a nightmare when you will try to handle or synchronize more than one queue
> > in a single call...
> 
> I wasn't proposing adding internal locks to vb2 queue, but using per-queue
> locks inside the driver. vb2 would still not handle locking itself.

How will it solve the issue? mmap_sem is taken by the kernel core for calling
mmap method, where you need to take your driver/queue lock(s) and preform the
operation. In qbuf you usually take your driver/queue lock(s) first and then 
you would like to take mmap_sem to grab userpages...

Best regards
  
Javier Martin Nov. 17, 2011, 10:41 a.m. UTC | #11
Hi all,
what is the current status of this patch? Do you plan to send an
improved version?

I want to test it against my mem2mem driver I recently submitted
(emma-PrP) and a UVC camera in order to transform YUYV to YUV420.

Provided we ignore the locking  problems you have mentioned is it in a
'working' state?
  
Marek Szyprowski Nov. 21, 2011, 10:51 a.m. UTC | #12
Hello,

On Thursday, November 17, 2011 11:41 AM javier Martin wrote:

> what is the current status of this patch? Do you plan to send an
> improved version?
>
> I want to test it against my mem2mem driver I recently submitted
> (emma-PrP) and a UVC camera in order to transform YUYV to YUV420.
> 
> Provided we ignore the locking  problems you have mentioned is it in a
> 'working' state?

The updated version will be posted soon. However using it for accessing
mmaped buffers from your mem2mem driver (which relies on 
videobuf2-dma-contig allocator) requires some additional work to get
access to direct PFNMAP-type mappings in kernel space. 

Best regards
  

Patch

diff --git a/drivers/media/video/videobuf2-vmalloc.c b/drivers/media/video/videobuf2-vmalloc.c
index a3a8842..ee0ee37 100644
--- a/drivers/media/video/videobuf2-vmalloc.c
+++ b/drivers/media/video/videobuf2-vmalloc.c
@@ -12,6 +12,7 @@ 
 
 #include <linux/module.h>
 #include <linux/mm.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 
@@ -20,7 +21,10 @@ 
 
 struct vb2_vmalloc_buf {
 	void				*vaddr;
+	struct page			**pages;
+	int				write;
 	unsigned long			size;
+	unsigned int			n_pages;
 	atomic_t			refcount;
 	struct vb2_vmarea_handler	handler;
 };
@@ -66,6 +70,83 @@  static void vb2_vmalloc_put(void *buf_priv)
 	}
 }
 
+static void *vb2_vmalloc_get_userptr(void *alloc_ctx, unsigned long vaddr,
+				     unsigned long size, int write)
+{
+	struct vb2_vmalloc_buf *buf;
+
+	unsigned long first, last;
+	int n_pages_from_user, offset;
+
+	buf = kzalloc(sizeof *buf, GFP_KERNEL);
+	if (!buf)
+		return NULL;
+
+	buf->vaddr = NULL;
+	buf->write = write;
+	offset = vaddr & ~PAGE_MASK;
+	buf->size = size;
+
+	first = (vaddr & PAGE_MASK) >> PAGE_SHIFT;
+	last  = ((vaddr + size - 1) & PAGE_MASK) >> PAGE_SHIFT;
+	buf->n_pages = last - first + 1;
+	buf->pages = kzalloc(buf->n_pages * sizeof(struct page *), GFP_KERNEL);
+	if (!buf->pages)
+		goto userptr_fail_pages_array_alloc;
+
+	down_read(&current->mm->mmap_sem);
+	n_pages_from_user = get_user_pages(current, current->mm,
+					     vaddr & PAGE_MASK,
+					     buf->n_pages,
+					     write,
+					     1, /* force */
+					     buf->pages,
+					     NULL);
+	up_read(&current->mm->mmap_sem);
+	if (n_pages_from_user != buf->n_pages)
+		goto userptr_fail_get_user_pages;
+
+	buf->vaddr = vm_map_ram(buf->pages, buf->n_pages, -1, PAGE_KERNEL);
+
+	if (buf->vaddr) {
+		buf->vaddr += offset;
+		return buf;
+	}
+
+userptr_fail_get_user_pages:
+	printk(KERN_DEBUG "get_user_pages requested/got: %d/%d]\n",
+	       n_pages_from_user, buf->n_pages);
+	while (--n_pages_from_user >= 0)
+		put_page(buf->pages[n_pages_from_user]);
+	kfree(buf->pages);
+
+userptr_fail_pages_array_alloc:
+	kfree(buf);
+
+	return NULL;
+}
+
+static void vb2_vmalloc_put_userptr(void *buf_priv)
+{
+	struct vb2_vmalloc_buf *buf = buf_priv;
+
+	int i = buf->n_pages;
+	int offset = (unsigned long)buf->vaddr & ~PAGE_MASK;
+
+	printk(KERN_DEBUG "%s: Releasing userspace buffer of %d pages\n",
+	       __func__, buf->n_pages);
+	if (buf->vaddr)
+		vm_unmap_ram((const void *)((unsigned long)buf->vaddr - offset),
+			     buf->n_pages);
+	while (--i >= 0) {
+		if (buf->write)
+			set_page_dirty_lock(buf->pages[i]);
+		put_page(buf->pages[i]);
+	}
+	kfree(buf->pages);
+	kfree(buf);
+}
+
 static void *vb2_vmalloc_vaddr(void *buf_priv)
 {
 	struct vb2_vmalloc_buf *buf = buf_priv;
@@ -73,7 +154,8 @@  static void *vb2_vmalloc_vaddr(void *buf_priv)
 	BUG_ON(!buf);
 
 	if (!buf->vaddr) {
-		printk(KERN_ERR "Address of an unallocated plane requested\n");
+		printk(KERN_ERR "Address of an unallocated plane requested "
+		       "or cannot map user pointer\n");
 		return NULL;
 	}
 
@@ -121,6 +203,8 @@  static int vb2_vmalloc_mmap(void *buf_priv, struct vm_area_struct *vma)
 const struct vb2_mem_ops vb2_vmalloc_memops = {
 	.alloc		= vb2_vmalloc_alloc,
 	.put		= vb2_vmalloc_put,
+	.get_userptr	= vb2_vmalloc_get_userptr,
+	.put_userptr	= vb2_vmalloc_put_userptr,
 	.vaddr		= vb2_vmalloc_vaddr,
 	.mmap		= vb2_vmalloc_mmap,
 	.num_users	= vb2_vmalloc_num_users,