[v3,2/2] drm/xe: Use dma-fence array for media GT TLB invalidations in PT code

Message ID 20240823045443.2132118-3-matthew.brost@intel.com (mailing list archive)
State New
Headers
Series Replace dma-fence chain with dma-fence array for media GT TLB invalidation |

Commit Message

Matthew Brost Aug. 23, 2024, 4:54 a.m. UTC
  Using a chain fence is problematic as these cannot be installed in
timeout drm sync objects. Use a dma-fence-array instead at the cost of
an extra failure point.

Also fixup reserve fence count to include media GT invalidation fence.

v2:
 - Fix reserve fence count (Casey Bowman)
v3:
 - Prealloc dma fence array (CI)

Fixes: 40520283e0fd ("drm/xe: Invalidate media_gt TLBs in PT code")
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_pt.c | 34 ++++++++++++++++++++++++----------
 1 file changed, 24 insertions(+), 10 deletions(-)
  

Comments

Christian König Aug. 23, 2024, 6:40 a.m. UTC | #1
Am 23.08.24 um 06:54 schrieb Matthew Brost:
> Using a chain fence is problematic as these cannot be installed in
> timeout drm sync objects. Use a dma-fence-array instead at the cost of
> an extra failure point.

Mhm, IIRC we converted chain objects into dma-fence-arrays while 
installing them into a timeline.

Doesn't that work any more?

Regards,
Christian.

>
> Also fixup reserve fence count to include media GT invalidation fence.
>
> v2:
>   - Fix reserve fence count (Casey Bowman)
> v3:
>   - Prealloc dma fence array (CI)
>
> Fixes: 40520283e0fd ("drm/xe: Invalidate media_gt TLBs in PT code")
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
>   drivers/gpu/drm/xe/xe_pt.c | 34 ++++++++++++++++++++++++----------
>   1 file changed, 24 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
> index 6c6714af3d5d..2e35444a85b0 100644
> --- a/drivers/gpu/drm/xe/xe_pt.c
> +++ b/drivers/gpu/drm/xe/xe_pt.c
> @@ -3,7 +3,7 @@
>    * Copyright © 2022 Intel Corporation
>    */
>   
> -#include <linux/dma-fence-chain.h>
> +#include <linux/dma-fence-array.h>
>   
>   #include "xe_pt.h"
>   
> @@ -1629,9 +1629,11 @@ xe_pt_update_ops_rfence_interval(struct xe_vm_pgtable_update_ops *pt_update_ops,
>   
>   static int vma_reserve_fences(struct xe_device *xe, struct xe_vma *vma)
>   {
> +	int shift = xe_device_get_root_tile(xe)->media_gt ? 1 : 0;
> +
>   	if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
>   		return dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv,
> -					       xe->info.tile_count);
> +					       xe->info.tile_count << shift);
>   
>   	return 0;
>   }
> @@ -1818,6 +1820,7 @@ int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops)
>   	struct xe_vm_pgtable_update_ops *pt_update_ops =
>   		&vops->pt_update_ops[tile->id];
>   	struct xe_vma_op *op;
> +	int shift = tile->media_gt ? 1 : 0;
>   	int err;
>   
>   	lockdep_assert_held(&vops->vm->lock);
> @@ -1826,7 +1829,7 @@ int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops)
>   	xe_pt_update_ops_init(pt_update_ops);
>   
>   	err = dma_resv_reserve_fences(xe_vm_resv(vops->vm),
> -				      tile_to_xe(tile)->info.tile_count);
> +				      tile_to_xe(tile)->info.tile_count << shift);
>   	if (err)
>   		return err;
>   
> @@ -1983,7 +1986,8 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
>   		&vops->pt_update_ops[tile->id];
>   	struct dma_fence *fence;
>   	struct invalidation_fence *ifence = NULL, *mfence = NULL;
> -	struct dma_fence_chain *chain_fence = NULL;
> +	struct dma_fence **fences = NULL;
> +	struct dma_fence_array *cf = NULL;
>   	struct xe_range_fence *rfence;
>   	struct xe_vma_op *op;
>   	int err = 0, i;
> @@ -2022,8 +2026,13 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
>   				err = -ENOMEM;
>   				goto free_ifence;
>   			}
> -			chain_fence = dma_fence_chain_alloc();
> -			if (!chain_fence) {
> +			fences = kmalloc_array(2, sizeof(*fences), GFP_KERNEL);
> +			if (!fences) {
> +				err = -ENOMEM;
> +				goto free_ifence;
> +			}
> +			cf = dma_fence_array_alloc(2);
> +			if (!cf) {
>   				err = -ENOMEM;
>   				goto free_ifence;
>   			}
> @@ -2068,9 +2077,13 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
>   			invalidation_fence_init(tile->media_gt, mfence, fence,
>   						pt_update_ops->start,
>   						pt_update_ops->last, vm->usm.asid);
> -			dma_fence_chain_init(chain_fence, &ifence->base.base,
> -					     &mfence->base.base, 0);
> -			fence = &chain_fence->base;
> +			fences[0] = &ifence->base.base;
> +			fences[1] = &mfence->base.base;
> +			dma_fence_array_arm(cf, 2, fences,
> +					    vm->composite_fence_ctx,
> +					    vm->composite_fence_seqno++,
> +					    false);
> +			fence = &cf->base;
>   		} else {
>   			fence = &ifence->base.base;
>   		}
> @@ -2108,7 +2121,8 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
>   free_rfence:
>   	kfree(rfence);
>   free_ifence:
> -	dma_fence_chain_free(chain_fence);
> +	kfree(cf);
> +	kfree(fences);
>   	kfree(mfence);
>   	kfree(ifence);
>   kill_vm_tile1:
  
Matthew Brost Aug. 23, 2024, 3:38 p.m. UTC | #2
On Fri, Aug 23, 2024 at 08:40:40AM +0200, Christian König wrote:
> Am 23.08.24 um 06:54 schrieb Matthew Brost:
> > Using a chain fence is problematic as these cannot be installed in
> > timeout drm sync objects. Use a dma-fence-array instead at the cost of
> > an extra failure point.
> 
> Mhm, IIRC we converted chain objects into dma-fence-arrays while installing
> them into a timeline.
> 
> Doesn't that work any more?
> 

Thanks for the quick feedback.

As is, installing a dma-fence-chain into a timeline sync doesn't work.

The 'fence' returned from 'xe_pt_update_ops_run' is installed here [1]
as the 'fence' argument. This blows up here [2] [3]. It does suggest in
[3] to use a dma-fence-array which is what I'm doing. 

The issue with using a dma-fence array as is it adds another failure
point if dma_fence_array_create is used as is after collecting multiple
fences from TLB invalidations. Also we have lock in xe_pt_update_ops_run
which is in the path reclaim so calling dma_fence_array_create isn't
allowed under that lock.

I suppose we could drop that lock and directly wait TLB invalidation
fences if dma_fence_array_create fails but to me it makes more sense to
prealloc the dma-fence-array and populate it later. Saw your response to
my first patch about how this could be problematic, a little confused on
that so responding there too.

Matt

[1] https://elixir.bootlin.com/linux/v6.10.6/source/drivers/gpu/drm/xe/xe_sync.c#L233
[2] https://elixir.bootlin.com/linux/v6.10.6/source/drivers/gpu/drm/drm_syncobj.c#L349
[3] https://elixir.bootlin.com/linux/v6.10.6/source/drivers/dma-buf/dma-fence-chain.c#L275

> Regards,
> Christian.
> 
> > 
> > Also fixup reserve fence count to include media GT invalidation fence.
> > 
> > v2:
> >   - Fix reserve fence count (Casey Bowman)
> > v3:
> >   - Prealloc dma fence array (CI)
> > 
> > Fixes: 40520283e0fd ("drm/xe: Invalidate media_gt TLBs in PT code")
> > Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> > ---
> >   drivers/gpu/drm/xe/xe_pt.c | 34 ++++++++++++++++++++++++----------
> >   1 file changed, 24 insertions(+), 10 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
> > index 6c6714af3d5d..2e35444a85b0 100644
> > --- a/drivers/gpu/drm/xe/xe_pt.c
> > +++ b/drivers/gpu/drm/xe/xe_pt.c
> > @@ -3,7 +3,7 @@
> >    * Copyright © 2022 Intel Corporation
> >    */
> > -#include <linux/dma-fence-chain.h>
> > +#include <linux/dma-fence-array.h>
> >   #include "xe_pt.h"
> > @@ -1629,9 +1629,11 @@ xe_pt_update_ops_rfence_interval(struct xe_vm_pgtable_update_ops *pt_update_ops,
> >   static int vma_reserve_fences(struct xe_device *xe, struct xe_vma *vma)
> >   {
> > +	int shift = xe_device_get_root_tile(xe)->media_gt ? 1 : 0;
> > +
> >   	if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
> >   		return dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv,
> > -					       xe->info.tile_count);
> > +					       xe->info.tile_count << shift);
> >   	return 0;
> >   }
> > @@ -1818,6 +1820,7 @@ int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops)
> >   	struct xe_vm_pgtable_update_ops *pt_update_ops =
> >   		&vops->pt_update_ops[tile->id];
> >   	struct xe_vma_op *op;
> > +	int shift = tile->media_gt ? 1 : 0;
> >   	int err;
> >   	lockdep_assert_held(&vops->vm->lock);
> > @@ -1826,7 +1829,7 @@ int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops)
> >   	xe_pt_update_ops_init(pt_update_ops);
> >   	err = dma_resv_reserve_fences(xe_vm_resv(vops->vm),
> > -				      tile_to_xe(tile)->info.tile_count);
> > +				      tile_to_xe(tile)->info.tile_count << shift);
> >   	if (err)
> >   		return err;
> > @@ -1983,7 +1986,8 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
> >   		&vops->pt_update_ops[tile->id];
> >   	struct dma_fence *fence;
> >   	struct invalidation_fence *ifence = NULL, *mfence = NULL;
> > -	struct dma_fence_chain *chain_fence = NULL;
> > +	struct dma_fence **fences = NULL;
> > +	struct dma_fence_array *cf = NULL;
> >   	struct xe_range_fence *rfence;
> >   	struct xe_vma_op *op;
> >   	int err = 0, i;
> > @@ -2022,8 +2026,13 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
> >   				err = -ENOMEM;
> >   				goto free_ifence;
> >   			}
> > -			chain_fence = dma_fence_chain_alloc();
> > -			if (!chain_fence) {
> > +			fences = kmalloc_array(2, sizeof(*fences), GFP_KERNEL);
> > +			if (!fences) {
> > +				err = -ENOMEM;
> > +				goto free_ifence;
> > +			}
> > +			cf = dma_fence_array_alloc(2);
> > +			if (!cf) {
> >   				err = -ENOMEM;
> >   				goto free_ifence;
> >   			}
> > @@ -2068,9 +2077,13 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
> >   			invalidation_fence_init(tile->media_gt, mfence, fence,
> >   						pt_update_ops->start,
> >   						pt_update_ops->last, vm->usm.asid);
> > -			dma_fence_chain_init(chain_fence, &ifence->base.base,
> > -					     &mfence->base.base, 0);
> > -			fence = &chain_fence->base;
> > +			fences[0] = &ifence->base.base;
> > +			fences[1] = &mfence->base.base;
> > +			dma_fence_array_arm(cf, 2, fences,
> > +					    vm->composite_fence_ctx,
> > +					    vm->composite_fence_seqno++,
> > +					    false);
> > +			fence = &cf->base;
> >   		} else {
> >   			fence = &ifence->base.base;
> >   		}
> > @@ -2108,7 +2121,8 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
> >   free_rfence:
> >   	kfree(rfence);
> >   free_ifence:
> > -	dma_fence_chain_free(chain_fence);
> > +	kfree(cf);
> > +	kfree(fences);
> >   	kfree(mfence);
> >   	kfree(ifence);
> >   kill_vm_tile1:
>
  
Christian König Aug. 26, 2024, 7:43 a.m. UTC | #3
Am 23.08.24 um 17:38 schrieb Matthew Brost:
> On Fri, Aug 23, 2024 at 08:40:40AM +0200, Christian König wrote:
>> Am 23.08.24 um 06:54 schrieb Matthew Brost:
>>> Using a chain fence is problematic as these cannot be installed in
>>> timeout drm sync objects. Use a dma-fence-array instead at the cost of
>>> an extra failure point.
>> Mhm, IIRC we converted chain objects into dma-fence-arrays while installing
>> them into a timeline.
>>
>> Doesn't that work any more?
>>
> Thanks for the quick feedback.
>
> As is, installing a dma-fence-chain into a timeline sync doesn't work.
>
> The 'fence' returned from 'xe_pt_update_ops_run' is installed here [1]
> as the 'fence' argument. This blows up here [2] [3]. It does suggest in
> [3] to use a dma-fence-array which is what I'm doing.

Ah, that makes it more clear. You are not using some IOCTL to install 
the fences into a timeline but rather want to do this at the end of your 
submission IOCTL, right?

> The issue with using a dma-fence array as is it adds another failure
> point if dma_fence_array_create is used as is after collecting multiple
> fences from TLB invalidations. Also we have lock in xe_pt_update_ops_run
> which is in the path reclaim so calling dma_fence_array_create isn't
> allowed under that lock.

Ok that is a rather good argument for this.

Just tow comments I've seen on the code:
1. Please rename dma_fence_array_arm() into dma_fence_array_init()
2. Please drop WARN_ON(!array, a NULL array will result in a NULL 
pointer de-reference and crash anyway.

> I suppose we could drop that lock and directly wait TLB invalidation
> fences if dma_fence_array_create fails but to me it makes more sense to
> prealloc the dma-fence-array and populate it later. Saw your response to
> my first patch about how this could be problematic, a little confused on
> that so responding there too.

Yeah people came up with the crazy idea to insert dma_fence_array 
objects into other dma_fence_array's resulting in overwriting the kernel 
stack when you free this construct finally.

Additional to that Sima pointed out during the initial review of this 
code that we should make sure that no circles can happen with a dma_fence.

But we now have a warning when somebody tries to add a container to a 
dma_fence_array object so that should probably be fine.

Regards,
Christian.

>
> Matt
>
> [1] https://elixir.bootlin.com/linux/v6.10.6/source/drivers/gpu/drm/xe/xe_sync.c#L233
> [2] https://elixir.bootlin.com/linux/v6.10.6/source/drivers/gpu/drm/drm_syncobj.c#L349
> [3] https://elixir.bootlin.com/linux/v6.10.6/source/drivers/dma-buf/dma-fence-chain.c#L275
>
>> Regards,
>> Christian.
>>
>>> Also fixup reserve fence count to include media GT invalidation fence.
>>>
>>> v2:
>>>    - Fix reserve fence count (Casey Bowman)
>>> v3:
>>>    - Prealloc dma fence array (CI)
>>>
>>> Fixes: 40520283e0fd ("drm/xe: Invalidate media_gt TLBs in PT code")
>>> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
>>> ---
>>>    drivers/gpu/drm/xe/xe_pt.c | 34 ++++++++++++++++++++++++----------
>>>    1 file changed, 24 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
>>> index 6c6714af3d5d..2e35444a85b0 100644
>>> --- a/drivers/gpu/drm/xe/xe_pt.c
>>> +++ b/drivers/gpu/drm/xe/xe_pt.c
>>> @@ -3,7 +3,7 @@
>>>     * Copyright © 2022 Intel Corporation
>>>     */
>>> -#include <linux/dma-fence-chain.h>
>>> +#include <linux/dma-fence-array.h>
>>>    #include "xe_pt.h"
>>> @@ -1629,9 +1629,11 @@ xe_pt_update_ops_rfence_interval(struct xe_vm_pgtable_update_ops *pt_update_ops,
>>>    static int vma_reserve_fences(struct xe_device *xe, struct xe_vma *vma)
>>>    {
>>> +	int shift = xe_device_get_root_tile(xe)->media_gt ? 1 : 0;
>>> +
>>>    	if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
>>>    		return dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv,
>>> -					       xe->info.tile_count);
>>> +					       xe->info.tile_count << shift);
>>>    	return 0;
>>>    }
>>> @@ -1818,6 +1820,7 @@ int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops)
>>>    	struct xe_vm_pgtable_update_ops *pt_update_ops =
>>>    		&vops->pt_update_ops[tile->id];
>>>    	struct xe_vma_op *op;
>>> +	int shift = tile->media_gt ? 1 : 0;
>>>    	int err;
>>>    	lockdep_assert_held(&vops->vm->lock);
>>> @@ -1826,7 +1829,7 @@ int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops)
>>>    	xe_pt_update_ops_init(pt_update_ops);
>>>    	err = dma_resv_reserve_fences(xe_vm_resv(vops->vm),
>>> -				      tile_to_xe(tile)->info.tile_count);
>>> +				      tile_to_xe(tile)->info.tile_count << shift);
>>>    	if (err)
>>>    		return err;
>>> @@ -1983,7 +1986,8 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
>>>    		&vops->pt_update_ops[tile->id];
>>>    	struct dma_fence *fence;
>>>    	struct invalidation_fence *ifence = NULL, *mfence = NULL;
>>> -	struct dma_fence_chain *chain_fence = NULL;
>>> +	struct dma_fence **fences = NULL;
>>> +	struct dma_fence_array *cf = NULL;
>>>    	struct xe_range_fence *rfence;
>>>    	struct xe_vma_op *op;
>>>    	int err = 0, i;
>>> @@ -2022,8 +2026,13 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
>>>    				err = -ENOMEM;
>>>    				goto free_ifence;
>>>    			}
>>> -			chain_fence = dma_fence_chain_alloc();
>>> -			if (!chain_fence) {
>>> +			fences = kmalloc_array(2, sizeof(*fences), GFP_KERNEL);
>>> +			if (!fences) {
>>> +				err = -ENOMEM;
>>> +				goto free_ifence;
>>> +			}
>>> +			cf = dma_fence_array_alloc(2);
>>> +			if (!cf) {
>>>    				err = -ENOMEM;
>>>    				goto free_ifence;
>>>    			}
>>> @@ -2068,9 +2077,13 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
>>>    			invalidation_fence_init(tile->media_gt, mfence, fence,
>>>    						pt_update_ops->start,
>>>    						pt_update_ops->last, vm->usm.asid);
>>> -			dma_fence_chain_init(chain_fence, &ifence->base.base,
>>> -					     &mfence->base.base, 0);
>>> -			fence = &chain_fence->base;
>>> +			fences[0] = &ifence->base.base;
>>> +			fences[1] = &mfence->base.base;
>>> +			dma_fence_array_arm(cf, 2, fences,
>>> +					    vm->composite_fence_ctx,
>>> +					    vm->composite_fence_seqno++,
>>> +					    false);
>>> +			fence = &cf->base;
>>>    		} else {
>>>    			fence = &ifence->base.base;
>>>    		}
>>> @@ -2108,7 +2121,8 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
>>>    free_rfence:
>>>    	kfree(rfence);
>>>    free_ifence:
>>> -	dma_fence_chain_free(chain_fence);
>>> +	kfree(cf);
>>> +	kfree(fences);
>>>    	kfree(mfence);
>>>    	kfree(ifence);
>>>    kill_vm_tile1:
  
Matthew Brost Aug. 26, 2024, 8:45 a.m. UTC | #4
On Mon, Aug 26, 2024 at 09:43:40AM +0200, Christian König wrote:
> Am 23.08.24 um 17:38 schrieb Matthew Brost:
> > On Fri, Aug 23, 2024 at 08:40:40AM +0200, Christian König wrote:
> > > Am 23.08.24 um 06:54 schrieb Matthew Brost:
> > > > Using a chain fence is problematic as these cannot be installed in
> > > > timeout drm sync objects. Use a dma-fence-array instead at the cost of
> > > > an extra failure point.
> > > Mhm, IIRC we converted chain objects into dma-fence-arrays while installing
> > > them into a timeline.
> > > 
> > > Doesn't that work any more?
> > > 
> > Thanks for the quick feedback.
> > 
> > As is, installing a dma-fence-chain into a timeline sync doesn't work.
> > 
> > The 'fence' returned from 'xe_pt_update_ops_run' is installed here [1]
> > as the 'fence' argument. This blows up here [2] [3]. It does suggest in
> > [3] to use a dma-fence-array which is what I'm doing.
> 
> Ah, that makes it more clear. You are not using some IOCTL to install the
> fences into a timeline but rather want to do this at the end of your
> submission IOCTL, right?
> 

Bind IOCTL, but correct. Submission and bind IOCTLs in Xe are
conceptually the same wrt to syncs.

> > The issue with using a dma-fence array as is it adds another failure
> > point if dma_fence_array_create is used as is after collecting multiple
> > fences from TLB invalidations. Also we have lock in xe_pt_update_ops_run
> > which is in the path reclaim so calling dma_fence_array_create isn't
> > allowed under that lock.
> 
> Ok that is a rather good argument for this.
> 
> Just tow comments I've seen on the code:
> 1. Please rename dma_fence_array_arm() into dma_fence_array_init()
> 2. Please drop WARN_ON(!array, a NULL array will result in a NULL pointer
> de-reference and crash anyway.
> 

Will do.

> > I suppose we could drop that lock and directly wait TLB invalidation
> > fences if dma_fence_array_create fails but to me it makes more sense to
> > prealloc the dma-fence-array and populate it later. Saw your response to
> > my first patch about how this could be problematic, a little confused on
> > that so responding there too.
> 
> Yeah people came up with the crazy idea to insert dma_fence_array objects
> into other dma_fence_array's resulting in overwriting the kernel stack when
> you free this construct finally.
> 
> Additional to that Sima pointed out during the initial review of this code
> that we should make sure that no circles can happen with a dma_fence.
>

Ah, yes. I could see how that could be an issue.
 
> But we now have a warning when somebody tries to add a container to a
> dma_fence_array object so that should probably be fine.
>

See the warn and agree this should protect against this type of problem
code.

Matt

> Regards,
> Christian.
> 
> > 
> > Matt
> > 
> > [1] https://elixir.bootlin.com/linux/v6.10.6/source/drivers/gpu/drm/xe/xe_sync.c#L233
> > [2] https://elixir.bootlin.com/linux/v6.10.6/source/drivers/gpu/drm/drm_syncobj.c#L349
> > [3] https://elixir.bootlin.com/linux/v6.10.6/source/drivers/dma-buf/dma-fence-chain.c#L275
> > 
> > > Regards,
> > > Christian.
> > > 
> > > > Also fixup reserve fence count to include media GT invalidation fence.
> > > > 
> > > > v2:
> > > >    - Fix reserve fence count (Casey Bowman)
> > > > v3:
> > > >    - Prealloc dma fence array (CI)
> > > > 
> > > > Fixes: 40520283e0fd ("drm/xe: Invalidate media_gt TLBs in PT code")
> > > > Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> > > > ---
> > > >    drivers/gpu/drm/xe/xe_pt.c | 34 ++++++++++++++++++++++++----------
> > > >    1 file changed, 24 insertions(+), 10 deletions(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
> > > > index 6c6714af3d5d..2e35444a85b0 100644
> > > > --- a/drivers/gpu/drm/xe/xe_pt.c
> > > > +++ b/drivers/gpu/drm/xe/xe_pt.c
> > > > @@ -3,7 +3,7 @@
> > > >     * Copyright © 2022 Intel Corporation
> > > >     */
> > > > -#include <linux/dma-fence-chain.h>
> > > > +#include <linux/dma-fence-array.h>
> > > >    #include "xe_pt.h"
> > > > @@ -1629,9 +1629,11 @@ xe_pt_update_ops_rfence_interval(struct xe_vm_pgtable_update_ops *pt_update_ops,
> > > >    static int vma_reserve_fences(struct xe_device *xe, struct xe_vma *vma)
> > > >    {
> > > > +	int shift = xe_device_get_root_tile(xe)->media_gt ? 1 : 0;
> > > > +
> > > >    	if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
> > > >    		return dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv,
> > > > -					       xe->info.tile_count);
> > > > +					       xe->info.tile_count << shift);
> > > >    	return 0;
> > > >    }
> > > > @@ -1818,6 +1820,7 @@ int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops)
> > > >    	struct xe_vm_pgtable_update_ops *pt_update_ops =
> > > >    		&vops->pt_update_ops[tile->id];
> > > >    	struct xe_vma_op *op;
> > > > +	int shift = tile->media_gt ? 1 : 0;
> > > >    	int err;
> > > >    	lockdep_assert_held(&vops->vm->lock);
> > > > @@ -1826,7 +1829,7 @@ int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops)
> > > >    	xe_pt_update_ops_init(pt_update_ops);
> > > >    	err = dma_resv_reserve_fences(xe_vm_resv(vops->vm),
> > > > -				      tile_to_xe(tile)->info.tile_count);
> > > > +				      tile_to_xe(tile)->info.tile_count << shift);
> > > >    	if (err)
> > > >    		return err;
> > > > @@ -1983,7 +1986,8 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
> > > >    		&vops->pt_update_ops[tile->id];
> > > >    	struct dma_fence *fence;
> > > >    	struct invalidation_fence *ifence = NULL, *mfence = NULL;
> > > > -	struct dma_fence_chain *chain_fence = NULL;
> > > > +	struct dma_fence **fences = NULL;
> > > > +	struct dma_fence_array *cf = NULL;
> > > >    	struct xe_range_fence *rfence;
> > > >    	struct xe_vma_op *op;
> > > >    	int err = 0, i;
> > > > @@ -2022,8 +2026,13 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
> > > >    				err = -ENOMEM;
> > > >    				goto free_ifence;
> > > >    			}
> > > > -			chain_fence = dma_fence_chain_alloc();
> > > > -			if (!chain_fence) {
> > > > +			fences = kmalloc_array(2, sizeof(*fences), GFP_KERNEL);
> > > > +			if (!fences) {
> > > > +				err = -ENOMEM;
> > > > +				goto free_ifence;
> > > > +			}
> > > > +			cf = dma_fence_array_alloc(2);
> > > > +			if (!cf) {
> > > >    				err = -ENOMEM;
> > > >    				goto free_ifence;
> > > >    			}
> > > > @@ -2068,9 +2077,13 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
> > > >    			invalidation_fence_init(tile->media_gt, mfence, fence,
> > > >    						pt_update_ops->start,
> > > >    						pt_update_ops->last, vm->usm.asid);
> > > > -			dma_fence_chain_init(chain_fence, &ifence->base.base,
> > > > -					     &mfence->base.base, 0);
> > > > -			fence = &chain_fence->base;
> > > > +			fences[0] = &ifence->base.base;
> > > > +			fences[1] = &mfence->base.base;
> > > > +			dma_fence_array_arm(cf, 2, fences,
> > > > +					    vm->composite_fence_ctx,
> > > > +					    vm->composite_fence_seqno++,
> > > > +					    false);
> > > > +			fence = &cf->base;
> > > >    		} else {
> > > >    			fence = &ifence->base.base;
> > > >    		}
> > > > @@ -2108,7 +2121,8 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
> > > >    free_rfence:
> > > >    	kfree(rfence);
> > > >    free_ifence:
> > > > -	dma_fence_chain_free(chain_fence);
> > > > +	kfree(cf);
> > > > +	kfree(fences);
> > > >    	kfree(mfence);
> > > >    	kfree(ifence);
> > > >    kill_vm_tile1:
>
  

Patch

diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 6c6714af3d5d..2e35444a85b0 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -3,7 +3,7 @@ 
  * Copyright © 2022 Intel Corporation
  */
 
-#include <linux/dma-fence-chain.h>
+#include <linux/dma-fence-array.h>
 
 #include "xe_pt.h"
 
@@ -1629,9 +1629,11 @@  xe_pt_update_ops_rfence_interval(struct xe_vm_pgtable_update_ops *pt_update_ops,
 
 static int vma_reserve_fences(struct xe_device *xe, struct xe_vma *vma)
 {
+	int shift = xe_device_get_root_tile(xe)->media_gt ? 1 : 0;
+
 	if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
 		return dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv,
-					       xe->info.tile_count);
+					       xe->info.tile_count << shift);
 
 	return 0;
 }
@@ -1818,6 +1820,7 @@  int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops)
 	struct xe_vm_pgtable_update_ops *pt_update_ops =
 		&vops->pt_update_ops[tile->id];
 	struct xe_vma_op *op;
+	int shift = tile->media_gt ? 1 : 0;
 	int err;
 
 	lockdep_assert_held(&vops->vm->lock);
@@ -1826,7 +1829,7 @@  int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops)
 	xe_pt_update_ops_init(pt_update_ops);
 
 	err = dma_resv_reserve_fences(xe_vm_resv(vops->vm),
-				      tile_to_xe(tile)->info.tile_count);
+				      tile_to_xe(tile)->info.tile_count << shift);
 	if (err)
 		return err;
 
@@ -1983,7 +1986,8 @@  xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
 		&vops->pt_update_ops[tile->id];
 	struct dma_fence *fence;
 	struct invalidation_fence *ifence = NULL, *mfence = NULL;
-	struct dma_fence_chain *chain_fence = NULL;
+	struct dma_fence **fences = NULL;
+	struct dma_fence_array *cf = NULL;
 	struct xe_range_fence *rfence;
 	struct xe_vma_op *op;
 	int err = 0, i;
@@ -2022,8 +2026,13 @@  xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
 				err = -ENOMEM;
 				goto free_ifence;
 			}
-			chain_fence = dma_fence_chain_alloc();
-			if (!chain_fence) {
+			fences = kmalloc_array(2, sizeof(*fences), GFP_KERNEL);
+			if (!fences) {
+				err = -ENOMEM;
+				goto free_ifence;
+			}
+			cf = dma_fence_array_alloc(2);
+			if (!cf) {
 				err = -ENOMEM;
 				goto free_ifence;
 			}
@@ -2068,9 +2077,13 @@  xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
 			invalidation_fence_init(tile->media_gt, mfence, fence,
 						pt_update_ops->start,
 						pt_update_ops->last, vm->usm.asid);
-			dma_fence_chain_init(chain_fence, &ifence->base.base,
-					     &mfence->base.base, 0);
-			fence = &chain_fence->base;
+			fences[0] = &ifence->base.base;
+			fences[1] = &mfence->base.base;
+			dma_fence_array_arm(cf, 2, fences,
+					    vm->composite_fence_ctx,
+					    vm->composite_fence_seqno++,
+					    false);
+			fence = &cf->base;
 		} else {
 			fence = &ifence->base.base;
 		}
@@ -2108,7 +2121,8 @@  xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
 free_rfence:
 	kfree(rfence);
 free_ifence:
-	dma_fence_chain_free(chain_fence);
+	kfree(cf);
+	kfree(fences);
 	kfree(mfence);
 	kfree(ifence);
 kill_vm_tile1: