[v4,01/14] dma-buf/dma-fence: Add deadline awareness

Message ID 20230218211608.1630586-2-robdclark@gmail.com (mailing list archive)
State Not Applicable
Headers
Series dma-fence: Deadline awareness |

Commit Message

Rob Clark Feb. 18, 2023, 9:15 p.m. UTC
  From: Rob Clark <robdclark@chromium.org>

Add a way to hint to the fence signaler of an upcoming deadline, such as
vblank, which the fence waiter would prefer not to miss.  This is to aid
the fence signaler in making power management decisions, like boosting
frequency as the deadline approaches and awareness of missing deadlines
so that can be factored in to the frequency scaling.

v2: Drop dma_fence::deadline and related logic to filter duplicate
    deadlines, to avoid increasing dma_fence size.  The fence-context
    implementation will need similar logic to track deadlines of all
    the fences on the same timeline.  [ckoenig]
v3: Clarify locking wrt. set_deadline callback

Signed-off-by: Rob Clark <robdclark@chromium.org>
Reviewed-by: Christian König <christian.koenig@amd.com>
---
 drivers/dma-buf/dma-fence.c | 20 ++++++++++++++++++++
 include/linux/dma-fence.h   | 20 ++++++++++++++++++++
 2 files changed, 40 insertions(+)
  

Comments

Tvrtko Ursulin Feb. 22, 2023, 10:23 a.m. UTC | #1
On 18/02/2023 21:15, Rob Clark wrote:
> From: Rob Clark <robdclark@chromium.org>
> 
> Add a way to hint to the fence signaler of an upcoming deadline, such as
> vblank, which the fence waiter would prefer not to miss.  This is to aid
> the fence signaler in making power management decisions, like boosting
> frequency as the deadline approaches and awareness of missing deadlines
> so that can be factored in to the frequency scaling.
> 
> v2: Drop dma_fence::deadline and related logic to filter duplicate
>      deadlines, to avoid increasing dma_fence size.  The fence-context
>      implementation will need similar logic to track deadlines of all
>      the fences on the same timeline.  [ckoenig]
> v3: Clarify locking wrt. set_deadline callback
> 
> Signed-off-by: Rob Clark <robdclark@chromium.org>
> Reviewed-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/dma-buf/dma-fence.c | 20 ++++++++++++++++++++
>   include/linux/dma-fence.h   | 20 ++++++++++++++++++++
>   2 files changed, 40 insertions(+)
> 
> diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
> index 0de0482cd36e..763b32627684 100644
> --- a/drivers/dma-buf/dma-fence.c
> +++ b/drivers/dma-buf/dma-fence.c
> @@ -912,6 +912,26 @@ dma_fence_wait_any_timeout(struct dma_fence **fences, uint32_t count,
>   }
>   EXPORT_SYMBOL(dma_fence_wait_any_timeout);
>   
> +
> +/**
> + * dma_fence_set_deadline - set desired fence-wait deadline
> + * @fence:    the fence that is to be waited on
> + * @deadline: the time by which the waiter hopes for the fence to be
> + *            signaled
> + *
> + * Inform the fence signaler of an upcoming deadline, such as vblank, by
> + * which point the waiter would prefer the fence to be signaled by.  This
> + * is intended to give feedback to the fence signaler to aid in power
> + * management decisions, such as boosting GPU frequency if a periodic
> + * vblank deadline is approaching.
> + */
> +void dma_fence_set_deadline(struct dma_fence *fence, ktime_t deadline)
> +{
> +	if (fence->ops->set_deadline && !dma_fence_is_signaled(fence))
> +		fence->ops->set_deadline(fence, deadline);
> +}
> +EXPORT_SYMBOL(dma_fence_set_deadline);
> +
>   /**
>    * dma_fence_describe - Dump fence describtion into seq_file
>    * @fence: the 6fence to describe
> diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
> index 775cdc0b4f24..d77f6591c453 100644
> --- a/include/linux/dma-fence.h
> +++ b/include/linux/dma-fence.h
> @@ -99,6 +99,7 @@ enum dma_fence_flag_bits {
>   	DMA_FENCE_FLAG_SIGNALED_BIT,
>   	DMA_FENCE_FLAG_TIMESTAMP_BIT,
>   	DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
> +	DMA_FENCE_FLAG_HAS_DEADLINE_BIT,

Would this bit be better left out from core implementation, given how 
the approach is the component which implements dma-fence has to track 
the actual deadline and all?

Also taking a step back - are we all okay with starting to expand the 
relatively simple core synchronisation primitive with side channel data 
like this? What would be the criteria for what side channel data would 
be acceptable? Taking note the thing lives outside drivers/gpu/.

Regards,

Tvrtko

>   	DMA_FENCE_FLAG_USER_BITS, /* must always be last member */
>   };
>   
> @@ -257,6 +258,23 @@ struct dma_fence_ops {
>   	 */
>   	void (*timeline_value_str)(struct dma_fence *fence,
>   				   char *str, int size);
> +
> +	/**
> +	 * @set_deadline:
> +	 *
> +	 * Callback to allow a fence waiter to inform the fence signaler of
> +	 * an upcoming deadline, such as vblank, by which point the waiter
> +	 * would prefer the fence to be signaled by.  This is intended to
> +	 * give feedback to the fence signaler to aid in power management
> +	 * decisions, such as boosting GPU frequency.
> +	 *
> +	 * This is called without &dma_fence.lock held, it can be called
> +	 * multiple times and from any context.  Locking is up to the callee
> +	 * if it has some state to manage.
> +	 *
> +	 * This callback is optional.
> +	 */
> +	void (*set_deadline)(struct dma_fence *fence, ktime_t deadline);
>   };
>   
>   void dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops,
> @@ -583,6 +601,8 @@ static inline signed long dma_fence_wait(struct dma_fence *fence, bool intr)
>   	return ret < 0 ? ret : 0;
>   }
>   
> +void dma_fence_set_deadline(struct dma_fence *fence, ktime_t deadline);
> +
>   struct dma_fence *dma_fence_get_stub(void);
>   struct dma_fence *dma_fence_allocate_private_stub(void);
>   u64 dma_fence_context_alloc(unsigned num);
  
Luben Tuikov Feb. 22, 2023, 11:01 a.m. UTC | #2
On 2023-02-18 16:15, Rob Clark wrote:
> From: Rob Clark <robdclark@chromium.org>
> 
> Add a way to hint to the fence signaler of an upcoming deadline, such as
> vblank, which the fence waiter would prefer not to miss.  This is to aid
> the fence signaler in making power management decisions, like boosting
> frequency as the deadline approaches and awareness of missing deadlines
> so that can be factored in to the frequency scaling.
> 
> v2: Drop dma_fence::deadline and related logic to filter duplicate
>     deadlines, to avoid increasing dma_fence size.  The fence-context
>     implementation will need similar logic to track deadlines of all
>     the fences on the same timeline.  [ckoenig]
> v3: Clarify locking wrt. set_deadline callback
> 
> Signed-off-by: Rob Clark <robdclark@chromium.org>
> Reviewed-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/dma-buf/dma-fence.c | 20 ++++++++++++++++++++
>  include/linux/dma-fence.h   | 20 ++++++++++++++++++++
>  2 files changed, 40 insertions(+)
> 
> diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
> index 0de0482cd36e..763b32627684 100644
> --- a/drivers/dma-buf/dma-fence.c
> +++ b/drivers/dma-buf/dma-fence.c
> @@ -912,6 +912,26 @@ dma_fence_wait_any_timeout(struct dma_fence **fences, uint32_t count,
>  }
>  EXPORT_SYMBOL(dma_fence_wait_any_timeout);
>  
> +
> +/**

The added empty line above creates a problem for scripts/checkpatch.pl--and
there's a few others here and there. It'd be a good idea to run this series
through checkpatch.pl, if at least informatively.

I wasn't able to apply patch 13 to drm-misc-next or any other known to me
branch, and I didn't see base tree information in the cover letter. I skipped
it and it compiled okay without it.
  
Christian König Feb. 22, 2023, 3:28 p.m. UTC | #3
Am 22.02.23 um 11:23 schrieb Tvrtko Ursulin:
>
> On 18/02/2023 21:15, Rob Clark wrote:
>> From: Rob Clark <robdclark@chromium.org>
>>
>> Add a way to hint to the fence signaler of an upcoming deadline, such as
>> vblank, which the fence waiter would prefer not to miss.  This is to aid
>> the fence signaler in making power management decisions, like boosting
>> frequency as the deadline approaches and awareness of missing deadlines
>> so that can be factored in to the frequency scaling.
>>
>> v2: Drop dma_fence::deadline and related logic to filter duplicate
>>      deadlines, to avoid increasing dma_fence size.  The fence-context
>>      implementation will need similar logic to track deadlines of all
>>      the fences on the same timeline.  [ckoenig]
>> v3: Clarify locking wrt. set_deadline callback
>>
>> Signed-off-by: Rob Clark <robdclark@chromium.org>
>> Reviewed-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/dma-buf/dma-fence.c | 20 ++++++++++++++++++++
>>   include/linux/dma-fence.h   | 20 ++++++++++++++++++++
>>   2 files changed, 40 insertions(+)
>>
>> diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
>> index 0de0482cd36e..763b32627684 100644
>> --- a/drivers/dma-buf/dma-fence.c
>> +++ b/drivers/dma-buf/dma-fence.c
>> @@ -912,6 +912,26 @@ dma_fence_wait_any_timeout(struct dma_fence 
>> **fences, uint32_t count,
>>   }
>>   EXPORT_SYMBOL(dma_fence_wait_any_timeout);
>>   +
>> +/**
>> + * dma_fence_set_deadline - set desired fence-wait deadline
>> + * @fence:    the fence that is to be waited on
>> + * @deadline: the time by which the waiter hopes for the fence to be
>> + *            signaled
>> + *
>> + * Inform the fence signaler of an upcoming deadline, such as 
>> vblank, by
>> + * which point the waiter would prefer the fence to be signaled by.  
>> This
>> + * is intended to give feedback to the fence signaler to aid in power
>> + * management decisions, such as boosting GPU frequency if a periodic
>> + * vblank deadline is approaching.
>> + */
>> +void dma_fence_set_deadline(struct dma_fence *fence, ktime_t deadline)
>> +{
>> +    if (fence->ops->set_deadline && !dma_fence_is_signaled(fence))
>> +        fence->ops->set_deadline(fence, deadline);
>> +}
>> +EXPORT_SYMBOL(dma_fence_set_deadline);
>> +
>>   /**
>>    * dma_fence_describe - Dump fence describtion into seq_file
>>    * @fence: the 6fence to describe
>> diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
>> index 775cdc0b4f24..d77f6591c453 100644
>> --- a/include/linux/dma-fence.h
>> +++ b/include/linux/dma-fence.h
>> @@ -99,6 +99,7 @@ enum dma_fence_flag_bits {
>>       DMA_FENCE_FLAG_SIGNALED_BIT,
>>       DMA_FENCE_FLAG_TIMESTAMP_BIT,
>>       DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
>> +    DMA_FENCE_FLAG_HAS_DEADLINE_BIT,
>
> Would this bit be better left out from core implementation, given how 
> the approach is the component which implements dma-fence has to track 
> the actual deadline and all?
>
> Also taking a step back - are we all okay with starting to expand the 
> relatively simple core synchronisation primitive with side channel 
> data like this? What would be the criteria for what side channel data 
> would be acceptable? Taking note the thing lives outside drivers/gpu/.

I had similar concerns and it took me a moment as well to understand the 
background why this is necessary. I essentially don't see much other 
approach we could do.

Yes, this is GPU/CRTC specific, but we somehow need a common interface 
for communicating it between drivers and that's the dma_fence object as 
far as I can see.

Regards,
Christian.

>
> Regards,
>
> Tvrtko
>
>>       DMA_FENCE_FLAG_USER_BITS, /* must always be last member */
>>   };
>>   @@ -257,6 +258,23 @@ struct dma_fence_ops {
>>        */
>>       void (*timeline_value_str)(struct dma_fence *fence,
>>                      char *str, int size);
>> +
>> +    /**
>> +     * @set_deadline:
>> +     *
>> +     * Callback to allow a fence waiter to inform the fence signaler of
>> +     * an upcoming deadline, such as vblank, by which point the waiter
>> +     * would prefer the fence to be signaled by.  This is intended to
>> +     * give feedback to the fence signaler to aid in power management
>> +     * decisions, such as boosting GPU frequency.
>> +     *
>> +     * This is called without &dma_fence.lock held, it can be called
>> +     * multiple times and from any context.  Locking is up to the 
>> callee
>> +     * if it has some state to manage.
>> +     *
>> +     * This callback is optional.
>> +     */
>> +    void (*set_deadline)(struct dma_fence *fence, ktime_t deadline);
>>   };
>>     void dma_fence_init(struct dma_fence *fence, const struct 
>> dma_fence_ops *ops,
>> @@ -583,6 +601,8 @@ static inline signed long dma_fence_wait(struct 
>> dma_fence *fence, bool intr)
>>       return ret < 0 ? ret : 0;
>>   }
>>   +void dma_fence_set_deadline(struct dma_fence *fence, ktime_t 
>> deadline);
>> +
>>   struct dma_fence *dma_fence_get_stub(void);
>>   struct dma_fence *dma_fence_allocate_private_stub(void);
>>   u64 dma_fence_context_alloc(unsigned num);
  
Tvrtko Ursulin Feb. 22, 2023, 5:04 p.m. UTC | #4
On 22/02/2023 15:28, Christian König wrote:
> Am 22.02.23 um 11:23 schrieb Tvrtko Ursulin:
>>
>> On 18/02/2023 21:15, Rob Clark wrote:
>>> From: Rob Clark <robdclark@chromium.org>
>>>
>>> Add a way to hint to the fence signaler of an upcoming deadline, such as
>>> vblank, which the fence waiter would prefer not to miss.  This is to aid
>>> the fence signaler in making power management decisions, like boosting
>>> frequency as the deadline approaches and awareness of missing deadlines
>>> so that can be factored in to the frequency scaling.
>>>
>>> v2: Drop dma_fence::deadline and related logic to filter duplicate
>>>      deadlines, to avoid increasing dma_fence size.  The fence-context
>>>      implementation will need similar logic to track deadlines of all
>>>      the fences on the same timeline.  [ckoenig]
>>> v3: Clarify locking wrt. set_deadline callback
>>>
>>> Signed-off-by: Rob Clark <robdclark@chromium.org>
>>> Reviewed-by: Christian König <christian.koenig@amd.com>
>>> ---
>>>   drivers/dma-buf/dma-fence.c | 20 ++++++++++++++++++++
>>>   include/linux/dma-fence.h   | 20 ++++++++++++++++++++
>>>   2 files changed, 40 insertions(+)
>>>
>>> diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
>>> index 0de0482cd36e..763b32627684 100644
>>> --- a/drivers/dma-buf/dma-fence.c
>>> +++ b/drivers/dma-buf/dma-fence.c
>>> @@ -912,6 +912,26 @@ dma_fence_wait_any_timeout(struct dma_fence 
>>> **fences, uint32_t count,
>>>   }
>>>   EXPORT_SYMBOL(dma_fence_wait_any_timeout);
>>>   +
>>> +/**
>>> + * dma_fence_set_deadline - set desired fence-wait deadline
>>> + * @fence:    the fence that is to be waited on
>>> + * @deadline: the time by which the waiter hopes for the fence to be
>>> + *            signaled
>>> + *
>>> + * Inform the fence signaler of an upcoming deadline, such as 
>>> vblank, by
>>> + * which point the waiter would prefer the fence to be signaled by. 
>>> This
>>> + * is intended to give feedback to the fence signaler to aid in power
>>> + * management decisions, such as boosting GPU frequency if a periodic
>>> + * vblank deadline is approaching.
>>> + */
>>> +void dma_fence_set_deadline(struct dma_fence *fence, ktime_t deadline)
>>> +{
>>> +    if (fence->ops->set_deadline && !dma_fence_is_signaled(fence))
>>> +        fence->ops->set_deadline(fence, deadline);
>>> +}
>>> +EXPORT_SYMBOL(dma_fence_set_deadline);
>>> +
>>>   /**
>>>    * dma_fence_describe - Dump fence describtion into seq_file
>>>    * @fence: the 6fence to describe
>>> diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
>>> index 775cdc0b4f24..d77f6591c453 100644
>>> --- a/include/linux/dma-fence.h
>>> +++ b/include/linux/dma-fence.h
>>> @@ -99,6 +99,7 @@ enum dma_fence_flag_bits {
>>>       DMA_FENCE_FLAG_SIGNALED_BIT,
>>>       DMA_FENCE_FLAG_TIMESTAMP_BIT,
>>>       DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
>>> +    DMA_FENCE_FLAG_HAS_DEADLINE_BIT,
>>
>> Would this bit be better left out from core implementation, given how 
>> the approach is the component which implements dma-fence has to track 
>> the actual deadline and all?
>>
>> Also taking a step back - are we all okay with starting to expand the 
>> relatively simple core synchronisation primitive with side channel 
>> data like this? What would be the criteria for what side channel data 
>> would be acceptable? Taking note the thing lives outside drivers/gpu/.
> 
> I had similar concerns and it took me a moment as well to understand the 
> background why this is necessary. I essentially don't see much other 
> approach we could do.
> 
> Yes, this is GPU/CRTC specific, but we somehow need a common interface 
> for communicating it between drivers and that's the dma_fence object as 
> far as I can see.

Yeah I also don't see any other easy options. Just wanted to raise this 
as something which probably needs some wider acks.

Also what about the "low level" part of my question about the reason, or 
benefits, of defining the deadline bit in the common layer?

Regards,

Tvrtko
  
Rob Clark Feb. 22, 2023, 5:16 p.m. UTC | #5
On Wed, Feb 22, 2023 at 9:05 AM Tvrtko Ursulin
<tvrtko.ursulin@linux.intel.com> wrote:
>
>
> On 22/02/2023 15:28, Christian König wrote:
> > Am 22.02.23 um 11:23 schrieb Tvrtko Ursulin:
> >>
> >> On 18/02/2023 21:15, Rob Clark wrote:
> >>> From: Rob Clark <robdclark@chromium.org>
> >>>
> >>> Add a way to hint to the fence signaler of an upcoming deadline, such as
> >>> vblank, which the fence waiter would prefer not to miss.  This is to aid
> >>> the fence signaler in making power management decisions, like boosting
> >>> frequency as the deadline approaches and awareness of missing deadlines
> >>> so that can be factored in to the frequency scaling.
> >>>
> >>> v2: Drop dma_fence::deadline and related logic to filter duplicate
> >>>      deadlines, to avoid increasing dma_fence size.  The fence-context
> >>>      implementation will need similar logic to track deadlines of all
> >>>      the fences on the same timeline.  [ckoenig]
> >>> v3: Clarify locking wrt. set_deadline callback
> >>>
> >>> Signed-off-by: Rob Clark <robdclark@chromium.org>
> >>> Reviewed-by: Christian König <christian.koenig@amd.com>
> >>> ---
> >>>   drivers/dma-buf/dma-fence.c | 20 ++++++++++++++++++++
> >>>   include/linux/dma-fence.h   | 20 ++++++++++++++++++++
> >>>   2 files changed, 40 insertions(+)
> >>>
> >>> diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
> >>> index 0de0482cd36e..763b32627684 100644
> >>> --- a/drivers/dma-buf/dma-fence.c
> >>> +++ b/drivers/dma-buf/dma-fence.c
> >>> @@ -912,6 +912,26 @@ dma_fence_wait_any_timeout(struct dma_fence
> >>> **fences, uint32_t count,
> >>>   }
> >>>   EXPORT_SYMBOL(dma_fence_wait_any_timeout);
> >>>   +
> >>> +/**
> >>> + * dma_fence_set_deadline - set desired fence-wait deadline
> >>> + * @fence:    the fence that is to be waited on
> >>> + * @deadline: the time by which the waiter hopes for the fence to be
> >>> + *            signaled
> >>> + *
> >>> + * Inform the fence signaler of an upcoming deadline, such as
> >>> vblank, by
> >>> + * which point the waiter would prefer the fence to be signaled by.
> >>> This
> >>> + * is intended to give feedback to the fence signaler to aid in power
> >>> + * management decisions, such as boosting GPU frequency if a periodic
> >>> + * vblank deadline is approaching.
> >>> + */
> >>> +void dma_fence_set_deadline(struct dma_fence *fence, ktime_t deadline)
> >>> +{
> >>> +    if (fence->ops->set_deadline && !dma_fence_is_signaled(fence))
> >>> +        fence->ops->set_deadline(fence, deadline);
> >>> +}
> >>> +EXPORT_SYMBOL(dma_fence_set_deadline);
> >>> +
> >>>   /**
> >>>    * dma_fence_describe - Dump fence describtion into seq_file
> >>>    * @fence: the 6fence to describe
> >>> diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
> >>> index 775cdc0b4f24..d77f6591c453 100644
> >>> --- a/include/linux/dma-fence.h
> >>> +++ b/include/linux/dma-fence.h
> >>> @@ -99,6 +99,7 @@ enum dma_fence_flag_bits {
> >>>       DMA_FENCE_FLAG_SIGNALED_BIT,
> >>>       DMA_FENCE_FLAG_TIMESTAMP_BIT,
> >>>       DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
> >>> +    DMA_FENCE_FLAG_HAS_DEADLINE_BIT,
> >>
> >> Would this bit be better left out from core implementation, given how
> >> the approach is the component which implements dma-fence has to track
> >> the actual deadline and all?
> >>
> >> Also taking a step back - are we all okay with starting to expand the
> >> relatively simple core synchronisation primitive with side channel
> >> data like this? What would be the criteria for what side channel data
> >> would be acceptable? Taking note the thing lives outside drivers/gpu/.
> >
> > I had similar concerns and it took me a moment as well to understand the
> > background why this is necessary. I essentially don't see much other
> > approach we could do.
> >
> > Yes, this is GPU/CRTC specific, but we somehow need a common interface
> > for communicating it between drivers and that's the dma_fence object as
> > far as I can see.
>
> Yeah I also don't see any other easy options. Just wanted to raise this
> as something which probably needs some wider acks.
>
> Also what about the "low level" part of my question about the reason, or
> benefits, of defining the deadline bit in the common layer?

We could leave DMA_FENCE_FLAG_HAS_DEADLINE_BIT out, but OTOH managing
a bitmask that is partially defined in core enum and partially in
backend-driver has it's own drawbacks, and it isn't like we are
running out of bits.. :shrug:

BR,
-R

> Regards,
>
> Tvrtko
  
Tvrtko Ursulin Feb. 22, 2023, 5:33 p.m. UTC | #6
On 22/02/2023 17:16, Rob Clark wrote:
> On Wed, Feb 22, 2023 at 9:05 AM Tvrtko Ursulin
> <tvrtko.ursulin@linux.intel.com> wrote:
>>
>>
>> On 22/02/2023 15:28, Christian König wrote:
>>> Am 22.02.23 um 11:23 schrieb Tvrtko Ursulin:
>>>>
>>>> On 18/02/2023 21:15, Rob Clark wrote:
>>>>> From: Rob Clark <robdclark@chromium.org>
>>>>>
>>>>> Add a way to hint to the fence signaler of an upcoming deadline, such as
>>>>> vblank, which the fence waiter would prefer not to miss.  This is to aid
>>>>> the fence signaler in making power management decisions, like boosting
>>>>> frequency as the deadline approaches and awareness of missing deadlines
>>>>> so that can be factored in to the frequency scaling.
>>>>>
>>>>> v2: Drop dma_fence::deadline and related logic to filter duplicate
>>>>>       deadlines, to avoid increasing dma_fence size.  The fence-context
>>>>>       implementation will need similar logic to track deadlines of all
>>>>>       the fences on the same timeline.  [ckoenig]
>>>>> v3: Clarify locking wrt. set_deadline callback
>>>>>
>>>>> Signed-off-by: Rob Clark <robdclark@chromium.org>
>>>>> Reviewed-by: Christian König <christian.koenig@amd.com>
>>>>> ---
>>>>>    drivers/dma-buf/dma-fence.c | 20 ++++++++++++++++++++
>>>>>    include/linux/dma-fence.h   | 20 ++++++++++++++++++++
>>>>>    2 files changed, 40 insertions(+)
>>>>>
>>>>> diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
>>>>> index 0de0482cd36e..763b32627684 100644
>>>>> --- a/drivers/dma-buf/dma-fence.c
>>>>> +++ b/drivers/dma-buf/dma-fence.c
>>>>> @@ -912,6 +912,26 @@ dma_fence_wait_any_timeout(struct dma_fence
>>>>> **fences, uint32_t count,
>>>>>    }
>>>>>    EXPORT_SYMBOL(dma_fence_wait_any_timeout);
>>>>>    +
>>>>> +/**
>>>>> + * dma_fence_set_deadline - set desired fence-wait deadline
>>>>> + * @fence:    the fence that is to be waited on
>>>>> + * @deadline: the time by which the waiter hopes for the fence to be
>>>>> + *            signaled
>>>>> + *
>>>>> + * Inform the fence signaler of an upcoming deadline, such as
>>>>> vblank, by
>>>>> + * which point the waiter would prefer the fence to be signaled by.
>>>>> This
>>>>> + * is intended to give feedback to the fence signaler to aid in power
>>>>> + * management decisions, such as boosting GPU frequency if a periodic
>>>>> + * vblank deadline is approaching.
>>>>> + */
>>>>> +void dma_fence_set_deadline(struct dma_fence *fence, ktime_t deadline)
>>>>> +{
>>>>> +    if (fence->ops->set_deadline && !dma_fence_is_signaled(fence))
>>>>> +        fence->ops->set_deadline(fence, deadline);
>>>>> +}
>>>>> +EXPORT_SYMBOL(dma_fence_set_deadline);
>>>>> +
>>>>>    /**
>>>>>     * dma_fence_describe - Dump fence describtion into seq_file
>>>>>     * @fence: the 6fence to describe
>>>>> diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
>>>>> index 775cdc0b4f24..d77f6591c453 100644
>>>>> --- a/include/linux/dma-fence.h
>>>>> +++ b/include/linux/dma-fence.h
>>>>> @@ -99,6 +99,7 @@ enum dma_fence_flag_bits {
>>>>>        DMA_FENCE_FLAG_SIGNALED_BIT,
>>>>>        DMA_FENCE_FLAG_TIMESTAMP_BIT,
>>>>>        DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
>>>>> +    DMA_FENCE_FLAG_HAS_DEADLINE_BIT,
>>>>
>>>> Would this bit be better left out from core implementation, given how
>>>> the approach is the component which implements dma-fence has to track
>>>> the actual deadline and all?
>>>>
>>>> Also taking a step back - are we all okay with starting to expand the
>>>> relatively simple core synchronisation primitive with side channel
>>>> data like this? What would be the criteria for what side channel data
>>>> would be acceptable? Taking note the thing lives outside drivers/gpu/.
>>>
>>> I had similar concerns and it took me a moment as well to understand the
>>> background why this is necessary. I essentially don't see much other
>>> approach we could do.
>>>
>>> Yes, this is GPU/CRTC specific, but we somehow need a common interface
>>> for communicating it between drivers and that's the dma_fence object as
>>> far as I can see.
>>
>> Yeah I also don't see any other easy options. Just wanted to raise this
>> as something which probably needs some wider acks.
>>
>> Also what about the "low level" part of my question about the reason, or
>> benefits, of defining the deadline bit in the common layer?
> 
> We could leave DMA_FENCE_FLAG_HAS_DEADLINE_BIT out, but OTOH managing
> a bitmask that is partially defined in core enum and partially in
> backend-driver has it's own drawbacks, and it isn't like we are
> running out of bits.. :shrug:

There is DMA_FENCE_FLAG_USER_BITS onwards which implementations could 
use to store their stuff?

And if we skip forward to "drm/scheduler: Add fence deadline support" 
that's the only place bit is used, right? Would it simply work to look 
at drm_sched_fence->deadline == 0 as bit not set? Or you see a need to 
interoperate with other fence implementations via that bit somehow?

Regards,

Tvrtko
  
Rob Clark Feb. 22, 2023, 6:57 p.m. UTC | #7
On Wed, Feb 22, 2023 at 9:33 AM Tvrtko Ursulin
<tvrtko.ursulin@linux.intel.com> wrote:
>
>
> On 22/02/2023 17:16, Rob Clark wrote:
> > On Wed, Feb 22, 2023 at 9:05 AM Tvrtko Ursulin
> > <tvrtko.ursulin@linux.intel.com> wrote:
> >>
> >>
> >> On 22/02/2023 15:28, Christian König wrote:
> >>> Am 22.02.23 um 11:23 schrieb Tvrtko Ursulin:
> >>>>
> >>>> On 18/02/2023 21:15, Rob Clark wrote:
> >>>>> From: Rob Clark <robdclark@chromium.org>
> >>>>>
> >>>>> Add a way to hint to the fence signaler of an upcoming deadline, such as
> >>>>> vblank, which the fence waiter would prefer not to miss.  This is to aid
> >>>>> the fence signaler in making power management decisions, like boosting
> >>>>> frequency as the deadline approaches and awareness of missing deadlines
> >>>>> so that can be factored in to the frequency scaling.
> >>>>>
> >>>>> v2: Drop dma_fence::deadline and related logic to filter duplicate
> >>>>>       deadlines, to avoid increasing dma_fence size.  The fence-context
> >>>>>       implementation will need similar logic to track deadlines of all
> >>>>>       the fences on the same timeline.  [ckoenig]
> >>>>> v3: Clarify locking wrt. set_deadline callback
> >>>>>
> >>>>> Signed-off-by: Rob Clark <robdclark@chromium.org>
> >>>>> Reviewed-by: Christian König <christian.koenig@amd.com>
> >>>>> ---
> >>>>>    drivers/dma-buf/dma-fence.c | 20 ++++++++++++++++++++
> >>>>>    include/linux/dma-fence.h   | 20 ++++++++++++++++++++
> >>>>>    2 files changed, 40 insertions(+)
> >>>>>
> >>>>> diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
> >>>>> index 0de0482cd36e..763b32627684 100644
> >>>>> --- a/drivers/dma-buf/dma-fence.c
> >>>>> +++ b/drivers/dma-buf/dma-fence.c
> >>>>> @@ -912,6 +912,26 @@ dma_fence_wait_any_timeout(struct dma_fence
> >>>>> **fences, uint32_t count,
> >>>>>    }
> >>>>>    EXPORT_SYMBOL(dma_fence_wait_any_timeout);
> >>>>>    +
> >>>>> +/**
> >>>>> + * dma_fence_set_deadline - set desired fence-wait deadline
> >>>>> + * @fence:    the fence that is to be waited on
> >>>>> + * @deadline: the time by which the waiter hopes for the fence to be
> >>>>> + *            signaled
> >>>>> + *
> >>>>> + * Inform the fence signaler of an upcoming deadline, such as
> >>>>> vblank, by
> >>>>> + * which point the waiter would prefer the fence to be signaled by.
> >>>>> This
> >>>>> + * is intended to give feedback to the fence signaler to aid in power
> >>>>> + * management decisions, such as boosting GPU frequency if a periodic
> >>>>> + * vblank deadline is approaching.
> >>>>> + */
> >>>>> +void dma_fence_set_deadline(struct dma_fence *fence, ktime_t deadline)
> >>>>> +{
> >>>>> +    if (fence->ops->set_deadline && !dma_fence_is_signaled(fence))
> >>>>> +        fence->ops->set_deadline(fence, deadline);
> >>>>> +}
> >>>>> +EXPORT_SYMBOL(dma_fence_set_deadline);
> >>>>> +
> >>>>>    /**
> >>>>>     * dma_fence_describe - Dump fence describtion into seq_file
> >>>>>     * @fence: the 6fence to describe
> >>>>> diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
> >>>>> index 775cdc0b4f24..d77f6591c453 100644
> >>>>> --- a/include/linux/dma-fence.h
> >>>>> +++ b/include/linux/dma-fence.h
> >>>>> @@ -99,6 +99,7 @@ enum dma_fence_flag_bits {
> >>>>>        DMA_FENCE_FLAG_SIGNALED_BIT,
> >>>>>        DMA_FENCE_FLAG_TIMESTAMP_BIT,
> >>>>>        DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
> >>>>> +    DMA_FENCE_FLAG_HAS_DEADLINE_BIT,
> >>>>
> >>>> Would this bit be better left out from core implementation, given how
> >>>> the approach is the component which implements dma-fence has to track
> >>>> the actual deadline and all?
> >>>>
> >>>> Also taking a step back - are we all okay with starting to expand the
> >>>> relatively simple core synchronisation primitive with side channel
> >>>> data like this? What would be the criteria for what side channel data
> >>>> would be acceptable? Taking note the thing lives outside drivers/gpu/.
> >>>
> >>> I had similar concerns and it took me a moment as well to understand the
> >>> background why this is necessary. I essentially don't see much other
> >>> approach we could do.
> >>>
> >>> Yes, this is GPU/CRTC specific, but we somehow need a common interface
> >>> for communicating it between drivers and that's the dma_fence object as
> >>> far as I can see.
> >>
> >> Yeah I also don't see any other easy options. Just wanted to raise this
> >> as something which probably needs some wider acks.
> >>
> >> Also what about the "low level" part of my question about the reason, or
> >> benefits, of defining the deadline bit in the common layer?
> >
> > We could leave DMA_FENCE_FLAG_HAS_DEADLINE_BIT out, but OTOH managing
> > a bitmask that is partially defined in core enum and partially in
> > backend-driver has it's own drawbacks, and it isn't like we are
> > running out of bits.. :shrug:
>
> There is DMA_FENCE_FLAG_USER_BITS onwards which implementations could
> use to store their stuff?
>
> And if we skip forward to "drm/scheduler: Add fence deadline support"
> that's the only place bit is used, right? Would it simply work to look
> at drm_sched_fence->deadline == 0 as bit not set? Or you see a need to
> interoperate with other fence implementations via that bit somehow?

Currently drm/scheduler is the only one using it.  I ended up dropping
use of it in msm since the deadline is stored in the fence-context
instead.  But I think it is better to try to avoid assuming that zero
means not-set.

It could be moved to drm/sched.. I guess there are few enough
implementations at this point to say whether it is something useful to
other drivers or not.

BR,
-R

> Regards,
>
> Tvrtko
  

Patch

diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index 0de0482cd36e..763b32627684 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -912,6 +912,26 @@  dma_fence_wait_any_timeout(struct dma_fence **fences, uint32_t count,
 }
 EXPORT_SYMBOL(dma_fence_wait_any_timeout);
 
+
+/**
+ * dma_fence_set_deadline - set desired fence-wait deadline
+ * @fence:    the fence that is to be waited on
+ * @deadline: the time by which the waiter hopes for the fence to be
+ *            signaled
+ *
+ * Inform the fence signaler of an upcoming deadline, such as vblank, by
+ * which point the waiter would prefer the fence to be signaled by.  This
+ * is intended to give feedback to the fence signaler to aid in power
+ * management decisions, such as boosting GPU frequency if a periodic
+ * vblank deadline is approaching.
+ */
+void dma_fence_set_deadline(struct dma_fence *fence, ktime_t deadline)
+{
+	if (fence->ops->set_deadline && !dma_fence_is_signaled(fence))
+		fence->ops->set_deadline(fence, deadline);
+}
+EXPORT_SYMBOL(dma_fence_set_deadline);
+
 /**
  * dma_fence_describe - Dump fence describtion into seq_file
  * @fence: the 6fence to describe
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index 775cdc0b4f24..d77f6591c453 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -99,6 +99,7 @@  enum dma_fence_flag_bits {
 	DMA_FENCE_FLAG_SIGNALED_BIT,
 	DMA_FENCE_FLAG_TIMESTAMP_BIT,
 	DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+	DMA_FENCE_FLAG_HAS_DEADLINE_BIT,
 	DMA_FENCE_FLAG_USER_BITS, /* must always be last member */
 };
 
@@ -257,6 +258,23 @@  struct dma_fence_ops {
 	 */
 	void (*timeline_value_str)(struct dma_fence *fence,
 				   char *str, int size);
+
+	/**
+	 * @set_deadline:
+	 *
+	 * Callback to allow a fence waiter to inform the fence signaler of
+	 * an upcoming deadline, such as vblank, by which point the waiter
+	 * would prefer the fence to be signaled by.  This is intended to
+	 * give feedback to the fence signaler to aid in power management
+	 * decisions, such as boosting GPU frequency.
+	 *
+	 * This is called without &dma_fence.lock held, it can be called
+	 * multiple times and from any context.  Locking is up to the callee
+	 * if it has some state to manage.
+	 *
+	 * This callback is optional.
+	 */
+	void (*set_deadline)(struct dma_fence *fence, ktime_t deadline);
 };
 
 void dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops,
@@ -583,6 +601,8 @@  static inline signed long dma_fence_wait(struct dma_fence *fence, bool intr)
 	return ret < 0 ? ret : 0;
 }
 
+void dma_fence_set_deadline(struct dma_fence *fence, ktime_t deadline);
+
 struct dma_fence *dma_fence_get_stub(void);
 struct dma_fence *dma_fence_allocate_private_stub(void);
 u64 dma_fence_context_alloc(unsigned num);