Web lists-archives.com

Re: [Intel-gfx] [PATCH] [v2] drm/i915/pmu: avoid -Wmaybe-uninitialized warning




On Tue, Mar 13, 2018 at 6:46 PM, Tvrtko Ursulin
<tvrtko.ursulin@xxxxxxxxxxxxxxx> wrote:
>
> On 13/03/2018 16:19, Arnd Bergmann wrote:
>>
>> The conditional spinlock confuses gcc into thinking the 'flags' value
>> might contain uninitialized data:
>>
>> drivers/gpu/drm/i915/i915_pmu.c: In function '__i915_pmu_event_read':
>> arch/x86/include/asm/paravirt_types.h:573:3: error: 'flags' may be used
>> uninitialized in this function [-Werror=maybe-uninitialized]
>
>
> Hm, how does paravirt_types.h comes into the picture?

spin_unlock_irqrestore() calls arch_local_irq_restore()

>> The code is correct, but it's easy to see how the compiler gets confused
>> here. This avoids the problem by pulling the lock outside of the function
>> into its only caller.
>
>
> Is it specific gcc version, specific options, or specific kernel config that
> this happens?

Not gcc version specific (same result with gcc-4.9 through 8, didn't test
earlier versions that are currently broken).

> Strange that it hasn't been seen so far.

It seems to be a relatively rare 'randconfig' combination. Looking at
the preprocessed sources, I find:

static u64 get_rc6(struct drm_i915_private *i915, bool locked)
{

 unsigned long flags;
 u64 val;

 if (intel_runtime_pm_get_if_in_use(i915)) {
  val = __get_rc6(i915);
  intel_runtime_pm_put(i915);
  if (!locked)
   do { do { ({ unsigned long __dummy; typeof(flags) __dummy2;
(void)(&__dummy == &__dummy2); 1; }); do { do { do { ({ unsigned long
__dummy; typeof(flags) __dummy2; (void)(&__dummy == &__dummy2); 1; });
flags = arch_local_irq_save(); } while (0); trace_hardirqs_off(); }
while (0); do { __asm__ __volatile__("": : :"memory"); do { (void)0;
(void)(spinlock_check(&i915->pmu.lock)); } while (0); } while (0); }
while (0); } while (0); } while (0);

  if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
   i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
   i915->pmu.sample[__I915_SAMPLE_RC6].cur = val;
  } else {
   val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
  }
  if (!locked)
   spin_unlock_irqrestore(&i915->pmu.lock, flags);
 } else {
  struct pci_dev *pdev = i915->drm.pdev;
  struct device *kdev = &pdev->dev;
  unsigned long flags2;
# 455 "/git/arm-soc/drivers/gpu/drm/i915/i915_pmu.c"
  if (!locked)
   do { do { ({ unsigned long __dummy; typeof(flags) __dummy2;
(void)(&__dummy == &__dummy2); 1; }); do { do { do { ({ unsigned long
__dummy; typeof(flags) __dummy2; (void)(&__dummy == &__dummy2); 1; });
flags = arch_local_irq_save(); } while (0); trace_hardirqs_off(); }
while (0); do { __asm__ __volatile__("": : :"memory"); do { (void)0;
(void)(spinlock_check(&i915->pmu.lock)); } while (0); } while (0); }
while (0); } while (0); } while (0);

  do { do { ({ unsigned long __dummy; typeof(flags2) __dummy2;
(void)(&__dummy == &__dummy2); 1; }); do { do { do { ({ unsigned long
__dummy; typeof(flags2) __dummy2; (void)(&__dummy == &__dummy2); 1;
}); flags2 = arch_local_irq_save(); } while (0); trace_hardirqs_off();
} while (0); do { __asm__ __volatile__("": : :"memory"); do { (void)0;
(void)(spinlock_check(&kdev->power.lock)); } while (0); } while (0); }
while (0); } while (0); } while (0);

  if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
   i915->pmu.suspended_jiffies_last =
      kdev->power.suspended_jiffies;

  val = kdev->power.suspended_jiffies -
        i915->pmu.suspended_jiffies_last;
  val += jiffies - kdev->power.accounting_timestamp;

  spin_unlock_irqrestore(&kdev->power.lock, flags2);

  val = jiffies_to_nsecs(val);
  val += i915->pmu.sample[__I915_SAMPLE_RC6].cur;
  i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;

  if (!locked)
   spin_unlock_irqrestore(&i915->pmu.lock, flags);
 }
  return val;
}

so it seems that the spin_lock_irqsave() is completely inlined through
a macro while the unlock is not, and the lock contains a memory barrier
(among other things) that might tell the compiler that the state of the
'locked' flag could changed underneath it.

It could also be the problem that arch_local_irq_restore() uses
__builtin_expect() in  PVOP_TEST_NULL(op) when
CONFIG_PARAVIRT_DEBUG is enabled, see

static inline __attribute__((unused))
__attribute__((no_instrument_function))
__attribute__((no_instrument_function)) void
arch_local_irq_restore(unsigned long f)
{
 ({ unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx;; do {
if (__builtin_expect(!!(pv_irq_ops.restore_fl.func == ((void *)0)),
0)) do { do { asm volatile("1:\t" ".byte 0x0f, 0x0b" "\n"
".pushsection __bug_table,\"aw\"\n" "2:\t" ".long " "1b" "\t#
bug_entry::bug_addr\n" "\t" ".long " "%c0" "\t# bug_entry::file\n"
"\t.word %c1" "\t# bug_entry::line\n" "\t.word %c2" "\t#
bug_entry::flags\n" "\t.org 2b+%c3\n" ".popsection" : : "i"
("/git/arm-soc/arch/x86/include/asm/paravirt.h"), "i" (783), "i" (0),
"i" (sizeof(struct bug_entry))); } while (0); do { ; asm volatile("");
__builtin_unreachable(); } while (0); } while (0); } while (0); asm
volatile("" "771:\n\t" "999:\n\t" ".pushsection
.discard.retpoline_safe\n\t" " " ".long" " " " 999b\n\t"
".popsection\n\t" "call *%c[paravirt_opptr];" "\n" "772:\n"
".pushsection .parainstructions,\"a\"\n" " " ".balign 4" " " "\n" " "
".long" " " " 771b\n" "  .byte " "%c[paravirt_typenum]" "\n" "  .byte
772b-771b\n" "  .short " "%c[paravirt_clobber]" "\n" ".popsection\n"
"" : "=a" (__eax), "=d" (__edx), "+r" (current_stack_pointer) :
[paravirt_typenum] "i" ((__builtin_offsetof(struct
paravirt_patch_template, pv_irq_ops.restore_fl.func) / sizeof(void
*))), [paravirt_opptr] "i" (&(pv_irq_ops.restore_fl.func)),
[paravirt_clobber] "i" (((1 << 0) | (1 << 2))), "a" ((unsigned
long)(f)) : "memory", "cc" ); });
}

this seems to frequently confuse gcc, and turning off that NULL check
avoids the warning as well.

If you want to analyze it further, see https://pastebin.com/T2yLRqU5
for the .config file, but I'm pretty sure this is a known problem with gcc
that happens to be very hard to fix.

       Arnd