• Namhyung Kim's avatar
    perf/x86: Fix out of range data · dec8ced8
    Namhyung Kim authored
    On x86 each struct cpu_hw_events maintains a table for counter assignment but
    it missed to update one for the deleted event in x86_pmu_del().  This
    can make perf_clear_dirty_counters() reset used counter if it's called
    before event scheduling or enabling.  Then it would return out of range
    data which doesn't make sense.
    
    The following code can reproduce the problem.
    
      $ cat repro.c
      #include <pthread.h>
      #include <stdio.h>
      #include <stdlib.h>
      #include <unistd.h>
      #include <linux/perf_event.h>
      #include <sys/ioctl.h>
      #include <sys/mman.h>
      #include <sys/syscall.h>
    
      struct perf_event_attr attr = {
      	.type = PERF_TYPE_HARDWARE,
      	.config = PERF_COUNT_HW_CPU_CYCLES,
      	.disabled = 1,
      };
    
      void *worker(void *arg)
      {
      	int cpu = (long)arg;
      	int fd1 = syscall(SYS_perf_event_open, &attr, -1, cpu, -1, 0);
      	int fd2 = syscall(SYS_perf_event_open, &attr, -1, cpu, -1, 0);
      	void *p;
    
      	do {
      		ioctl(fd1, PERF_EVENT_IOC_ENABLE, 0);
      		p = mmap(NULL, 4096, PROT_READ, MAP_SHARED, fd1, 0);
      		ioctl(fd2, PERF_EVENT_IOC_ENABLE, 0);
    
      		ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0);
      		munmap(p, 4096);
      		ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0);
      	} while (1);
    
      	return NULL;
      }
    
      int main(void)
      {
      	int i;
      	int n = sysconf(_SC_NPROCESSORS_ONLN);
      	pthread_t *th = calloc(n, sizeof(*th));
    
      	for (i = 0; i < n; i++)
      		pthread_create(&th[i], NULL, worker, (void *)(long)i);
      	for (i = 0; i < n; i++)
      		pthread_join(th[i], NULL);
    
      	free(th);
      	return 0;
      }
    
    And you can see the out of range data using perf stat like this.
    Probably it'd be easier to see on a large machine.
    
      $ gcc -o repro repro.c -pthread
      $ ./repro &
      $ sudo perf stat -A -I 1000 2>&1 | awk '{ if (length($3) > 15) print }'
           1.001028462 CPU6   196,719,295,683,763      cycles                           # 194290.996 GHz                       (71.54%)
           1.001028462 CPU3   396,077,485,787,730      branch-misses                    # 15804359784.80% of all branches      (71.07%)
           1.001028462 CPU17  197,608,350,727,877      branch-misses                    # 14594186554.56% of all branches      (71.22%)
           2.020064073 CPU4   198,372,472,612,140      cycles                           # 194681.113 GHz                       (70.95%)
           2.020064073 CPU6   199,419,277,896,696      cycles                           # 195720.007 GHz                       (70.57%)
           2.020064073 CPU20  198,147,174,025,639      cycles                           # 194474.654 GHz                       (71.03%)
           2.020064073 CPU20  198,421,240,580,145      stalled-cycles-frontend          #  100.14% frontend cycles idle        (70.93%)
           3.037443155 CPU4   197,382,689,923,416      cycles                           # 194043.065 GHz                       (71.30%)
           3.037443155 CPU20  196,324,797,879,414      cycles                           # 193003.773 GHz                       (71.69%)
           3.037443155 CPU5   197,679,956,608,205      stalled-cycles-backend           # 1315606428.66% backend cycles idle   (71.19%)
           3.037443155 CPU5   198,571,860,474,851      instructions                     # 13215422.58  insn per cycle
    
    It should move the contents in the cpuc->assign as well.
    
    Fixes: 5471eea5 ("perf/x86: Reset the dirty counter to prevent the leak for an RDPMC task")
    Signed-off-by: default avatarNamhyung Kim <namhyung@kernel.org>
    Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
    Reviewed-by: default avatarKan Liang <kan.liang@linux.intel.com>
    Cc: stable@vger.kernel.org
    Link: https://lore.kernel.org/r/20240306061003.1894224-1-namhyung@kernel.org
    dec8ced8
core.c 73.5 KB