The Meltdown patch for the Linux kernel makes use of the relatively new PCID instruction. I still sometimes use my old laptop, which contains a Core 2 Duo Penryn CPU (T7250), and does not support the PCID instruction, so I did a quick UnixBench run to see what kind of difference the absence of the PCID instruction would make. At the end of this article, I have a bonus “benchmark” for an alternative way to mitigate Meltdown: disabling the CPU’s caches. All my tests were performed on Debian Wheezy (currently oldstable) using kernel version 3.16.0-5-amd64.

First of all, here are another person’s results for a CPU that supports PCID. And since that’s in Japanese, here’s the important bit:

Test	Before	After	Change (positive is better)
System Call Overhead	5391.9	4009.7	-25.63%

Now, my tests on the Penryn CPU:

Test	Before	After	Change (positive is better)
Dhrystone 2 using register variables	3360.4	3414.1	+1.60%
Double-Precision Whetstone	724.1	724	-0.01%
Execl Throughput	1351.7	1222.9	-9.53%
File Copy 1024 bufsize 2000 maxblocks	1582	1244	-21.37%
File Copy 256 bufsize 500 maxblocks	1255.9	922.1	-26.58%
File Copy 4096 bufsize 8000 maxblocks	1982.4	1810.6	-8.67%
Pipe Throughput	1672.8	765.4	-54.24%
Pipe-based Context Switching	1108.3	671	-39.46%
Process Creation	1150	1025.3	-10.84%
Shell Scripts (1 concurrent)	1995.7	1909	-4.34%
Shell Scripts (8 concurrent)	1831.8	1743.3	-4.83%
System Call Overhead	1705.6	544.9	-68.05%
System Benchmarks Index Score	1535.8	1160.9	-24.41%

And the raw data in case you are interested:

Before updating:

Test	Score	Unit	Time	Iters.	Baseline	Index
Dhrystone 2 using register variables	`39215974.0`	`lps`	`10.0 s`	`7`	`116700.0`	`3360.4`
Double-Precision Whetstone	`3982.6`	`MWIPS`	`9.9 s`	`7`	`55.0`	`724.1`
Execl Throughput	`5812.4`	`lps`	`29.2 s`	`2`	`43.0`	`1351.7`
File Copy 1024 bufsize 2000 maxblocks	`626453.0`	`KBps`	`30.0 s`	`2`	`3960.0`	`1582.0`
File Copy 256 bufsize 500 maxblocks	`207854.8`	`KBps`	`30.0 s`	`2`	`1655.0`	`1255.9`
File Copy 4096 bufsize 8000 maxblocks	`1149781.6`	`KBps`	`30.0 s`	`2`	`5800.0`	`1982.4`
Pipe Throughput	`2080979.1`	`lps`	`10.0 s`	`7`	`12440.0`	`1672.8`
Pipe-based Context Switching	`443337.7`	`lps`	`10.0 s`	`7`	`4000.0`	`1108.3`
Process Creation	`14490.3`	`lps`	`30.0 s`	`2`	`126.0`	`1150.0`
Shell Scripts (1 concurrent)	`8461.7`	`lpm`	`60.0 s`	`2`	`42.4`	`1995.7`
Shell Scripts (8 concurrent)	`1099.1`	`lpm`	`60.1 s`	`2`	`6.0`	`1831.8`
System Call Overhead	`2558469.9`	`lps`	`10.0 s`	`7`	`15000.0`	`1705.6`
System Benchmarks Index Score:						`1535.8`

After updating:

Test	Score	Unit	Time	Iters.	Baseline	Index
Dhrystone 2 using register variables	`39842314.8`	`lps`	`10.0 s`	`7`	`116700.0`	`3414.1`
Double-Precision Whetstone	`3982.0`	`MWIPS`	`9.8 s`	`7`	`55.0`	`724.0`
Execl Throughput	`5258.5`	`lps`	`30.0 s`	`2`	`43.0`	`1222.9`
File Copy 1024 bufsize 2000 maxblocks	`492638.1`	`KBps`	`30.0 s`	`2`	`3960.0`	`1244.0`
File Copy 256 bufsize 500 maxblocks	`152610.9`	`KBps`	`30.0 s`	`2`	`1655.0`	`922.1`
File Copy 4096 bufsize 8000 maxblocks	`1050156.7`	`KBps`	`30.0 s`	`2`	`5800.0`	`1810.6`
Pipe Throughput	`952188.4`	`lps`	`10.0 s`	`7`	`12440.0`	`765.4`
Pipe-based Context Switching	`268401.0`	`lps`	`10.0 s`	`7`	`4000.0`	`671.0`
Process Creation	`12918.3`	`lps`	`30.0 s`	`2`	`126.0`	`1025.3`
Shell Scripts (1 concurrent)	`8094.2`	`lpm`	`60.0 s`	`2`	`42.4`	`1909.0`
Shell Scripts (8 concurrent)	`1046.0`	`lpm`	`60.1 s`	`2`	`6.0`	`1743.3`
System Call Overhead	`817288.1`	`lps`	`10.0 s`	`7`	`15000.0`	`544.9`
System Benchmarks Index Score:						`1160.9`

Now, Mitigating Meltdown by switching off CPU caches:

You wouldn’t even want to run UnixBench without CPU caches. Here’s a “simpler” benchmark that tells you why:

# time perl -e 'for (1..1000000) {}'

real 0m0.056s
user 0m0.052s
sys 0m0.000s
# insmod disable_cache.ko
# time perl -e 'for (1..1000000) {}' 

real 0m44.689s
user 0m40.044s
sys 0m0.520s
# rmmod disable_cache

Unless you enjoy working on a system that is some 800 times slower. (Don’t try to do this in a GUI setting.)

Nonetheless, here’s some code to disable the CPU caches. (Modified from https://www.linuxquestions.org/questions/linux-kernel-70/disabling-cpu-caches-936077/)

#include <linux/init.h>
#include <linux/module.h>
#include <linux/smp.h>

MODULE_LICENSE("Dual BSD/GPL");

void _disable_cache(void *p) {
 printk(KERN_ALERT "Disabling L1 and L2 caches on processor %d.\n", smp_processor_id());
 __asm__(".intel_syntax noprefix\n\t"
 "mov rax,cr0\n\t"
 "or rax,(1 << 30)\n\t"
 "mov cr0,rax\n\t"
 "wbinvd\n\t"
 ".att_syntax noprefix\n\t"
 : : : "rax" );
}
void _enable_cache(void *p) {
 printk(KERN_ALERT "Enabling L1 and L2 caches on processor %d.\n", smp_processor_id());
 __asm__(".intel_syntax noprefix\n\t"
 "mov rax,cr0\n\t"
 "and rax,~(1 << 30)\n\t"
 "mov cr0,rax\n\t"
 "wbinvd\n\t"
 ".att_syntax noprefix\n\t"
 : : : "rax" );
}

static int disable_cache_init(void)
{
 on_each_cpu(_disable_cache, NULL, 1);
 return 0;
}
static void disable_cache_exit(void)
{
 on_each_cpu(_enable_cache, NULL, 1);
}

module_init(disable_cache_init);
module_exit(disable_cache_exit);

Makefile:

obj-m += disable_cache.o

all:
	make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules

Note that you need to indent using tabs in Makefile. CR0 can only be read from Ring 0, and thus a kernel module is needed.

Here’s some example code to just read the CR0 registers on all CPUs:

#include <linux/init.h>
#include <linux/module.h>
#include <linux/smp.h>

MODULE_LICENSE("Dual BSD/GPL");

void cache_status(void *p) {
 long int cr0_30 = 0;
 __asm__(".intel_syntax noprefix\n\t"
 "mov %0, cr0\n\t"
 "and %0, (1 << 30)\n\t"
 "shr %0, 30\n\t"
 ".att_syntax noprefix\n\t"
 : "=r" (cr0_30));
 printk(KERN_INFO "Processor %d: %ld\n", smp_processor_id(), cr0_30&(1<<30)>>30);
}

static int cache_status_init(void) {
 on_each_cpu(cache_status, NULL, 1);
 return 0;
}
static void cache_status_exit(void) {
 on_each_cpu(cache_status, NULL, 1);
}

module_init(cache_status_init);
module_exit(cache_status_exit);

And the corresponding Makefile:

obj-m += cache_status.o

all:
	make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules

Meltdown / Spectre Kernel Patch Benchmarks on Older Systems

Now, Mitigating Meltdown by switching off CPU caches:

Leave a Reply Cancel reply