Код был немного модифицирован под старую версию pari/gp:
Код:
inline(nth=default(nbthreads)); inline(stop=precprime(10^8)); t0=getwalltime(); n=0;
inline(f(i,n,s)={my(x,y=0,a); forstep(a=i,s-1,n, if(ispseudoprime(lift(chinese(Mod(1,9699690),Mod(a,s)))), y++); ); y});
parfor(th=1,nth, f(th,nth,stop), r,n+=r);
print("n=",n,"/",stop-1,", nth=",nth,", time: ",(getwalltime()-t0)/1000.0,"s");
Смотрим результаты:
Код:
/usr/local/bin/gp-2.9
GP/PARI CALCULATOR Version 2.9.3 (released)
amd64 running freebsd (x86-64/GMP-6.1.2 kernel) 64-bit version
compiled: Jul 31 2022, FreeBSD clang version 5.0.1 (tags/RELEASE_501/final 320880) (based on LLVM 5.0.1)
threading engine: pthread
(readline v7.0 enabled, extended help enabled)
Copyright (C) 2000-2017 The PARI Group
PARI/GP is free software, covered by the GNU General Public License, and comes WITHOUT ANY WARRANTY WHATSOEVER.
Type ? for help, \q to quit.
Type ?15 for how to get moral (and possibly technical) support.
parisize = 8000000, primelimit = 500000, nbthreads = 16
? inline(nth=default(nbthreads)); inline(stop=precprime(10^8)); t0=getwalltime(); n=0;
? inline(f(i,n,s)={my(x,y=0,a); forstep(a=i,s-1,n, if(ispseudoprime(lift(chinese(Mod(1,9699690),Mod(a,s)))), y++); ); y});
? parfor(th=1,nth, f(th,nth,stop), r,n+=r);
? print("n=",n,"/",stop-1,", nth=",nth,", time: ",(getwalltime()-t0)/1000.0,"s");
n=17468130/99999988, nth=16, time: 50.163000000000000000000000000000000000s
?
?
? inline(nth=default(nbthreads)); inline(stop=precprime(10^8)); t0=getwalltime(); n=0;
? inline(f(i,n,s)={my(x,y=0,a); forstep(a=i,s-1,n, if(ispseudoprime(lift(chinese(Mod(1,9699690),Mod(a,s)))), y++); ); y});
? parfor(th=1,nth, f(th,nth,stop), r,n+=r);
? print("n=",n,"/",stop-1,", nth=",nth,", time: ",(getwalltime()-t0)/1000.0,"s");
n=17468130/99999988, nth=16, time: 41.539000000000000000000000000000000000s
?
?
? inline(nth=default(nbthreads)); inline(stop=precprime(10^8)); t0=getwalltime(); n=0;
? inline(f(i,n,s)={my(x,y=0,a); forstep(a=i,s-1,n, if(ispseudoprime(lift(chinese(Mod(1,9699690),Mod(a,s)))), y++); ); y});
? parfor(th=1,nth, f(th,nth,stop), r,n+=r);
? print("n=",n,"/",stop-1,", nth=",nth,", time: ",(getwalltime()-t0)/1000.0,"s");
n=17468130/99999988, nth=16, time: 41.257000000000000000000000000000000000s
?
? default(nbthreads,1);
? inline(nth=default(nbthreads)); inline(stop=precprime(10^8)); t0=getwalltime(); n=0;
? inline(f(i,n,s)={my(x,y=0,a); forstep(a=i,s-1,n, if(ispseudoprime(lift(chinese(Mod(1,9699690),Mod(a,s)))), y++); ); y});
? parfor(th=1,nth, f(th,nth,stop), r,n+=r);
? print("n=",n,"/",stop-1,", nth=",nth,", time: ",(getwalltime()-t0)/1000.0,"s");
n=17468130/99999988, nth=1, time: 425.36600000000000000000000000000000000s
?
? inline(nth=default(nbthreads)); inline(stop=precprime(10^8)); t0=getwalltime(); n=0;
? inline(f(i,n,s)={my(x,y=0,a); forstep(a=i,s-1,n, if(ispseudoprime(lift(chinese(Mod(1,9699690),Mod(a,s)))), y++); ); y});
? parfor(th=1,nth, f(th,nth,stop), r,n+=r);
? print("n=",n,"/",stop-1,", nth=",nth,", time: ",(getwalltime()-t0)/1000.0,"s");
n=17468130/99999988, nth=1, time: 424.12100000000000000000000000000000000s
?
? inline(nth=default(nbthreads)); inline(stop=precprime(10^8)); t0=getwalltime(); n=0;
? inline(f(i,n,s)={my(x,y=0,a); forstep(a=i,s-1,n, if(ispseudoprime(lift(chinese(Mod(1,9699690),Mod(a,s)))), y++); ); y});
? parfor(th=1,nth, f(th,nth,stop), r,n+=r);
? print("n=",n,"/",stop-1,", nth=",nth,", time: ",(getwalltime()-t0)/1000.0,"s");
n=17468130/99999988, nth=1, time: 423.78400000000000000000000000000000000s
?
? default(nbthreads,16);
? inline(nth=default(nbthreads)); inline(stop=precprime(10^8)); t0=getwalltime(); n=0;
? inline(f(i,n,s)={my(x,y=0,a); forstep(a=i,s-1,n, if(ispseudoprime(lift(chinese(Mod(1,9699690),Mod(a,s)))), y++); ); y});
? parfor(th=1,nth, f(th,nth,stop), r,n+=r);
? print("n=",n,"/",stop-1,", nth=",nth,", time: ",(getwalltime()-t0)/1000.0,"s");
n=17468130/99999988, nth=16, time: 39.669000000000000000000000000000000000s
?
Загрузка по тредам (столбец THR) для предпоследнего и последнего тестов:
Код:
last pid: 40624; load averages: 1.03, 1.13, 1.31 up 41+19:02:09 17:31:58
94 processes: 2 running, 92 sleeping
CPU: 6.4% user, 0.0% nice, 0.2% system, 0.0% interrupt, 93.3% idle
Mem: 100M Active, 9000M Inact, 9635M Wired, 1278M Buf, 5160M Free
ARC: 956M Total, 416M MFU, 231M MRU, 1354K Anon, 54M Header, 255M Other
352M Compressed, 556M Uncompressed, 1.58:1 Ratio
Swap: 74G Total, 74G Free
PID USERNAME THR PRI NICE SIZE RES STATE C TIME WCPU COMMAND
39898 root 1 103 0 62996K 39964K CPU12 12 48:13 97.76% gp-2.9
last pid: 40809; load averages: 3.98, 1.67, 1.43 up 41+19:07:29 17:37:18
94 processes: 1 running, 93 sleeping
CPU: 100% user, 0.0% nice, 0.0% system, 0.0% interrupt, 0.0% idle
Mem: 162M Active, 9000M Inact, 9636M Wired, 1278M Buf, 5098M Free
ARC: 953M Total, 418M MFU, 222M MRU, 3152K Anon, 54M Header, 255M Other
347M Compressed, 547M Uncompressed, 1.58:1 Ratio
Swap: 74G Total, 74G Free
PID USERNAME THR PRI NICE SIZE RES STATE C TIME WCPU COMMAND
39898 root 17 20 0 184M 101M uwait 7 57:02 1606.37% gp-2.9
40264 root 1 20 0 8164K 4256K CPU12 12 0:01 0.09% top
Важно понимать, что этот код исполнялся на старом железе 2012 года, в отличии от теста на винде.
Машина двухпроцессорная, процессоры ксеон, AVX2 в процессорах отсутствует, гипертреадинг включен.
Т.е. 4*2*2 = 16 потоков