Commit 0ab8dbef authored by Russ Cox's avatar Russ Cox

annotations

TBR=r
OCL=32896
CL=32896
parent d1ee5d6e
...@@ -18,18 +18,21 @@ Aug 4 2009 ...@@ -18,18 +18,21 @@ Aug 4 2009
# hand-written optimization of integer division # hand-written optimization of integer division
# use int32->float conversion # use int32->float conversion
fasta -n 25000000 fasta -n 25000000
# probably I/O library inefficiencies
gcc -O2 fasta.c 5.99u 0.00s 6.00r gcc -O2 fasta.c 5.99u 0.00s 6.00r
gccgo -O2 fasta.go 8.82u 0.02s 8.85r gccgo -O2 fasta.go 8.82u 0.02s 8.85r
gc fasta 10.70u 0.00s 10.77r gc fasta 10.70u 0.00s 10.77r
gc_B fasta 10.09u 0.03s 10.12r gc_B fasta 10.09u 0.03s 10.12r
reverse-complement < output-of-fasta-25000000 reverse-complement < output-of-fasta-25000000
# we don't know - memory cache behavior?
gcc -O2 reverse-complement.c 2.04u 0.94s 10.54r gcc -O2 reverse-complement.c 2.04u 0.94s 10.54r
gccgo -O2 reverse-complement.go 6.54u 0.63s 7.17r gccgo -O2 reverse-complement.go 6.54u 0.63s 7.17r
gc reverse-complement 6.55u 0.70s 7.26r gc reverse-complement 6.55u 0.70s 7.26r
gc_B reverse-complement 6.32u 0.70s 7.10r gc_B reverse-complement 6.32u 0.70s 7.10r
nbody 50000000 nbody 50000000
# math.Sqrt needs to be in assembly; inlining is probably the other 50%
gcc -O2 nbody.c 21.61u 0.01s 24.80r gcc -O2 nbody.c 21.61u 0.01s 24.80r
gccgo -O2 nbody.go 118.55u 0.02s 120.32r gccgo -O2 nbody.go 118.55u 0.02s 120.32r
gc nbody 100.84u 0.00s 100.85r gc nbody 100.84u 0.00s 100.85r
...@@ -40,6 +43,7 @@ hacked Sqrt in assembler ...@@ -40,6 +43,7 @@ hacked Sqrt in assembler
] ]
binary-tree 15 # too slow to use 20 binary-tree 15 # too slow to use 20
# memory allocation and garbage collection
gcc -O2 binary-tree.c -lm 0.86u 0.00s 0.87r gcc -O2 binary-tree.c -lm 0.86u 0.00s 0.87r
gccgo -O2 binary-tree.go 1.69u 0.46s 2.15r gccgo -O2 binary-tree.go 1.69u 0.46s 2.15r
gccgo -O2 binary-tree-freelist.go 8.48u 0.00s 8.48r gccgo -O2 binary-tree-freelist.go 8.48u 0.00s 8.48r
...@@ -49,12 +53,15 @@ binary-tree 15 # too slow to use 20 ...@@ -49,12 +53,15 @@ binary-tree 15 # too slow to use 20
August 5, 2009 August 5, 2009
fannkuch 12 fannkuch 12
# bounds checking is half the difference
# rest might be registerization
gcc -O2 fannkuch.c 60.09u 0.01s 60.32r gcc -O2 fannkuch.c 60.09u 0.01s 60.32r
gccgo -O2 fannkuch.go 64.89u 0.00s 64.92r gccgo -O2 fannkuch.go 64.89u 0.00s 64.92r
gc fannkuch 124.59u 0.00s 124.67r gc fannkuch 124.59u 0.00s 124.67r
gc_B fannkuch 91.14u 0.00s 91.16r gc_B fannkuch 91.14u 0.00s 91.16r
regex-dna 100000 regex-dna 100000
# regexp code is slow on trivial regexp
gcc -O2 regex-dna.c -lpcre 0.92u 0.00s 0.99r gcc -O2 regex-dna.c -lpcre 0.92u 0.00s 0.99r
gc regexp-dna 26.94u 0.18s 28.75r gc regexp-dna 26.94u 0.18s 28.75r
gc_B regexp-dna 26.51u 0.09s 26.75r gc_B regexp-dna 26.51u 0.09s 26.75r
...@@ -70,24 +77,28 @@ spectral-norm 5500 ...@@ -70,24 +77,28 @@ spectral-norm 5500
August 6, 2009 August 6, 2009
k-nucleotide 5000000 k-nucleotide 5000000
# string maps are slower than glib string maps
gcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0 k-nucleotide.c: 10.72u 0.01s 10.74r gcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0 k-nucleotide.c: 10.72u 0.01s 10.74r
gccgo -O2 k-nucleotide.go 21.64u 0.83s 22.78r gccgo -O2 k-nucleotide.go 21.64u 0.83s 22.78r
gc k-nucleotide 16.08u 0.06s 16.50r gc k-nucleotide 16.08u 0.06s 16.50r
gc_B k-nucleotide 17.32u 0.02s 17.37r gc_B k-nucleotide 17.32u 0.02s 17.37r
mandelbrot 5500 mandelbrot 5500
# floating point code generator should use more registers
gcc -O2 mandelbrot.c 56.13u 0.02s 56.17r gcc -O2 mandelbrot.c 56.13u 0.02s 56.17r
gccgo -O2 mandelbrot.go 57.49u 0.01s 57.51r gccgo -O2 mandelbrot.go 57.49u 0.01s 57.51r
gc mandelbrot 74.32u 0.00s 74.35r gc mandelbrot 74.32u 0.00s 74.35r
gc_B mandelbrot 74.28u 0.01s 74.31r gc_B mandelbrot 74.28u 0.01s 74.31r
meteor 16000 meteor 16000
# we don't know
gcc -O2 meteor-contest.c 0.10u 0.00s 0.10r gcc -O2 meteor-contest.c 0.10u 0.00s 0.10r
gccgo -O2 meteor-contest.go 0.12u 0.00s 0.14r gccgo -O2 meteor-contest.go 0.12u 0.00s 0.14r
gc meteor-contest 0.24u 0.00s 0.26r gc meteor-contest 0.24u 0.00s 0.26r
gc_B meteor-contest 0.23u 0.00s 0.24r gc_B meteor-contest 0.23u 0.00s 0.24r
pidigits 10000 pidigits 10000
# bignum is slower than gmp
gcc -O2 pidigits.c -lgmp 2.60u 0.00s 2.62r gcc -O2 pidigits.c -lgmp 2.60u 0.00s 2.62r
gc pidigits 77.69u 0.14s 78.18r gc pidigits 77.69u 0.14s 78.18r
gc_B pidigits 74.26u 0.18s 75.41r gc_B pidigits 74.26u 0.18s 75.41r
...@@ -98,6 +109,7 @@ August 7 2009 ...@@ -98,6 +109,7 @@ August 7 2009
# New gc does better division by powers of 2. Significant improvements: # New gc does better division by powers of 2. Significant improvements:
spectral-norm 5500 spectral-norm 5500
# floating point code generator should use more registers; possibly inline evalA
gcc -O2 spectral-norm.c -lm 11.50u 0.00s 11.50r gcc -O2 spectral-norm.c -lm 11.50u 0.00s 11.50r
gccgo -O2 spectral-norm.go 12.02u 0.00s 12.02r gccgo -O2 spectral-norm.go 12.02u 0.00s 12.02r
gc spectral-norm 23.98u 0.00s 24.00r # new time is 0.48 times old time, 52% faster gc spectral-norm 23.98u 0.00s 24.00r # new time is 0.48 times old time, 52% faster
...@@ -105,12 +117,14 @@ spectral-norm 5500 ...@@ -105,12 +117,14 @@ spectral-norm 5500
gc spectral-norm-parallel 24.04u 0.00s 6.26r # /2 put back. note: 4x faster (on r70, idle) gc spectral-norm-parallel 24.04u 0.00s 6.26r # /2 put back. note: 4x faster (on r70, idle)
k-nucleotide 1000000 k-nucleotide 1000000
# string maps are slower than glib string maps
gcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0 10.82u 0.04s 10.87r gcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0 10.82u 0.04s 10.87r
gccgo -O2 k-nucleotide.go 22.73u 0.89s 23.63r gccgo -O2 k-nucleotide.go 22.73u 0.89s 23.63r
gc k-nucleotide 15.97u 0.03s 16.04r gc k-nucleotide 15.97u 0.03s 16.04r
gc_B k-nucleotide 15.86u 0.06s 15.93r # 8.5% faster, but probably due to weird cache effeccts in previous version gc_B k-nucleotide 15.86u 0.06s 15.93r # 8.5% faster, but probably due to weird cache effeccts in previous version
pidigits 10000 pidigits 10000
# bignum is slower than gmp
gcc -O2 pidigits.c -lgmp 2.58u 0.00s 2.58r gcc -O2 pidigits.c -lgmp 2.58u 0.00s 2.58r
gc pidigits 71.24u 0.04s 71.28r # 8.5% faster gc pidigits 71.24u 0.04s 71.28r # 8.5% faster
gc_B pidigits 71.25u 0.03s 71.29r # 4% faster gc_B pidigits 71.25u 0.03s 71.29r # 4% faster
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment