Commit 417683c3 authored by Rob Pike's avatar Rob Pike

clean up the code a bit

start a log of progress

R=rsc
DELTA=222  (185 added, 17 deleted, 20 changed)
OCL=32701
CL=32718
parent 9155bb33
...@@ -49,7 +49,7 @@ var out *bufio.Writer ...@@ -49,7 +49,7 @@ var out *bufio.Writer
var n = flag.Int("n", 1000, "length of result") var n = flag.Int("n", 1000, "length of result")
const WIDTH = 60 const WIDTH = 60 // Fold lines after WIDTH bytes
func min(a, b int) int { func min(a, b int) int {
if a < b { if a < b {
...@@ -65,6 +65,7 @@ type AminoAcid struct { ...@@ -65,6 +65,7 @@ type AminoAcid struct {
var lastrandom uint32 = 42 var lastrandom uint32 = 42
// Random number between 0.0 and 1.0
func myrandom() float { func myrandom() float {
const ( const (
IM = 139968; IM = 139968;
...@@ -77,24 +78,22 @@ func myrandom() float { ...@@ -77,24 +78,22 @@ func myrandom() float {
} }
func AccumulateProbabilities(genelist []AminoAcid) { func AccumulateProbabilities(genelist []AminoAcid) {
cp := 0.0; for i := 1; i < len(genelist); i++ {
for i := 0; i < len(genelist); i++ { genelist[i].p += genelist[i-1].p;
cp += genelist[i].p;
genelist[i].p = cp;
} }
} }
/* This function prints the characters of the string s. When it */ // RepeatFasta prints the characters of the byte slice s. When it
/* reaches the end of the string, it goes back to the beginning */ // reaches the end of the slice, it goes back to the beginning.
/* It stops when the total number of characters printed is count. */ // It stops after generating count characters.
/* Between each WIDTH consecutive characters it prints a newline */ // After each WIDTH characters it prints a newline.
/* This function assumes that WIDTH <= strlen (s) + 1 */ // It assumes that WIDTH <= len(s) + 1.
func RepeatFasta(s []byte, count int) { func RepeatFasta(s []byte, count int) {
pos := 0; pos := 0;
s2 := make([]byte, len(s) + WIDTH); s2 := make([]byte, len(s) + WIDTH);
bytes.Copy(s2, s); bytes.Copy(s2, s);
bytes.Copy(s2[len(s):len(s2)], s); bytes.Copy(s2[len(s):len(s2)], s);
for { for count > 0 {
line := min(WIDTH, count); line := min(WIDTH, count);
out.Write(s2[pos:pos+line]); out.Write(s2[pos:pos+line]);
out.WriteByte('\n'); out.WriteByte('\n');
...@@ -103,43 +102,31 @@ func RepeatFasta(s []byte, count int) { ...@@ -103,43 +102,31 @@ func RepeatFasta(s []byte, count int) {
pos -= len(s); pos -= len(s);
} }
count -= line; count -= line;
if count <= 0 {
break
}
} }
} }
/* This function takes a pointer to the first element of an array */ // Each element of genelist is a struct with a character and
/* Each element of the array is a struct with a character and */ // a floating point number p between 0 and 1.
/* a float number p between 0 and 1. */ // RandomFasta generates a random float r and
/* The function generates a random float number r and */ // finds the first element such that p >= r.
/* finds the first array element such that p >= r. */ // This is a weighted random selection.
/* This is a weighted random selection. */ // RandomFasta then prints the character of the array element.
/* The function then prints the character of the array element. */ // This sequence is repeated count times.
/* This is done count times. */ // Between each WIDTH consecutive characters, the function prints a newline.
/* Between each WIDTH consecutive characters, the function prints a newline */
func RandomFasta(genelist []AminoAcid, count int) { func RandomFasta(genelist []AminoAcid, count int) {
buf := make([]byte, WIDTH + 1); buf := make([]byte, WIDTH + 1);
for { for count > 0 {
line := min(WIDTH, count); line := min(WIDTH, count);
pos := 0; for pos := 0; pos < line; pos++ {
for {
r := myrandom(); r := myrandom();
var i int; var i int;
for i = 0; genelist[i].p < r; i++ { for i = 0; genelist[i].p < r; i++ {
} }
buf[pos] = genelist[i].c; buf[pos] = genelist[i].c;
pos++;
if pos >= line {
break
}
} }
buf[line] = '\n'; buf[line] = '\n';
out.Write(buf[0:line + 1]); out.Write(buf[0:line + 1]);
count -= line; count -= line;
if count <= 0 {
break
}
} }
} }
...@@ -177,7 +164,7 @@ func main() { ...@@ -177,7 +164,7 @@ func main() {
AccumulateProbabilities(iub); AccumulateProbabilities(iub);
AccumulateProbabilities(homosapiens); AccumulateProbabilities(homosapiens);
alu := strings.Bytes("" alu := strings.Bytes(
"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG" "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG"
"GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA" "GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA"
"CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT" "CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT"
...@@ -188,11 +175,8 @@ func main() { ...@@ -188,11 +175,8 @@ func main() {
out.WriteString(">ONE Homo sapiens alu\n"); out.WriteString(">ONE Homo sapiens alu\n");
RepeatFasta(alu, 2 * *n); RepeatFasta(alu, 2 * *n);
out.Flush();
out.WriteString(">TWO IUB ambiguity codes\n"); out.WriteString(">TWO IUB ambiguity codes\n");
RandomFasta(iub, 3 * *n); RandomFasta(iub, 3 * *n);
out.Flush();
out.WriteString(">THREE Homo sapiens frequency\n"); out.WriteString(">THREE Homo sapiens frequency\n");
RandomFasta(homosapiens, 5 * *n); RandomFasta(homosapiens, 5 * *n);
out.Flush();
} }
>ONE Homo sapiens alu
GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGA
TCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACT
AAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAG
GCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCG
CCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGT
GGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCA
GGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAA
TTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAG
AATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCA
GCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGT
AATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACC
AGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTG
GTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACC
CGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAG
AGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTT
TGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACA
TGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCT
GTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGG
TTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGT
CTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGG
CGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCG
TCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTA
CTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCG
AGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCG
GGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACC
TGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAA
TACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGA
GGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACT
GCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTC
ACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGT
TCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGC
CGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCG
CTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTG
GGCGACAGAGCGAGACTCCG
>TWO IUB ambiguity codes
cttBtatcatatgctaKggNcataaaSatgtaaaDcDRtBggDtctttataattcBgtcg
tactDtDagcctatttSVHtHttKtgtHMaSattgWaHKHttttagacatWatgtRgaaa
NtactMcSMtYtcMgRtacttctWBacgaaatatagScDtttgaagacacatagtVgYgt
cattHWtMMWcStgttaggKtSgaYaaccWStcgBttgcgaMttBYatcWtgacaYcaga
gtaBDtRacttttcWatMttDBcatWtatcttactaBgaYtcttgttttttttYaaScYa
HgtgttNtSatcMtcVaaaStccRcctDaataataStcYtRDSaMtDttgttSagtRRca
tttHatSttMtWgtcgtatSSagactYaaattcaMtWatttaSgYttaRgKaRtccactt
tattRggaMcDaWaWagttttgacatgttctacaaaRaatataataaMttcgDacgaSSt
acaStYRctVaNMtMgtaggcKatcttttattaaaaagVWaHKYagtttttatttaacct
tacgtVtcVaattVMBcttaMtttaStgacttagattWWacVtgWYagWVRctDattBYt
gtttaagaagattattgacVatMaacattVctgtBSgaVtgWWggaKHaatKWcBScSWa
accRVacacaaactaccScattRatatKVtactatatttHttaagtttSKtRtacaaagt
RDttcaaaaWgcacatWaDgtDKacgaacaattacaRNWaatHtttStgttattaaMtgt
tgDcgtMgcatBtgcttcgcgaDWgagctgcgaggggVtaaScNatttacttaatgacag
cccccacatYScaMgtaggtYaNgttctgaMaacNaMRaacaaacaKctacatagYWctg
ttWaaataaaataRattagHacacaagcgKatacBttRttaagtatttccgatctHSaat
actcNttMaagtattMtgRtgaMgcataatHcMtaBSaRattagttgatHtMttaaKagg
YtaaBataSaVatactWtataVWgKgttaaaacagtgcgRatatacatVtHRtVYataSa
KtWaStVcNKHKttactatccctcatgWHatWaRcttactaggatctataDtDHBttata
aaaHgtacVtagaYttYaKcctattcttcttaataNDaaggaaaDYgcggctaaWSctBa
aNtgctggMBaKctaMVKagBaactaWaDaMaccYVtNtaHtVWtKgRtcaaNtYaNacg
gtttNattgVtttctgtBaWgtaattcaagtcaVWtactNggattctttaYtaaagccgc
tcttagHVggaYtgtNcDaVagctctctKgacgtatagYcctRYHDtgBattDaaDgccK
tcHaaStttMcctagtattgcRgWBaVatHaaaataYtgtttagMDMRtaataaggatMt
ttctWgtNtgtgaaaaMaatatRtttMtDgHHtgtcattttcWattRSHcVagaagtacg
ggtaKVattKYagactNaatgtttgKMMgYNtcccgSKttctaStatatNVataYHgtNa
BKRgNacaactgatttcctttaNcgatttctctataScaHtataRagtcRVttacDSDtt
aRtSatacHgtSKacYagttMHtWataggatgactNtatSaNctataVtttRNKtgRacc
tttYtatgttactttttcctttaaacatacaHactMacacggtWataMtBVacRaSaatc
cgtaBVttccagccBcttaRKtgtgcctttttRtgtcagcRttKtaaacKtaaatctcac
aattgcaNtSBaaccgggttattaaBcKatDagttactcttcattVtttHaaggctKKga
tacatcBggScagtVcacattttgaHaDSgHatRMaHWggtatatRgccDttcgtatcga
aacaHtaagttaRatgaVacttagattVKtaaYttaaatcaNatccRttRRaMScNaaaD
gttVHWgtcHaaHgacVaWtgttScactaagSgttatcttagggDtaccagWattWtRtg
ttHWHacgattBtgVcaYatcggttgagKcWtKKcaVtgaYgWctgYggVctgtHgaNcV
taBtWaaYatcDRaaRtSctgaHaYRttagatMatgcatttNattaDttaattgttctaa
ccctcccctagaWBtttHtBccttagaVaatMcBHagaVcWcagBVttcBtaYMccagat
gaaaaHctctaacgttagNWRtcggattNatcRaNHttcagtKttttgWatWttcSaNgg
gaWtactKKMaacatKatacNattgctWtatctaVgagctatgtRaHtYcWcttagccaa
tYttWttaWSSttaHcaaaaagVacVgtaVaRMgattaVcDactttcHHggHRtgNcctt
tYatcatKgctcctctatVcaaaaKaaaagtatatctgMtWtaaaacaStttMtcgactt
taSatcgDataaactaaacaagtaaVctaggaSccaatMVtaaSKNVattttgHccatca
cBVctgcaVatVttRtactgtVcaattHgtaaattaaattttYtatattaaRSgYtgBag
aHSBDgtagcacRHtYcBgtcacttacactaYcgctWtattgSHtSatcataaatataHt
cgtYaaMNgBaatttaRgaMaatatttBtttaaaHHKaatctgatWatYaacttMctctt
ttVctagctDaaagtaVaKaKRtaacBgtatccaaccactHHaagaagaaggaNaaatBW
attccgStaMSaMatBttgcatgRSacgttVVtaaDMtcSgVatWcaSatcttttVatag
ttactttacgatcaccNtaDVgSRcgVcgtgaacgaNtaNatatagtHtMgtHcMtagaa
attBgtataRaaaacaYKgtRccYtatgaagtaataKgtaaMttgaaRVatgcagaKStc
tHNaaatctBBtcttaYaBWHgtVtgacagcaRcataWctcaBcYacYgatDgtDHccta
>THREE Homo sapiens frequency
aacacttcaccaggtatcgtgaaggctcaagattacccagagaacctttgcaatataaga
atatgtatgcagcattaccctaagtaattatattctttttctgactcaaagtgacaagcc
ctagtgtatattaaatcggtatatttgggaaattcctcaaactatcctaatcaggtagcc
atgaaagtgatcaaaaaagttcgtacttataccatacatgaattctggccaagtaaaaaa
tagattgcgcaaaattcgtaccttaagtctctcgccaagatattaggatcctattactca
tatcgtgtttttctttattgccgccatccccggagtatctcacccatccttctcttaaag
gcctaatattacctatgcaaataaacatatattgttgaaaattgagaacctgatcgtgat
tcttatgtgtaccatatgtatagtaatcacgcgactatatagtgctttagtatcgcccgt
gggtgagtgaatattctgggctagcgtgagatagtttcttgtcctaatatttttcagatc
gaatagcttctatttttgtgtttattgacatatgtcgaaactccttactcagtgaaagtc
atgaccagatccacgaacaatcttcggaatcagtctcgttttacggcggaatcttgagtc
taacttatatcccgtcgcttactttctaacaccccttatgtatttttaaaattacgttta
ttcgaacgtacttggcggaagcgttattttttgaagtaagttacattgggcagactcttg
acattttcgatacgactttctttcatccatcacaggactcgttcgtattgatatcagaag
ctcgtgatgattagttgtcttctttaccaatactttgaggcctattctgcgaaatttttg
ttgccctgcgaacttcacataccaaggaacacctcgcaacatgccttcatatccatcgtt
cattgtaattcttacacaatgaatcctaagtaattacatccctgcgtaaaagatggtagg
ggcactgaggatatattaccaagcatttagttatgagtaatcagcaatgtttcttgtatt
aagttctctaaaatagttacatcgtaatgttatctcgggttccgcgaataaacgagatag
attcattatatatggccctaagcaaaaacctcctcgtattctgttggtaattagaatcac
acaatacgggttgagatattaattatttgtagtacgaagagatataaaaagatgaacaat
tactcaagtcaagatgtatacgggatttataataaaaatcgggtagagatctgctttgca
attcagacgtgccactaaatcgtaatatgtcgcgttacatcagaaagggtaactattatt
aattaataaagggcttaatcactacatattagatcttatccgatagtcttatctattcgt
tgtatttttaagcggttctaattcagtcattatatcagtgctccgagttctttattattg
ttttaaggatgacaaaatgcctcttgttataacgctgggagaagcagactaagagtcgga
gcagttggtagaatgaggctgcaaaagacggtctcgacgaatggacagactttactaaac
caatgaaagacagaagtagagcaaagtctgaagtggtatcagcttaattatgacaaccct
taatacttccctttcgccgaatactggcgtggaaaggttttaaaagtcgaagtagttaga
ggcatctctcgctcataaataggtagactactcgcaatccaatgtgactatgtaatactg
ggaacatcagtccgcgatgcagcgtgtttatcaaccgtccccactcgcctggggagacat
gagaccacccccgtggggattattagtccgcagtaatcgactcttgacaatccttttcga
ttatgtcatagcaatttacgacagttcagcgaagtgactactcggcgaaatggtattact
aaagcattcgaacccacatgaatgtgattcttggcaatttctaatccactaaagcttttc
cgttgaatctggttgtagatatttatataagttcactaattaagatcacggtagtatatt
gatagtgatgtctttgcaagaggttggccgaggaatttacggattctctattgatacaat
ttgtctggcttataactcttaaggctgaaccaggcgtttttagacgacttgatcagctgt
tagaatggtttggactccctctttcatgtcagtaacatttcagccgttattgttacgata
tgcttgaacaatattgatctaccacacacccatagtatattttataggtcatgctgttac
ctacgagcatggtattccacttcccattcaatgagtattcaacatcactagcctcagaga
tgatgacccacctctaataacgtcacgttgcggccatgtgaaacctgaacttgagtagac
gatatcaagcgctttaaattgcatataacatttgagggtaaagctaagcggatgctttat
ataatcaatactcaataataagatttgattgcattttagagttatgacacgacatagttc
actaacgagttactattcccagatctagactgaagtactgatcgagacgatccttacgtc
gatgatcgttagttatcgacttaggtcgggtctctagcggtattggtacttaaccggaca
ctatactaataacccatgatcaaagcataacagaatacagacgataatttcgccaacata
tatgtacagaccccaagcatgagaagctcattgaaagctatcattgaagtcccgctcaca
atgtgtcttttccagacggtttaactggttcccgggagtcctggagtttcgacttacata
aatggaaacaatgtattttgctaatttatctatagcgtcatttggaccaatacagaatat
tatgttgcctagtaatccactataacccgcaagtgctgatagaaaatttttagacgattt
ataaatgccccaagtatccctcccgtgaatcctccgttatactaattagtattcgttcat
acgtataccgcgcatatatgaacatttggcgataaggcgcgtgaattgttacgtgacaga
gatagcagtttcttgtgatatggttaacagacgtacatgaagggaaactttatatctata
gtgatgcttccgtagaaataccgccactggtctgccaatgatgaagtatgtagctttagg
tttgtactatgaggctttcgtttgtttgcagagtataacagttgcgagtgaaaaaccgac
gaatttatactaatacgctttcactattggctacaaaatagggaagagtttcaatcatga
gagggagtatatggatgctttgtagctaaaggtagaacgtatgtatatgctgccgttcat
tcttgaaagatacataagcgataagttacgacaattataagcaacatccctaccttcgta
acgatttcactgttactgcgcttgaaatacactatggggctattggcggagagaagcaga
tcgcgccgagcatatacgagacctataatgttgatgatagagaaggcgtctgaattgata
catcgaagtacactttctttcgtagtatctctcgtcctctttctatctccggacacaaga
attaagttatatatatagagtcttaccaatcatgttgaatcctgattctcagagttcttt
ggcgggccttgtgatgactgagaaacaatgcaatattgctccaaatttcctaagcaaatt
ctcggttatgttatgttatcagcaaagcgttacgttatgttatttaaatctggaatgacg
gagcgaagttcttatgtcggtgtgggaataattcttttgaagacagcactccttaaataa
tatcgctccgtgtttgtatttatcgaatgggtctgtaaccttgcacaagcaaatcggtgg
tgtatatatcggataacaattaatacgatgttcatagtgacagtatactgatcgagtcct
ctaaagtcaattacctcacttaacaatctcattgatgttgtgtcattcccggtatcgccc
gtagtatgtgctctgattgaccgagtgtgaaccaaggaacatctactaatgcctttgtta
ggtaagatctctctgaattccttcgtgccaacttaaaacattatcaaaatttcttctact
tggattaactacttttacgagcatggcaaattcccctgtggaagacggttcattattatc
ggaaaccttatagaaattgcgtgttgactgaaattagatttttattgtaagagttgcatc
tttgcgattcctctggtctagcttccaatgaacagtcctcccttctattcgacatcgggt
ccttcgtacatgtctttgcgatgtaataattaggttcggagtgtggccttaatgggtgca
actaggaatacaacgcaaatttgctgacatgatagcaaatcggtatgccggcaccaaaac
gtgctccttgcttagcttgtgaatgagactcagtagttaaataaatccatatctgcaatc
gattccacaggtattgtccactatctttgaactactctaagagatacaagcttagctgag
accgaggtgtatatgactacgctgatatctgtaaggtaccaatgcaggcaaagtatgcga
gaagctaataccggctgtttccagctttataagattaaaatttggctgtcctggcggcct
cagaattgttctatcgtaatcagttggttcattaattagctaagtacgaggtacaactta
tctgtcccagaacagctccacaagtttttttacagccgaaacccctgtgtgaatcttaat
atccaagcgcgttatctgattagagtttacaactcagtattttatcagtacgttttgttt
ccaacattacccggtatgacaaaatgacgccacgtgtcgaataatggtctgaccaatgta
ggaagtgaaaagataaatat
All tests on r45
Aug 3 2009
First version of fasta. Translation of fasta.c, fetched from
http://shootout.alioth.debian.org/u32q/benchmark.php?test=fasta&lang=gpp&id=4
fasta -n 25000000
[gcc -O2 fasta.c 5.98u 0.00s 6.01r]
gccgo -O2 8.82u 0.02s 8.85r
6g 13.50u 0.02s 13.53r
6g -B 12.99u 0.02s 13.02r
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment