Commit 77595e46 authored by Ian Lance Taylor's avatar Ian Lance Taylor

net: if a DNS lookup times out, forget that it is in flight

Before this CL, if the system resolver does a very slow DNS
lookup for a particular host, all subsequent requests for that
host will hang waiting for that lookup to complete.  That is
more or less expected when Dial is called with no deadline.
When Dial has a deadline, though, we can accumulate a large
number of goroutines waiting for that slow DNS lookup.  Try to
avoid this problem by restarting the DNS lookup when it is
redone after a deadline is passed.

This CL also avoids creating an extra goroutine purely to
handle the deadline.

No test because we would have to simulate a slow DNS lookup
followed by a fast DNS lookup.

Fixes #8602.

LGTM=bradfitz
R=bradfitz, mikioh.mikioh
CC=golang-codereviews, r, rsc
https://golang.org/cl/154610044
parent 78082dfa
...@@ -40,10 +40,16 @@ func lookupIPMerge(host string) (addrs []IP, err error) { ...@@ -40,10 +40,16 @@ func lookupIPMerge(host string) (addrs []IP, err error) {
addrsi, err, shared := lookupGroup.Do(host, func() (interface{}, error) { addrsi, err, shared := lookupGroup.Do(host, func() (interface{}, error) {
return lookupIP(host) return lookupIP(host)
}) })
return lookupIPReturn(addrsi, err, shared)
}
// lookupIPReturn turns the return values from singleflight.Do into
// the return values from LookupIP.
func lookupIPReturn(addrsi interface{}, err error, shared bool) ([]IP, error) {
if err != nil { if err != nil {
return nil, err return nil, err
} }
addrs = addrsi.([]IP) addrs := addrsi.([]IP)
if shared { if shared {
clone := make([]IP, len(addrs)) clone := make([]IP, len(addrs))
copy(clone, addrs) copy(clone, addrs)
...@@ -52,41 +58,40 @@ func lookupIPMerge(host string) (addrs []IP, err error) { ...@@ -52,41 +58,40 @@ func lookupIPMerge(host string) (addrs []IP, err error) {
return addrs, nil return addrs, nil
} }
// lookupIPDeadline looks up a hostname with a deadline.
func lookupIPDeadline(host string, deadline time.Time) (addrs []IP, err error) { func lookupIPDeadline(host string, deadline time.Time) (addrs []IP, err error) {
if deadline.IsZero() { if deadline.IsZero() {
return lookupIPMerge(host) return lookupIPMerge(host)
} }
// TODO(bradfitz): consider pushing the deadline down into the // We could push the deadline down into the name resolution
// name resolution functions. But that involves fixing it for // functions. However, the most commonly used implementation
// the native Go resolver, cgo, Windows, etc. // calls getaddrinfo, which has no timeout.
//
// In the meantime, just use a goroutine. Most users affected
// by http://golang.org/issue/2631 are due to TCP connections
// to unresponsive hosts, not DNS.
timeout := deadline.Sub(time.Now()) timeout := deadline.Sub(time.Now())
if timeout <= 0 { if timeout <= 0 {
err = errTimeout return nil, errTimeout
return
} }
t := time.NewTimer(timeout) t := time.NewTimer(timeout)
defer t.Stop() defer t.Stop()
type res struct {
addrs []IP ch := lookupGroup.DoChan(host, func() (interface{}, error) {
err error return lookupIP(host)
} })
resc := make(chan res, 1)
go func() {
a, err := lookupIPMerge(host)
resc <- res{a, err}
}()
select { select {
case <-t.C: case <-t.C:
err = errTimeout // The DNS lookup timed out for some reason. Force
case r := <-resc: // future requests to start the DNS lookup again
addrs, err = r.addrs, r.err // rather than waiting for the current lookup to
// complete. See issue 8602.
lookupGroup.Forget(host)
return nil, errTimeout
case r := <-ch:
return lookupIPReturn(r.v, r.err, r.shared)
} }
return
} }
// LookupPort looks up the port for the given network and service. // LookupPort looks up the port for the given network and service.
......
...@@ -8,10 +8,18 @@ import "sync" ...@@ -8,10 +8,18 @@ import "sync"
// call is an in-flight or completed singleflight.Do call // call is an in-flight or completed singleflight.Do call
type call struct { type call struct {
wg sync.WaitGroup wg sync.WaitGroup
val interface{}
err error // These fields are written once before the WaitGroup is done
dups int // and are only read after the WaitGroup is done.
val interface{}
err error
// These fields are read and written with the singleflight
// mutex held before the WaitGroup is done, and are read but
// not written after the WaitGroup is done.
dups int
chans []chan<- singleflightResult
} }
// singleflight represents a class of work and forms a namespace in // singleflight represents a class of work and forms a namespace in
...@@ -21,6 +29,14 @@ type singleflight struct { ...@@ -21,6 +29,14 @@ type singleflight struct {
m map[string]*call // lazily initialized m map[string]*call // lazily initialized
} }
// singleflightResult holds the results of Do, so they can be passed
// on a channel.
type singleflightResult struct {
v interface{}
err error
shared bool
}
// Do executes and returns the results of the given function, making // Do executes and returns the results of the given function, making
// sure that only one execution is in-flight for a given key at a // sure that only one execution is in-flight for a given key at a
// time. If a duplicate comes in, the duplicate caller waits for the // time. If a duplicate comes in, the duplicate caller waits for the
...@@ -42,12 +58,52 @@ func (g *singleflight) Do(key string, fn func() (interface{}, error)) (v interfa ...@@ -42,12 +58,52 @@ func (g *singleflight) Do(key string, fn func() (interface{}, error)) (v interfa
g.m[key] = c g.m[key] = c
g.mu.Unlock() g.mu.Unlock()
g.doCall(c, key, fn)
return c.val, c.err, c.dups > 0
}
// DoChan is like Do but returns a channel that will receive the
// results when they are ready.
func (g *singleflight) DoChan(key string, fn func() (interface{}, error)) <-chan singleflightResult {
ch := make(chan singleflightResult, 1)
g.mu.Lock()
if g.m == nil {
g.m = make(map[string]*call)
}
if c, ok := g.m[key]; ok {
c.dups++
c.chans = append(c.chans, ch)
g.mu.Unlock()
return ch
}
c := &call{chans: []chan<- singleflightResult{ch}}
c.wg.Add(1)
g.m[key] = c
g.mu.Unlock()
go g.doCall(c, key, fn)
return ch
}
// doCall handles the single call for a key.
func (g *singleflight) doCall(c *call, key string, fn func() (interface{}, error)) {
c.val, c.err = fn() c.val, c.err = fn()
c.wg.Done() c.wg.Done()
g.mu.Lock() g.mu.Lock()
delete(g.m, key) delete(g.m, key)
for _, ch := range c.chans {
ch <- singleflightResult{c.val, c.err, c.dups > 0}
}
g.mu.Unlock() g.mu.Unlock()
}
return c.val, c.err, c.dups > 0 // Forget tells the singleflight to forget about a key. Future calls
// to Do for this key will call the function rather than waiting for
// an earlier call to complete.
func (g *singleflight) Forget(key string) {
g.mu.Lock()
delete(g.m, key)
g.mu.Unlock()
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment