Use new listener which implements enhanced tcp features (#6289)

This package provide customizable TCP net.Listener with various
performance-related options:

 * SO_REUSEPORT. This option allows linear scaling server performance
   on multi-CPU servers.
   See https://www.nginx.com/blog/socket-sharding-nginx-release-1-9-1/ for details.
 * TCP_DEFER_ACCEPT. This option expects the server reads from the accepted
   connection before writing to them.
 * TCP_FASTOPEN. See https://lwn.net/Articles/508865/ for details.
master
Harshavardhana 6 years ago committed by kannappanr
parent 5a4a57700b
commit 50a817e3d3
  1. 30
      cmd/http/listen_nix.go
  2. 24
      cmd/http/listen_others.go
  3. 3
      cmd/http/listener.go
  4. 32
      cmd/http/listener_test.go
  5. 21
      vendor/github.com/valyala/tcplisten/LICENSE
  6. 21
      vendor/github.com/valyala/tcplisten/README.md
  7. 23
      vendor/github.com/valyala/tcplisten/socket.go
  8. 5
      vendor/github.com/valyala/tcplisten/socket_darwin.go
  9. 21
      vendor/github.com/valyala/tcplisten/socket_other.go
  10. 162
      vendor/github.com/valyala/tcplisten/tcplisten.go
  11. 24
      vendor/github.com/valyala/tcplisten/tcplisten_bsd.go
  12. 59
      vendor/github.com/valyala/tcplisten/tcplisten_linux.go
  13. 6
      vendor/vendor.json

@ -0,0 +1,30 @@
// +build linux darwin dragonfly freebsd netbsd openbsd rumprun
/*
* Minio Cloud Storage, (C) 2018 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package http
import "github.com/valyala/tcplisten"
var cfg = &tcplisten.Config{
ReusePort: true,
DeferAccept: true,
FastOpen: true,
}
// Unix listener with special TCP options.
var listen = cfg.NewListener

@ -0,0 +1,24 @@
// +build windows plan9
/*
* Minio Cloud Storage, (C) 2018 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package http
import "net"
// Windows, plan9 specific listener.
var listen = net.Listen

@ -300,6 +300,7 @@ func newHTTPListener(serverAddrs []string,
updateBytesWrittenFunc func(int)) (listener *httpListener, err error) { updateBytesWrittenFunc func(int)) (listener *httpListener, err error) {
var tcpListeners []*net.TCPListener var tcpListeners []*net.TCPListener
// Close all opened listeners on error // Close all opened listeners on error
defer func() { defer func() {
if err == nil { if err == nil {
@ -314,7 +315,7 @@ func newHTTPListener(serverAddrs []string,
for _, serverAddr := range serverAddrs { for _, serverAddr := range serverAddrs {
var l net.Listener var l net.Listener
if l, err = net.Listen("tcp", serverAddr); err != nil { if l, err = listen("tcp4", serverAddr); err != nil {
return nil, err return nil, err
} }

@ -197,11 +197,23 @@ func TestIsHTTPMethod(t *testing.T) {
func TestNewHTTPListener(t *testing.T) { func TestNewHTTPListener(t *testing.T) {
errMsg := ": no such host" errMsg := ": no such host"
remoteAddrErrMsg := "listen tcp 93.184.216.34:65432: bind: cannot assign requested address" remoteAddrErrMsgIP := "cannot bind to \"93.184.216.34:65432\": cannot assign requested address"
if runtime.GOOS == "windows" { if runtime.GOOS == "windows" {
remoteAddrErrMsg = "listen tcp 93.184.216.34:65432: bind: The requested address is not valid in its context." remoteAddrErrMsgIP = "listen tcp 93.184.216.34:65432: bind: The requested address is not valid in its context."
} else if runtime.GOOS == "darwin" { }
remoteAddrErrMsg = "listen tcp 93.184.216.34:65432: bind: can't assign requested address" remoteAddrErrMsgHost := "cannot bind to \"example.org:65432\": cannot assign requested address"
if runtime.GOOS == "windows" {
remoteAddrErrMsgHost = "listen tcp 93.184.216.34:65432: bind: The requested address is not valid in its context."
}
remoteMissingErr := "address unknown-host: missing port in address"
if runtime.GOOS == "windows" {
remoteMissingErr = "listen tcp: address unknown-host: missing port in address"
}
remoteUnknownErr := "lookup unknown-host" + errMsg
if runtime.GOOS == "wpindows" {
remoteUnknownErr = "listen tcp: lookup unknown-host" + errMsg
} }
tlsConfig := getTLSConfig(t) tlsConfig := getTLSConfig(t)
@ -217,12 +229,12 @@ func TestNewHTTPListener(t *testing.T) {
errorLogFunc func(context.Context, error) errorLogFunc func(context.Context, error)
expectedErr error expectedErr error
}{ }{
{[]string{"93.184.216.34:65432"}, nil, time.Duration(0), time.Duration(0), time.Duration(0), nil, nil, nil, errors.New(remoteAddrErrMsg)}, {[]string{"93.184.216.34:65432"}, nil, time.Duration(0), time.Duration(0), time.Duration(0), nil, nil, nil, errors.New(remoteAddrErrMsgIP)},
{[]string{"example.org:65432"}, nil, time.Duration(0), time.Duration(0), time.Duration(0), nil, nil, nil, errors.New(remoteAddrErrMsg)}, {[]string{"example.org:65432"}, nil, time.Duration(0), time.Duration(0), time.Duration(0), nil, nil, nil, errors.New(remoteAddrErrMsgHost)},
{[]string{"unknown-host"}, nil, time.Duration(0), time.Duration(0), time.Duration(0), nil, nil, nil, errors.New("listen tcp: address unknown-host: missing port in address")}, {[]string{"unknown-host"}, nil, time.Duration(0), time.Duration(0), time.Duration(0), nil, nil, nil, errors.New(remoteMissingErr)},
{[]string{"unknown-host:65432"}, nil, time.Duration(0), time.Duration(0), time.Duration(0), nil, nil, nil, errors.New("listen tcp: lookup unknown-host" + errMsg)}, {[]string{"unknown-host:65432"}, nil, time.Duration(0), time.Duration(0), time.Duration(0), nil, nil, nil, errors.New(remoteUnknownErr)},
{[]string{"localhost:65432", "93.184.216.34:65432"}, nil, time.Duration(0), time.Duration(0), time.Duration(0), nil, nil, nil, errors.New(remoteAddrErrMsg)}, {[]string{"localhost:65432", "93.184.216.34:65432"}, nil, time.Duration(0), time.Duration(0), time.Duration(0), nil, nil, nil, errors.New(remoteAddrErrMsgIP)},
{[]string{"localhost:65432", "unknown-host:65432"}, nil, time.Duration(0), time.Duration(0), time.Duration(0), nil, nil, nil, errors.New("listen tcp: lookup unknown-host" + errMsg)}, {[]string{"localhost:65432", "unknown-host:65432"}, nil, time.Duration(0), time.Duration(0), time.Duration(0), nil, nil, nil, errors.New(remoteUnknownErr)},
{[]string{"localhost:0"}, nil, time.Duration(0), time.Duration(0), time.Duration(0), nil, nil, nil, nil}, {[]string{"localhost:0"}, nil, time.Duration(0), time.Duration(0), time.Duration(0), nil, nil, nil, nil},
{[]string{"localhost:0"}, tlsConfig, time.Duration(0), time.Duration(0), time.Duration(0), nil, nil, nil, nil}, {[]string{"localhost:0"}, tlsConfig, time.Duration(0), time.Duration(0), time.Duration(0), nil, nil, nil, nil},
} }

@ -0,0 +1,21 @@
The MIT License (MIT)
Copyright (c) 2016 Aliaksandr Valialkin
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

@ -0,0 +1,21 @@
[![Build Status](https://travis-ci.org/valyala/tcplisten.svg)](https://travis-ci.org/valyala/tcplisten)
[![GoDoc](https://godoc.org/github.com/valyala/tcplisten?status.svg)](http://godoc.org/github.com/valyala/tcplisten)
[![Go Report](https://goreportcard.com/badge/github.com/valyala/tcplisten)](https://goreportcard.com/report/github.com/valyala/tcplisten)
Package tcplisten provides customizable TCP net.Listener with various
performance-related options:
* SO_REUSEPORT. This option allows linear scaling server performance
on multi-CPU servers.
See https://www.nginx.com/blog/socket-sharding-nginx-release-1-9-1/ for details.
* TCP_DEFER_ACCEPT. This option expects the server reads from the accepted
connection before writing to them.
* TCP_FASTOPEN. See https://lwn.net/Articles/508865/ for details.
[Documentation](https://godoc.org/github.com/valyala/tcplisten).
The package is derived from [go_reuseport](https://github.com/kavu/go_reuseport).

@ -0,0 +1,23 @@
package tcplisten
import (
"fmt"
"syscall"
)
func newSocketCloexecOld(domain, typ, proto int) (int, error) {
syscall.ForkLock.RLock()
fd, err := syscall.Socket(domain, typ, proto)
if err == nil {
syscall.CloseOnExec(fd)
}
syscall.ForkLock.RUnlock()
if err != nil {
return -1, fmt.Errorf("cannot create listening socket: %s", err)
}
if err = syscall.SetNonblock(fd, true); err != nil {
syscall.Close(fd)
return -1, fmt.Errorf("cannot make non-blocked listening socket: %s", err)
}
return fd, nil
}

@ -0,0 +1,5 @@
// +build darwin
package tcplisten
var newSocketCloexec = newSocketCloexecOld

@ -0,0 +1,21 @@
// +build !darwin
package tcplisten
import (
"fmt"
"syscall"
)
func newSocketCloexec(domain, typ, proto int) (int, error) {
fd, err := syscall.Socket(domain, typ|syscall.SOCK_NONBLOCK|syscall.SOCK_CLOEXEC, proto)
if err == nil {
return fd, nil
}
if err == syscall.EPROTONOSUPPORT || err == syscall.EINVAL {
return newSocketCloexecOld(domain, typ, proto)
}
return -1, fmt.Errorf("cannot create listening unblocked socket: %s", err)
}

@ -0,0 +1,162 @@
// +build linux darwin dragonfly freebsd netbsd openbsd rumprun
// Package tcplisten provides customizable TCP net.Listener with various
// performance-related options:
//
// - SO_REUSEPORT. This option allows linear scaling server performance
// on multi-CPU servers.
// See https://www.nginx.com/blog/socket-sharding-nginx-release-1-9-1/ for details.
//
// - TCP_DEFER_ACCEPT. This option expects the server reads from the accepted
// connection before writing to them.
//
// - TCP_FASTOPEN. See https://lwn.net/Articles/508865/ for details.
//
// The package is derived from https://github.com/kavu/go_reuseport .
package tcplisten
import (
"errors"
"fmt"
"net"
"os"
"syscall"
)
// Config provides options to enable on the returned listener.
type Config struct {
// ReusePort enables SO_REUSEPORT.
ReusePort bool
// DeferAccept enables TCP_DEFER_ACCEPT.
DeferAccept bool
// FastOpen enables TCP_FASTOPEN.
FastOpen bool
// Backlog is the maximum number of pending TCP connections the listener
// may queue before passing them to Accept.
// See man 2 listen for details.
//
// By default system-level backlog value is used.
Backlog int
}
// NewListener returns TCP listener with options set in the Config.
//
// The function may be called many times for creating distinct listeners
// with the given config.
//
// Only tcp4 and tcp6 networks are supported.
func (cfg *Config) NewListener(network, addr string) (net.Listener, error) {
sa, soType, err := getSockaddr(network, addr)
if err != nil {
return nil, err
}
fd, err := newSocketCloexec(soType, syscall.SOCK_STREAM, syscall.IPPROTO_TCP)
if err != nil {
return nil, err
}
if err = cfg.fdSetup(fd, sa, addr); err != nil {
syscall.Close(fd)
return nil, err
}
name := fmt.Sprintf("reuseport.%d.%s.%s", os.Getpid(), network, addr)
file := os.NewFile(uintptr(fd), name)
ln, err := net.FileListener(file)
if err != nil {
file.Close()
return nil, err
}
if err = file.Close(); err != nil {
ln.Close()
return nil, err
}
return ln, nil
}
func (cfg *Config) fdSetup(fd int, sa syscall.Sockaddr, addr string) error {
var err error
if err = syscall.SetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_REUSEADDR, 1); err != nil {
return fmt.Errorf("cannot enable SO_REUSEADDR: %s", err)
}
// This should disable Nagle's algorithm in all accepted sockets by default.
// Users may enable it with net.TCPConn.SetNoDelay(false).
if err = syscall.SetsockoptInt(fd, syscall.IPPROTO_TCP, syscall.TCP_NODELAY, 1); err != nil {
return fmt.Errorf("cannot disable Nagle's algorithm: %s", err)
}
if cfg.ReusePort {
if err = syscall.SetsockoptInt(fd, syscall.SOL_SOCKET, soReusePort, 1); err != nil {
return fmt.Errorf("cannot enable SO_REUSEPORT: %s", err)
}
}
if cfg.DeferAccept {
if err = enableDeferAccept(fd); err != nil {
return err
}
}
if cfg.FastOpen {
if err = enableFastOpen(fd); err != nil {
return err
}
}
if err = syscall.Bind(fd, sa); err != nil {
return fmt.Errorf("cannot bind to %q: %s", addr, err)
}
backlog := cfg.Backlog
if backlog <= 0 {
if backlog, err = soMaxConn(); err != nil {
return fmt.Errorf("cannot determine backlog to pass to listen(2): %s", err)
}
}
if err = syscall.Listen(fd, backlog); err != nil {
return fmt.Errorf("cannot listen on %q: %s", addr, err)
}
return nil
}
func getSockaddr(network, addr string) (sa syscall.Sockaddr, soType int, err error) {
if network != "tcp4" && network != "tcp6" {
return nil, -1, errors.New("only tcp4 and tcp6 network is supported")
}
tcpAddr, err := net.ResolveTCPAddr(network, addr)
if err != nil {
return nil, -1, err
}
switch network {
case "tcp4":
var sa4 syscall.SockaddrInet4
sa4.Port = tcpAddr.Port
copy(sa4.Addr[:], tcpAddr.IP.To4())
return &sa4, syscall.AF_INET, nil
case "tcp6":
var sa6 syscall.SockaddrInet6
sa6.Port = tcpAddr.Port
copy(sa6.Addr[:], tcpAddr.IP.To16())
if tcpAddr.Zone != "" {
ifi, err := net.InterfaceByName(tcpAddr.Zone)
if err != nil {
return nil, -1, err
}
sa6.ZoneId = uint32(ifi.Index)
}
return &sa6, syscall.AF_INET6, nil
default:
return nil, -1, errors.New("Unknown network type " + network)
}
}

@ -0,0 +1,24 @@
// +build darwin dragonfly freebsd netbsd openbsd rumprun
package tcplisten
import (
"syscall"
)
const soReusePort = syscall.SO_REUSEPORT
func enableDeferAccept(fd int) error {
// TODO: implement SO_ACCEPTFILTER:dataready here
return nil
}
func enableFastOpen(fd int) error {
// TODO: implement TCP_FASTOPEN when it will be ready
return nil
}
func soMaxConn() (int, error) {
// TODO: properly implement it
return syscall.SOMAXCONN, nil
}

@ -0,0 +1,59 @@
// +build linux
package tcplisten
import (
"fmt"
"io/ioutil"
"os"
"strconv"
"strings"
"syscall"
)
const (
soReusePort = 0x0F
tcpFastOpen = 0x17
)
func enableDeferAccept(fd int) error {
if err := syscall.SetsockoptInt(fd, syscall.IPPROTO_TCP, syscall.TCP_DEFER_ACCEPT, 1); err != nil {
return fmt.Errorf("cannot enable TCP_DEFER_ACCEPT: %s", err)
}
return nil
}
func enableFastOpen(fd int) error {
if err := syscall.SetsockoptInt(fd, syscall.SOL_TCP, tcpFastOpen, fastOpenQlen); err != nil {
return fmt.Errorf("cannot enable TCP_FASTOPEN(qlen=%d): %s", fastOpenQlen, err)
}
return nil
}
const fastOpenQlen = 16 * 1024
func soMaxConn() (int, error) {
data, err := ioutil.ReadFile(soMaxConnFilePath)
if err != nil {
// This error may trigger on travis build. Just use SOMAXCONN
if os.IsNotExist(err) {
return syscall.SOMAXCONN, nil
}
return -1, err
}
s := strings.TrimSpace(string(data))
n, err := strconv.Atoi(s)
if err != nil || n <= 0 {
return -1, fmt.Errorf("cannot parse somaxconn %q read from %s: %s", s, soMaxConnFilePath, err)
}
// Linux stores the backlog in a uint16.
// Truncate number to avoid wrapping.
// See https://github.com/golang/go/issues/5030 .
if n > 1<<16-1 {
n = 1<<16 - 1
}
return n, nil
}
const soMaxConnFilePath = "/proc/sys/net/core/somaxconn"

@ -894,6 +894,12 @@
"revision": "ded73eae5db7e7a0ef6f55aace87a2873c5d2b74", "revision": "ded73eae5db7e7a0ef6f55aace87a2873c5d2b74",
"revisionTime": "2017-01-07T13:32:03Z" "revisionTime": "2017-01-07T13:32:03Z"
}, },
{
"checksumSHA1": "R/Mpe0uUp5HeqWrdrH5qE2qZuE8=",
"path": "github.com/valyala/tcplisten",
"revision": "ceec8f93295a060cdb565ec25e4ccf17941dbd55",
"revisionTime": "2016-11-14T21:01:44Z"
},
{ {
"checksumSHA1": "6ksZHYhLc3yOzTbcWKb3bDENhD4=", "checksumSHA1": "6ksZHYhLc3yOzTbcWKb3bDENhD4=",
"path": "github.com/xwb1989/sqlparser", "path": "github.com/xwb1989/sqlparser",

Loading…
Cancel
Save