Use new listener which implements enhanced tcp features (#6289)
This package provide customizable TCP net.Listener with various performance-related options: * SO_REUSEPORT. This option allows linear scaling server performance on multi-CPU servers. See https://www.nginx.com/blog/socket-sharding-nginx-release-1-9-1/ for details. * TCP_DEFER_ACCEPT. This option expects the server reads from the accepted connection before writing to them. * TCP_FASTOPEN. See https://lwn.net/Articles/508865/ for details.master
parent
5a4a57700b
commit
50a817e3d3
@ -0,0 +1,30 @@ |
|||||||
|
// +build linux darwin dragonfly freebsd netbsd openbsd rumprun
|
||||||
|
|
||||||
|
/* |
||||||
|
* Minio Cloud Storage, (C) 2018 Minio, Inc. |
||||||
|
* |
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
* you may not use this file except in compliance with the License. |
||||||
|
* You may obtain a copy of the License at |
||||||
|
* |
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
* |
||||||
|
* Unless required by applicable law or agreed to in writing, software |
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
* See the License for the specific language governing permissions and |
||||||
|
* limitations under the License. |
||||||
|
*/ |
||||||
|
|
||||||
|
package http |
||||||
|
|
||||||
|
import "github.com/valyala/tcplisten" |
||||||
|
|
||||||
|
var cfg = &tcplisten.Config{ |
||||||
|
ReusePort: true, |
||||||
|
DeferAccept: true, |
||||||
|
FastOpen: true, |
||||||
|
} |
||||||
|
|
||||||
|
// Unix listener with special TCP options.
|
||||||
|
var listen = cfg.NewListener |
@ -0,0 +1,24 @@ |
|||||||
|
// +build windows plan9
|
||||||
|
|
||||||
|
/* |
||||||
|
* Minio Cloud Storage, (C) 2018 Minio, Inc. |
||||||
|
* |
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
* you may not use this file except in compliance with the License. |
||||||
|
* You may obtain a copy of the License at |
||||||
|
* |
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
* |
||||||
|
* Unless required by applicable law or agreed to in writing, software |
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
* See the License for the specific language governing permissions and |
||||||
|
* limitations under the License. |
||||||
|
*/ |
||||||
|
|
||||||
|
package http |
||||||
|
|
||||||
|
import "net" |
||||||
|
|
||||||
|
// Windows, plan9 specific listener.
|
||||||
|
var listen = net.Listen |
@ -0,0 +1,21 @@ |
|||||||
|
The MIT License (MIT) |
||||||
|
|
||||||
|
Copyright (c) 2016 Aliaksandr Valialkin |
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy |
||||||
|
of this software and associated documentation files (the "Software"), to deal |
||||||
|
in the Software without restriction, including without limitation the rights |
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||||
|
copies of the Software, and to permit persons to whom the Software is |
||||||
|
furnished to do so, subject to the following conditions: |
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all |
||||||
|
copies or substantial portions of the Software. |
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
||||||
|
SOFTWARE. |
@ -0,0 +1,21 @@ |
|||||||
|
[![Build Status](https://travis-ci.org/valyala/tcplisten.svg)](https://travis-ci.org/valyala/tcplisten) |
||||||
|
[![GoDoc](https://godoc.org/github.com/valyala/tcplisten?status.svg)](http://godoc.org/github.com/valyala/tcplisten) |
||||||
|
[![Go Report](https://goreportcard.com/badge/github.com/valyala/tcplisten)](https://goreportcard.com/report/github.com/valyala/tcplisten) |
||||||
|
|
||||||
|
|
||||||
|
Package tcplisten provides customizable TCP net.Listener with various |
||||||
|
performance-related options: |
||||||
|
|
||||||
|
* SO_REUSEPORT. This option allows linear scaling server performance |
||||||
|
on multi-CPU servers. |
||||||
|
See https://www.nginx.com/blog/socket-sharding-nginx-release-1-9-1/ for details. |
||||||
|
|
||||||
|
* TCP_DEFER_ACCEPT. This option expects the server reads from the accepted |
||||||
|
connection before writing to them. |
||||||
|
|
||||||
|
* TCP_FASTOPEN. See https://lwn.net/Articles/508865/ for details. |
||||||
|
|
||||||
|
|
||||||
|
[Documentation](https://godoc.org/github.com/valyala/tcplisten). |
||||||
|
|
||||||
|
The package is derived from [go_reuseport](https://github.com/kavu/go_reuseport). |
@ -0,0 +1,23 @@ |
|||||||
|
package tcplisten |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"syscall" |
||||||
|
) |
||||||
|
|
||||||
|
func newSocketCloexecOld(domain, typ, proto int) (int, error) { |
||||||
|
syscall.ForkLock.RLock() |
||||||
|
fd, err := syscall.Socket(domain, typ, proto) |
||||||
|
if err == nil { |
||||||
|
syscall.CloseOnExec(fd) |
||||||
|
} |
||||||
|
syscall.ForkLock.RUnlock() |
||||||
|
if err != nil { |
||||||
|
return -1, fmt.Errorf("cannot create listening socket: %s", err) |
||||||
|
} |
||||||
|
if err = syscall.SetNonblock(fd, true); err != nil { |
||||||
|
syscall.Close(fd) |
||||||
|
return -1, fmt.Errorf("cannot make non-blocked listening socket: %s", err) |
||||||
|
} |
||||||
|
return fd, nil |
||||||
|
} |
@ -0,0 +1,5 @@ |
|||||||
|
// +build darwin
|
||||||
|
|
||||||
|
package tcplisten |
||||||
|
|
||||||
|
var newSocketCloexec = newSocketCloexecOld |
@ -0,0 +1,21 @@ |
|||||||
|
// +build !darwin
|
||||||
|
|
||||||
|
package tcplisten |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"syscall" |
||||||
|
) |
||||||
|
|
||||||
|
func newSocketCloexec(domain, typ, proto int) (int, error) { |
||||||
|
fd, err := syscall.Socket(domain, typ|syscall.SOCK_NONBLOCK|syscall.SOCK_CLOEXEC, proto) |
||||||
|
if err == nil { |
||||||
|
return fd, nil |
||||||
|
} |
||||||
|
|
||||||
|
if err == syscall.EPROTONOSUPPORT || err == syscall.EINVAL { |
||||||
|
return newSocketCloexecOld(domain, typ, proto) |
||||||
|
} |
||||||
|
|
||||||
|
return -1, fmt.Errorf("cannot create listening unblocked socket: %s", err) |
||||||
|
} |
@ -0,0 +1,162 @@ |
|||||||
|
// +build linux darwin dragonfly freebsd netbsd openbsd rumprun
|
||||||
|
|
||||||
|
// Package tcplisten provides customizable TCP net.Listener with various
|
||||||
|
// performance-related options:
|
||||||
|
//
|
||||||
|
// - SO_REUSEPORT. This option allows linear scaling server performance
|
||||||
|
// on multi-CPU servers.
|
||||||
|
// See https://www.nginx.com/blog/socket-sharding-nginx-release-1-9-1/ for details.
|
||||||
|
//
|
||||||
|
// - TCP_DEFER_ACCEPT. This option expects the server reads from the accepted
|
||||||
|
// connection before writing to them.
|
||||||
|
//
|
||||||
|
// - TCP_FASTOPEN. See https://lwn.net/Articles/508865/ for details.
|
||||||
|
//
|
||||||
|
// The package is derived from https://github.com/kavu/go_reuseport .
|
||||||
|
package tcplisten |
||||||
|
|
||||||
|
import ( |
||||||
|
"errors" |
||||||
|
"fmt" |
||||||
|
"net" |
||||||
|
"os" |
||||||
|
"syscall" |
||||||
|
) |
||||||
|
|
||||||
|
// Config provides options to enable on the returned listener.
|
||||||
|
type Config struct { |
||||||
|
// ReusePort enables SO_REUSEPORT.
|
||||||
|
ReusePort bool |
||||||
|
|
||||||
|
// DeferAccept enables TCP_DEFER_ACCEPT.
|
||||||
|
DeferAccept bool |
||||||
|
|
||||||
|
// FastOpen enables TCP_FASTOPEN.
|
||||||
|
FastOpen bool |
||||||
|
|
||||||
|
// Backlog is the maximum number of pending TCP connections the listener
|
||||||
|
// may queue before passing them to Accept.
|
||||||
|
// See man 2 listen for details.
|
||||||
|
//
|
||||||
|
// By default system-level backlog value is used.
|
||||||
|
Backlog int |
||||||
|
} |
||||||
|
|
||||||
|
// NewListener returns TCP listener with options set in the Config.
|
||||||
|
//
|
||||||
|
// The function may be called many times for creating distinct listeners
|
||||||
|
// with the given config.
|
||||||
|
//
|
||||||
|
// Only tcp4 and tcp6 networks are supported.
|
||||||
|
func (cfg *Config) NewListener(network, addr string) (net.Listener, error) { |
||||||
|
sa, soType, err := getSockaddr(network, addr) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
fd, err := newSocketCloexec(soType, syscall.SOCK_STREAM, syscall.IPPROTO_TCP) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
if err = cfg.fdSetup(fd, sa, addr); err != nil { |
||||||
|
syscall.Close(fd) |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
name := fmt.Sprintf("reuseport.%d.%s.%s", os.Getpid(), network, addr) |
||||||
|
file := os.NewFile(uintptr(fd), name) |
||||||
|
ln, err := net.FileListener(file) |
||||||
|
if err != nil { |
||||||
|
file.Close() |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
if err = file.Close(); err != nil { |
||||||
|
ln.Close() |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
return ln, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (cfg *Config) fdSetup(fd int, sa syscall.Sockaddr, addr string) error { |
||||||
|
var err error |
||||||
|
|
||||||
|
if err = syscall.SetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_REUSEADDR, 1); err != nil { |
||||||
|
return fmt.Errorf("cannot enable SO_REUSEADDR: %s", err) |
||||||
|
} |
||||||
|
|
||||||
|
// This should disable Nagle's algorithm in all accepted sockets by default.
|
||||||
|
// Users may enable it with net.TCPConn.SetNoDelay(false).
|
||||||
|
if err = syscall.SetsockoptInt(fd, syscall.IPPROTO_TCP, syscall.TCP_NODELAY, 1); err != nil { |
||||||
|
return fmt.Errorf("cannot disable Nagle's algorithm: %s", err) |
||||||
|
} |
||||||
|
|
||||||
|
if cfg.ReusePort { |
||||||
|
if err = syscall.SetsockoptInt(fd, syscall.SOL_SOCKET, soReusePort, 1); err != nil { |
||||||
|
return fmt.Errorf("cannot enable SO_REUSEPORT: %s", err) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
if cfg.DeferAccept { |
||||||
|
if err = enableDeferAccept(fd); err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
if cfg.FastOpen { |
||||||
|
if err = enableFastOpen(fd); err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
if err = syscall.Bind(fd, sa); err != nil { |
||||||
|
return fmt.Errorf("cannot bind to %q: %s", addr, err) |
||||||
|
} |
||||||
|
|
||||||
|
backlog := cfg.Backlog |
||||||
|
if backlog <= 0 { |
||||||
|
if backlog, err = soMaxConn(); err != nil { |
||||||
|
return fmt.Errorf("cannot determine backlog to pass to listen(2): %s", err) |
||||||
|
} |
||||||
|
} |
||||||
|
if err = syscall.Listen(fd, backlog); err != nil { |
||||||
|
return fmt.Errorf("cannot listen on %q: %s", addr, err) |
||||||
|
} |
||||||
|
|
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
func getSockaddr(network, addr string) (sa syscall.Sockaddr, soType int, err error) { |
||||||
|
if network != "tcp4" && network != "tcp6" { |
||||||
|
return nil, -1, errors.New("only tcp4 and tcp6 network is supported") |
||||||
|
} |
||||||
|
|
||||||
|
tcpAddr, err := net.ResolveTCPAddr(network, addr) |
||||||
|
if err != nil { |
||||||
|
return nil, -1, err |
||||||
|
} |
||||||
|
|
||||||
|
switch network { |
||||||
|
case "tcp4": |
||||||
|
var sa4 syscall.SockaddrInet4 |
||||||
|
sa4.Port = tcpAddr.Port |
||||||
|
copy(sa4.Addr[:], tcpAddr.IP.To4()) |
||||||
|
return &sa4, syscall.AF_INET, nil |
||||||
|
case "tcp6": |
||||||
|
var sa6 syscall.SockaddrInet6 |
||||||
|
sa6.Port = tcpAddr.Port |
||||||
|
copy(sa6.Addr[:], tcpAddr.IP.To16()) |
||||||
|
if tcpAddr.Zone != "" { |
||||||
|
ifi, err := net.InterfaceByName(tcpAddr.Zone) |
||||||
|
if err != nil { |
||||||
|
return nil, -1, err |
||||||
|
} |
||||||
|
sa6.ZoneId = uint32(ifi.Index) |
||||||
|
} |
||||||
|
return &sa6, syscall.AF_INET6, nil |
||||||
|
default: |
||||||
|
return nil, -1, errors.New("Unknown network type " + network) |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,24 @@ |
|||||||
|
// +build darwin dragonfly freebsd netbsd openbsd rumprun
|
||||||
|
|
||||||
|
package tcplisten |
||||||
|
|
||||||
|
import ( |
||||||
|
"syscall" |
||||||
|
) |
||||||
|
|
||||||
|
const soReusePort = syscall.SO_REUSEPORT |
||||||
|
|
||||||
|
func enableDeferAccept(fd int) error { |
||||||
|
// TODO: implement SO_ACCEPTFILTER:dataready here
|
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
func enableFastOpen(fd int) error { |
||||||
|
// TODO: implement TCP_FASTOPEN when it will be ready
|
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
func soMaxConn() (int, error) { |
||||||
|
// TODO: properly implement it
|
||||||
|
return syscall.SOMAXCONN, nil |
||||||
|
} |
@ -0,0 +1,59 @@ |
|||||||
|
// +build linux
|
||||||
|
|
||||||
|
package tcplisten |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"io/ioutil" |
||||||
|
"os" |
||||||
|
"strconv" |
||||||
|
"strings" |
||||||
|
"syscall" |
||||||
|
) |
||||||
|
|
||||||
|
const ( |
||||||
|
soReusePort = 0x0F |
||||||
|
tcpFastOpen = 0x17 |
||||||
|
) |
||||||
|
|
||||||
|
func enableDeferAccept(fd int) error { |
||||||
|
if err := syscall.SetsockoptInt(fd, syscall.IPPROTO_TCP, syscall.TCP_DEFER_ACCEPT, 1); err != nil { |
||||||
|
return fmt.Errorf("cannot enable TCP_DEFER_ACCEPT: %s", err) |
||||||
|
} |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
func enableFastOpen(fd int) error { |
||||||
|
if err := syscall.SetsockoptInt(fd, syscall.SOL_TCP, tcpFastOpen, fastOpenQlen); err != nil { |
||||||
|
return fmt.Errorf("cannot enable TCP_FASTOPEN(qlen=%d): %s", fastOpenQlen, err) |
||||||
|
} |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
const fastOpenQlen = 16 * 1024 |
||||||
|
|
||||||
|
func soMaxConn() (int, error) { |
||||||
|
data, err := ioutil.ReadFile(soMaxConnFilePath) |
||||||
|
if err != nil { |
||||||
|
// This error may trigger on travis build. Just use SOMAXCONN
|
||||||
|
if os.IsNotExist(err) { |
||||||
|
return syscall.SOMAXCONN, nil |
||||||
|
} |
||||||
|
return -1, err |
||||||
|
} |
||||||
|
s := strings.TrimSpace(string(data)) |
||||||
|
n, err := strconv.Atoi(s) |
||||||
|
if err != nil || n <= 0 { |
||||||
|
return -1, fmt.Errorf("cannot parse somaxconn %q read from %s: %s", s, soMaxConnFilePath, err) |
||||||
|
} |
||||||
|
|
||||||
|
// Linux stores the backlog in a uint16.
|
||||||
|
// Truncate number to avoid wrapping.
|
||||||
|
// See https://github.com/golang/go/issues/5030 .
|
||||||
|
if n > 1<<16-1 { |
||||||
|
n = 1<<16 - 1 |
||||||
|
} |
||||||
|
return n, nil |
||||||
|
} |
||||||
|
|
||||||
|
const soMaxConnFilePath = "/proc/sys/net/core/somaxconn" |
Loading…
Reference in new issue