Use new listener which implements enhanced tcp features (#6289)
This package provide customizable TCP net.Listener with various performance-related options: * SO_REUSEPORT. This option allows linear scaling server performance on multi-CPU servers. See https://www.nginx.com/blog/socket-sharding-nginx-release-1-9-1/ for details. * TCP_DEFER_ACCEPT. This option expects the server reads from the accepted connection before writing to them. * TCP_FASTOPEN. See https://lwn.net/Articles/508865/ for details.master
parent
5a4a57700b
commit
50a817e3d3
@ -0,0 +1,30 @@ |
||||
// +build linux darwin dragonfly freebsd netbsd openbsd rumprun
|
||||
|
||||
/* |
||||
* Minio Cloud Storage, (C) 2018 Minio, Inc. |
||||
* |
||||
* Licensed under the Apache License, Version 2.0 (the "License"); |
||||
* you may not use this file except in compliance with the License. |
||||
* You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package http |
||||
|
||||
import "github.com/valyala/tcplisten" |
||||
|
||||
var cfg = &tcplisten.Config{ |
||||
ReusePort: true, |
||||
DeferAccept: true, |
||||
FastOpen: true, |
||||
} |
||||
|
||||
// Unix listener with special TCP options.
|
||||
var listen = cfg.NewListener |
@ -0,0 +1,24 @@ |
||||
// +build windows plan9
|
||||
|
||||
/* |
||||
* Minio Cloud Storage, (C) 2018 Minio, Inc. |
||||
* |
||||
* Licensed under the Apache License, Version 2.0 (the "License"); |
||||
* you may not use this file except in compliance with the License. |
||||
* You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package http |
||||
|
||||
import "net" |
||||
|
||||
// Windows, plan9 specific listener.
|
||||
var listen = net.Listen |
@ -0,0 +1,21 @@ |
||||
The MIT License (MIT) |
||||
|
||||
Copyright (c) 2016 Aliaksandr Valialkin |
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
of this software and associated documentation files (the "Software"), to deal |
||||
in the Software without restriction, including without limitation the rights |
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
copies of the Software, and to permit persons to whom the Software is |
||||
furnished to do so, subject to the following conditions: |
||||
|
||||
The above copyright notice and this permission notice shall be included in all |
||||
copies or substantial portions of the Software. |
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
||||
SOFTWARE. |
@ -0,0 +1,21 @@ |
||||
[![Build Status](https://travis-ci.org/valyala/tcplisten.svg)](https://travis-ci.org/valyala/tcplisten) |
||||
[![GoDoc](https://godoc.org/github.com/valyala/tcplisten?status.svg)](http://godoc.org/github.com/valyala/tcplisten) |
||||
[![Go Report](https://goreportcard.com/badge/github.com/valyala/tcplisten)](https://goreportcard.com/report/github.com/valyala/tcplisten) |
||||
|
||||
|
||||
Package tcplisten provides customizable TCP net.Listener with various |
||||
performance-related options: |
||||
|
||||
* SO_REUSEPORT. This option allows linear scaling server performance |
||||
on multi-CPU servers. |
||||
See https://www.nginx.com/blog/socket-sharding-nginx-release-1-9-1/ for details. |
||||
|
||||
* TCP_DEFER_ACCEPT. This option expects the server reads from the accepted |
||||
connection before writing to them. |
||||
|
||||
* TCP_FASTOPEN. See https://lwn.net/Articles/508865/ for details. |
||||
|
||||
|
||||
[Documentation](https://godoc.org/github.com/valyala/tcplisten). |
||||
|
||||
The package is derived from [go_reuseport](https://github.com/kavu/go_reuseport). |
@ -0,0 +1,23 @@ |
||||
package tcplisten |
||||
|
||||
import ( |
||||
"fmt" |
||||
"syscall" |
||||
) |
||||
|
||||
func newSocketCloexecOld(domain, typ, proto int) (int, error) { |
||||
syscall.ForkLock.RLock() |
||||
fd, err := syscall.Socket(domain, typ, proto) |
||||
if err == nil { |
||||
syscall.CloseOnExec(fd) |
||||
} |
||||
syscall.ForkLock.RUnlock() |
||||
if err != nil { |
||||
return -1, fmt.Errorf("cannot create listening socket: %s", err) |
||||
} |
||||
if err = syscall.SetNonblock(fd, true); err != nil { |
||||
syscall.Close(fd) |
||||
return -1, fmt.Errorf("cannot make non-blocked listening socket: %s", err) |
||||
} |
||||
return fd, nil |
||||
} |
@ -0,0 +1,5 @@ |
||||
// +build darwin
|
||||
|
||||
package tcplisten |
||||
|
||||
var newSocketCloexec = newSocketCloexecOld |
@ -0,0 +1,21 @@ |
||||
// +build !darwin
|
||||
|
||||
package tcplisten |
||||
|
||||
import ( |
||||
"fmt" |
||||
"syscall" |
||||
) |
||||
|
||||
func newSocketCloexec(domain, typ, proto int) (int, error) { |
||||
fd, err := syscall.Socket(domain, typ|syscall.SOCK_NONBLOCK|syscall.SOCK_CLOEXEC, proto) |
||||
if err == nil { |
||||
return fd, nil |
||||
} |
||||
|
||||
if err == syscall.EPROTONOSUPPORT || err == syscall.EINVAL { |
||||
return newSocketCloexecOld(domain, typ, proto) |
||||
} |
||||
|
||||
return -1, fmt.Errorf("cannot create listening unblocked socket: %s", err) |
||||
} |
@ -0,0 +1,162 @@ |
||||
// +build linux darwin dragonfly freebsd netbsd openbsd rumprun
|
||||
|
||||
// Package tcplisten provides customizable TCP net.Listener with various
|
||||
// performance-related options:
|
||||
//
|
||||
// - SO_REUSEPORT. This option allows linear scaling server performance
|
||||
// on multi-CPU servers.
|
||||
// See https://www.nginx.com/blog/socket-sharding-nginx-release-1-9-1/ for details.
|
||||
//
|
||||
// - TCP_DEFER_ACCEPT. This option expects the server reads from the accepted
|
||||
// connection before writing to them.
|
||||
//
|
||||
// - TCP_FASTOPEN. See https://lwn.net/Articles/508865/ for details.
|
||||
//
|
||||
// The package is derived from https://github.com/kavu/go_reuseport .
|
||||
package tcplisten |
||||
|
||||
import ( |
||||
"errors" |
||||
"fmt" |
||||
"net" |
||||
"os" |
||||
"syscall" |
||||
) |
||||
|
||||
// Config provides options to enable on the returned listener.
|
||||
type Config struct { |
||||
// ReusePort enables SO_REUSEPORT.
|
||||
ReusePort bool |
||||
|
||||
// DeferAccept enables TCP_DEFER_ACCEPT.
|
||||
DeferAccept bool |
||||
|
||||
// FastOpen enables TCP_FASTOPEN.
|
||||
FastOpen bool |
||||
|
||||
// Backlog is the maximum number of pending TCP connections the listener
|
||||
// may queue before passing them to Accept.
|
||||
// See man 2 listen for details.
|
||||
//
|
||||
// By default system-level backlog value is used.
|
||||
Backlog int |
||||
} |
||||
|
||||
// NewListener returns TCP listener with options set in the Config.
|
||||
//
|
||||
// The function may be called many times for creating distinct listeners
|
||||
// with the given config.
|
||||
//
|
||||
// Only tcp4 and tcp6 networks are supported.
|
||||
func (cfg *Config) NewListener(network, addr string) (net.Listener, error) { |
||||
sa, soType, err := getSockaddr(network, addr) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
fd, err := newSocketCloexec(soType, syscall.SOCK_STREAM, syscall.IPPROTO_TCP) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
if err = cfg.fdSetup(fd, sa, addr); err != nil { |
||||
syscall.Close(fd) |
||||
return nil, err |
||||
} |
||||
|
||||
name := fmt.Sprintf("reuseport.%d.%s.%s", os.Getpid(), network, addr) |
||||
file := os.NewFile(uintptr(fd), name) |
||||
ln, err := net.FileListener(file) |
||||
if err != nil { |
||||
file.Close() |
||||
return nil, err |
||||
} |
||||
|
||||
if err = file.Close(); err != nil { |
||||
ln.Close() |
||||
return nil, err |
||||
} |
||||
|
||||
return ln, nil |
||||
} |
||||
|
||||
func (cfg *Config) fdSetup(fd int, sa syscall.Sockaddr, addr string) error { |
||||
var err error |
||||
|
||||
if err = syscall.SetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_REUSEADDR, 1); err != nil { |
||||
return fmt.Errorf("cannot enable SO_REUSEADDR: %s", err) |
||||
} |
||||
|
||||
// This should disable Nagle's algorithm in all accepted sockets by default.
|
||||
// Users may enable it with net.TCPConn.SetNoDelay(false).
|
||||
if err = syscall.SetsockoptInt(fd, syscall.IPPROTO_TCP, syscall.TCP_NODELAY, 1); err != nil { |
||||
return fmt.Errorf("cannot disable Nagle's algorithm: %s", err) |
||||
} |
||||
|
||||
if cfg.ReusePort { |
||||
if err = syscall.SetsockoptInt(fd, syscall.SOL_SOCKET, soReusePort, 1); err != nil { |
||||
return fmt.Errorf("cannot enable SO_REUSEPORT: %s", err) |
||||
} |
||||
} |
||||
|
||||
if cfg.DeferAccept { |
||||
if err = enableDeferAccept(fd); err != nil { |
||||
return err |
||||
} |
||||
} |
||||
|
||||
if cfg.FastOpen { |
||||
if err = enableFastOpen(fd); err != nil { |
||||
return err |
||||
} |
||||
} |
||||
|
||||
if err = syscall.Bind(fd, sa); err != nil { |
||||
return fmt.Errorf("cannot bind to %q: %s", addr, err) |
||||
} |
||||
|
||||
backlog := cfg.Backlog |
||||
if backlog <= 0 { |
||||
if backlog, err = soMaxConn(); err != nil { |
||||
return fmt.Errorf("cannot determine backlog to pass to listen(2): %s", err) |
||||
} |
||||
} |
||||
if err = syscall.Listen(fd, backlog); err != nil { |
||||
return fmt.Errorf("cannot listen on %q: %s", addr, err) |
||||
} |
||||
|
||||
return nil |
||||
} |
||||
|
||||
func getSockaddr(network, addr string) (sa syscall.Sockaddr, soType int, err error) { |
||||
if network != "tcp4" && network != "tcp6" { |
||||
return nil, -1, errors.New("only tcp4 and tcp6 network is supported") |
||||
} |
||||
|
||||
tcpAddr, err := net.ResolveTCPAddr(network, addr) |
||||
if err != nil { |
||||
return nil, -1, err |
||||
} |
||||
|
||||
switch network { |
||||
case "tcp4": |
||||
var sa4 syscall.SockaddrInet4 |
||||
sa4.Port = tcpAddr.Port |
||||
copy(sa4.Addr[:], tcpAddr.IP.To4()) |
||||
return &sa4, syscall.AF_INET, nil |
||||
case "tcp6": |
||||
var sa6 syscall.SockaddrInet6 |
||||
sa6.Port = tcpAddr.Port |
||||
copy(sa6.Addr[:], tcpAddr.IP.To16()) |
||||
if tcpAddr.Zone != "" { |
||||
ifi, err := net.InterfaceByName(tcpAddr.Zone) |
||||
if err != nil { |
||||
return nil, -1, err |
||||
} |
||||
sa6.ZoneId = uint32(ifi.Index) |
||||
} |
||||
return &sa6, syscall.AF_INET6, nil |
||||
default: |
||||
return nil, -1, errors.New("Unknown network type " + network) |
||||
} |
||||
} |
@ -0,0 +1,24 @@ |
||||
// +build darwin dragonfly freebsd netbsd openbsd rumprun
|
||||
|
||||
package tcplisten |
||||
|
||||
import ( |
||||
"syscall" |
||||
) |
||||
|
||||
const soReusePort = syscall.SO_REUSEPORT |
||||
|
||||
func enableDeferAccept(fd int) error { |
||||
// TODO: implement SO_ACCEPTFILTER:dataready here
|
||||
return nil |
||||
} |
||||
|
||||
func enableFastOpen(fd int) error { |
||||
// TODO: implement TCP_FASTOPEN when it will be ready
|
||||
return nil |
||||
} |
||||
|
||||
func soMaxConn() (int, error) { |
||||
// TODO: properly implement it
|
||||
return syscall.SOMAXCONN, nil |
||||
} |
@ -0,0 +1,59 @@ |
||||
// +build linux
|
||||
|
||||
package tcplisten |
||||
|
||||
import ( |
||||
"fmt" |
||||
"io/ioutil" |
||||
"os" |
||||
"strconv" |
||||
"strings" |
||||
"syscall" |
||||
) |
||||
|
||||
const ( |
||||
soReusePort = 0x0F |
||||
tcpFastOpen = 0x17 |
||||
) |
||||
|
||||
func enableDeferAccept(fd int) error { |
||||
if err := syscall.SetsockoptInt(fd, syscall.IPPROTO_TCP, syscall.TCP_DEFER_ACCEPT, 1); err != nil { |
||||
return fmt.Errorf("cannot enable TCP_DEFER_ACCEPT: %s", err) |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
func enableFastOpen(fd int) error { |
||||
if err := syscall.SetsockoptInt(fd, syscall.SOL_TCP, tcpFastOpen, fastOpenQlen); err != nil { |
||||
return fmt.Errorf("cannot enable TCP_FASTOPEN(qlen=%d): %s", fastOpenQlen, err) |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
const fastOpenQlen = 16 * 1024 |
||||
|
||||
func soMaxConn() (int, error) { |
||||
data, err := ioutil.ReadFile(soMaxConnFilePath) |
||||
if err != nil { |
||||
// This error may trigger on travis build. Just use SOMAXCONN
|
||||
if os.IsNotExist(err) { |
||||
return syscall.SOMAXCONN, nil |
||||
} |
||||
return -1, err |
||||
} |
||||
s := strings.TrimSpace(string(data)) |
||||
n, err := strconv.Atoi(s) |
||||
if err != nil || n <= 0 { |
||||
return -1, fmt.Errorf("cannot parse somaxconn %q read from %s: %s", s, soMaxConnFilePath, err) |
||||
} |
||||
|
||||
// Linux stores the backlog in a uint16.
|
||||
// Truncate number to avoid wrapping.
|
||||
// See https://github.com/golang/go/issues/5030 .
|
||||
if n > 1<<16-1 { |
||||
n = 1<<16 - 1 |
||||
} |
||||
return n, nil |
||||
} |
||||
|
||||
const soMaxConnFilePath = "/proc/sys/net/core/somaxconn" |
Loading…
Reference in new issue