Initial commit: Go 1.23 release state

This commit is contained in:
Vorapol Rinsatitnon
2024-09-21 23:49:08 +10:00
commit 17cd57a668
13231 changed files with 3114330 additions and 0 deletions

View File

@@ -0,0 +1,121 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package poll
import (
"internal/syscall/unix"
"sync"
"syscall"
)
var isKernelVersionGE53 = sync.OnceValue(func() bool {
major, minor := unix.KernelVersion()
// copy_file_range(2) is broken in various ways on kernels older than 5.3,
// see https://go.dev/issue/42400 and
// https://man7.org/linux/man-pages/man2/copy_file_range.2.html#VERSIONS
return major > 5 || (major == 5 && minor >= 3)
})
const maxCopyFileRangeRound = 1 << 30
// CopyFileRange copies at most remain bytes of data from src to dst, using
// the copy_file_range system call. dst and src must refer to regular files.
func CopyFileRange(dst, src *FD, remain int64) (written int64, handled bool, err error) {
if !isKernelVersionGE53() {
return 0, false, nil
}
for remain > 0 {
max := remain
if max > maxCopyFileRangeRound {
max = maxCopyFileRangeRound
}
n, err := copyFileRange(dst, src, int(max))
switch err {
case syscall.ENOSYS:
// copy_file_range(2) was introduced in Linux 4.5.
// Go supports Linux >= 2.6.33, so the system call
// may not be present.
//
// If we see ENOSYS, we have certainly not transferred
// any data, so we can tell the caller that we
// couldn't handle the transfer and let them fall
// back to more generic code.
return 0, false, nil
case syscall.EXDEV, syscall.EINVAL, syscall.EIO, syscall.EOPNOTSUPP, syscall.EPERM:
// Prior to Linux 5.3, it was not possible to
// copy_file_range across file systems. Similarly to
// the ENOSYS case above, if we see EXDEV, we have
// not transferred any data, and we can let the caller
// fall back to generic code.
//
// As for EINVAL, that is what we see if, for example,
// dst or src refer to a pipe rather than a regular
// file. This is another case where no data has been
// transferred, so we consider it unhandled.
//
// If src and dst are on CIFS, we can see EIO.
// See issue #42334.
//
// If the file is on NFS, we can see EOPNOTSUPP.
// See issue #40731.
//
// If the process is running inside a Docker container,
// we might see EPERM instead of ENOSYS. See issue
// #40893. Since EPERM might also be a legitimate error,
// don't mark copy_file_range(2) as unsupported.
return 0, false, nil
case nil:
if n == 0 {
// If we did not read any bytes at all,
// then this file may be in a file system
// where copy_file_range silently fails.
// https://lore.kernel.org/linux-fsdevel/20210126233840.GG4626@dread.disaster.area/T/#m05753578c7f7882f6e9ffe01f981bc223edef2b0
if written == 0 {
return 0, false, nil
}
// Otherwise src is at EOF, which means
// we are done.
return written, true, nil
}
remain -= n
written += n
default:
return written, true, err
}
}
return written, true, nil
}
// copyFileRange performs one round of copy_file_range(2).
func copyFileRange(dst, src *FD, max int) (written int64, err error) {
// The signature of copy_file_range(2) is:
//
// ssize_t copy_file_range(int fd_in, loff_t *off_in,
// int fd_out, loff_t *off_out,
// size_t len, unsigned int flags);
//
// Note that in the call to unix.CopyFileRange below, we use nil
// values for off_in and off_out. For the system call, this means
// "use and update the file offsets". That is why we must acquire
// locks for both file descriptors (and why this whole machinery is
// in the internal/poll package to begin with).
if err := dst.writeLock(); err != nil {
return 0, err
}
defer dst.writeUnlock()
if err := src.readLock(); err != nil {
return 0, err
}
defer src.readUnlock()
var n int
for {
n, err = unix.CopyFileRange(src.Sysfd, nil, dst.Sysfd, nil, max, 0)
if err != syscall.EINTR {
break
}
}
return int64(n), err
}

View File

@@ -0,0 +1,33 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build unix || wasip1
package poll
import "syscall"
// Do the interface allocations only once for common
// Errno values.
var (
errEAGAIN error = syscall.EAGAIN
errEINVAL error = syscall.EINVAL
errENOENT error = syscall.ENOENT
)
// errnoErr returns common boxed Errno values, to prevent
// allocations at runtime.
func errnoErr(e syscall.Errno) error {
switch e {
case 0:
return nil
case syscall.EAGAIN:
return errEAGAIN
case syscall.EINVAL:
return errEINVAL
case syscall.ENOENT:
return errENOENT
}
return e
}

View File

@@ -0,0 +1,31 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build windows
package poll
import "syscall"
// Do the interface allocations only once for common
// Errno values.
var (
errERROR_IO_PENDING error = syscall.Errno(syscall.ERROR_IO_PENDING)
)
// errnoErr returns common boxed Errno values, to prevent
// allocations at runtime.
func errnoErr(e syscall.Errno) error {
switch e {
case 0:
return nil
case syscall.ERROR_IO_PENDING:
return errERROR_IO_PENDING
}
// TODO: add more here, after collecting data on the common
// error values see on Windows. (perhaps when running
// all.bat?)
return e
}

View File

@@ -0,0 +1,31 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package poll_test
import (
"errors"
"internal/poll"
"os"
"syscall"
)
func badStateFile() (*os.File, error) {
if os.Getuid() != 0 {
return nil, errors.New("must be root")
}
// Using OpenFile for a device file is an easy way to make a
// file attached to the runtime-integrated network poller and
// configured in halfway.
return os.OpenFile("/dev/net/tun", os.O_RDWR, 0)
}
func isBadStateFileError(err error) (string, bool) {
switch err {
case poll.ErrNotPollable, syscall.EBADFD:
return "", true
default:
return "not pollable or file in bad state error", false
}
}

View File

@@ -0,0 +1,21 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !linux
package poll_test
import (
"errors"
"os"
"runtime"
)
func badStateFile() (*os.File, error) {
return nil, errors.New("not supported on " + runtime.GOOS)
}
func isBadStateFileError(err error) (string, bool) {
return "", false
}

View File

@@ -0,0 +1,51 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package poll_test
import (
"fmt"
"io/fs"
"net"
"os"
"testing"
"time"
)
func TestReadError(t *testing.T) {
t.Run("ErrNotPollable", func(t *testing.T) {
f, err := badStateFile()
if err != nil {
t.Skip(err)
}
defer f.Close()
// Give scheduler a chance to have two separated
// goroutines: an event poller and an event waiter.
time.Sleep(100 * time.Millisecond)
var b [1]byte
_, err = f.Read(b[:])
if perr := parseReadError(err, isBadStateFileError); perr != nil {
t.Fatal(perr)
}
})
}
func parseReadError(nestedErr error, verify func(error) (string, bool)) error {
err := nestedErr
if nerr, ok := err.(*net.OpError); ok {
err = nerr.Err
}
if nerr, ok := err.(*fs.PathError); ok {
err = nerr.Err
}
if nerr, ok := err.(*os.SyscallError); ok {
err = nerr.Err
}
if s, ok := verify(err); !ok {
return fmt.Errorf("got %v; want %s", nestedErr, s)
}
return nil
}

View File

@@ -0,0 +1,22 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Export guts for testing on linux.
// Since testing imports os and os imports internal/poll,
// the internal/poll tests can not be in package poll.
package poll
var (
GetPipe = getPipe
PutPipe = putPipe
NewPipe = newPipe
DestroyPipe = destroyPipe
)
func GetPipeFds(p *SplicePipe) (int, int) {
return p.rfd, p.wfd
}
type SplicePipe = splicePipe

View File

@@ -0,0 +1,15 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build unix || windows
// Export guts for testing on posix.
// Since testing imports os and os imports internal/poll,
// the internal/poll tests can not be in package poll.
package poll
func (fd *FD) EOFError(n int, err error) error {
return fd.eofError(n, err)
}

View File

@@ -0,0 +1,35 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Export guts for testing.
// Since testing imports os and os imports internal/poll,
// the internal/poll tests can not be in package poll.
package poll
var Consume = consume
type XFDMutex struct {
fdMutex
}
func (mu *XFDMutex) Incref() bool {
return mu.incref()
}
func (mu *XFDMutex) IncrefAndClose() bool {
return mu.increfAndClose()
}
func (mu *XFDMutex) Decref() bool {
return mu.decref()
}
func (mu *XFDMutex) RWLock(read bool) bool {
return mu.rwlock(read)
}
func (mu *XFDMutex) RWUnlock(read bool) bool {
return mu.rwunlock(read)
}

View File

@@ -0,0 +1,17 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Export guts for testing on windows.
// Since testing imports os and os imports internal/poll,
// the internal/poll tests can not be in package poll.
package poll
var (
LogInitFD = &logInitFD
)
func (fd *FD) IsPartOfNetpoll() bool {
return fd.pd.runtimeCtx != 0
}

94
src/internal/poll/fd.go Normal file
View File

@@ -0,0 +1,94 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package poll supports non-blocking I/O on file descriptors with polling.
// This supports I/O operations that block only a goroutine, not a thread.
// This is used by the net and os packages.
// It uses a poller built into the runtime, with support from the
// runtime scheduler.
package poll
import (
"errors"
)
// errNetClosing is the type of the variable ErrNetClosing.
// This is used to implement the net.Error interface.
type errNetClosing struct{}
// Error returns the error message for ErrNetClosing.
// Keep this string consistent because of issue #4373:
// since historically programs have not been able to detect
// this error, they look for the string.
func (e errNetClosing) Error() string { return "use of closed network connection" }
func (e errNetClosing) Timeout() bool { return false }
func (e errNetClosing) Temporary() bool { return false }
// ErrNetClosing is returned when a network descriptor is used after
// it has been closed.
var ErrNetClosing = errNetClosing{}
// ErrFileClosing is returned when a file descriptor is used after it
// has been closed.
var ErrFileClosing = errors.New("use of closed file")
// ErrNoDeadline is returned when a request is made to set a deadline
// on a file type that does not use the poller.
var ErrNoDeadline = errors.New("file type does not support deadline")
// Return the appropriate closing error based on isFile.
func errClosing(isFile bool) error {
if isFile {
return ErrFileClosing
}
return ErrNetClosing
}
// ErrDeadlineExceeded is returned for an expired deadline.
// This is exported by the os package as os.ErrDeadlineExceeded.
var ErrDeadlineExceeded error = &DeadlineExceededError{}
// DeadlineExceededError is returned for an expired deadline.
type DeadlineExceededError struct{}
// Implement the net.Error interface.
// The string is "i/o timeout" because that is what was returned
// by earlier Go versions. Changing it may break programs that
// match on error strings.
func (e *DeadlineExceededError) Error() string { return "i/o timeout" }
func (e *DeadlineExceededError) Timeout() bool { return true }
func (e *DeadlineExceededError) Temporary() bool { return true }
// ErrNotPollable is returned when the file or socket is not suitable
// for event notification.
var ErrNotPollable = errors.New("not pollable")
// consume removes data from a slice of byte slices, for writev.
func consume(v *[][]byte, n int64) {
for len(*v) > 0 {
ln0 := int64(len((*v)[0]))
if ln0 > n {
(*v)[0] = (*v)[0][n:]
return
}
n -= ln0
(*v)[0] = nil
*v = (*v)[1:]
}
}
// TestHookDidWritev is a hook for testing writev.
var TestHookDidWritev = func(wrote int) {}
// String is an internal string definition for methods/functions
// that is not intended for use outside the standard libraries.
//
// Other packages in std that import internal/poll and have some
// exported APIs (now we've got some in net.rawConn) which are only used
// internally and are not intended to be used outside the standard libraries,
// Therefore, we make those APIs use internal types like poll.FD or poll.String
// in their function signatures to disable the usability of these APIs from
// external codebase.
type String string

View File

@@ -0,0 +1,32 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package poll
import (
"errors"
"internal/syscall/unix"
"syscall"
)
// Fsync invokes SYS_FCNTL with SYS_FULLFSYNC because
// on OS X, SYS_FSYNC doesn't fully flush contents to disk.
// See Issue #26650 as well as the man page for fsync on OS X.
func (fd *FD) Fsync() error {
if err := fd.incref(); err != nil {
return err
}
defer fd.decref()
return ignoringEINTR(func() error {
_, err := unix.Fcntl(fd.Sysfd, syscall.F_FULLFSYNC, 0)
// There are scenarios such as SMB mounts where fcntl will fail
// with ENOTSUP. In those cases fallback to fsync.
// See #64215
if err != nil && errors.Is(err, syscall.ENOTSUP) {
err = syscall.Fsync(fd.Sysfd)
}
return err
})
}

View File

@@ -0,0 +1,20 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build aix || dragonfly || freebsd || (js && wasm) || linux || netbsd || openbsd || solaris || wasip1
package poll
import "syscall"
// Fsync wraps syscall.Fsync.
func (fd *FD) Fsync() error {
if err := fd.incref(); err != nil {
return err
}
defer fd.decref()
return ignoringEINTR(func() error {
return syscall.Fsync(fd.Sysfd)
})
}

View File

@@ -0,0 +1,16 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package poll
import "syscall"
// Fsync wraps syscall.Fsync.
func (fd *FD) Fsync() error {
if err := fd.incref(); err != nil {
return err
}
defer fd.decref()
return syscall.Fsync(fd.Sysfd)
}

View File

@@ -0,0 +1,92 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package poll
import (
"internal/itoa"
"runtime"
"sync"
"syscall"
)
// asyncIO implements asynchronous cancelable I/O.
// An asyncIO represents a single asynchronous Read or Write
// operation. The result is returned on the result channel.
// The undergoing I/O system call can either complete or be
// interrupted by a note.
type asyncIO struct {
res chan result
// mu guards the pid field.
mu sync.Mutex
// pid holds the process id of
// the process running the IO operation.
pid int
}
// result is the return value of a Read or Write operation.
type result struct {
n int
err error
}
// newAsyncIO returns a new asyncIO that performs an I/O
// operation by calling fn, which must do one and only one
// interruptible system call.
func newAsyncIO(fn func([]byte) (int, error), b []byte) *asyncIO {
aio := &asyncIO{
res: make(chan result, 0),
}
aio.mu.Lock()
go func() {
// Lock the current goroutine to its process
// and store the pid in io so that Cancel can
// interrupt it. We ignore the "hangup" signal,
// so the signal does not take down the entire
// Go runtime.
runtime.LockOSThread()
runtime_ignoreHangup()
aio.pid = syscall.Getpid()
aio.mu.Unlock()
n, err := fn(b)
aio.mu.Lock()
aio.pid = -1
runtime_unignoreHangup()
aio.mu.Unlock()
aio.res <- result{n, err}
}()
return aio
}
// Cancel interrupts the I/O operation, causing
// the Wait function to return.
func (aio *asyncIO) Cancel() {
aio.mu.Lock()
defer aio.mu.Unlock()
if aio.pid == -1 {
return
}
f, e := syscall.Open("/proc/"+itoa.Itoa(aio.pid)+"/note", syscall.O_WRONLY)
if e != nil {
return
}
syscall.Write(f, []byte("hangup"))
syscall.Close(f)
}
// Wait for the I/O operation to complete.
func (aio *asyncIO) Wait() (int, error) {
res := <-aio.res
return res.n, res.err
}
// The following functions, provided by the runtime, are used to
// ignore and unignore the "hangup" signal received by the process.
func runtime_ignoreHangup()
func runtime_unignoreHangup()

View File

@@ -0,0 +1,252 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package poll
import "sync/atomic"
// fdMutex is a specialized synchronization primitive that manages
// lifetime of an fd and serializes access to Read, Write and Close
// methods on FD.
type fdMutex struct {
state uint64
rsema uint32
wsema uint32
}
// fdMutex.state is organized as follows:
// 1 bit - whether FD is closed, if set all subsequent lock operations will fail.
// 1 bit - lock for read operations.
// 1 bit - lock for write operations.
// 20 bits - total number of references (read+write+misc).
// 20 bits - number of outstanding read waiters.
// 20 bits - number of outstanding write waiters.
const (
mutexClosed = 1 << 0
mutexRLock = 1 << 1
mutexWLock = 1 << 2
mutexRef = 1 << 3
mutexRefMask = (1<<20 - 1) << 3
mutexRWait = 1 << 23
mutexRMask = (1<<20 - 1) << 23
mutexWWait = 1 << 43
mutexWMask = (1<<20 - 1) << 43
)
const overflowMsg = "too many concurrent operations on a single file or socket (max 1048575)"
// Read operations must do rwlock(true)/rwunlock(true).
//
// Write operations must do rwlock(false)/rwunlock(false).
//
// Misc operations must do incref/decref.
// Misc operations include functions like setsockopt and setDeadline.
// They need to use incref/decref to ensure that they operate on the
// correct fd in presence of a concurrent close call (otherwise fd can
// be closed under their feet).
//
// Close operations must do increfAndClose/decref.
// incref adds a reference to mu.
// It reports whether mu is available for reading or writing.
func (mu *fdMutex) incref() bool {
for {
old := atomic.LoadUint64(&mu.state)
if old&mutexClosed != 0 {
return false
}
new := old + mutexRef
if new&mutexRefMask == 0 {
panic(overflowMsg)
}
if atomic.CompareAndSwapUint64(&mu.state, old, new) {
return true
}
}
}
// increfAndClose sets the state of mu to closed.
// It returns false if the file was already closed.
func (mu *fdMutex) increfAndClose() bool {
for {
old := atomic.LoadUint64(&mu.state)
if old&mutexClosed != 0 {
return false
}
// Mark as closed and acquire a reference.
new := (old | mutexClosed) + mutexRef
if new&mutexRefMask == 0 {
panic(overflowMsg)
}
// Remove all read and write waiters.
new &^= mutexRMask | mutexWMask
if atomic.CompareAndSwapUint64(&mu.state, old, new) {
// Wake all read and write waiters,
// they will observe closed flag after wakeup.
for old&mutexRMask != 0 {
old -= mutexRWait
runtime_Semrelease(&mu.rsema)
}
for old&mutexWMask != 0 {
old -= mutexWWait
runtime_Semrelease(&mu.wsema)
}
return true
}
}
}
// decref removes a reference from mu.
// It reports whether there is no remaining reference.
func (mu *fdMutex) decref() bool {
for {
old := atomic.LoadUint64(&mu.state)
if old&mutexRefMask == 0 {
panic("inconsistent poll.fdMutex")
}
new := old - mutexRef
if atomic.CompareAndSwapUint64(&mu.state, old, new) {
return new&(mutexClosed|mutexRefMask) == mutexClosed
}
}
}
// lock adds a reference to mu and locks mu.
// It reports whether mu is available for reading or writing.
func (mu *fdMutex) rwlock(read bool) bool {
var mutexBit, mutexWait, mutexMask uint64
var mutexSema *uint32
if read {
mutexBit = mutexRLock
mutexWait = mutexRWait
mutexMask = mutexRMask
mutexSema = &mu.rsema
} else {
mutexBit = mutexWLock
mutexWait = mutexWWait
mutexMask = mutexWMask
mutexSema = &mu.wsema
}
for {
old := atomic.LoadUint64(&mu.state)
if old&mutexClosed != 0 {
return false
}
var new uint64
if old&mutexBit == 0 {
// Lock is free, acquire it.
new = (old | mutexBit) + mutexRef
if new&mutexRefMask == 0 {
panic(overflowMsg)
}
} else {
// Wait for lock.
new = old + mutexWait
if new&mutexMask == 0 {
panic(overflowMsg)
}
}
if atomic.CompareAndSwapUint64(&mu.state, old, new) {
if old&mutexBit == 0 {
return true
}
runtime_Semacquire(mutexSema)
// The signaller has subtracted mutexWait.
}
}
}
// unlock removes a reference from mu and unlocks mu.
// It reports whether there is no remaining reference.
func (mu *fdMutex) rwunlock(read bool) bool {
var mutexBit, mutexWait, mutexMask uint64
var mutexSema *uint32
if read {
mutexBit = mutexRLock
mutexWait = mutexRWait
mutexMask = mutexRMask
mutexSema = &mu.rsema
} else {
mutexBit = mutexWLock
mutexWait = mutexWWait
mutexMask = mutexWMask
mutexSema = &mu.wsema
}
for {
old := atomic.LoadUint64(&mu.state)
if old&mutexBit == 0 || old&mutexRefMask == 0 {
panic("inconsistent poll.fdMutex")
}
// Drop lock, drop reference and wake read waiter if present.
new := (old &^ mutexBit) - mutexRef
if old&mutexMask != 0 {
new -= mutexWait
}
if atomic.CompareAndSwapUint64(&mu.state, old, new) {
if old&mutexMask != 0 {
runtime_Semrelease(mutexSema)
}
return new&(mutexClosed|mutexRefMask) == mutexClosed
}
}
}
// Implemented in runtime package.
func runtime_Semacquire(sema *uint32)
func runtime_Semrelease(sema *uint32)
// incref adds a reference to fd.
// It returns an error when fd cannot be used.
func (fd *FD) incref() error {
if !fd.fdmu.incref() {
return errClosing(fd.isFile)
}
return nil
}
// decref removes a reference from fd.
// It also closes fd when the state of fd is set to closed and there
// is no remaining reference.
func (fd *FD) decref() error {
if fd.fdmu.decref() {
return fd.destroy()
}
return nil
}
// readLock adds a reference to fd and locks fd for reading.
// It returns an error when fd cannot be used for reading.
func (fd *FD) readLock() error {
if !fd.fdmu.rwlock(true) {
return errClosing(fd.isFile)
}
return nil
}
// readUnlock removes a reference from fd and unlocks fd for reading.
// It also closes fd when the state of fd is set to closed and there
// is no remaining reference.
func (fd *FD) readUnlock() {
if fd.fdmu.rwunlock(true) {
fd.destroy()
}
}
// writeLock adds a reference to fd and locks fd for writing.
// It returns an error when fd cannot be used for writing.
func (fd *FD) writeLock() error {
if !fd.fdmu.rwlock(false) {
return errClosing(fd.isFile)
}
return nil
}
// writeUnlock removes a reference from fd and unlocks fd for writing.
// It also closes fd when the state of fd is set to closed and there
// is no remaining reference.
func (fd *FD) writeUnlock() {
if fd.fdmu.rwunlock(false) {
fd.destroy()
}
}

View File

@@ -0,0 +1,222 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package poll_test
import (
. "internal/poll"
"math/rand"
"runtime"
"strings"
"testing"
"time"
)
func TestMutexLock(t *testing.T) {
var mu XFDMutex
if !mu.Incref() {
t.Fatal("broken")
}
if mu.Decref() {
t.Fatal("broken")
}
if !mu.RWLock(true) {
t.Fatal("broken")
}
if mu.RWUnlock(true) {
t.Fatal("broken")
}
if !mu.RWLock(false) {
t.Fatal("broken")
}
if mu.RWUnlock(false) {
t.Fatal("broken")
}
}
func TestMutexClose(t *testing.T) {
var mu XFDMutex
if !mu.IncrefAndClose() {
t.Fatal("broken")
}
if mu.Incref() {
t.Fatal("broken")
}
if mu.RWLock(true) {
t.Fatal("broken")
}
if mu.RWLock(false) {
t.Fatal("broken")
}
if mu.IncrefAndClose() {
t.Fatal("broken")
}
}
func TestMutexCloseUnblock(t *testing.T) {
c := make(chan bool, 4)
var mu XFDMutex
mu.RWLock(true)
for i := 0; i < 4; i++ {
go func() {
if mu.RWLock(true) {
t.Error("broken")
return
}
c <- true
}()
}
// Concurrent goroutines must not be able to read lock the mutex.
time.Sleep(time.Millisecond)
select {
case <-c:
t.Fatal("broken")
default:
}
mu.IncrefAndClose() // Must unblock the readers.
for i := 0; i < 4; i++ {
select {
case <-c:
case <-time.After(10 * time.Second):
t.Fatal("broken")
}
}
if mu.Decref() {
t.Fatal("broken")
}
if !mu.RWUnlock(true) {
t.Fatal("broken")
}
}
func TestMutexPanic(t *testing.T) {
ensurePanics := func(f func()) {
defer func() {
if recover() == nil {
t.Fatal("does not panic")
}
}()
f()
}
var mu XFDMutex
ensurePanics(func() { mu.Decref() })
ensurePanics(func() { mu.RWUnlock(true) })
ensurePanics(func() { mu.RWUnlock(false) })
ensurePanics(func() { mu.Incref(); mu.Decref(); mu.Decref() })
ensurePanics(func() { mu.RWLock(true); mu.RWUnlock(true); mu.RWUnlock(true) })
ensurePanics(func() { mu.RWLock(false); mu.RWUnlock(false); mu.RWUnlock(false) })
// ensure that it's still not broken
mu.Incref()
mu.Decref()
mu.RWLock(true)
mu.RWUnlock(true)
mu.RWLock(false)
mu.RWUnlock(false)
}
func TestMutexOverflowPanic(t *testing.T) {
defer func() {
r := recover()
if r == nil {
t.Fatal("did not panic")
}
msg, ok := r.(string)
if !ok {
t.Fatalf("unexpected panic type %T", r)
}
if !strings.Contains(msg, "too many") || strings.Contains(msg, "inconsistent") {
t.Fatalf("wrong panic message %q", msg)
}
}()
var mu1 XFDMutex
for i := 0; i < 1<<21; i++ {
mu1.Incref()
}
}
func TestMutexStress(t *testing.T) {
P := 8
N := int(1e6)
if testing.Short() {
P = 4
N = 1e4
}
defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(P))
done := make(chan bool, P)
var mu XFDMutex
var readState [2]uint64
var writeState [2]uint64
for p := 0; p < P; p++ {
go func() {
defer func() {
done <- !t.Failed()
}()
r := rand.New(rand.NewSource(rand.Int63()))
for i := 0; i < N; i++ {
switch r.Intn(3) {
case 0:
if !mu.Incref() {
t.Error("broken")
return
}
if mu.Decref() {
t.Error("broken")
return
}
case 1:
if !mu.RWLock(true) {
t.Error("broken")
return
}
// Ensure that it provides mutual exclusion for readers.
if readState[0] != readState[1] {
t.Error("broken")
return
}
readState[0]++
readState[1]++
if mu.RWUnlock(true) {
t.Error("broken")
return
}
case 2:
if !mu.RWLock(false) {
t.Error("broken")
return
}
// Ensure that it provides mutual exclusion for writers.
if writeState[0] != writeState[1] {
t.Error("broken")
return
}
writeState[0]++
writeState[1]++
if mu.RWUnlock(false) {
t.Error("broken")
return
}
}
}
}()
}
for p := 0; p < P; p++ {
if !<-done {
t.FailNow()
}
}
if !mu.IncrefAndClose() {
t.Fatal("broken")
}
if !mu.Decref() {
t.Fatal("broken")
}
}

View File

@@ -0,0 +1,39 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package poll
import (
"syscall"
_ "unsafe" // for go:linkname
)
// OpenDir returns a pointer to a DIR structure suitable for
// ReadDir. In case of an error, the name of the failed
// syscall is returned along with a syscall.Errno.
func (fd *FD) OpenDir() (uintptr, string, error) {
// fdopendir(3) takes control of the file descriptor,
// so use a dup.
fd2, call, err := fd.Dup()
if err != nil {
return 0, call, err
}
var dir uintptr
for {
dir, err = fdopendir(fd2)
if err != syscall.EINTR {
break
}
}
if err != nil {
syscall.Close(fd2)
return 0, "fdopendir", err
}
return dir, "", nil
}
// Implemented in syscall/syscall_darwin.go.
//
//go:linkname fdopendir syscall.fdopendir
func fdopendir(fd int) (dir uintptr, err error)

View File

@@ -0,0 +1,245 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package poll
import (
"errors"
"internal/stringslite"
"io"
"sync"
"syscall"
"time"
)
type FD struct {
// Lock sysfd and serialize access to Read and Write methods.
fdmu fdMutex
Destroy func()
// deadlines
rmu sync.Mutex
wmu sync.Mutex
raio *asyncIO
waio *asyncIO
rtimer *time.Timer
wtimer *time.Timer
rtimedout bool // set true when read deadline has been reached
wtimedout bool // set true when write deadline has been reached
// Whether this is a normal file.
// On Plan 9 we do not use this package for ordinary files,
// so this is always false, but the field is present because
// shared code in fd_mutex.go checks it.
isFile bool
}
// We need this to close out a file descriptor when it is unlocked,
// but the real implementation has to live in the net package because
// it uses os.File's.
func (fd *FD) destroy() error {
if fd.Destroy != nil {
fd.Destroy()
}
return nil
}
// Close handles the locking for closing an FD. The real operation
// is in the net package.
func (fd *FD) Close() error {
if !fd.fdmu.increfAndClose() {
return errClosing(fd.isFile)
}
return nil
}
// Read implements io.Reader.
func (fd *FD) Read(fn func([]byte) (int, error), b []byte) (int, error) {
if err := fd.readLock(); err != nil {
return 0, err
}
defer fd.readUnlock()
if len(b) == 0 {
return 0, nil
}
fd.rmu.Lock()
if fd.rtimedout {
fd.rmu.Unlock()
return 0, ErrDeadlineExceeded
}
fd.raio = newAsyncIO(fn, b)
fd.rmu.Unlock()
n, err := fd.raio.Wait()
fd.raio = nil
if isHangup(err) {
err = io.EOF
}
if isInterrupted(err) {
err = ErrDeadlineExceeded
}
return n, err
}
// Write implements io.Writer.
func (fd *FD) Write(fn func([]byte) (int, error), b []byte) (int, error) {
if err := fd.writeLock(); err != nil {
return 0, err
}
defer fd.writeUnlock()
fd.wmu.Lock()
if fd.wtimedout {
fd.wmu.Unlock()
return 0, ErrDeadlineExceeded
}
fd.waio = newAsyncIO(fn, b)
fd.wmu.Unlock()
n, err := fd.waio.Wait()
fd.waio = nil
if isInterrupted(err) {
err = ErrDeadlineExceeded
}
return n, err
}
// SetDeadline sets the read and write deadlines associated with fd.
func (fd *FD) SetDeadline(t time.Time) error {
return setDeadlineImpl(fd, t, 'r'+'w')
}
// SetReadDeadline sets the read deadline associated with fd.
func (fd *FD) SetReadDeadline(t time.Time) error {
return setDeadlineImpl(fd, t, 'r')
}
// SetWriteDeadline sets the write deadline associated with fd.
func (fd *FD) SetWriteDeadline(t time.Time) error {
return setDeadlineImpl(fd, t, 'w')
}
func setDeadlineImpl(fd *FD, t time.Time, mode int) error {
d := t.Sub(time.Now())
if mode == 'r' || mode == 'r'+'w' {
fd.rmu.Lock()
defer fd.rmu.Unlock()
if fd.rtimer != nil {
fd.rtimer.Stop()
fd.rtimer = nil
}
fd.rtimedout = false
}
if mode == 'w' || mode == 'r'+'w' {
fd.wmu.Lock()
defer fd.wmu.Unlock()
if fd.wtimer != nil {
fd.wtimer.Stop()
fd.wtimer = nil
}
fd.wtimedout = false
}
if !t.IsZero() && d > 0 {
// Interrupt I/O operation once timer has expired
if mode == 'r' || mode == 'r'+'w' {
var timer *time.Timer
timer = time.AfterFunc(d, func() {
fd.rmu.Lock()
defer fd.rmu.Unlock()
if fd.rtimer != timer {
// deadline was changed
return
}
fd.rtimedout = true
if fd.raio != nil {
fd.raio.Cancel()
}
})
fd.rtimer = timer
}
if mode == 'w' || mode == 'r'+'w' {
var timer *time.Timer
timer = time.AfterFunc(d, func() {
fd.wmu.Lock()
defer fd.wmu.Unlock()
if fd.wtimer != timer {
// deadline was changed
return
}
fd.wtimedout = true
if fd.waio != nil {
fd.waio.Cancel()
}
})
fd.wtimer = timer
}
}
if !t.IsZero() && d <= 0 {
// Interrupt current I/O operation
if mode == 'r' || mode == 'r'+'w' {
fd.rtimedout = true
if fd.raio != nil {
fd.raio.Cancel()
}
}
if mode == 'w' || mode == 'r'+'w' {
fd.wtimedout = true
if fd.waio != nil {
fd.waio.Cancel()
}
}
}
return nil
}
// On Plan 9 only, expose the locking for the net code.
// ReadLock wraps FD.readLock.
func (fd *FD) ReadLock() error {
return fd.readLock()
}
// ReadUnlock wraps FD.readUnlock.
func (fd *FD) ReadUnlock() {
fd.readUnlock()
}
func isHangup(err error) bool {
return err != nil && stringslite.HasSuffix(err.Error(), "Hangup")
}
func isInterrupted(err error) bool {
return err != nil && stringslite.HasSuffix(err.Error(), "interrupted")
}
// IsPollDescriptor reports whether fd is the descriptor being used by the poller.
// This is only used for testing.
func IsPollDescriptor(fd uintptr) bool {
return false
}
// RawControl invokes the user-defined function f for a non-IO
// operation.
func (fd *FD) RawControl(f func(uintptr)) error {
return errors.New("not implemented")
}
// RawRead invokes the user-defined function f for a read operation.
func (fd *FD) RawRead(f func(uintptr) bool) error {
return errors.New("not implemented")
}
// RawWrite invokes the user-defined function f for a write operation.
func (fd *FD) RawWrite(f func(uintptr) bool) error {
return errors.New("not implemented")
}
func DupCloseOnExec(fd int) (int, string, error) {
nfd, err := syscall.Dup(int(fd), -1)
if err != nil {
return 0, "dup", err
}
// Plan9 has no syscall.CloseOnExec but
// its forkAndExecInChild closes all fds
// not related to the fork+exec.
return nfd, "", nil
}

View File

@@ -0,0 +1,99 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build js && wasm
package poll
import (
"syscall"
"time"
)
type pollDesc struct {
fd *FD
closing bool
}
func (pd *pollDesc) init(fd *FD) error { pd.fd = fd; return nil }
func (pd *pollDesc) close() {}
func (pd *pollDesc) evict() {
pd.closing = true
if pd.fd != nil {
syscall.StopIO(pd.fd.Sysfd)
}
}
func (pd *pollDesc) prepare(mode int, isFile bool) error {
if pd.closing {
return errClosing(isFile)
}
return nil
}
func (pd *pollDesc) prepareRead(isFile bool) error { return pd.prepare('r', isFile) }
func (pd *pollDesc) prepareWrite(isFile bool) error { return pd.prepare('w', isFile) }
func (pd *pollDesc) wait(mode int, isFile bool) error {
if pd.closing {
return errClosing(isFile)
}
if isFile { // TODO(neelance): js/wasm: Use callbacks from JS to block until the read/write finished.
return nil
}
return ErrDeadlineExceeded
}
func (pd *pollDesc) waitRead(isFile bool) error { return pd.wait('r', isFile) }
func (pd *pollDesc) waitWrite(isFile bool) error { return pd.wait('w', isFile) }
func (pd *pollDesc) waitCanceled(mode int) {}
func (pd *pollDesc) pollable() bool { return true }
// SetDeadline sets the read and write deadlines associated with fd.
func (fd *FD) SetDeadline(t time.Time) error {
return setDeadlineImpl(fd, t, 'r'+'w')
}
// SetReadDeadline sets the read deadline associated with fd.
func (fd *FD) SetReadDeadline(t time.Time) error {
return setDeadlineImpl(fd, t, 'r')
}
// SetWriteDeadline sets the write deadline associated with fd.
func (fd *FD) SetWriteDeadline(t time.Time) error {
return setDeadlineImpl(fd, t, 'w')
}
func setDeadlineImpl(fd *FD, t time.Time, mode int) error {
d := t.UnixNano()
if t.IsZero() {
d = 0
}
if err := fd.incref(); err != nil {
return err
}
switch mode {
case 'r':
syscall.SetReadDeadline(fd.Sysfd, d)
case 'w':
syscall.SetWriteDeadline(fd.Sysfd, d)
case 'r' + 'w':
syscall.SetReadDeadline(fd.Sysfd, d)
syscall.SetWriteDeadline(fd.Sysfd, d)
}
fd.decref()
return nil
}
// IsPollDescriptor reports whether fd is the descriptor being used by the poller.
// This is only used for testing.
func IsPollDescriptor(fd uintptr) bool {
return false
}

View File

@@ -0,0 +1,179 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build unix || windows || wasip1
package poll
import (
"errors"
"sync"
"syscall"
"time"
_ "unsafe" // for go:linkname
)
// runtimeNano returns the current value of the runtime clock in nanoseconds.
//
//go:linkname runtimeNano runtime.nanotime
func runtimeNano() int64
func runtime_pollServerInit()
func runtime_pollOpen(fd uintptr) (uintptr, int)
func runtime_pollClose(ctx uintptr)
func runtime_pollWait(ctx uintptr, mode int) int
func runtime_pollWaitCanceled(ctx uintptr, mode int)
func runtime_pollReset(ctx uintptr, mode int) int
func runtime_pollSetDeadline(ctx uintptr, d int64, mode int)
func runtime_pollUnblock(ctx uintptr)
func runtime_isPollServerDescriptor(fd uintptr) bool
type pollDesc struct {
runtimeCtx uintptr
}
var serverInit sync.Once
func (pd *pollDesc) init(fd *FD) error {
serverInit.Do(runtime_pollServerInit)
ctx, errno := runtime_pollOpen(uintptr(fd.Sysfd))
if errno != 0 {
return errnoErr(syscall.Errno(errno))
}
pd.runtimeCtx = ctx
return nil
}
func (pd *pollDesc) close() {
if pd.runtimeCtx == 0 {
return
}
runtime_pollClose(pd.runtimeCtx)
pd.runtimeCtx = 0
}
// Evict evicts fd from the pending list, unblocking any I/O running on fd.
func (pd *pollDesc) evict() {
if pd.runtimeCtx == 0 {
return
}
runtime_pollUnblock(pd.runtimeCtx)
}
func (pd *pollDesc) prepare(mode int, isFile bool) error {
if pd.runtimeCtx == 0 {
return nil
}
res := runtime_pollReset(pd.runtimeCtx, mode)
return convertErr(res, isFile)
}
func (pd *pollDesc) prepareRead(isFile bool) error {
return pd.prepare('r', isFile)
}
func (pd *pollDesc) prepareWrite(isFile bool) error {
return pd.prepare('w', isFile)
}
func (pd *pollDesc) wait(mode int, isFile bool) error {
if pd.runtimeCtx == 0 {
return errors.New("waiting for unsupported file type")
}
res := runtime_pollWait(pd.runtimeCtx, mode)
return convertErr(res, isFile)
}
func (pd *pollDesc) waitRead(isFile bool) error {
return pd.wait('r', isFile)
}
func (pd *pollDesc) waitWrite(isFile bool) error {
return pd.wait('w', isFile)
}
func (pd *pollDesc) waitCanceled(mode int) {
if pd.runtimeCtx == 0 {
return
}
runtime_pollWaitCanceled(pd.runtimeCtx, mode)
}
func (pd *pollDesc) pollable() bool {
return pd.runtimeCtx != 0
}
// Error values returned by runtime_pollReset and runtime_pollWait.
// These must match the values in runtime/netpoll.go.
const (
pollNoError = 0
pollErrClosing = 1
pollErrTimeout = 2
pollErrNotPollable = 3
)
func convertErr(res int, isFile bool) error {
switch res {
case pollNoError:
return nil
case pollErrClosing:
return errClosing(isFile)
case pollErrTimeout:
return ErrDeadlineExceeded
case pollErrNotPollable:
return ErrNotPollable
}
println("unreachable: ", res)
panic("unreachable")
}
// SetDeadline sets the read and write deadlines associated with fd.
func (fd *FD) SetDeadline(t time.Time) error {
return setDeadlineImpl(fd, t, 'r'+'w')
}
// SetReadDeadline sets the read deadline associated with fd.
func (fd *FD) SetReadDeadline(t time.Time) error {
return setDeadlineImpl(fd, t, 'r')
}
// SetWriteDeadline sets the write deadline associated with fd.
func (fd *FD) SetWriteDeadline(t time.Time) error {
return setDeadlineImpl(fd, t, 'w')
}
func setDeadlineImpl(fd *FD, t time.Time, mode int) error {
var d int64
if !t.IsZero() {
d = int64(time.Until(t))
if d == 0 {
d = -1 // don't confuse deadline right now with no deadline
}
}
if err := fd.incref(); err != nil {
return err
}
defer fd.decref()
if fd.pd.runtimeCtx == 0 {
return ErrNoDeadline
}
runtime_pollSetDeadline(fd.pd.runtimeCtx, d, mode)
return nil
}
// IsPollDescriptor reports whether fd is the descriptor being used by the poller.
// This is only used for testing.
//
// IsPollDescriptor should be an internal detail,
// but widely used packages access it using linkname.
// Notable members of the hall of shame include:
// - github.com/opencontainers/runc
//
// Do not remove or change the type signature.
// See go.dev/issue/67401.
//
//go:linkname IsPollDescriptor
func IsPollDescriptor(fd uintptr) bool {
return runtime_isPollServerDescriptor(fd)
}

View File

@@ -0,0 +1,79 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build unix || (js && wasm) || wasip1 || windows
package poll
import (
"io"
"syscall"
)
// eofError returns io.EOF when fd is available for reading end of
// file.
func (fd *FD) eofError(n int, err error) error {
if n == 0 && err == nil && fd.ZeroReadIsEOF {
return io.EOF
}
return err
}
// Shutdown wraps syscall.Shutdown.
func (fd *FD) Shutdown(how int) error {
if err := fd.incref(); err != nil {
return err
}
defer fd.decref()
return syscall.Shutdown(fd.Sysfd, how)
}
// Fchown wraps syscall.Fchown.
func (fd *FD) Fchown(uid, gid int) error {
if err := fd.incref(); err != nil {
return err
}
defer fd.decref()
return ignoringEINTR(func() error {
return syscall.Fchown(fd.Sysfd, uid, gid)
})
}
// Ftruncate wraps syscall.Ftruncate.
func (fd *FD) Ftruncate(size int64) error {
if err := fd.incref(); err != nil {
return err
}
defer fd.decref()
return ignoringEINTR(func() error {
return syscall.Ftruncate(fd.Sysfd, size)
})
}
// RawControl invokes the user-defined function f for a non-IO
// operation.
func (fd *FD) RawControl(f func(uintptr)) error {
if err := fd.incref(); err != nil {
return err
}
defer fd.decref()
f(uintptr(fd.Sysfd))
return nil
}
// ignoringEINTR makes a function call and repeats it if it returns
// an EINTR error. This appears to be required even though we install all
// signal handlers with SA_RESTART: see #22838, #38033, #38836, #40846.
// Also #20400 and #36644 are issues in which a signal handler is
// installed without setting SA_RESTART. None of these are the common case,
// but there are enough of them that it seems that we can't avoid
// an EINTR loop.
func ignoringEINTR(fn func() error) error {
for {
err := fn()
if err != syscall.EINTR {
return err
}
}
}

View File

@@ -0,0 +1,43 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build unix || windows
package poll_test
import (
. "internal/poll"
"io"
"testing"
)
var eofErrorTests = []struct {
n int
err error
fd *FD
expected error
}{
{100, nil, &FD{ZeroReadIsEOF: true}, nil},
{100, io.EOF, &FD{ZeroReadIsEOF: true}, io.EOF},
{100, ErrNetClosing, &FD{ZeroReadIsEOF: true}, ErrNetClosing},
{0, nil, &FD{ZeroReadIsEOF: true}, io.EOF},
{0, io.EOF, &FD{ZeroReadIsEOF: true}, io.EOF},
{0, ErrNetClosing, &FD{ZeroReadIsEOF: true}, ErrNetClosing},
{100, nil, &FD{ZeroReadIsEOF: false}, nil},
{100, io.EOF, &FD{ZeroReadIsEOF: false}, io.EOF},
{100, ErrNetClosing, &FD{ZeroReadIsEOF: false}, ErrNetClosing},
{0, nil, &FD{ZeroReadIsEOF: false}, nil},
{0, io.EOF, &FD{ZeroReadIsEOF: false}, io.EOF},
{0, ErrNetClosing, &FD{ZeroReadIsEOF: false}, ErrNetClosing},
}
func TestEOFError(t *testing.T) {
for _, tt := range eofErrorTests {
actual := tt.fd.EOFError(tt.n, tt.err)
if actual != tt.expected {
t.Errorf("eofError(%v, %v, %v): expected %v, actual %v", tt.n, tt.err, tt.fd.ZeroReadIsEOF, tt.expected, actual)
}
}
}

View File

@@ -0,0 +1,750 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build unix || (js && wasm) || wasip1
package poll
import (
"internal/itoa"
"internal/syscall/unix"
"io"
"sync/atomic"
"syscall"
)
// FD is a file descriptor. The net and os packages use this type as a
// field of a larger type representing a network connection or OS file.
type FD struct {
// Lock sysfd and serialize access to Read and Write methods.
fdmu fdMutex
// System file descriptor. Immutable until Close.
Sysfd int
// Platform dependent state of the file descriptor.
SysFile
// I/O poller.
pd pollDesc
// Semaphore signaled when file is closed.
csema uint32
// Non-zero if this file has been set to blocking mode.
isBlocking uint32
// Whether this is a streaming descriptor, as opposed to a
// packet-based descriptor like a UDP socket. Immutable.
IsStream bool
// Whether a zero byte read indicates EOF. This is false for a
// message based socket connection.
ZeroReadIsEOF bool
// Whether this is a file rather than a network socket.
isFile bool
}
// Init initializes the FD. The Sysfd field should already be set.
// This can be called multiple times on a single FD.
// The net argument is a network name from the net package (e.g., "tcp"),
// or "file".
// Set pollable to true if fd should be managed by runtime netpoll.
func (fd *FD) Init(net string, pollable bool) error {
fd.SysFile.init()
// We don't actually care about the various network types.
if net == "file" {
fd.isFile = true
}
if !pollable {
fd.isBlocking = 1
return nil
}
err := fd.pd.init(fd)
if err != nil {
// If we could not initialize the runtime poller,
// assume we are using blocking mode.
fd.isBlocking = 1
}
return err
}
// Destroy closes the file descriptor. This is called when there are
// no remaining references.
func (fd *FD) destroy() error {
// Poller may want to unregister fd in readiness notification mechanism,
// so this must be executed before CloseFunc.
fd.pd.close()
err := fd.SysFile.destroy(fd.Sysfd)
fd.Sysfd = -1
runtime_Semrelease(&fd.csema)
return err
}
// Close closes the FD. The underlying file descriptor is closed by the
// destroy method when there are no remaining references.
func (fd *FD) Close() error {
if !fd.fdmu.increfAndClose() {
return errClosing(fd.isFile)
}
// Unblock any I/O. Once it all unblocks and returns,
// so that it cannot be referring to fd.sysfd anymore,
// the final decref will close fd.sysfd. This should happen
// fairly quickly, since all the I/O is non-blocking, and any
// attempts to block in the pollDesc will return errClosing(fd.isFile).
fd.pd.evict()
// The call to decref will call destroy if there are no other
// references.
err := fd.decref()
// Wait until the descriptor is closed. If this was the only
// reference, it is already closed. Only wait if the file has
// not been set to blocking mode, as otherwise any current I/O
// may be blocking, and that would block the Close.
// No need for an atomic read of isBlocking, increfAndClose means
// we have exclusive access to fd.
if fd.isBlocking == 0 {
runtime_Semacquire(&fd.csema)
}
return err
}
// SetBlocking puts the file into blocking mode.
func (fd *FD) SetBlocking() error {
if err := fd.incref(); err != nil {
return err
}
defer fd.decref()
// Atomic store so that concurrent calls to SetBlocking
// do not cause a race condition. isBlocking only ever goes
// from 0 to 1 so there is no real race here.
atomic.StoreUint32(&fd.isBlocking, 1)
return syscall.SetNonblock(fd.Sysfd, false)
}
// Darwin and FreeBSD can't read or write 2GB+ files at a time,
// even on 64-bit systems.
// The same is true of socket implementations on many systems.
// See golang.org/issue/7812 and golang.org/issue/16266.
// Use 1GB instead of, say, 2GB-1, to keep subsequent reads aligned.
const maxRW = 1 << 30
// Read implements io.Reader.
func (fd *FD) Read(p []byte) (int, error) {
if err := fd.readLock(); err != nil {
return 0, err
}
defer fd.readUnlock()
if len(p) == 0 {
// If the caller wanted a zero byte read, return immediately
// without trying (but after acquiring the readLock).
// Otherwise syscall.Read returns 0, nil which looks like
// io.EOF.
// TODO(bradfitz): make it wait for readability? (Issue 15735)
return 0, nil
}
if err := fd.pd.prepareRead(fd.isFile); err != nil {
return 0, err
}
if fd.IsStream && len(p) > maxRW {
p = p[:maxRW]
}
for {
n, err := ignoringEINTRIO(syscall.Read, fd.Sysfd, p)
if err != nil {
n = 0
if err == syscall.EAGAIN && fd.pd.pollable() {
if err = fd.pd.waitRead(fd.isFile); err == nil {
continue
}
}
}
err = fd.eofError(n, err)
return n, err
}
}
// Pread wraps the pread system call.
func (fd *FD) Pread(p []byte, off int64) (int, error) {
// Call incref, not readLock, because since pread specifies the
// offset it is independent from other reads.
// Similarly, using the poller doesn't make sense for pread.
if err := fd.incref(); err != nil {
return 0, err
}
if fd.IsStream && len(p) > maxRW {
p = p[:maxRW]
}
var (
n int
err error
)
for {
n, err = syscall.Pread(fd.Sysfd, p, off)
if err != syscall.EINTR {
break
}
}
if err != nil {
n = 0
}
fd.decref()
err = fd.eofError(n, err)
return n, err
}
// ReadFrom wraps the recvfrom network call.
func (fd *FD) ReadFrom(p []byte) (int, syscall.Sockaddr, error) {
if err := fd.readLock(); err != nil {
return 0, nil, err
}
defer fd.readUnlock()
if err := fd.pd.prepareRead(fd.isFile); err != nil {
return 0, nil, err
}
for {
n, sa, err := syscall.Recvfrom(fd.Sysfd, p, 0)
if err != nil {
if err == syscall.EINTR {
continue
}
n = 0
if err == syscall.EAGAIN && fd.pd.pollable() {
if err = fd.pd.waitRead(fd.isFile); err == nil {
continue
}
}
}
err = fd.eofError(n, err)
return n, sa, err
}
}
// ReadFromInet4 wraps the recvfrom network call for IPv4.
func (fd *FD) ReadFromInet4(p []byte, from *syscall.SockaddrInet4) (int, error) {
if err := fd.readLock(); err != nil {
return 0, err
}
defer fd.readUnlock()
if err := fd.pd.prepareRead(fd.isFile); err != nil {
return 0, err
}
for {
n, err := unix.RecvfromInet4(fd.Sysfd, p, 0, from)
if err != nil {
if err == syscall.EINTR {
continue
}
n = 0
if err == syscall.EAGAIN && fd.pd.pollable() {
if err = fd.pd.waitRead(fd.isFile); err == nil {
continue
}
}
}
err = fd.eofError(n, err)
return n, err
}
}
// ReadFromInet6 wraps the recvfrom network call for IPv6.
func (fd *FD) ReadFromInet6(p []byte, from *syscall.SockaddrInet6) (int, error) {
if err := fd.readLock(); err != nil {
return 0, err
}
defer fd.readUnlock()
if err := fd.pd.prepareRead(fd.isFile); err != nil {
return 0, err
}
for {
n, err := unix.RecvfromInet6(fd.Sysfd, p, 0, from)
if err != nil {
if err == syscall.EINTR {
continue
}
n = 0
if err == syscall.EAGAIN && fd.pd.pollable() {
if err = fd.pd.waitRead(fd.isFile); err == nil {
continue
}
}
}
err = fd.eofError(n, err)
return n, err
}
}
// ReadMsg wraps the recvmsg network call.
func (fd *FD) ReadMsg(p []byte, oob []byte, flags int) (int, int, int, syscall.Sockaddr, error) {
if err := fd.readLock(); err != nil {
return 0, 0, 0, nil, err
}
defer fd.readUnlock()
if err := fd.pd.prepareRead(fd.isFile); err != nil {
return 0, 0, 0, nil, err
}
for {
n, oobn, sysflags, sa, err := syscall.Recvmsg(fd.Sysfd, p, oob, flags)
if err != nil {
if err == syscall.EINTR {
continue
}
// TODO(dfc) should n and oobn be set to 0
if err == syscall.EAGAIN && fd.pd.pollable() {
if err = fd.pd.waitRead(fd.isFile); err == nil {
continue
}
}
}
err = fd.eofError(n, err)
return n, oobn, sysflags, sa, err
}
}
// ReadMsgInet4 is ReadMsg, but specialized for syscall.SockaddrInet4.
func (fd *FD) ReadMsgInet4(p []byte, oob []byte, flags int, sa4 *syscall.SockaddrInet4) (int, int, int, error) {
if err := fd.readLock(); err != nil {
return 0, 0, 0, err
}
defer fd.readUnlock()
if err := fd.pd.prepareRead(fd.isFile); err != nil {
return 0, 0, 0, err
}
for {
n, oobn, sysflags, err := unix.RecvmsgInet4(fd.Sysfd, p, oob, flags, sa4)
if err != nil {
if err == syscall.EINTR {
continue
}
// TODO(dfc) should n and oobn be set to 0
if err == syscall.EAGAIN && fd.pd.pollable() {
if err = fd.pd.waitRead(fd.isFile); err == nil {
continue
}
}
}
err = fd.eofError(n, err)
return n, oobn, sysflags, err
}
}
// ReadMsgInet6 is ReadMsg, but specialized for syscall.SockaddrInet6.
func (fd *FD) ReadMsgInet6(p []byte, oob []byte, flags int, sa6 *syscall.SockaddrInet6) (int, int, int, error) {
if err := fd.readLock(); err != nil {
return 0, 0, 0, err
}
defer fd.readUnlock()
if err := fd.pd.prepareRead(fd.isFile); err != nil {
return 0, 0, 0, err
}
for {
n, oobn, sysflags, err := unix.RecvmsgInet6(fd.Sysfd, p, oob, flags, sa6)
if err != nil {
if err == syscall.EINTR {
continue
}
// TODO(dfc) should n and oobn be set to 0
if err == syscall.EAGAIN && fd.pd.pollable() {
if err = fd.pd.waitRead(fd.isFile); err == nil {
continue
}
}
}
err = fd.eofError(n, err)
return n, oobn, sysflags, err
}
}
// Write implements io.Writer.
func (fd *FD) Write(p []byte) (int, error) {
if err := fd.writeLock(); err != nil {
return 0, err
}
defer fd.writeUnlock()
if err := fd.pd.prepareWrite(fd.isFile); err != nil {
return 0, err
}
var nn int
for {
max := len(p)
if fd.IsStream && max-nn > maxRW {
max = nn + maxRW
}
n, err := ignoringEINTRIO(syscall.Write, fd.Sysfd, p[nn:max])
if n > 0 {
if n > max-nn {
// This can reportedly happen when using
// some VPN software. Issue #61060.
// If we don't check this we will panic
// with slice bounds out of range.
// Use a more informative panic.
panic("invalid return from write: got " + itoa.Itoa(n) + " from a write of " + itoa.Itoa(max-nn))
}
nn += n
}
if nn == len(p) {
return nn, err
}
if err == syscall.EAGAIN && fd.pd.pollable() {
if err = fd.pd.waitWrite(fd.isFile); err == nil {
continue
}
}
if err != nil {
return nn, err
}
if n == 0 {
return nn, io.ErrUnexpectedEOF
}
}
}
// Pwrite wraps the pwrite system call.
func (fd *FD) Pwrite(p []byte, off int64) (int, error) {
// Call incref, not writeLock, because since pwrite specifies the
// offset it is independent from other writes.
// Similarly, using the poller doesn't make sense for pwrite.
if err := fd.incref(); err != nil {
return 0, err
}
defer fd.decref()
var nn int
for {
max := len(p)
if fd.IsStream && max-nn > maxRW {
max = nn + maxRW
}
n, err := syscall.Pwrite(fd.Sysfd, p[nn:max], off+int64(nn))
if err == syscall.EINTR {
continue
}
if n > 0 {
nn += n
}
if nn == len(p) {
return nn, err
}
if err != nil {
return nn, err
}
if n == 0 {
return nn, io.ErrUnexpectedEOF
}
}
}
// WriteToInet4 wraps the sendto network call for IPv4 addresses.
func (fd *FD) WriteToInet4(p []byte, sa *syscall.SockaddrInet4) (int, error) {
if err := fd.writeLock(); err != nil {
return 0, err
}
defer fd.writeUnlock()
if err := fd.pd.prepareWrite(fd.isFile); err != nil {
return 0, err
}
for {
err := unix.SendtoInet4(fd.Sysfd, p, 0, sa)
if err == syscall.EINTR {
continue
}
if err == syscall.EAGAIN && fd.pd.pollable() {
if err = fd.pd.waitWrite(fd.isFile); err == nil {
continue
}
}
if err != nil {
return 0, err
}
return len(p), nil
}
}
// WriteToInet6 wraps the sendto network call for IPv6 addresses.
func (fd *FD) WriteToInet6(p []byte, sa *syscall.SockaddrInet6) (int, error) {
if err := fd.writeLock(); err != nil {
return 0, err
}
defer fd.writeUnlock()
if err := fd.pd.prepareWrite(fd.isFile); err != nil {
return 0, err
}
for {
err := unix.SendtoInet6(fd.Sysfd, p, 0, sa)
if err == syscall.EINTR {
continue
}
if err == syscall.EAGAIN && fd.pd.pollable() {
if err = fd.pd.waitWrite(fd.isFile); err == nil {
continue
}
}
if err != nil {
return 0, err
}
return len(p), nil
}
}
// WriteTo wraps the sendto network call.
func (fd *FD) WriteTo(p []byte, sa syscall.Sockaddr) (int, error) {
if err := fd.writeLock(); err != nil {
return 0, err
}
defer fd.writeUnlock()
if err := fd.pd.prepareWrite(fd.isFile); err != nil {
return 0, err
}
for {
err := syscall.Sendto(fd.Sysfd, p, 0, sa)
if err == syscall.EINTR {
continue
}
if err == syscall.EAGAIN && fd.pd.pollable() {
if err = fd.pd.waitWrite(fd.isFile); err == nil {
continue
}
}
if err != nil {
return 0, err
}
return len(p), nil
}
}
// WriteMsg wraps the sendmsg network call.
func (fd *FD) WriteMsg(p []byte, oob []byte, sa syscall.Sockaddr) (int, int, error) {
if err := fd.writeLock(); err != nil {
return 0, 0, err
}
defer fd.writeUnlock()
if err := fd.pd.prepareWrite(fd.isFile); err != nil {
return 0, 0, err
}
for {
n, err := syscall.SendmsgN(fd.Sysfd, p, oob, sa, 0)
if err == syscall.EINTR {
continue
}
if err == syscall.EAGAIN && fd.pd.pollable() {
if err = fd.pd.waitWrite(fd.isFile); err == nil {
continue
}
}
if err != nil {
return n, 0, err
}
return n, len(oob), err
}
}
// WriteMsgInet4 is WriteMsg specialized for syscall.SockaddrInet4.
func (fd *FD) WriteMsgInet4(p []byte, oob []byte, sa *syscall.SockaddrInet4) (int, int, error) {
if err := fd.writeLock(); err != nil {
return 0, 0, err
}
defer fd.writeUnlock()
if err := fd.pd.prepareWrite(fd.isFile); err != nil {
return 0, 0, err
}
for {
n, err := unix.SendmsgNInet4(fd.Sysfd, p, oob, sa, 0)
if err == syscall.EINTR {
continue
}
if err == syscall.EAGAIN && fd.pd.pollable() {
if err = fd.pd.waitWrite(fd.isFile); err == nil {
continue
}
}
if err != nil {
return n, 0, err
}
return n, len(oob), err
}
}
// WriteMsgInet6 is WriteMsg specialized for syscall.SockaddrInet6.
func (fd *FD) WriteMsgInet6(p []byte, oob []byte, sa *syscall.SockaddrInet6) (int, int, error) {
if err := fd.writeLock(); err != nil {
return 0, 0, err
}
defer fd.writeUnlock()
if err := fd.pd.prepareWrite(fd.isFile); err != nil {
return 0, 0, err
}
for {
n, err := unix.SendmsgNInet6(fd.Sysfd, p, oob, sa, 0)
if err == syscall.EINTR {
continue
}
if err == syscall.EAGAIN && fd.pd.pollable() {
if err = fd.pd.waitWrite(fd.isFile); err == nil {
continue
}
}
if err != nil {
return n, 0, err
}
return n, len(oob), err
}
}
// Accept wraps the accept network call.
func (fd *FD) Accept() (int, syscall.Sockaddr, string, error) {
if err := fd.readLock(); err != nil {
return -1, nil, "", err
}
defer fd.readUnlock()
if err := fd.pd.prepareRead(fd.isFile); err != nil {
return -1, nil, "", err
}
for {
s, rsa, errcall, err := accept(fd.Sysfd)
if err == nil {
return s, rsa, "", err
}
switch err {
case syscall.EINTR:
continue
case syscall.EAGAIN:
if fd.pd.pollable() {
if err = fd.pd.waitRead(fd.isFile); err == nil {
continue
}
}
case syscall.ECONNABORTED:
// This means that a socket on the listen
// queue was closed before we Accept()ed it;
// it's a silly error, so try again.
continue
}
return -1, nil, errcall, err
}
}
// Fchmod wraps syscall.Fchmod.
func (fd *FD) Fchmod(mode uint32) error {
if err := fd.incref(); err != nil {
return err
}
defer fd.decref()
return ignoringEINTR(func() error {
return syscall.Fchmod(fd.Sysfd, mode)
})
}
// Fstat wraps syscall.Fstat
func (fd *FD) Fstat(s *syscall.Stat_t) error {
if err := fd.incref(); err != nil {
return err
}
defer fd.decref()
return ignoringEINTR(func() error {
return syscall.Fstat(fd.Sysfd, s)
})
}
// dupCloexecUnsupported indicates whether F_DUPFD_CLOEXEC is supported by the kernel.
var dupCloexecUnsupported atomic.Bool
// DupCloseOnExec dups fd and marks it close-on-exec.
func DupCloseOnExec(fd int) (int, string, error) {
if syscall.F_DUPFD_CLOEXEC != 0 && !dupCloexecUnsupported.Load() {
r0, err := unix.Fcntl(fd, syscall.F_DUPFD_CLOEXEC, 0)
if err == nil {
return r0, "", nil
}
switch err {
case syscall.EINVAL, syscall.ENOSYS:
// Old kernel, or js/wasm (which returns
// ENOSYS). Fall back to the portable way from
// now on.
dupCloexecUnsupported.Store(true)
default:
return -1, "fcntl", err
}
}
return dupCloseOnExecOld(fd)
}
// Dup duplicates the file descriptor.
func (fd *FD) Dup() (int, string, error) {
if err := fd.incref(); err != nil {
return -1, "", err
}
defer fd.decref()
return DupCloseOnExec(fd.Sysfd)
}
// On Unix variants only, expose the IO event for the net code.
// WaitWrite waits until data can be written to fd.
func (fd *FD) WaitWrite() error {
return fd.pd.waitWrite(fd.isFile)
}
// WriteOnce is for testing only. It makes a single write call.
func (fd *FD) WriteOnce(p []byte) (int, error) {
if err := fd.writeLock(); err != nil {
return 0, err
}
defer fd.writeUnlock()
return ignoringEINTRIO(syscall.Write, fd.Sysfd, p)
}
// RawRead invokes the user-defined function f for a read operation.
func (fd *FD) RawRead(f func(uintptr) bool) error {
if err := fd.readLock(); err != nil {
return err
}
defer fd.readUnlock()
if err := fd.pd.prepareRead(fd.isFile); err != nil {
return err
}
for {
if f(uintptr(fd.Sysfd)) {
return nil
}
if err := fd.pd.waitRead(fd.isFile); err != nil {
return err
}
}
}
// RawWrite invokes the user-defined function f for a write operation.
func (fd *FD) RawWrite(f func(uintptr) bool) error {
if err := fd.writeLock(); err != nil {
return err
}
defer fd.writeUnlock()
if err := fd.pd.prepareWrite(fd.isFile); err != nil {
return err
}
for {
if f(uintptr(fd.Sysfd)) {
return nil
}
if err := fd.pd.waitWrite(fd.isFile); err != nil {
return err
}
}
}
// ignoringEINTRIO is like ignoringEINTR, but just for IO calls.
func ignoringEINTRIO(fn func(fd int, p []byte) (int, error), fd int, p []byte) (int, error) {
for {
n, err := fn(fd, p)
if err != syscall.EINTR {
return n, err
}
}
}

View File

@@ -0,0 +1,79 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build unix || (js && wasm)
package poll
import "syscall"
type SysFile struct {
// Writev cache.
iovecs *[]syscall.Iovec
}
func (s *SysFile) init() {}
func (s *SysFile) destroy(fd int) error {
// We don't use ignoringEINTR here because POSIX does not define
// whether the descriptor is closed if close returns EINTR.
// If the descriptor is indeed closed, using a loop would race
// with some other goroutine opening a new descriptor.
// (The Linux kernel guarantees that it is closed on an EINTR error.)
return CloseFunc(fd)
}
// dupCloseOnExecOld is the traditional way to dup an fd and
// set its O_CLOEXEC bit, using two system calls.
func dupCloseOnExecOld(fd int) (int, string, error) {
syscall.ForkLock.RLock()
defer syscall.ForkLock.RUnlock()
newfd, err := syscall.Dup(fd)
if err != nil {
return -1, "dup", err
}
syscall.CloseOnExec(newfd)
return newfd, "", nil
}
// Fchdir wraps syscall.Fchdir.
func (fd *FD) Fchdir() error {
if err := fd.incref(); err != nil {
return err
}
defer fd.decref()
return syscall.Fchdir(fd.Sysfd)
}
// ReadDirent wraps syscall.ReadDirent.
// We treat this like an ordinary system call rather than a call
// that tries to fill the buffer.
func (fd *FD) ReadDirent(buf []byte) (int, error) {
if err := fd.incref(); err != nil {
return 0, err
}
defer fd.decref()
for {
n, err := ignoringEINTRIO(syscall.ReadDirent, fd.Sysfd, buf)
if err != nil {
n = 0
if err == syscall.EAGAIN && fd.pd.pollable() {
if err = fd.pd.waitRead(fd.isFile); err == nil {
continue
}
}
}
// Do not call eofError; caller does not expect to see io.EOF.
return n, err
}
}
// Seek wraps syscall.Seek.
func (fd *FD) Seek(offset int64, whence int) (int64, error) {
if err := fd.incref(); err != nil {
return 0, err
}
defer fd.decref()
return syscall.Seek(fd.Sysfd, offset, whence)
}

View File

@@ -0,0 +1,236 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package poll
import (
"internal/byteorder"
"sync/atomic"
"syscall"
"unsafe"
)
type SysFile struct {
// RefCountPtr is a pointer to the reference count of Sysfd.
//
// WASI preview 1 lacks a dup(2) system call. When the os and net packages
// need to share a file/socket, instead of duplicating the underlying file
// descriptor, we instead provide a way to copy FD instances and manage the
// underlying file descriptor with reference counting.
RefCountPtr *int32
// RefCount is the reference count of Sysfd. When a copy of an FD is made,
// it points to the reference count of the original FD instance.
RefCount int32
// Cache for the file type, lazily initialized when Seek is called.
Filetype uint32
// If the file represents a directory, this field contains the current
// readdir position. It is reset to zero if the program calls Seek(0, 0).
Dircookie uint64
// Absolute path of the file, as returned by syscall.PathOpen;
// this is used by Fchdir to emulate setting the current directory
// to an open file descriptor.
Path string
// TODO(achille): it could be meaningful to move isFile from FD to a method
// on this struct type, and expose it as `IsFile() bool` which derives the
// result from the Filetype field. We would need to ensure that Filetype is
// always set instead of being lazily initialized.
}
func (s *SysFile) init() {
if s.RefCountPtr == nil {
s.RefCount = 1
s.RefCountPtr = &s.RefCount
}
}
func (s *SysFile) ref() SysFile {
atomic.AddInt32(s.RefCountPtr, +1)
return SysFile{RefCountPtr: s.RefCountPtr}
}
func (s *SysFile) destroy(fd int) error {
if s.RefCountPtr != nil && atomic.AddInt32(s.RefCountPtr, -1) > 0 {
return nil
}
// We don't use ignoringEINTR here because POSIX does not define
// whether the descriptor is closed if close returns EINTR.
// If the descriptor is indeed closed, using a loop would race
// with some other goroutine opening a new descriptor.
// (The Linux kernel guarantees that it is closed on an EINTR error.)
return CloseFunc(fd)
}
// Copy creates a copy of the FD.
//
// The FD instance points to the same underlying file descriptor. The file
// descriptor isn't closed until all FD instances that refer to it have been
// closed/destroyed.
func (fd *FD) Copy() FD {
return FD{
Sysfd: fd.Sysfd,
SysFile: fd.SysFile.ref(),
IsStream: fd.IsStream,
ZeroReadIsEOF: fd.ZeroReadIsEOF,
isBlocking: fd.isBlocking,
isFile: fd.isFile,
}
}
// dupCloseOnExecOld always errors on wasip1 because there is no mechanism to
// duplicate file descriptors.
func dupCloseOnExecOld(fd int) (int, string, error) {
return -1, "dup", syscall.ENOSYS
}
// Fchdir wraps syscall.Fchdir.
func (fd *FD) Fchdir() error {
if err := fd.incref(); err != nil {
return err
}
defer fd.decref()
return syscall.Chdir(fd.Path)
}
// ReadDir wraps syscall.ReadDir.
// We treat this like an ordinary system call rather than a call
// that tries to fill the buffer.
func (fd *FD) ReadDir(buf []byte, cookie syscall.Dircookie) (int, error) {
if err := fd.incref(); err != nil {
return 0, err
}
defer fd.decref()
for {
n, err := syscall.ReadDir(fd.Sysfd, buf, cookie)
if err != nil {
n = 0
if err == syscall.EAGAIN && fd.pd.pollable() {
if err = fd.pd.waitRead(fd.isFile); err == nil {
continue
}
}
}
// Do not call eofError; caller does not expect to see io.EOF.
return n, err
}
}
func (fd *FD) ReadDirent(buf []byte) (int, error) {
n, err := fd.ReadDir(buf, fd.Dircookie)
if err != nil {
return 0, err
}
if n <= 0 {
return n, nil // EOF
}
// We assume that the caller of ReadDirent will consume the entire buffer
// up to the last full entry, so we scan through the buffer looking for the
// value of the last next cookie.
b := buf[:n]
for len(b) > 0 {
next, ok := direntNext(b)
if !ok {
break
}
size, ok := direntReclen(b)
if !ok {
break
}
if size > uint64(len(b)) {
break
}
fd.Dircookie = syscall.Dircookie(next)
b = b[size:]
}
// Trim a potentially incomplete trailing entry; this is necessary because
// the code in src/os/dir_unix.go does not deal well with partial values in
// calls to direntReclen, etc... and ends up causing an early EOF before all
// directory entries were consumed. ReadDirent is called with a large enough
// buffer (8 KiB) that at least one entry should always fit, tho this seems
// a bit brittle but cannot be addressed without a large change of the
// algorithm in the os.(*File).readdir method.
return n - len(b), nil
}
// Seek wraps syscall.Seek.
func (fd *FD) Seek(offset int64, whence int) (int64, error) {
if err := fd.incref(); err != nil {
return 0, err
}
defer fd.decref()
// syscall.Filetype is a uint8 but we store it as a uint32 in SysFile in
// order to use atomic load/store on the field, which is why we have to
// perform this type conversion.
fileType := syscall.Filetype(atomic.LoadUint32(&fd.Filetype))
if fileType == syscall.FILETYPE_UNKNOWN {
var stat syscall.Stat_t
if err := fd.Fstat(&stat); err != nil {
return 0, err
}
fileType = stat.Filetype
atomic.StoreUint32(&fd.Filetype, uint32(fileType))
}
if fileType == syscall.FILETYPE_DIRECTORY {
// If the file descriptor is opened on a directory, we reset the readdir
// cookie when seeking back to the beginning to allow reusing the file
// descriptor to scan the directory again.
if offset == 0 && whence == 0 {
fd.Dircookie = 0
return 0, nil
} else {
return 0, syscall.EINVAL
}
}
return syscall.Seek(fd.Sysfd, offset, whence)
}
// https://github.com/WebAssembly/WASI/blob/main/legacy/preview1/docs.md#-dirent-record
const sizeOfDirent = 24
func direntReclen(buf []byte) (uint64, bool) {
namelen, ok := direntNamlen(buf)
return sizeOfDirent + namelen, ok
}
func direntNamlen(buf []byte) (uint64, bool) {
return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Namlen), unsafe.Sizeof(syscall.Dirent{}.Namlen))
}
func direntNext(buf []byte) (uint64, bool) {
return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Next), unsafe.Sizeof(syscall.Dirent{}.Next))
}
// readInt returns the size-bytes unsigned integer in native byte order at offset off.
func readInt(b []byte, off, size uintptr) (u uint64, ok bool) {
if len(b) < int(off+size) {
return 0, false
}
return readIntLE(b[off:], size), true
}
func readIntLE(b []byte, size uintptr) uint64 {
switch size {
case 1:
return uint64(b[0])
case 2:
return uint64(byteorder.LeUint16(b))
case 4:
return uint64(byteorder.LeUint32(b))
case 8:
return uint64(byteorder.LeUint64(b))
default:
panic("internal/poll: readInt with unsupported size")
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,187 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package poll_test
import (
"errors"
"fmt"
"internal/poll"
"internal/syscall/windows"
"os"
"sync"
"syscall"
"testing"
"unsafe"
)
type loggedFD struct {
Net string
FD *poll.FD
Err error
}
var (
logMu sync.Mutex
loggedFDs map[syscall.Handle]*loggedFD
)
func logFD(net string, fd *poll.FD, err error) {
logMu.Lock()
defer logMu.Unlock()
loggedFDs[fd.Sysfd] = &loggedFD{
Net: net,
FD: fd,
Err: err,
}
}
func init() {
loggedFDs = make(map[syscall.Handle]*loggedFD)
*poll.LogInitFD = logFD
poll.InitWSA()
}
func findLoggedFD(h syscall.Handle) (lfd *loggedFD, found bool) {
logMu.Lock()
defer logMu.Unlock()
lfd, found = loggedFDs[h]
return lfd, found
}
// checkFileIsNotPartOfNetpoll verifies that f is not managed by netpoll.
// It returns error, if check fails.
func checkFileIsNotPartOfNetpoll(f *os.File) error {
lfd, found := findLoggedFD(syscall.Handle(f.Fd()))
if !found {
return fmt.Errorf("%v fd=%v: is not found in the log", f.Name(), f.Fd())
}
if lfd.FD.IsPartOfNetpoll() {
return fmt.Errorf("%v fd=%v: is part of netpoll, but should not be (logged: net=%v err=%v)", f.Name(), f.Fd(), lfd.Net, lfd.Err)
}
return nil
}
func TestFileFdsAreInitialised(t *testing.T) {
exe, err := os.Executable()
if err != nil {
t.Fatal(err)
}
f, err := os.Open(exe)
if err != nil {
t.Fatal(err)
}
defer f.Close()
err = checkFileIsNotPartOfNetpoll(f)
if err != nil {
t.Fatal(err)
}
}
func TestSerialFdsAreInitialised(t *testing.T) {
for _, name := range []string{"COM1", "COM2", "COM3", "COM4"} {
t.Run(name, func(t *testing.T) {
h, err := syscall.CreateFile(syscall.StringToUTF16Ptr(name),
syscall.GENERIC_READ|syscall.GENERIC_WRITE,
0,
nil,
syscall.OPEN_EXISTING,
syscall.FILE_ATTRIBUTE_NORMAL|syscall.FILE_FLAG_OVERLAPPED,
0)
if err != nil {
if errno, ok := err.(syscall.Errno); ok {
switch errno {
case syscall.ERROR_FILE_NOT_FOUND,
syscall.ERROR_ACCESS_DENIED:
t.Log("Skipping: ", err)
return
}
}
t.Fatal(err)
}
f := os.NewFile(uintptr(h), name)
defer f.Close()
err = checkFileIsNotPartOfNetpoll(f)
if err != nil {
t.Fatal(err)
}
})
}
}
func TestWSASocketConflict(t *testing.T) {
s, err := windows.WSASocket(syscall.AF_INET, syscall.SOCK_STREAM, syscall.IPPROTO_TCP, nil, 0, windows.WSA_FLAG_OVERLAPPED)
if err != nil {
t.Fatal(err)
}
fd := poll.FD{Sysfd: s, IsStream: true, ZeroReadIsEOF: true}
_, err = fd.Init("tcp", true)
if err != nil {
syscall.CloseHandle(s)
t.Fatal(err)
}
defer fd.Close()
const SIO_TCP_INFO = syscall.IOC_INOUT | syscall.IOC_VENDOR | 39
inbuf := uint32(0)
var outbuf _TCP_INFO_v0
cbbr := uint32(0)
var ov syscall.Overlapped
// Create an event so that we can efficiently wait for completion
// of a requested overlapped I/O operation.
ov.HEvent, _ = windows.CreateEvent(nil, 0, 0, nil)
if ov.HEvent == 0 {
t.Fatalf("could not create the event!")
}
defer syscall.CloseHandle(ov.HEvent)
if err = fd.WSAIoctl(
SIO_TCP_INFO,
(*byte)(unsafe.Pointer(&inbuf)),
uint32(unsafe.Sizeof(inbuf)),
(*byte)(unsafe.Pointer(&outbuf)),
uint32(unsafe.Sizeof(outbuf)),
&cbbr,
&ov,
0,
); err != nil && !errors.Is(err, syscall.ERROR_IO_PENDING) {
t.Fatalf("could not perform the WSAIoctl: %v", err)
}
if err != nil && errors.Is(err, syscall.ERROR_IO_PENDING) {
// It is possible that the overlapped I/O operation completed
// immediately so there is no need to wait for it to complete.
if res, err := syscall.WaitForSingleObject(ov.HEvent, syscall.INFINITE); res != 0 {
t.Fatalf("waiting for the completion of the overlapped IO failed: %v", err)
}
}
}
type _TCP_INFO_v0 struct {
State uint32
Mss uint32
ConnectionTimeMs uint64
TimestampsEnabled bool
RttUs uint32
MinRttUs uint32
BytesInFlight uint32
Cwnd uint32
SndWnd uint32
RcvWnd uint32
RcvBuf uint32
BytesOut uint64
BytesIn uint64
BytesReordered uint32
BytesRetrans uint32
FastRetrans uint32
DupAcksIn uint32
TimeoutEpisodes uint32
SynRetrans uint8
}

View File

@@ -0,0 +1,15 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build aix || darwin || (openbsd && !mips64) || solaris
package poll
import (
"syscall"
_ "unsafe" // for go:linkname
)
//go:linkname writev syscall.writev
func writev(fd int, iovecs []syscall.Iovec) (uintptr, error)

View File

@@ -0,0 +1,29 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build dragonfly || freebsd || linux || netbsd || (openbsd && mips64)
package poll
import (
"syscall"
"unsafe"
)
func writev(fd int, iovecs []syscall.Iovec) (uintptr, error) {
var (
r uintptr
e syscall.Errno
)
for {
r, _, e = syscall.Syscall(syscall.SYS_WRITEV, uintptr(fd), uintptr(unsafe.Pointer(&iovecs[0])), uintptr(len(iovecs)))
if e != syscall.EINTR {
break
}
}
if e != 0 {
return r, e
}
return r, nil
}

View File

@@ -0,0 +1,42 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package poll
// Expose fdMutex for use by the os package on Plan 9.
// On Plan 9 we don't want to use async I/O for file operations,
// but we still want the locking semantics that fdMutex provides.
// FDMutex is an exported fdMutex, only for Plan 9.
type FDMutex struct {
fdmu fdMutex
}
func (fdmu *FDMutex) Incref() bool {
return fdmu.fdmu.incref()
}
func (fdmu *FDMutex) Decref() bool {
return fdmu.fdmu.decref()
}
func (fdmu *FDMutex) IncrefAndClose() bool {
return fdmu.fdmu.increfAndClose()
}
func (fdmu *FDMutex) ReadLock() bool {
return fdmu.fdmu.rwlock(true)
}
func (fdmu *FDMutex) ReadUnlock() bool {
return fdmu.fdmu.rwunlock(true)
}
func (fdmu *FDMutex) WriteLock() bool {
return fdmu.fdmu.rwlock(false)
}
func (fdmu *FDMutex) WriteUnlock() bool {
return fdmu.fdmu.rwunlock(false)
}

View File

@@ -0,0 +1,12 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build dragonfly || freebsd || linux || netbsd || openbsd || solaris
package poll
import "syscall"
// Accept4Func is used to hook the accept4 call.
var Accept4Func func(int, int) (int, syscall.Sockaddr, error) = syscall.Accept4

View File

@@ -0,0 +1,15 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build unix || (js && wasm) || wasip1
package poll
import "syscall"
// CloseFunc is used to hook the close call.
var CloseFunc func(int) error = syscall.Close
// AcceptFunc is used to hook the accept call.
var AcceptFunc func(int) (int, syscall.Sockaddr, error) = syscall.Accept

View File

@@ -0,0 +1,16 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package poll
import "syscall"
// CloseFunc is used to hook the close call.
var CloseFunc func(syscall.Handle) error = syscall.Closesocket
// AcceptFunc is used to hook the accept call.
var AcceptFunc func(syscall.Handle, syscall.Handle, *byte, uint32, uint32, uint32, *uint32, *syscall.Overlapped) error = syscall.AcceptEx
// ConnectExFunc is used to hook the ConnectEx call.
var ConnectExFunc func(syscall.Handle, syscall.Sockaddr, *byte, uint32, *uint32, *syscall.Overlapped) error = syscall.ConnectEx

View File

@@ -0,0 +1,14 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package poll
import (
"syscall"
"unsafe"
)
func newIovecWithBase(base *byte) syscall.Iovec {
return syscall.Iovec{Base: (*int8)(unsafe.Pointer(base))}
}

View File

@@ -0,0 +1,13 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd
package poll
import "syscall"
func newIovecWithBase(base *byte) syscall.Iovec {
return syscall.Iovec{Base: base}
}

View File

@@ -0,0 +1,61 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package poll_test
import (
"os"
"runtime"
"sync"
"testing"
"time"
)
func TestRead(t *testing.T) {
t.Run("SpecialFile", func(t *testing.T) {
var wg sync.WaitGroup
for _, p := range specialFiles() {
for i := 0; i < 4; i++ {
wg.Add(1)
go func(p string) {
defer wg.Done()
for i := 0; i < 100; i++ {
if _, err := os.ReadFile(p); err != nil {
t.Error(err)
return
}
time.Sleep(time.Nanosecond)
}
}(p)
}
}
wg.Wait()
})
}
func specialFiles() []string {
var ps []string
switch runtime.GOOS {
case "darwin", "ios", "dragonfly", "freebsd", "netbsd", "openbsd":
ps = []string{
"/dev/null",
}
case "linux":
ps = []string{
"/dev/null",
"/proc/stat",
"/sys/devices/system/cpu/online",
}
}
nps := ps[:0]
for _, p := range ps {
f, err := os.Open(p)
if err != nil {
continue
}
f.Close()
nps = append(nps, p)
}
return nps
}

View File

@@ -0,0 +1,7 @@
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package poll
var TestHookDidSendFile = func(dstFD *FD, src int, written int64, err error, handled bool) {}

View File

@@ -0,0 +1,59 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build darwin || dragonfly || freebsd
package poll
import "syscall"
// maxSendfileSize is the largest chunk size we ask the kernel to copy
// at a time.
const maxSendfileSize int = 4 << 20
// SendFile wraps the sendfile system call.
func SendFile(dstFD *FD, src int, pos, remain int64) (written int64, err error, handled bool) {
defer func() {
TestHookDidSendFile(dstFD, src, written, err, handled)
}()
if err := dstFD.writeLock(); err != nil {
return 0, err, false
}
defer dstFD.writeUnlock()
if err := dstFD.pd.prepareWrite(dstFD.isFile); err != nil {
return 0, err, false
}
dst := dstFD.Sysfd
for remain > 0 {
n := maxSendfileSize
if int64(n) > remain {
n = int(remain)
}
pos1 := pos
n, err = syscall.Sendfile(dst, src, &pos1, n)
if n > 0 {
pos += int64(n)
written += int64(n)
remain -= int64(n)
}
if err == syscall.EINTR {
continue
}
// This includes syscall.ENOSYS (no kernel
// support) and syscall.EINVAL (fd types which
// don't implement sendfile), and other errors.
// We should end the loop when there is no error
// returned from sendfile(2) or it is not a retryable error.
if err != syscall.EAGAIN {
break
}
if err = dstFD.pd.waitWrite(dstFD.isFile); err != nil {
break
}
}
handled = written != 0 || (err != syscall.ENOSYS && err != syscall.EINVAL)
return
}

View File

@@ -0,0 +1,55 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package poll
import "syscall"
// maxSendfileSize is the largest chunk size we ask the kernel to copy
// at a time.
const maxSendfileSize int = 4 << 20
// SendFile wraps the sendfile system call.
func SendFile(dstFD *FD, src int, remain int64) (written int64, err error, handled bool) {
defer func() {
TestHookDidSendFile(dstFD, src, written, err, handled)
}()
if err := dstFD.writeLock(); err != nil {
return 0, err, false
}
defer dstFD.writeUnlock()
if err := dstFD.pd.prepareWrite(dstFD.isFile); err != nil {
return 0, err, false
}
dst := dstFD.Sysfd
for remain > 0 {
n := maxSendfileSize
if int64(n) > remain {
n = int(remain)
}
n, err = syscall.Sendfile(dst, src, nil, n)
if n > 0 {
written += int64(n)
remain -= int64(n)
continue
} else if err != syscall.EAGAIN && err != syscall.EINTR {
// This includes syscall.ENOSYS (no kernel
// support) and syscall.EINVAL (fd types which
// don't implement sendfile), and other errors.
// We should end the loop when there is no error
// returned from sendfile(2) or it is not a retryable error.
break
}
if err == syscall.EINTR {
continue
}
if err = dstFD.pd.waitWrite(dstFD.isFile); err != nil {
break
}
}
handled = written != 0 || (err != syscall.ENOSYS && err != syscall.EINVAL)
return
}

View File

@@ -0,0 +1,66 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package poll
import "syscall"
// Not strictly needed, but very helpful for debugging, see issue #10221.
//
//go:cgo_import_dynamic _ _ "libsendfile.so"
//go:cgo_import_dynamic _ _ "libsocket.so"
// maxSendfileSize is the largest chunk size we ask the kernel to copy
// at a time.
const maxSendfileSize int = 4 << 20
// SendFile wraps the sendfile system call.
func SendFile(dstFD *FD, src int, pos, remain int64) (written int64, err error, handled bool) {
defer func() {
TestHookDidSendFile(dstFD, src, written, err, handled)
}()
if err := dstFD.writeLock(); err != nil {
return 0, err, false
}
defer dstFD.writeUnlock()
if err := dstFD.pd.prepareWrite(dstFD.isFile); err != nil {
return 0, err, false
}
dst := dstFD.Sysfd
for remain > 0 {
n := maxSendfileSize
if int64(n) > remain {
n = int(remain)
}
pos1 := pos
n, err = syscall.Sendfile(dst, src, &pos1, n)
if err == syscall.EAGAIN || err == syscall.EINTR {
// partial write may have occurred
n = int(pos1 - pos)
}
if n > 0 {
pos += int64(n)
written += int64(n)
remain -= int64(n)
continue
} else if err != syscall.EAGAIN && err != syscall.EINTR {
// This includes syscall.ENOSYS (no kernel
// support) and syscall.EINVAL (fd types which
// don't implement sendfile), and other errors.
// We should end the loop when there is no error
// returned from sendfile(2) or it is not a retryable error.
break
}
if err == syscall.EINTR {
continue
}
if err = dstFD.pd.waitWrite(dstFD.isFile); err != nil {
break
}
}
handled = written != 0 || (err != syscall.ENOSYS && err != syscall.EINVAL)
return
}

View File

@@ -0,0 +1,87 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package poll
import (
"io"
"syscall"
)
// SendFile wraps the TransmitFile call.
func SendFile(fd *FD, src syscall.Handle, n int64) (written int64, err error) {
defer func() {
TestHookDidSendFile(fd, 0, written, err, written > 0)
}()
if fd.kind == kindPipe {
// TransmitFile does not work with pipes
return 0, syscall.ESPIPE
}
if ft, _ := syscall.GetFileType(src); ft == syscall.FILE_TYPE_PIPE {
return 0, syscall.ESPIPE
}
if err := fd.writeLock(); err != nil {
return 0, err
}
defer fd.writeUnlock()
o := &fd.wop
o.handle = src
// TODO(brainman): skip calling syscall.Seek if OS allows it
curpos, err := syscall.Seek(o.handle, 0, io.SeekCurrent)
if err != nil {
return 0, err
}
if n <= 0 { // We don't know the size of the file so infer it.
// Find the number of bytes offset from curpos until the end of the file.
n, err = syscall.Seek(o.handle, -curpos, io.SeekEnd)
if err != nil {
return
}
// Now seek back to the original position.
if _, err = syscall.Seek(o.handle, curpos, io.SeekStart); err != nil {
return
}
}
// TransmitFile can be invoked in one call with at most
// 2,147,483,646 bytes: the maximum value for a 32-bit integer minus 1.
// See https://docs.microsoft.com/en-us/windows/win32/api/mswsock/nf-mswsock-transmitfile
const maxChunkSizePerCall = int64(0x7fffffff - 1)
for n > 0 {
chunkSize := maxChunkSizePerCall
if chunkSize > n {
chunkSize = n
}
o.qty = uint32(chunkSize)
o.o.Offset = uint32(curpos)
o.o.OffsetHigh = uint32(curpos >> 32)
nw, err := execIO(o, func(o *operation) error {
return syscall.TransmitFile(o.fd.Sysfd, o.handle, o.qty, 0, &o.o, nil, syscall.TF_WRITE_BEHIND)
})
if err != nil {
return written, err
}
curpos += int64(nw)
// Some versions of Windows (Windows 10 1803) do not set
// file position after TransmitFile completes.
// So just use Seek to set file position.
if _, err = syscall.Seek(o.handle, curpos, io.SeekStart); err != nil {
return written, err
}
n -= int64(nw)
written += int64(nw)
}
return
}

View File

@@ -0,0 +1,22 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This file implements accept for platforms that provide a fast path for
// setting SetNonblock and CloseOnExec.
//go:build dragonfly || freebsd || (linux && !arm) || netbsd || openbsd
package poll
import "syscall"
// Wrapper around the accept system call that marks the returned file
// descriptor as nonblocking and close-on-exec.
func accept(s int) (int, syscall.Sockaddr, string, error) {
ns, sa, err := Accept4Func(s, syscall.SOCK_NONBLOCK|syscall.SOCK_CLOEXEC)
if err != nil {
return -1, nil, "accept4", err
}
return ns, sa, "", nil
}

View File

@@ -0,0 +1,51 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This file implements accept for platforms that provide a fast path for
// setting SetNonblock and CloseOnExec, but don't necessarily have accept4.
// This is the code we used for accept in Go 1.17 and earlier.
// On Linux the accept4 system call was introduced in 2.6.28 kernel,
// and our minimum requirement is 2.6.32, so we simplified the function.
// Unfortunately, on ARM accept4 wasn't added until 2.6.36, so for ARM
// only we continue using the older code.
//go:build linux && arm
package poll
import "syscall"
// Wrapper around the accept system call that marks the returned file
// descriptor as nonblocking and close-on-exec.
func accept(s int) (int, syscall.Sockaddr, string, error) {
ns, sa, err := Accept4Func(s, syscall.SOCK_NONBLOCK|syscall.SOCK_CLOEXEC)
switch err {
case nil:
return ns, sa, "", nil
default: // errors other than the ones listed
return -1, sa, "accept4", err
case syscall.ENOSYS: // syscall missing
case syscall.EINVAL: // some Linux use this instead of ENOSYS
case syscall.EACCES: // some Linux use this instead of ENOSYS
case syscall.EFAULT: // some Linux use this instead of ENOSYS
}
// See ../syscall/exec_unix.go for description of ForkLock.
// It is probably okay to hold the lock across syscall.Accept
// because we have put fd.sysfd into non-blocking mode.
// However, a call to the File method will put it back into
// blocking mode. We can't take that risk, so no use of ForkLock here.
ns, sa, err = AcceptFunc(s)
if err == nil {
syscall.CloseOnExec(ns)
}
if err != nil {
return -1, nil, "accept", err
}
if err = syscall.SetNonblock(ns, true); err != nil {
CloseFunc(ns)
return -1, nil, "setnonblock", err
}
return ns, sa, "", nil
}

View File

@@ -0,0 +1,47 @@
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This file implements accept for platforms that provide a fast path for
// setting SetNonblock and CloseOnExec, but don't necessarily have accept4.
// The accept4(3c) function was added to Oracle Solaris in the Solaris 11.4.0
// release. Thus, on releases prior to 11.4, we fall back to the combination
// of accept(3c) and fcntl(2).
package poll
import (
"internal/syscall/unix"
"syscall"
)
// Wrapper around the accept system call that marks the returned file
// descriptor as nonblocking and close-on-exec.
func accept(s int) (int, syscall.Sockaddr, string, error) {
// Perform a cheap test and try the fast path first.
if unix.SupportAccept4() {
ns, sa, err := Accept4Func(s, syscall.SOCK_NONBLOCK|syscall.SOCK_CLOEXEC)
if err != nil {
return -1, nil, "accept4", err
}
return ns, sa, "", nil
}
// See ../syscall/exec_unix.go for description of ForkLock.
// It is probably okay to hold the lock across syscall.Accept
// because we have put fd.sysfd into non-blocking mode.
// However, a call to the File method will put it back into
// blocking mode. We can't take that risk, so no use of ForkLock here.
ns, sa, err := AcceptFunc(s)
if err == nil {
syscall.CloseOnExec(ns)
}
if err != nil {
return -1, nil, "accept", err
}
if err = syscall.SetNonblock(ns, true); err != nil {
CloseFunc(ns)
return -1, nil, "setnonblock", err
}
return ns, sa, "", nil
}

View File

@@ -0,0 +1,45 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build unix || windows
package poll
import "syscall"
// SetsockoptInt wraps the setsockopt network call with an int argument.
func (fd *FD) SetsockoptInt(level, name, arg int) error {
if err := fd.incref(); err != nil {
return err
}
defer fd.decref()
return syscall.SetsockoptInt(fd.Sysfd, level, name, arg)
}
// SetsockoptInet4Addr wraps the setsockopt network call with an IPv4 address.
func (fd *FD) SetsockoptInet4Addr(level, name int, arg [4]byte) error {
if err := fd.incref(); err != nil {
return err
}
defer fd.decref()
return syscall.SetsockoptInet4Addr(fd.Sysfd, level, name, arg)
}
// SetsockoptLinger wraps the setsockopt network call with a Linger argument.
func (fd *FD) SetsockoptLinger(level, name int, l *syscall.Linger) error {
if err := fd.incref(); err != nil {
return err
}
defer fd.decref()
return syscall.SetsockoptLinger(fd.Sysfd, level, name, l)
}
// GetsockoptInt wraps the getsockopt network call with an int argument.
func (fd *FD) GetsockoptInt(level, name int) (int, error) {
if err := fd.incref(); err != nil {
return -1, err
}
defer fd.decref()
return syscall.GetsockoptInt(fd.Sysfd, level, name)
}

View File

@@ -0,0 +1,16 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package poll
import "syscall"
// SetsockoptIPMreqn wraps the setsockopt network call with an IPMreqn argument.
func (fd *FD) SetsockoptIPMreqn(level, name int, mreq *syscall.IPMreqn) error {
if err := fd.incref(); err != nil {
return err
}
defer fd.decref()
return syscall.SetsockoptIPMreqn(fd.Sysfd, level, name, mreq)
}

View File

@@ -0,0 +1,18 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build unix
package poll
import "syscall"
// SetsockoptByte wraps the setsockopt network call with a byte argument.
func (fd *FD) SetsockoptByte(level, name int, arg byte) error {
if err := fd.incref(); err != nil {
return err
}
defer fd.decref()
return syscall.SetsockoptByte(fd.Sysfd, level, name, arg)
}

View File

@@ -0,0 +1,16 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package poll
import "syscall"
// WSAIoctl wraps the WSAIoctl network call.
func (fd *FD) WSAIoctl(iocc uint32, inbuf *byte, cbif uint32, outbuf *byte, cbob uint32, cbbr *uint32, overlapped *syscall.Overlapped, completionRoutine uintptr) error {
if err := fd.incref(); err != nil {
return err
}
defer fd.decref()
return syscall.WSAIoctl(fd.Sysfd, iocc, inbuf, cbif, outbuf, cbob, cbbr, overlapped, completionRoutine)
}

View File

@@ -0,0 +1,27 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build unix || windows
package poll
import "syscall"
// SetsockoptIPMreq wraps the setsockopt network call with an IPMreq argument.
func (fd *FD) SetsockoptIPMreq(level, name int, mreq *syscall.IPMreq) error {
if err := fd.incref(); err != nil {
return err
}
defer fd.decref()
return syscall.SetsockoptIPMreq(fd.Sysfd, level, name, mreq)
}
// SetsockoptIPv6Mreq wraps the setsockopt network call with an IPv6Mreq argument.
func (fd *FD) SetsockoptIPv6Mreq(level, name int, mreq *syscall.IPv6Mreq) error {
if err := fd.incref(); err != nil {
return err
}
defer fd.decref()
return syscall.SetsockoptIPv6Mreq(fd.Sysfd, level, name, mreq)
}

View File

@@ -0,0 +1,245 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package poll
import (
"internal/syscall/unix"
"runtime"
"sync"
"syscall"
"unsafe"
)
const (
// spliceNonblock doesn't make the splice itself necessarily nonblocking
// (because the actual file descriptors that are spliced from/to may block
// unless they have the O_NONBLOCK flag set), but it makes the splice pipe
// operations nonblocking.
spliceNonblock = 0x2
// maxSpliceSize is the maximum amount of data Splice asks
// the kernel to move in a single call to splice(2).
// We use 1MB as Splice writes data through a pipe, and 1MB is the default maximum pipe buffer size,
// which is determined by /proc/sys/fs/pipe-max-size.
maxSpliceSize = 1 << 20
)
// Splice transfers at most remain bytes of data from src to dst, using the
// splice system call to minimize copies of data from and to userspace.
//
// Splice gets a pipe buffer from the pool or creates a new one if needed, to serve as a buffer for the data transfer.
// src and dst must both be stream-oriented sockets.
func Splice(dst, src *FD, remain int64) (written int64, handled bool, err error) {
p, err := getPipe()
if err != nil {
return 0, false, err
}
defer putPipe(p)
var inPipe, n int
for err == nil && remain > 0 {
max := maxSpliceSize
if int64(max) > remain {
max = int(remain)
}
inPipe, err = spliceDrain(p.wfd, src, max)
// The operation is considered handled if splice returns no
// error, or an error other than EINVAL. An EINVAL means the
// kernel does not support splice for the socket type of src.
// The failed syscall does not consume any data so it is safe
// to fall back to a generic copy.
//
// spliceDrain should never return EAGAIN, so if err != nil,
// Splice cannot continue.
//
// If inPipe == 0 && err == nil, src is at EOF, and the
// transfer is complete.
handled = handled || (err != syscall.EINVAL)
if err != nil || inPipe == 0 {
break
}
p.data += inPipe
n, err = splicePump(dst, p.rfd, inPipe)
if n > 0 {
written += int64(n)
remain -= int64(n)
p.data -= n
}
}
if err != nil {
return written, handled, err
}
return written, true, nil
}
// spliceDrain moves data from a socket to a pipe.
//
// Invariant: when entering spliceDrain, the pipe is empty. It is either in its
// initial state, or splicePump has emptied it previously.
//
// Given this, spliceDrain can reasonably assume that the pipe is ready for
// writing, so if splice returns EAGAIN, it must be because the socket is not
// ready for reading.
//
// If spliceDrain returns (0, nil), src is at EOF.
func spliceDrain(pipefd int, sock *FD, max int) (int, error) {
if err := sock.readLock(); err != nil {
return 0, err
}
defer sock.readUnlock()
if err := sock.pd.prepareRead(sock.isFile); err != nil {
return 0, err
}
for {
// In theory calling splice(2) with SPLICE_F_NONBLOCK could end up an infinite loop here,
// because it could return EAGAIN ceaselessly when the write end of the pipe is full,
// but this shouldn't be a concern here, since the pipe buffer must be sufficient for
// this data transmission on the basis of the workflow in Splice.
n, err := splice(pipefd, sock.Sysfd, max, spliceNonblock)
if err == syscall.EINTR {
continue
}
if err != syscall.EAGAIN {
return n, err
}
if sock.pd.pollable() {
if err := sock.pd.waitRead(sock.isFile); err != nil {
return n, err
}
}
}
}
// splicePump moves all the buffered data from a pipe to a socket.
//
// Invariant: when entering splicePump, there are exactly inPipe
// bytes of data in the pipe, from a previous call to spliceDrain.
//
// By analogy to the condition from spliceDrain, splicePump
// only needs to poll the socket for readiness, if splice returns
// EAGAIN.
//
// If splicePump cannot move all the data in a single call to
// splice(2), it loops over the buffered data until it has written
// all of it to the socket. This behavior is similar to the Write
// step of an io.Copy in userspace.
func splicePump(sock *FD, pipefd int, inPipe int) (int, error) {
if err := sock.writeLock(); err != nil {
return 0, err
}
defer sock.writeUnlock()
if err := sock.pd.prepareWrite(sock.isFile); err != nil {
return 0, err
}
written := 0
for inPipe > 0 {
// In theory calling splice(2) with SPLICE_F_NONBLOCK could end up an infinite loop here,
// because it could return EAGAIN ceaselessly when the read end of the pipe is empty,
// but this shouldn't be a concern here, since the pipe buffer must contain inPipe size of
// data on the basis of the workflow in Splice.
n, err := splice(sock.Sysfd, pipefd, inPipe, spliceNonblock)
if err == syscall.EINTR {
continue
}
// Here, the condition n == 0 && err == nil should never be
// observed, since Splice controls the write side of the pipe.
if n > 0 {
inPipe -= n
written += n
continue
}
if err != syscall.EAGAIN {
return written, err
}
if sock.pd.pollable() {
if err := sock.pd.waitWrite(sock.isFile); err != nil {
return written, err
}
}
}
return written, nil
}
// splice wraps the splice system call. Since the current implementation
// only uses splice on sockets and pipes, the offset arguments are unused.
// splice returns int instead of int64, because callers never ask it to
// move more data in a single call than can fit in an int32.
func splice(out int, in int, max int, flags int) (int, error) {
n, err := syscall.Splice(in, nil, out, nil, max, flags)
return int(n), err
}
type splicePipeFields struct {
rfd int
wfd int
data int
}
type splicePipe struct {
splicePipeFields
// We want to use a finalizer, so ensure that the size is
// large enough to not use the tiny allocator.
_ [24 - unsafe.Sizeof(splicePipeFields{})%24]byte
}
// splicePipePool caches pipes to avoid high-frequency construction and destruction of pipe buffers.
// The garbage collector will free all pipes in the sync.Pool periodically, thus we need to set up
// a finalizer for each pipe to close its file descriptors before the actual GC.
var splicePipePool = sync.Pool{New: newPoolPipe}
func newPoolPipe() any {
// Discard the error which occurred during the creation of pipe buffer,
// redirecting the data transmission to the conventional way utilizing read() + write() as a fallback.
p := newPipe()
if p == nil {
return nil
}
runtime.SetFinalizer(p, destroyPipe)
return p
}
// getPipe tries to acquire a pipe buffer from the pool or create a new one with newPipe() if it gets nil from the cache.
func getPipe() (*splicePipe, error) {
v := splicePipePool.Get()
if v == nil {
return nil, syscall.EINVAL
}
return v.(*splicePipe), nil
}
func putPipe(p *splicePipe) {
// If there is still data left in the pipe,
// then close and discard it instead of putting it back into the pool.
if p.data != 0 {
runtime.SetFinalizer(p, nil)
destroyPipe(p)
return
}
splicePipePool.Put(p)
}
// newPipe sets up a pipe for a splice operation.
func newPipe() *splicePipe {
var fds [2]int
if err := syscall.Pipe2(fds[:], syscall.O_CLOEXEC|syscall.O_NONBLOCK); err != nil {
return nil
}
// Splice will loop writing maxSpliceSize bytes from the source to the pipe,
// and then write those bytes from the pipe to the destination.
// Set the pipe buffer size to maxSpliceSize to optimize that.
// Ignore errors here, as a smaller buffer size will work,
// although it will require more system calls.
unix.Fcntl(fds[0], syscall.F_SETPIPE_SZ, maxSpliceSize)
return &splicePipe{splicePipeFields: splicePipeFields{rfd: fds[0], wfd: fds[1]}}
}
// destroyPipe destroys a pipe.
func destroyPipe(p *splicePipe) {
CloseFunc(p.rfd)
CloseFunc(p.wfd)
}

View File

@@ -0,0 +1,136 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package poll_test
import (
"internal/poll"
"runtime"
"sync"
"sync/atomic"
"testing"
"time"
)
var closeHook atomic.Value // func(fd int)
func init() {
closeFunc := poll.CloseFunc
poll.CloseFunc = func(fd int) (err error) {
if v := closeHook.Load(); v != nil {
if hook := v.(func(int)); hook != nil {
hook(fd)
}
}
return closeFunc(fd)
}
}
func TestSplicePipePool(t *testing.T) {
const N = 64
var (
p *poll.SplicePipe
ps []*poll.SplicePipe
allFDs []int
pendingFDs sync.Map // fd → struct{}{}
err error
)
closeHook.Store(func(fd int) { pendingFDs.Delete(fd) })
t.Cleanup(func() { closeHook.Store((func(int))(nil)) })
for i := 0; i < N; i++ {
p, err = poll.GetPipe()
if err != nil {
t.Skipf("failed to create pipe due to error(%v), skip this test", err)
}
_, pwfd := poll.GetPipeFds(p)
allFDs = append(allFDs, pwfd)
pendingFDs.Store(pwfd, struct{}{})
ps = append(ps, p)
}
for _, p = range ps {
poll.PutPipe(p)
}
ps = nil
p = nil
// Exploit the timeout of "go test" as a timer for the subsequent verification.
timeout := 5 * time.Minute
if deadline, ok := t.Deadline(); ok {
timeout = deadline.Sub(time.Now())
timeout -= timeout / 10 // Leave 10% headroom for cleanup.
}
expiredTime := time.NewTimer(timeout)
defer expiredTime.Stop()
// Trigger garbage collection repeatedly, waiting for all pipes in sync.Pool
// to either be deallocated and closed, or to time out.
for {
runtime.GC()
time.Sleep(10 * time.Millisecond)
// Detect whether all pipes are closed properly.
var leakedFDs []int
pendingFDs.Range(func(k, v any) bool {
leakedFDs = append(leakedFDs, k.(int))
return true
})
if len(leakedFDs) == 0 {
break
}
select {
case <-expiredTime.C:
t.Logf("all descriptors: %v", allFDs)
t.Fatalf("leaked descriptors: %v", leakedFDs)
default:
}
}
}
func BenchmarkSplicePipe(b *testing.B) {
b.Run("SplicePipeWithPool", func(b *testing.B) {
for i := 0; i < b.N; i++ {
p, err := poll.GetPipe()
if err != nil {
continue
}
poll.PutPipe(p)
}
})
b.Run("SplicePipeWithoutPool", func(b *testing.B) {
for i := 0; i < b.N; i++ {
p := poll.NewPipe()
if p == nil {
b.Skip("newPipe returned nil")
}
poll.DestroyPipe(p)
}
})
}
func BenchmarkSplicePipePoolParallel(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
p, err := poll.GetPipe()
if err != nil {
continue
}
poll.PutPipe(p)
}
})
}
func BenchmarkSplicePipeNativeParallel(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
p := poll.NewPipe()
if p == nil {
b.Skip("newPipe returned nil")
}
poll.DestroyPipe(p)
}
})
}

View File

@@ -0,0 +1,36 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This file implements accept for platforms that do not provide a fast path for
// setting SetNonblock and CloseOnExec.
//go:build aix || darwin || (js && wasm) || wasip1
package poll
import (
"syscall"
)
// Wrapper around the accept system call that marks the returned file
// descriptor as nonblocking and close-on-exec.
func accept(s int) (int, syscall.Sockaddr, string, error) {
// See ../syscall/exec_unix.go for description of ForkLock.
// It is probably okay to hold the lock across syscall.Accept
// because we have put fd.sysfd into non-blocking mode.
// However, a call to the File method will put it back into
// blocking mode. We can't take that risk, so no use of ForkLock here.
ns, sa, err := AcceptFunc(s)
if err == nil {
syscall.CloseOnExec(ns)
}
if err != nil {
return -1, nil, "accept", err
}
if err = syscall.SetNonblock(ns, true); err != nil {
CloseFunc(ns)
return -1, nil, "setnonblock", err
}
return ns, sa, "", nil
}

View File

@@ -0,0 +1,90 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build unix
package poll
import (
"io"
"runtime"
"syscall"
)
// Writev wraps the writev system call.
func (fd *FD) Writev(v *[][]byte) (int64, error) {
if err := fd.writeLock(); err != nil {
return 0, err
}
defer fd.writeUnlock()
if err := fd.pd.prepareWrite(fd.isFile); err != nil {
return 0, err
}
var iovecs []syscall.Iovec
if fd.iovecs != nil {
iovecs = *fd.iovecs
}
// TODO: read from sysconf(_SC_IOV_MAX)? The Linux default is
// 1024 and this seems conservative enough for now. Darwin's
// UIO_MAXIOV also seems to be 1024.
maxVec := 1024
if runtime.GOOS == "aix" || runtime.GOOS == "solaris" {
// IOV_MAX is set to XOPEN_IOV_MAX on AIX and Solaris.
maxVec = 16
}
var n int64
var err error
for len(*v) > 0 {
iovecs = iovecs[:0]
for _, chunk := range *v {
if len(chunk) == 0 {
continue
}
iovecs = append(iovecs, newIovecWithBase(&chunk[0]))
if fd.IsStream && len(chunk) > 1<<30 {
iovecs[len(iovecs)-1].SetLen(1 << 30)
break // continue chunk on next writev
}
iovecs[len(iovecs)-1].SetLen(len(chunk))
if len(iovecs) == maxVec {
break
}
}
if len(iovecs) == 0 {
break
}
if fd.iovecs == nil {
fd.iovecs = new([]syscall.Iovec)
}
*fd.iovecs = iovecs // cache
var wrote uintptr
wrote, err = writev(fd.Sysfd, iovecs)
if wrote == ^uintptr(0) {
wrote = 0
}
TestHookDidWritev(int(wrote))
n += int64(wrote)
consume(v, int64(wrote))
clear(iovecs)
if err != nil {
if err == syscall.EINTR {
continue
}
if err == syscall.EAGAIN {
if err = fd.pd.waitWrite(fd.isFile); err == nil {
continue
}
}
break
}
if n == 0 {
err = io.ErrUnexpectedEOF
break
}
}
return n, err
}

View File

@@ -0,0 +1,62 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package poll_test
import (
"internal/poll"
"reflect"
"testing"
)
func TestConsume(t *testing.T) {
tests := []struct {
in [][]byte
consume int64
want [][]byte
}{
{
in: [][]byte{[]byte("foo"), []byte("bar")},
consume: 0,
want: [][]byte{[]byte("foo"), []byte("bar")},
},
{
in: [][]byte{[]byte("foo"), []byte("bar")},
consume: 2,
want: [][]byte{[]byte("o"), []byte("bar")},
},
{
in: [][]byte{[]byte("foo"), []byte("bar")},
consume: 3,
want: [][]byte{[]byte("bar")},
},
{
in: [][]byte{[]byte("foo"), []byte("bar")},
consume: 4,
want: [][]byte{[]byte("ar")},
},
{
in: [][]byte{nil, nil, nil, []byte("bar")},
consume: 1,
want: [][]byte{[]byte("ar")},
},
{
in: [][]byte{nil, nil, nil, []byte("foo")},
consume: 0,
want: [][]byte{[]byte("foo")},
},
{
in: [][]byte{nil, nil, nil},
consume: 0,
want: [][]byte{},
},
}
for i, tt := range tests {
in := tt.in
poll.Consume(&in, tt.consume)
if !reflect.DeepEqual(in, tt.want) {
t.Errorf("%d. after consume(%d) = %+v, want %+v", i, tt.consume, in, tt.want)
}
}
}