Initial commit: Go 1.23 release state

2024-09-21 23:49:08 +10:00
commit 17cd57a668
13231 changed files with 3114330 additions and 0 deletions
--- a/src/internal/poll/copy_file_range_linux.go
+++ b/src/internal/poll/copy_file_range_linux.go
@@ -0,0 +1,121 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll
+
+import (
+	"internal/syscall/unix"
+	"sync"
+	"syscall"
+)
+
+var isKernelVersionGE53 = sync.OnceValue(func() bool {
+	major, minor := unix.KernelVersion()
+	// copy_file_range(2) is broken in various ways on kernels older than 5.3,
+	// see https://go.dev/issue/42400 and
+	// https://man7.org/linux/man-pages/man2/copy_file_range.2.html#VERSIONS
+	return major > 5 || (major == 5 && minor >= 3)
+})
+
+const maxCopyFileRangeRound = 1 << 30
+
+// CopyFileRange copies at most remain bytes of data from src to dst, using
+// the copy_file_range system call. dst and src must refer to regular files.
+func CopyFileRange(dst, src *FD, remain int64) (written int64, handled bool, err error) {
+	if !isKernelVersionGE53() {
+		return 0, false, nil
+	}
+
+	for remain > 0 {
+		max := remain
+		if max > maxCopyFileRangeRound {
+			max = maxCopyFileRangeRound
+		}
+		n, err := copyFileRange(dst, src, int(max))
+		switch err {
+		case syscall.ENOSYS:
+			// copy_file_range(2) was introduced in Linux 4.5.
+			// Go supports Linux >= 2.6.33, so the system call
+			// may not be present.
+			//
+			// If we see ENOSYS, we have certainly not transferred
+			// any data, so we can tell the caller that we
+			// couldn't handle the transfer and let them fall
+			// back to more generic code.
+			return 0, false, nil
+		case syscall.EXDEV, syscall.EINVAL, syscall.EIO, syscall.EOPNOTSUPP, syscall.EPERM:
+			// Prior to Linux 5.3, it was not possible to
+			// copy_file_range across file systems. Similarly to
+			// the ENOSYS case above, if we see EXDEV, we have
+			// not transferred any data, and we can let the caller
+			// fall back to generic code.
+			//
+			// As for EINVAL, that is what we see if, for example,
+			// dst or src refer to a pipe rather than a regular
+			// file. This is another case where no data has been
+			// transferred, so we consider it unhandled.
+			//
+			// If src and dst are on CIFS, we can see EIO.
+			// See issue #42334.
+			//
+			// If the file is on NFS, we can see EOPNOTSUPP.
+			// See issue #40731.
+			//
+			// If the process is running inside a Docker container,
+			// we might see EPERM instead of ENOSYS. See issue
+			// #40893. Since EPERM might also be a legitimate error,
+			// don't mark copy_file_range(2) as unsupported.
+			return 0, false, nil
+		case nil:
+			if n == 0 {
+				// If we did not read any bytes at all,
+				// then this file may be in a file system
+				// where copy_file_range silently fails.
+				// https://lore.kernel.org/linux-fsdevel/20210126233840.GG4626@dread.disaster.area/T/#m05753578c7f7882f6e9ffe01f981bc223edef2b0
+				if written == 0 {
+					return 0, false, nil
+				}
+				// Otherwise src is at EOF, which means
+				// we are done.
+				return written, true, nil
+			}
+			remain -= n
+			written += n
+		default:
+			return written, true, err
+		}
+	}
+	return written, true, nil
+}
+
+// copyFileRange performs one round of copy_file_range(2).
+func copyFileRange(dst, src *FD, max int) (written int64, err error) {
+	// The signature of copy_file_range(2) is:
+	//
+	// ssize_t copy_file_range(int fd_in, loff_t *off_in,
+	//                         int fd_out, loff_t *off_out,
+	//                         size_t len, unsigned int flags);
+	//
+	// Note that in the call to unix.CopyFileRange below, we use nil
+	// values for off_in and off_out. For the system call, this means
+	// "use and update the file offsets". That is why we must acquire
+	// locks for both file descriptors (and why this whole machinery is
+	// in the internal/poll package to begin with).
+	if err := dst.writeLock(); err != nil {
+		return 0, err
+	}
+	defer dst.writeUnlock()
+	if err := src.readLock(); err != nil {
+		return 0, err
+	}
+	defer src.readUnlock()
+	var n int
+	for {
+		n, err = unix.CopyFileRange(src.Sysfd, nil, dst.Sysfd, nil, max, 0)
+		if err != syscall.EINTR {
+			break
+		}
+	}
+	return int64(n), err
+}
--- a/src/internal/poll/errno_unix.go
+++ b/src/internal/poll/errno_unix.go
@@ -0,0 +1,33 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build unix || wasip1
+
+package poll
+
+import "syscall"
+
+// Do the interface allocations only once for common
+// Errno values.
+var (
+	errEAGAIN error = syscall.EAGAIN
+	errEINVAL error = syscall.EINVAL
+	errENOENT error = syscall.ENOENT
+)
+
+// errnoErr returns common boxed Errno values, to prevent
+// allocations at runtime.
+func errnoErr(e syscall.Errno) error {
+	switch e {
+	case 0:
+		return nil
+	case syscall.EAGAIN:
+		return errEAGAIN
+	case syscall.EINVAL:
+		return errEINVAL
+	case syscall.ENOENT:
+		return errENOENT
+	}
+	return e
+}
--- a/src/internal/poll/errno_windows.go
+++ b/src/internal/poll/errno_windows.go
@@ -0,0 +1,31 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build windows
+
+package poll
+
+import "syscall"
+
+// Do the interface allocations only once for common
+// Errno values.
+
+var (
+	errERROR_IO_PENDING error = syscall.Errno(syscall.ERROR_IO_PENDING)
+)
+
+// errnoErr returns common boxed Errno values, to prevent
+// allocations at runtime.
+func errnoErr(e syscall.Errno) error {
+	switch e {
+	case 0:
+		return nil
+	case syscall.ERROR_IO_PENDING:
+		return errERROR_IO_PENDING
+	}
+	// TODO: add more here, after collecting data on the common
+	// error values see on Windows. (perhaps when running
+	// all.bat?)
+	return e
+}
--- a/src/internal/poll/error_linux_test.go
+++ b/src/internal/poll/error_linux_test.go
@@ -0,0 +1,31 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll_test
+
+import (
+	"errors"
+	"internal/poll"
+	"os"
+	"syscall"
+)
+
+func badStateFile() (*os.File, error) {
+	if os.Getuid() != 0 {
+		return nil, errors.New("must be root")
+	}
+	// Using OpenFile for a device file is an easy way to make a
+	// file attached to the runtime-integrated network poller and
+	// configured in halfway.
+	return os.OpenFile("/dev/net/tun", os.O_RDWR, 0)
+}
+
+func isBadStateFileError(err error) (string, bool) {
+	switch err {
+	case poll.ErrNotPollable, syscall.EBADFD:
+		return "", true
+	default:
+		return "not pollable or file in bad state error", false
+	}
+}
--- a/src/internal/poll/error_stub_test.go
+++ b/src/internal/poll/error_stub_test.go
@@ -0,0 +1,21 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !linux
+
+package poll_test
+
+import (
+	"errors"
+	"os"
+	"runtime"
+)
+
+func badStateFile() (*os.File, error) {
+	return nil, errors.New("not supported on " + runtime.GOOS)
+}
+
+func isBadStateFileError(err error) (string, bool) {
+	return "", false
+}
--- a/src/internal/poll/error_test.go
+++ b/src/internal/poll/error_test.go
@@ -0,0 +1,51 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll_test
+
+import (
+	"fmt"
+	"io/fs"
+	"net"
+	"os"
+	"testing"
+	"time"
+)
+
+func TestReadError(t *testing.T) {
+	t.Run("ErrNotPollable", func(t *testing.T) {
+		f, err := badStateFile()
+		if err != nil {
+			t.Skip(err)
+		}
+		defer f.Close()
+
+		// Give scheduler a chance to have two separated
+		// goroutines: an event poller and an event waiter.
+		time.Sleep(100 * time.Millisecond)
+
+		var b [1]byte
+		_, err = f.Read(b[:])
+		if perr := parseReadError(err, isBadStateFileError); perr != nil {
+			t.Fatal(perr)
+		}
+	})
+}
+
+func parseReadError(nestedErr error, verify func(error) (string, bool)) error {
+	err := nestedErr
+	if nerr, ok := err.(*net.OpError); ok {
+		err = nerr.Err
+	}
+	if nerr, ok := err.(*fs.PathError); ok {
+		err = nerr.Err
+	}
+	if nerr, ok := err.(*os.SyscallError); ok {
+		err = nerr.Err
+	}
+	if s, ok := verify(err); !ok {
+		return fmt.Errorf("got %v; want %s", nestedErr, s)
+	}
+	return nil
+}
--- a/src/internal/poll/export_linux_test.go
+++ b/src/internal/poll/export_linux_test.go
@@ -0,0 +1,22 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Export guts for testing on linux.
+// Since testing imports os and os imports internal/poll,
+// the internal/poll tests can not be in package poll.
+
+package poll
+
+var (
+	GetPipe     = getPipe
+	PutPipe     = putPipe
+	NewPipe     = newPipe
+	DestroyPipe = destroyPipe
+)
+
+func GetPipeFds(p *SplicePipe) (int, int) {
+	return p.rfd, p.wfd
+}
+
+type SplicePipe = splicePipe
--- a/src/internal/poll/export_posix_test.go
+++ b/src/internal/poll/export_posix_test.go
@@ -0,0 +1,15 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build unix || windows
+
+// Export guts for testing on posix.
+// Since testing imports os and os imports internal/poll,
+// the internal/poll tests can not be in package poll.
+
+package poll
+
+func (fd *FD) EOFError(n int, err error) error {
+	return fd.eofError(n, err)
+}
--- a/src/internal/poll/export_test.go
+++ b/src/internal/poll/export_test.go
@@ -0,0 +1,35 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Export guts for testing.
+// Since testing imports os and os imports internal/poll,
+// the internal/poll tests can not be in package poll.
+
+package poll
+
+var Consume = consume
+
+type XFDMutex struct {
+	fdMutex
+}
+
+func (mu *XFDMutex) Incref() bool {
+	return mu.incref()
+}
+
+func (mu *XFDMutex) IncrefAndClose() bool {
+	return mu.increfAndClose()
+}
+
+func (mu *XFDMutex) Decref() bool {
+	return mu.decref()
+}
+
+func (mu *XFDMutex) RWLock(read bool) bool {
+	return mu.rwlock(read)
+}
+
+func (mu *XFDMutex) RWUnlock(read bool) bool {
+	return mu.rwunlock(read)
+}
--- a/src/internal/poll/export_windows_test.go
+++ b/src/internal/poll/export_windows_test.go
@@ -0,0 +1,17 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Export guts for testing on windows.
+// Since testing imports os and os imports internal/poll,
+// the internal/poll tests can not be in package poll.
+
+package poll
+
+var (
+	LogInitFD = &logInitFD
+)
+
+func (fd *FD) IsPartOfNetpoll() bool {
+	return fd.pd.runtimeCtx != 0
+}
--- a/src/internal/poll/fd.go
+++ b/src/internal/poll/fd.go
@@ -0,0 +1,94 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package poll supports non-blocking I/O on file descriptors with polling.
+// This supports I/O operations that block only a goroutine, not a thread.
+// This is used by the net and os packages.
+// It uses a poller built into the runtime, with support from the
+// runtime scheduler.
+package poll
+
+import (
+	"errors"
+)
+
+// errNetClosing is the type of the variable ErrNetClosing.
+// This is used to implement the net.Error interface.
+type errNetClosing struct{}
+
+// Error returns the error message for ErrNetClosing.
+// Keep this string consistent because of issue #4373:
+// since historically programs have not been able to detect
+// this error, they look for the string.
+func (e errNetClosing) Error() string { return "use of closed network connection" }
+
+func (e errNetClosing) Timeout() bool   { return false }
+func (e errNetClosing) Temporary() bool { return false }
+
+// ErrNetClosing is returned when a network descriptor is used after
+// it has been closed.
+var ErrNetClosing = errNetClosing{}
+
+// ErrFileClosing is returned when a file descriptor is used after it
+// has been closed.
+var ErrFileClosing = errors.New("use of closed file")
+
+// ErrNoDeadline is returned when a request is made to set a deadline
+// on a file type that does not use the poller.
+var ErrNoDeadline = errors.New("file type does not support deadline")
+
+// Return the appropriate closing error based on isFile.
+func errClosing(isFile bool) error {
+	if isFile {
+		return ErrFileClosing
+	}
+	return ErrNetClosing
+}
+
+// ErrDeadlineExceeded is returned for an expired deadline.
+// This is exported by the os package as os.ErrDeadlineExceeded.
+var ErrDeadlineExceeded error = &DeadlineExceededError{}
+
+// DeadlineExceededError is returned for an expired deadline.
+type DeadlineExceededError struct{}
+
+// Implement the net.Error interface.
+// The string is "i/o timeout" because that is what was returned
+// by earlier Go versions. Changing it may break programs that
+// match on error strings.
+func (e *DeadlineExceededError) Error() string   { return "i/o timeout" }
+func (e *DeadlineExceededError) Timeout() bool   { return true }
+func (e *DeadlineExceededError) Temporary() bool { return true }
+
+// ErrNotPollable is returned when the file or socket is not suitable
+// for event notification.
+var ErrNotPollable = errors.New("not pollable")
+
+// consume removes data from a slice of byte slices, for writev.
+func consume(v *[][]byte, n int64) {
+	for len(*v) > 0 {
+		ln0 := int64(len((*v)[0]))
+		if ln0 > n {
+			(*v)[0] = (*v)[0][n:]
+			return
+		}
+		n -= ln0
+		(*v)[0] = nil
+		*v = (*v)[1:]
+	}
+}
+
+// TestHookDidWritev is a hook for testing writev.
+var TestHookDidWritev = func(wrote int) {}
+
+// String is an internal string definition for methods/functions
+// that is not intended for use outside the standard libraries.
+//
+// Other packages in std that import internal/poll and have some
+// exported APIs (now we've got some in net.rawConn) which are only used
+// internally and are not intended to be used outside the standard libraries,
+// Therefore, we make those APIs use internal types like poll.FD or poll.String
+// in their function signatures to disable the usability of these APIs from
+// external codebase.
+type String string
--- a/src/internal/poll/fd_fsync_darwin.go
+++ b/src/internal/poll/fd_fsync_darwin.go
@@ -0,0 +1,32 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll
+
+import (
+	"errors"
+	"internal/syscall/unix"
+	"syscall"
+)
+
+// Fsync invokes SYS_FCNTL with SYS_FULLFSYNC because
+// on OS X, SYS_FSYNC doesn't fully flush contents to disk.
+// See Issue #26650 as well as the man page for fsync on OS X.
+func (fd *FD) Fsync() error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	return ignoringEINTR(func() error {
+		_, err := unix.Fcntl(fd.Sysfd, syscall.F_FULLFSYNC, 0)
+
+		// There are scenarios such as SMB mounts where fcntl will fail
+		// with ENOTSUP. In those cases fallback to fsync.
+		// See #64215
+		if err != nil && errors.Is(err, syscall.ENOTSUP) {
+			err = syscall.Fsync(fd.Sysfd)
+		}
+		return err
+	})
+}
--- a/src/internal/poll/fd_fsync_posix.go
+++ b/src/internal/poll/fd_fsync_posix.go
@@ -0,0 +1,20 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build aix || dragonfly || freebsd || (js && wasm) || linux || netbsd || openbsd || solaris || wasip1
+
+package poll
+
+import "syscall"
+
+// Fsync wraps syscall.Fsync.
+func (fd *FD) Fsync() error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	return ignoringEINTR(func() error {
+		return syscall.Fsync(fd.Sysfd)
+	})
+}
--- a/src/internal/poll/fd_fsync_windows.go
+++ b/src/internal/poll/fd_fsync_windows.go
@@ -0,0 +1,16 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll
+
+import "syscall"
+
+// Fsync wraps syscall.Fsync.
+func (fd *FD) Fsync() error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	return syscall.Fsync(fd.Sysfd)
+}
--- a/src/internal/poll/fd_io_plan9.go
+++ b/src/internal/poll/fd_io_plan9.go
@@ -0,0 +1,92 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll
+
+import (
+	"internal/itoa"
+	"runtime"
+	"sync"
+	"syscall"
+)
+
+// asyncIO implements asynchronous cancelable I/O.
+// An asyncIO represents a single asynchronous Read or Write
+// operation. The result is returned on the result channel.
+// The undergoing I/O system call can either complete or be
+// interrupted by a note.
+type asyncIO struct {
+	res chan result
+
+	// mu guards the pid field.
+	mu sync.Mutex
+
+	// pid holds the process id of
+	// the process running the IO operation.
+	pid int
+}
+
+// result is the return value of a Read or Write operation.
+type result struct {
+	n   int
+	err error
+}
+
+// newAsyncIO returns a new asyncIO that performs an I/O
+// operation by calling fn, which must do one and only one
+// interruptible system call.
+func newAsyncIO(fn func([]byte) (int, error), b []byte) *asyncIO {
+	aio := &asyncIO{
+		res: make(chan result, 0),
+	}
+	aio.mu.Lock()
+	go func() {
+		// Lock the current goroutine to its process
+		// and store the pid in io so that Cancel can
+		// interrupt it. We ignore the "hangup" signal,
+		// so the signal does not take down the entire
+		// Go runtime.
+		runtime.LockOSThread()
+		runtime_ignoreHangup()
+		aio.pid = syscall.Getpid()
+		aio.mu.Unlock()
+
+		n, err := fn(b)
+
+		aio.mu.Lock()
+		aio.pid = -1
+		runtime_unignoreHangup()
+		aio.mu.Unlock()
+
+		aio.res <- result{n, err}
+	}()
+	return aio
+}
+
+// Cancel interrupts the I/O operation, causing
+// the Wait function to return.
+func (aio *asyncIO) Cancel() {
+	aio.mu.Lock()
+	defer aio.mu.Unlock()
+	if aio.pid == -1 {
+		return
+	}
+	f, e := syscall.Open("/proc/"+itoa.Itoa(aio.pid)+"/note", syscall.O_WRONLY)
+	if e != nil {
+		return
+	}
+	syscall.Write(f, []byte("hangup"))
+	syscall.Close(f)
+}
+
+// Wait for the I/O operation to complete.
+func (aio *asyncIO) Wait() (int, error) {
+	res := <-aio.res
+	return res.n, res.err
+}
+
+// The following functions, provided by the runtime, are used to
+// ignore and unignore the "hangup" signal received by the process.
+func runtime_ignoreHangup()
+func runtime_unignoreHangup()
--- a/src/internal/poll/fd_mutex.go
+++ b/src/internal/poll/fd_mutex.go
@@ -0,0 +1,252 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll
+
+import "sync/atomic"
+
+// fdMutex is a specialized synchronization primitive that manages
+// lifetime of an fd and serializes access to Read, Write and Close
+// methods on FD.
+type fdMutex struct {
+	state uint64
+	rsema uint32
+	wsema uint32
+}
+
+// fdMutex.state is organized as follows:
+// 1 bit - whether FD is closed, if set all subsequent lock operations will fail.
+// 1 bit - lock for read operations.
+// 1 bit - lock for write operations.
+// 20 bits - total number of references (read+write+misc).
+// 20 bits - number of outstanding read waiters.
+// 20 bits - number of outstanding write waiters.
+const (
+	mutexClosed  = 1 << 0
+	mutexRLock   = 1 << 1
+	mutexWLock   = 1 << 2
+	mutexRef     = 1 << 3
+	mutexRefMask = (1<<20 - 1) << 3
+	mutexRWait   = 1 << 23
+	mutexRMask   = (1<<20 - 1) << 23
+	mutexWWait   = 1 << 43
+	mutexWMask   = (1<<20 - 1) << 43
+)
+
+const overflowMsg = "too many concurrent operations on a single file or socket (max 1048575)"
+
+// Read operations must do rwlock(true)/rwunlock(true).
+//
+// Write operations must do rwlock(false)/rwunlock(false).
+//
+// Misc operations must do incref/decref.
+// Misc operations include functions like setsockopt and setDeadline.
+// They need to use incref/decref to ensure that they operate on the
+// correct fd in presence of a concurrent close call (otherwise fd can
+// be closed under their feet).
+//
+// Close operations must do increfAndClose/decref.
+
+// incref adds a reference to mu.
+// It reports whether mu is available for reading or writing.
+func (mu *fdMutex) incref() bool {
+	for {
+		old := atomic.LoadUint64(&mu.state)
+		if old&mutexClosed != 0 {
+			return false
+		}
+		new := old + mutexRef
+		if new&mutexRefMask == 0 {
+			panic(overflowMsg)
+		}
+		if atomic.CompareAndSwapUint64(&mu.state, old, new) {
+			return true
+		}
+	}
+}
+
+// increfAndClose sets the state of mu to closed.
+// It returns false if the file was already closed.
+func (mu *fdMutex) increfAndClose() bool {
+	for {
+		old := atomic.LoadUint64(&mu.state)
+		if old&mutexClosed != 0 {
+			return false
+		}
+		// Mark as closed and acquire a reference.
+		new := (old | mutexClosed) + mutexRef
+		if new&mutexRefMask == 0 {
+			panic(overflowMsg)
+		}
+		// Remove all read and write waiters.
+		new &^= mutexRMask | mutexWMask
+		if atomic.CompareAndSwapUint64(&mu.state, old, new) {
+			// Wake all read and write waiters,
+			// they will observe closed flag after wakeup.
+			for old&mutexRMask != 0 {
+				old -= mutexRWait
+				runtime_Semrelease(&mu.rsema)
+			}
+			for old&mutexWMask != 0 {
+				old -= mutexWWait
+				runtime_Semrelease(&mu.wsema)
+			}
+			return true
+		}
+	}
+}
+
+// decref removes a reference from mu.
+// It reports whether there is no remaining reference.
+func (mu *fdMutex) decref() bool {
+	for {
+		old := atomic.LoadUint64(&mu.state)
+		if old&mutexRefMask == 0 {
+			panic("inconsistent poll.fdMutex")
+		}
+		new := old - mutexRef
+		if atomic.CompareAndSwapUint64(&mu.state, old, new) {
+			return new&(mutexClosed|mutexRefMask) == mutexClosed
+		}
+	}
+}
+
+// lock adds a reference to mu and locks mu.
+// It reports whether mu is available for reading or writing.
+func (mu *fdMutex) rwlock(read bool) bool {
+	var mutexBit, mutexWait, mutexMask uint64
+	var mutexSema *uint32
+	if read {
+		mutexBit = mutexRLock
+		mutexWait = mutexRWait
+		mutexMask = mutexRMask
+		mutexSema = &mu.rsema
+	} else {
+		mutexBit = mutexWLock
+		mutexWait = mutexWWait
+		mutexMask = mutexWMask
+		mutexSema = &mu.wsema
+	}
+	for {
+		old := atomic.LoadUint64(&mu.state)
+		if old&mutexClosed != 0 {
+			return false
+		}
+		var new uint64
+		if old&mutexBit == 0 {
+			// Lock is free, acquire it.
+			new = (old | mutexBit) + mutexRef
+			if new&mutexRefMask == 0 {
+				panic(overflowMsg)
+			}
+		} else {
+			// Wait for lock.
+			new = old + mutexWait
+			if new&mutexMask == 0 {
+				panic(overflowMsg)
+			}
+		}
+		if atomic.CompareAndSwapUint64(&mu.state, old, new) {
+			if old&mutexBit == 0 {
+				return true
+			}
+			runtime_Semacquire(mutexSema)
+			// The signaller has subtracted mutexWait.
+		}
+	}
+}
+
+// unlock removes a reference from mu and unlocks mu.
+// It reports whether there is no remaining reference.
+func (mu *fdMutex) rwunlock(read bool) bool {
+	var mutexBit, mutexWait, mutexMask uint64
+	var mutexSema *uint32
+	if read {
+		mutexBit = mutexRLock
+		mutexWait = mutexRWait
+		mutexMask = mutexRMask
+		mutexSema = &mu.rsema
+	} else {
+		mutexBit = mutexWLock
+		mutexWait = mutexWWait
+		mutexMask = mutexWMask
+		mutexSema = &mu.wsema
+	}
+	for {
+		old := atomic.LoadUint64(&mu.state)
+		if old&mutexBit == 0 || old&mutexRefMask == 0 {
+			panic("inconsistent poll.fdMutex")
+		}
+		// Drop lock, drop reference and wake read waiter if present.
+		new := (old &^ mutexBit) - mutexRef
+		if old&mutexMask != 0 {
+			new -= mutexWait
+		}
+		if atomic.CompareAndSwapUint64(&mu.state, old, new) {
+			if old&mutexMask != 0 {
+				runtime_Semrelease(mutexSema)
+			}
+			return new&(mutexClosed|mutexRefMask) == mutexClosed
+		}
+	}
+}
+
+// Implemented in runtime package.
+func runtime_Semacquire(sema *uint32)
+func runtime_Semrelease(sema *uint32)
+
+// incref adds a reference to fd.
+// It returns an error when fd cannot be used.
+func (fd *FD) incref() error {
+	if !fd.fdmu.incref() {
+		return errClosing(fd.isFile)
+	}
+	return nil
+}
+
+// decref removes a reference from fd.
+// It also closes fd when the state of fd is set to closed and there
+// is no remaining reference.
+func (fd *FD) decref() error {
+	if fd.fdmu.decref() {
+		return fd.destroy()
+	}
+	return nil
+}
+
+// readLock adds a reference to fd and locks fd for reading.
+// It returns an error when fd cannot be used for reading.
+func (fd *FD) readLock() error {
+	if !fd.fdmu.rwlock(true) {
+		return errClosing(fd.isFile)
+	}
+	return nil
+}
+
+// readUnlock removes a reference from fd and unlocks fd for reading.
+// It also closes fd when the state of fd is set to closed and there
+// is no remaining reference.
+func (fd *FD) readUnlock() {
+	if fd.fdmu.rwunlock(true) {
+		fd.destroy()
+	}
+}
+
+// writeLock adds a reference to fd and locks fd for writing.
+// It returns an error when fd cannot be used for writing.
+func (fd *FD) writeLock() error {
+	if !fd.fdmu.rwlock(false) {
+		return errClosing(fd.isFile)
+	}
+	return nil
+}
+
+// writeUnlock removes a reference from fd and unlocks fd for writing.
+// It also closes fd when the state of fd is set to closed and there
+// is no remaining reference.
+func (fd *FD) writeUnlock() {
+	if fd.fdmu.rwunlock(false) {
+		fd.destroy()
+	}
+}
--- a/src/internal/poll/fd_mutex_test.go
+++ b/src/internal/poll/fd_mutex_test.go
@@ -0,0 +1,222 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll_test
+
+import (
+	. "internal/poll"
+	"math/rand"
+	"runtime"
+	"strings"
+	"testing"
+	"time"
+)
+
+func TestMutexLock(t *testing.T) {
+	var mu XFDMutex
+
+	if !mu.Incref() {
+		t.Fatal("broken")
+	}
+	if mu.Decref() {
+		t.Fatal("broken")
+	}
+
+	if !mu.RWLock(true) {
+		t.Fatal("broken")
+	}
+	if mu.RWUnlock(true) {
+		t.Fatal("broken")
+	}
+
+	if !mu.RWLock(false) {
+		t.Fatal("broken")
+	}
+	if mu.RWUnlock(false) {
+		t.Fatal("broken")
+	}
+}
+
+func TestMutexClose(t *testing.T) {
+	var mu XFDMutex
+	if !mu.IncrefAndClose() {
+		t.Fatal("broken")
+	}
+
+	if mu.Incref() {
+		t.Fatal("broken")
+	}
+	if mu.RWLock(true) {
+		t.Fatal("broken")
+	}
+	if mu.RWLock(false) {
+		t.Fatal("broken")
+	}
+	if mu.IncrefAndClose() {
+		t.Fatal("broken")
+	}
+}
+
+func TestMutexCloseUnblock(t *testing.T) {
+	c := make(chan bool, 4)
+	var mu XFDMutex
+	mu.RWLock(true)
+	for i := 0; i < 4; i++ {
+		go func() {
+			if mu.RWLock(true) {
+				t.Error("broken")
+				return
+			}
+			c <- true
+		}()
+	}
+	// Concurrent goroutines must not be able to read lock the mutex.
+	time.Sleep(time.Millisecond)
+	select {
+	case <-c:
+		t.Fatal("broken")
+	default:
+	}
+	mu.IncrefAndClose() // Must unblock the readers.
+	for i := 0; i < 4; i++ {
+		select {
+		case <-c:
+		case <-time.After(10 * time.Second):
+			t.Fatal("broken")
+		}
+	}
+	if mu.Decref() {
+		t.Fatal("broken")
+	}
+	if !mu.RWUnlock(true) {
+		t.Fatal("broken")
+	}
+}
+
+func TestMutexPanic(t *testing.T) {
+	ensurePanics := func(f func()) {
+		defer func() {
+			if recover() == nil {
+				t.Fatal("does not panic")
+			}
+		}()
+		f()
+	}
+
+	var mu XFDMutex
+	ensurePanics(func() { mu.Decref() })
+	ensurePanics(func() { mu.RWUnlock(true) })
+	ensurePanics(func() { mu.RWUnlock(false) })
+
+	ensurePanics(func() { mu.Incref(); mu.Decref(); mu.Decref() })
+	ensurePanics(func() { mu.RWLock(true); mu.RWUnlock(true); mu.RWUnlock(true) })
+	ensurePanics(func() { mu.RWLock(false); mu.RWUnlock(false); mu.RWUnlock(false) })
+
+	// ensure that it's still not broken
+	mu.Incref()
+	mu.Decref()
+	mu.RWLock(true)
+	mu.RWUnlock(true)
+	mu.RWLock(false)
+	mu.RWUnlock(false)
+}
+
+func TestMutexOverflowPanic(t *testing.T) {
+	defer func() {
+		r := recover()
+		if r == nil {
+			t.Fatal("did not panic")
+		}
+		msg, ok := r.(string)
+		if !ok {
+			t.Fatalf("unexpected panic type %T", r)
+		}
+		if !strings.Contains(msg, "too many") || strings.Contains(msg, "inconsistent") {
+			t.Fatalf("wrong panic message %q", msg)
+		}
+	}()
+
+	var mu1 XFDMutex
+	for i := 0; i < 1<<21; i++ {
+		mu1.Incref()
+	}
+}
+
+func TestMutexStress(t *testing.T) {
+	P := 8
+	N := int(1e6)
+	if testing.Short() {
+		P = 4
+		N = 1e4
+	}
+	defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(P))
+	done := make(chan bool, P)
+	var mu XFDMutex
+	var readState [2]uint64
+	var writeState [2]uint64
+	for p := 0; p < P; p++ {
+		go func() {
+			defer func() {
+				done <- !t.Failed()
+			}()
+			r := rand.New(rand.NewSource(rand.Int63()))
+			for i := 0; i < N; i++ {
+				switch r.Intn(3) {
+				case 0:
+					if !mu.Incref() {
+						t.Error("broken")
+						return
+					}
+					if mu.Decref() {
+						t.Error("broken")
+						return
+					}
+				case 1:
+					if !mu.RWLock(true) {
+						t.Error("broken")
+						return
+					}
+					// Ensure that it provides mutual exclusion for readers.
+					if readState[0] != readState[1] {
+						t.Error("broken")
+						return
+					}
+					readState[0]++
+					readState[1]++
+					if mu.RWUnlock(true) {
+						t.Error("broken")
+						return
+					}
+				case 2:
+					if !mu.RWLock(false) {
+						t.Error("broken")
+						return
+					}
+					// Ensure that it provides mutual exclusion for writers.
+					if writeState[0] != writeState[1] {
+						t.Error("broken")
+						return
+					}
+					writeState[0]++
+					writeState[1]++
+					if mu.RWUnlock(false) {
+						t.Error("broken")
+						return
+					}
+				}
+			}
+		}()
+	}
+	for p := 0; p < P; p++ {
+		if !<-done {
+			t.FailNow()
+		}
+	}
+	if !mu.IncrefAndClose() {
+		t.Fatal("broken")
+	}
+	if !mu.Decref() {
+		t.Fatal("broken")
+	}
+}
--- a/src/internal/poll/fd_opendir_darwin.go
+++ b/src/internal/poll/fd_opendir_darwin.go
@@ -0,0 +1,39 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll
+
+import (
+	"syscall"
+	_ "unsafe" // for go:linkname
+)
+
+// OpenDir returns a pointer to a DIR structure suitable for
+// ReadDir. In case of an error, the name of the failed
+// syscall is returned along with a syscall.Errno.
+func (fd *FD) OpenDir() (uintptr, string, error) {
+	// fdopendir(3) takes control of the file descriptor,
+	// so use a dup.
+	fd2, call, err := fd.Dup()
+	if err != nil {
+		return 0, call, err
+	}
+	var dir uintptr
+	for {
+		dir, err = fdopendir(fd2)
+		if err != syscall.EINTR {
+			break
+		}
+	}
+	if err != nil {
+		syscall.Close(fd2)
+		return 0, "fdopendir", err
+	}
+	return dir, "", nil
+}
+
+// Implemented in syscall/syscall_darwin.go.
+//
+//go:linkname fdopendir syscall.fdopendir
+func fdopendir(fd int) (dir uintptr, err error)
--- a/src/internal/poll/fd_plan9.go
+++ b/src/internal/poll/fd_plan9.go
@@ -0,0 +1,245 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll
+
+import (
+	"errors"
+	"internal/stringslite"
+	"io"
+	"sync"
+	"syscall"
+	"time"
+)
+
+type FD struct {
+	// Lock sysfd and serialize access to Read and Write methods.
+	fdmu fdMutex
+
+	Destroy func()
+
+	// deadlines
+	rmu       sync.Mutex
+	wmu       sync.Mutex
+	raio      *asyncIO
+	waio      *asyncIO
+	rtimer    *time.Timer
+	wtimer    *time.Timer
+	rtimedout bool // set true when read deadline has been reached
+	wtimedout bool // set true when write deadline has been reached
+
+	// Whether this is a normal file.
+	// On Plan 9 we do not use this package for ordinary files,
+	// so this is always false, but the field is present because
+	// shared code in fd_mutex.go checks it.
+	isFile bool
+}
+
+// We need this to close out a file descriptor when it is unlocked,
+// but the real implementation has to live in the net package because
+// it uses os.File's.
+func (fd *FD) destroy() error {
+	if fd.Destroy != nil {
+		fd.Destroy()
+	}
+	return nil
+}
+
+// Close handles the locking for closing an FD. The real operation
+// is in the net package.
+func (fd *FD) Close() error {
+	if !fd.fdmu.increfAndClose() {
+		return errClosing(fd.isFile)
+	}
+	return nil
+}
+
+// Read implements io.Reader.
+func (fd *FD) Read(fn func([]byte) (int, error), b []byte) (int, error) {
+	if err := fd.readLock(); err != nil {
+		return 0, err
+	}
+	defer fd.readUnlock()
+	if len(b) == 0 {
+		return 0, nil
+	}
+	fd.rmu.Lock()
+	if fd.rtimedout {
+		fd.rmu.Unlock()
+		return 0, ErrDeadlineExceeded
+	}
+	fd.raio = newAsyncIO(fn, b)
+	fd.rmu.Unlock()
+	n, err := fd.raio.Wait()
+	fd.raio = nil
+	if isHangup(err) {
+		err = io.EOF
+	}
+	if isInterrupted(err) {
+		err = ErrDeadlineExceeded
+	}
+	return n, err
+}
+
+// Write implements io.Writer.
+func (fd *FD) Write(fn func([]byte) (int, error), b []byte) (int, error) {
+	if err := fd.writeLock(); err != nil {
+		return 0, err
+	}
+	defer fd.writeUnlock()
+	fd.wmu.Lock()
+	if fd.wtimedout {
+		fd.wmu.Unlock()
+		return 0, ErrDeadlineExceeded
+	}
+	fd.waio = newAsyncIO(fn, b)
+	fd.wmu.Unlock()
+	n, err := fd.waio.Wait()
+	fd.waio = nil
+	if isInterrupted(err) {
+		err = ErrDeadlineExceeded
+	}
+	return n, err
+}
+
+// SetDeadline sets the read and write deadlines associated with fd.
+func (fd *FD) SetDeadline(t time.Time) error {
+	return setDeadlineImpl(fd, t, 'r'+'w')
+}
+
+// SetReadDeadline sets the read deadline associated with fd.
+func (fd *FD) SetReadDeadline(t time.Time) error {
+	return setDeadlineImpl(fd, t, 'r')
+}
+
+// SetWriteDeadline sets the write deadline associated with fd.
+func (fd *FD) SetWriteDeadline(t time.Time) error {
+	return setDeadlineImpl(fd, t, 'w')
+}
+
+func setDeadlineImpl(fd *FD, t time.Time, mode int) error {
+	d := t.Sub(time.Now())
+	if mode == 'r' || mode == 'r'+'w' {
+		fd.rmu.Lock()
+		defer fd.rmu.Unlock()
+		if fd.rtimer != nil {
+			fd.rtimer.Stop()
+			fd.rtimer = nil
+		}
+		fd.rtimedout = false
+	}
+	if mode == 'w' || mode == 'r'+'w' {
+		fd.wmu.Lock()
+		defer fd.wmu.Unlock()
+		if fd.wtimer != nil {
+			fd.wtimer.Stop()
+			fd.wtimer = nil
+		}
+		fd.wtimedout = false
+	}
+	if !t.IsZero() && d > 0 {
+		// Interrupt I/O operation once timer has expired
+		if mode == 'r' || mode == 'r'+'w' {
+			var timer *time.Timer
+			timer = time.AfterFunc(d, func() {
+				fd.rmu.Lock()
+				defer fd.rmu.Unlock()
+				if fd.rtimer != timer {
+					// deadline was changed
+					return
+				}
+				fd.rtimedout = true
+				if fd.raio != nil {
+					fd.raio.Cancel()
+				}
+			})
+			fd.rtimer = timer
+		}
+		if mode == 'w' || mode == 'r'+'w' {
+			var timer *time.Timer
+			timer = time.AfterFunc(d, func() {
+				fd.wmu.Lock()
+				defer fd.wmu.Unlock()
+				if fd.wtimer != timer {
+					// deadline was changed
+					return
+				}
+				fd.wtimedout = true
+				if fd.waio != nil {
+					fd.waio.Cancel()
+				}
+			})
+			fd.wtimer = timer
+		}
+	}
+	if !t.IsZero() && d <= 0 {
+		// Interrupt current I/O operation
+		if mode == 'r' || mode == 'r'+'w' {
+			fd.rtimedout = true
+			if fd.raio != nil {
+				fd.raio.Cancel()
+			}
+		}
+		if mode == 'w' || mode == 'r'+'w' {
+			fd.wtimedout = true
+			if fd.waio != nil {
+				fd.waio.Cancel()
+			}
+		}
+	}
+	return nil
+}
+
+// On Plan 9 only, expose the locking for the net code.
+
+// ReadLock wraps FD.readLock.
+func (fd *FD) ReadLock() error {
+	return fd.readLock()
+}
+
+// ReadUnlock wraps FD.readUnlock.
+func (fd *FD) ReadUnlock() {
+	fd.readUnlock()
+}
+
+func isHangup(err error) bool {
+	return err != nil && stringslite.HasSuffix(err.Error(), "Hangup")
+}
+
+func isInterrupted(err error) bool {
+	return err != nil && stringslite.HasSuffix(err.Error(), "interrupted")
+}
+
+// IsPollDescriptor reports whether fd is the descriptor being used by the poller.
+// This is only used for testing.
+func IsPollDescriptor(fd uintptr) bool {
+	return false
+}
+
+// RawControl invokes the user-defined function f for a non-IO
+// operation.
+func (fd *FD) RawControl(f func(uintptr)) error {
+	return errors.New("not implemented")
+}
+
+// RawRead invokes the user-defined function f for a read operation.
+func (fd *FD) RawRead(f func(uintptr) bool) error {
+	return errors.New("not implemented")
+}
+
+// RawWrite invokes the user-defined function f for a write operation.
+func (fd *FD) RawWrite(f func(uintptr) bool) error {
+	return errors.New("not implemented")
+}
+
+func DupCloseOnExec(fd int) (int, string, error) {
+	nfd, err := syscall.Dup(int(fd), -1)
+	if err != nil {
+		return 0, "dup", err
+	}
+	// Plan9 has no syscall.CloseOnExec but
+	// its forkAndExecInChild closes all fds
+	// not related to the fork+exec.
+	return nfd, "", nil
+}
--- a/src/internal/poll/fd_poll_js.go
+++ b/src/internal/poll/fd_poll_js.go
@@ -0,0 +1,99 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build js && wasm
+
+package poll
+
+import (
+	"syscall"
+	"time"
+)
+
+type pollDesc struct {
+	fd      *FD
+	closing bool
+}
+
+func (pd *pollDesc) init(fd *FD) error { pd.fd = fd; return nil }
+
+func (pd *pollDesc) close() {}
+
+func (pd *pollDesc) evict() {
+	pd.closing = true
+	if pd.fd != nil {
+		syscall.StopIO(pd.fd.Sysfd)
+	}
+}
+
+func (pd *pollDesc) prepare(mode int, isFile bool) error {
+	if pd.closing {
+		return errClosing(isFile)
+	}
+	return nil
+}
+
+func (pd *pollDesc) prepareRead(isFile bool) error { return pd.prepare('r', isFile) }
+
+func (pd *pollDesc) prepareWrite(isFile bool) error { return pd.prepare('w', isFile) }
+
+func (pd *pollDesc) wait(mode int, isFile bool) error {
+	if pd.closing {
+		return errClosing(isFile)
+	}
+	if isFile { // TODO(neelance): js/wasm: Use callbacks from JS to block until the read/write finished.
+		return nil
+	}
+	return ErrDeadlineExceeded
+}
+
+func (pd *pollDesc) waitRead(isFile bool) error { return pd.wait('r', isFile) }
+
+func (pd *pollDesc) waitWrite(isFile bool) error { return pd.wait('w', isFile) }
+
+func (pd *pollDesc) waitCanceled(mode int) {}
+
+func (pd *pollDesc) pollable() bool { return true }
+
+// SetDeadline sets the read and write deadlines associated with fd.
+func (fd *FD) SetDeadline(t time.Time) error {
+	return setDeadlineImpl(fd, t, 'r'+'w')
+}
+
+// SetReadDeadline sets the read deadline associated with fd.
+func (fd *FD) SetReadDeadline(t time.Time) error {
+	return setDeadlineImpl(fd, t, 'r')
+}
+
+// SetWriteDeadline sets the write deadline associated with fd.
+func (fd *FD) SetWriteDeadline(t time.Time) error {
+	return setDeadlineImpl(fd, t, 'w')
+}
+
+func setDeadlineImpl(fd *FD, t time.Time, mode int) error {
+	d := t.UnixNano()
+	if t.IsZero() {
+		d = 0
+	}
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	switch mode {
+	case 'r':
+		syscall.SetReadDeadline(fd.Sysfd, d)
+	case 'w':
+		syscall.SetWriteDeadline(fd.Sysfd, d)
+	case 'r' + 'w':
+		syscall.SetReadDeadline(fd.Sysfd, d)
+		syscall.SetWriteDeadline(fd.Sysfd, d)
+	}
+	fd.decref()
+	return nil
+}
+
+// IsPollDescriptor reports whether fd is the descriptor being used by the poller.
+// This is only used for testing.
+func IsPollDescriptor(fd uintptr) bool {
+	return false
+}
--- a/src/internal/poll/fd_poll_runtime.go
+++ b/src/internal/poll/fd_poll_runtime.go
@@ -0,0 +1,179 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build unix || windows || wasip1
+
+package poll
+
+import (
+	"errors"
+	"sync"
+	"syscall"
+	"time"
+	_ "unsafe" // for go:linkname
+)
+
+// runtimeNano returns the current value of the runtime clock in nanoseconds.
+//
+//go:linkname runtimeNano runtime.nanotime
+func runtimeNano() int64
+
+func runtime_pollServerInit()
+func runtime_pollOpen(fd uintptr) (uintptr, int)
+func runtime_pollClose(ctx uintptr)
+func runtime_pollWait(ctx uintptr, mode int) int
+func runtime_pollWaitCanceled(ctx uintptr, mode int)
+func runtime_pollReset(ctx uintptr, mode int) int
+func runtime_pollSetDeadline(ctx uintptr, d int64, mode int)
+func runtime_pollUnblock(ctx uintptr)
+func runtime_isPollServerDescriptor(fd uintptr) bool
+
+type pollDesc struct {
+	runtimeCtx uintptr
+}
+
+var serverInit sync.Once
+
+func (pd *pollDesc) init(fd *FD) error {
+	serverInit.Do(runtime_pollServerInit)
+	ctx, errno := runtime_pollOpen(uintptr(fd.Sysfd))
+	if errno != 0 {
+		return errnoErr(syscall.Errno(errno))
+	}
+	pd.runtimeCtx = ctx
+	return nil
+}
+
+func (pd *pollDesc) close() {
+	if pd.runtimeCtx == 0 {
+		return
+	}
+	runtime_pollClose(pd.runtimeCtx)
+	pd.runtimeCtx = 0
+}
+
+// Evict evicts fd from the pending list, unblocking any I/O running on fd.
+func (pd *pollDesc) evict() {
+	if pd.runtimeCtx == 0 {
+		return
+	}
+	runtime_pollUnblock(pd.runtimeCtx)
+}
+
+func (pd *pollDesc) prepare(mode int, isFile bool) error {
+	if pd.runtimeCtx == 0 {
+		return nil
+	}
+	res := runtime_pollReset(pd.runtimeCtx, mode)
+	return convertErr(res, isFile)
+}
+
+func (pd *pollDesc) prepareRead(isFile bool) error {
+	return pd.prepare('r', isFile)
+}
+
+func (pd *pollDesc) prepareWrite(isFile bool) error {
+	return pd.prepare('w', isFile)
+}
+
+func (pd *pollDesc) wait(mode int, isFile bool) error {
+	if pd.runtimeCtx == 0 {
+		return errors.New("waiting for unsupported file type")
+	}
+	res := runtime_pollWait(pd.runtimeCtx, mode)
+	return convertErr(res, isFile)
+}
+
+func (pd *pollDesc) waitRead(isFile bool) error {
+	return pd.wait('r', isFile)
+}
+
+func (pd *pollDesc) waitWrite(isFile bool) error {
+	return pd.wait('w', isFile)
+}
+
+func (pd *pollDesc) waitCanceled(mode int) {
+	if pd.runtimeCtx == 0 {
+		return
+	}
+	runtime_pollWaitCanceled(pd.runtimeCtx, mode)
+}
+
+func (pd *pollDesc) pollable() bool {
+	return pd.runtimeCtx != 0
+}
+
+// Error values returned by runtime_pollReset and runtime_pollWait.
+// These must match the values in runtime/netpoll.go.
+const (
+	pollNoError        = 0
+	pollErrClosing     = 1
+	pollErrTimeout     = 2
+	pollErrNotPollable = 3
+)
+
+func convertErr(res int, isFile bool) error {
+	switch res {
+	case pollNoError:
+		return nil
+	case pollErrClosing:
+		return errClosing(isFile)
+	case pollErrTimeout:
+		return ErrDeadlineExceeded
+	case pollErrNotPollable:
+		return ErrNotPollable
+	}
+	println("unreachable: ", res)
+	panic("unreachable")
+}
+
+// SetDeadline sets the read and write deadlines associated with fd.
+func (fd *FD) SetDeadline(t time.Time) error {
+	return setDeadlineImpl(fd, t, 'r'+'w')
+}
+
+// SetReadDeadline sets the read deadline associated with fd.
+func (fd *FD) SetReadDeadline(t time.Time) error {
+	return setDeadlineImpl(fd, t, 'r')
+}
+
+// SetWriteDeadline sets the write deadline associated with fd.
+func (fd *FD) SetWriteDeadline(t time.Time) error {
+	return setDeadlineImpl(fd, t, 'w')
+}
+
+func setDeadlineImpl(fd *FD, t time.Time, mode int) error {
+	var d int64
+	if !t.IsZero() {
+		d = int64(time.Until(t))
+		if d == 0 {
+			d = -1 // don't confuse deadline right now with no deadline
+		}
+	}
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	if fd.pd.runtimeCtx == 0 {
+		return ErrNoDeadline
+	}
+	runtime_pollSetDeadline(fd.pd.runtimeCtx, d, mode)
+	return nil
+}
+
+// IsPollDescriptor reports whether fd is the descriptor being used by the poller.
+// This is only used for testing.
+//
+// IsPollDescriptor should be an internal detail,
+// but widely used packages access it using linkname.
+// Notable members of the hall of shame include:
+//   - github.com/opencontainers/runc
+//
+// Do not remove or change the type signature.
+// See go.dev/issue/67401.
+//
+//go:linkname IsPollDescriptor
+func IsPollDescriptor(fd uintptr) bool {
+	return runtime_isPollServerDescriptor(fd)
+}
--- a/src/internal/poll/fd_posix.go
+++ b/src/internal/poll/fd_posix.go
@@ -0,0 +1,79 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build unix || (js && wasm) || wasip1 || windows
+
+package poll
+
+import (
+	"io"
+	"syscall"
+)
+
+// eofError returns io.EOF when fd is available for reading end of
+// file.
+func (fd *FD) eofError(n int, err error) error {
+	if n == 0 && err == nil && fd.ZeroReadIsEOF {
+		return io.EOF
+	}
+	return err
+}
+
+// Shutdown wraps syscall.Shutdown.
+func (fd *FD) Shutdown(how int) error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	return syscall.Shutdown(fd.Sysfd, how)
+}
+
+// Fchown wraps syscall.Fchown.
+func (fd *FD) Fchown(uid, gid int) error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	return ignoringEINTR(func() error {
+		return syscall.Fchown(fd.Sysfd, uid, gid)
+	})
+}
+
+// Ftruncate wraps syscall.Ftruncate.
+func (fd *FD) Ftruncate(size int64) error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	return ignoringEINTR(func() error {
+		return syscall.Ftruncate(fd.Sysfd, size)
+	})
+}
+
+// RawControl invokes the user-defined function f for a non-IO
+// operation.
+func (fd *FD) RawControl(f func(uintptr)) error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	f(uintptr(fd.Sysfd))
+	return nil
+}
+
+// ignoringEINTR makes a function call and repeats it if it returns
+// an EINTR error. This appears to be required even though we install all
+// signal handlers with SA_RESTART: see #22838, #38033, #38836, #40846.
+// Also #20400 and #36644 are issues in which a signal handler is
+// installed without setting SA_RESTART. None of these are the common case,
+// but there are enough of them that it seems that we can't avoid
+// an EINTR loop.
+func ignoringEINTR(fn func() error) error {
+	for {
+		err := fn()
+		if err != syscall.EINTR {
+			return err
+		}
+	}
+}
--- a/src/internal/poll/fd_posix_test.go
+++ b/src/internal/poll/fd_posix_test.go
@@ -0,0 +1,43 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build unix || windows
+
+package poll_test
+
+import (
+	. "internal/poll"
+	"io"
+	"testing"
+)
+
+var eofErrorTests = []struct {
+	n        int
+	err      error
+	fd       *FD
+	expected error
+}{
+	{100, nil, &FD{ZeroReadIsEOF: true}, nil},
+	{100, io.EOF, &FD{ZeroReadIsEOF: true}, io.EOF},
+	{100, ErrNetClosing, &FD{ZeroReadIsEOF: true}, ErrNetClosing},
+	{0, nil, &FD{ZeroReadIsEOF: true}, io.EOF},
+	{0, io.EOF, &FD{ZeroReadIsEOF: true}, io.EOF},
+	{0, ErrNetClosing, &FD{ZeroReadIsEOF: true}, ErrNetClosing},
+
+	{100, nil, &FD{ZeroReadIsEOF: false}, nil},
+	{100, io.EOF, &FD{ZeroReadIsEOF: false}, io.EOF},
+	{100, ErrNetClosing, &FD{ZeroReadIsEOF: false}, ErrNetClosing},
+	{0, nil, &FD{ZeroReadIsEOF: false}, nil},
+	{0, io.EOF, &FD{ZeroReadIsEOF: false}, io.EOF},
+	{0, ErrNetClosing, &FD{ZeroReadIsEOF: false}, ErrNetClosing},
+}
+
+func TestEOFError(t *testing.T) {
+	for _, tt := range eofErrorTests {
+		actual := tt.fd.EOFError(tt.n, tt.err)
+		if actual != tt.expected {
+			t.Errorf("eofError(%v, %v, %v): expected %v, actual %v", tt.n, tt.err, tt.fd.ZeroReadIsEOF, tt.expected, actual)
+		}
+	}
+}
--- a/src/internal/poll/fd_unix.go
+++ b/src/internal/poll/fd_unix.go
@@ -0,0 +1,750 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build unix || (js && wasm) || wasip1
+
+package poll
+
+import (
+	"internal/itoa"
+	"internal/syscall/unix"
+	"io"
+	"sync/atomic"
+	"syscall"
+)
+
+// FD is a file descriptor. The net and os packages use this type as a
+// field of a larger type representing a network connection or OS file.
+type FD struct {
+	// Lock sysfd and serialize access to Read and Write methods.
+	fdmu fdMutex
+
+	// System file descriptor. Immutable until Close.
+	Sysfd int
+
+	// Platform dependent state of the file descriptor.
+	SysFile
+
+	// I/O poller.
+	pd pollDesc
+
+	// Semaphore signaled when file is closed.
+	csema uint32
+
+	// Non-zero if this file has been set to blocking mode.
+	isBlocking uint32
+
+	// Whether this is a streaming descriptor, as opposed to a
+	// packet-based descriptor like a UDP socket. Immutable.
+	IsStream bool
+
+	// Whether a zero byte read indicates EOF. This is false for a
+	// message based socket connection.
+	ZeroReadIsEOF bool
+
+	// Whether this is a file rather than a network socket.
+	isFile bool
+}
+
+// Init initializes the FD. The Sysfd field should already be set.
+// This can be called multiple times on a single FD.
+// The net argument is a network name from the net package (e.g., "tcp"),
+// or "file".
+// Set pollable to true if fd should be managed by runtime netpoll.
+func (fd *FD) Init(net string, pollable bool) error {
+	fd.SysFile.init()
+
+	// We don't actually care about the various network types.
+	if net == "file" {
+		fd.isFile = true
+	}
+	if !pollable {
+		fd.isBlocking = 1
+		return nil
+	}
+	err := fd.pd.init(fd)
+	if err != nil {
+		// If we could not initialize the runtime poller,
+		// assume we are using blocking mode.
+		fd.isBlocking = 1
+	}
+	return err
+}
+
+// Destroy closes the file descriptor. This is called when there are
+// no remaining references.
+func (fd *FD) destroy() error {
+	// Poller may want to unregister fd in readiness notification mechanism,
+	// so this must be executed before CloseFunc.
+	fd.pd.close()
+
+	err := fd.SysFile.destroy(fd.Sysfd)
+
+	fd.Sysfd = -1
+	runtime_Semrelease(&fd.csema)
+	return err
+}
+
+// Close closes the FD. The underlying file descriptor is closed by the
+// destroy method when there are no remaining references.
+func (fd *FD) Close() error {
+	if !fd.fdmu.increfAndClose() {
+		return errClosing(fd.isFile)
+	}
+
+	// Unblock any I/O.  Once it all unblocks and returns,
+	// so that it cannot be referring to fd.sysfd anymore,
+	// the final decref will close fd.sysfd. This should happen
+	// fairly quickly, since all the I/O is non-blocking, and any
+	// attempts to block in the pollDesc will return errClosing(fd.isFile).
+	fd.pd.evict()
+
+	// The call to decref will call destroy if there are no other
+	// references.
+	err := fd.decref()
+
+	// Wait until the descriptor is closed. If this was the only
+	// reference, it is already closed. Only wait if the file has
+	// not been set to blocking mode, as otherwise any current I/O
+	// may be blocking, and that would block the Close.
+	// No need for an atomic read of isBlocking, increfAndClose means
+	// we have exclusive access to fd.
+	if fd.isBlocking == 0 {
+		runtime_Semacquire(&fd.csema)
+	}
+
+	return err
+}
+
+// SetBlocking puts the file into blocking mode.
+func (fd *FD) SetBlocking() error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	// Atomic store so that concurrent calls to SetBlocking
+	// do not cause a race condition. isBlocking only ever goes
+	// from 0 to 1 so there is no real race here.
+	atomic.StoreUint32(&fd.isBlocking, 1)
+	return syscall.SetNonblock(fd.Sysfd, false)
+}
+
+// Darwin and FreeBSD can't read or write 2GB+ files at a time,
+// even on 64-bit systems.
+// The same is true of socket implementations on many systems.
+// See golang.org/issue/7812 and golang.org/issue/16266.
+// Use 1GB instead of, say, 2GB-1, to keep subsequent reads aligned.
+const maxRW = 1 << 30
+
+// Read implements io.Reader.
+func (fd *FD) Read(p []byte) (int, error) {
+	if err := fd.readLock(); err != nil {
+		return 0, err
+	}
+	defer fd.readUnlock()
+	if len(p) == 0 {
+		// If the caller wanted a zero byte read, return immediately
+		// without trying (but after acquiring the readLock).
+		// Otherwise syscall.Read returns 0, nil which looks like
+		// io.EOF.
+		// TODO(bradfitz): make it wait for readability? (Issue 15735)
+		return 0, nil
+	}
+	if err := fd.pd.prepareRead(fd.isFile); err != nil {
+		return 0, err
+	}
+	if fd.IsStream && len(p) > maxRW {
+		p = p[:maxRW]
+	}
+	for {
+		n, err := ignoringEINTRIO(syscall.Read, fd.Sysfd, p)
+		if err != nil {
+			n = 0
+			if err == syscall.EAGAIN && fd.pd.pollable() {
+				if err = fd.pd.waitRead(fd.isFile); err == nil {
+					continue
+				}
+			}
+		}
+		err = fd.eofError(n, err)
+		return n, err
+	}
+}
+
+// Pread wraps the pread system call.
+func (fd *FD) Pread(p []byte, off int64) (int, error) {
+	// Call incref, not readLock, because since pread specifies the
+	// offset it is independent from other reads.
+	// Similarly, using the poller doesn't make sense for pread.
+	if err := fd.incref(); err != nil {
+		return 0, err
+	}
+	if fd.IsStream && len(p) > maxRW {
+		p = p[:maxRW]
+	}
+	var (
+		n   int
+		err error
+	)
+	for {
+		n, err = syscall.Pread(fd.Sysfd, p, off)
+		if err != syscall.EINTR {
+			break
+		}
+	}
+	if err != nil {
+		n = 0
+	}
+	fd.decref()
+	err = fd.eofError(n, err)
+	return n, err
+}
+
+// ReadFrom wraps the recvfrom network call.
+func (fd *FD) ReadFrom(p []byte) (int, syscall.Sockaddr, error) {
+	if err := fd.readLock(); err != nil {
+		return 0, nil, err
+	}
+	defer fd.readUnlock()
+	if err := fd.pd.prepareRead(fd.isFile); err != nil {
+		return 0, nil, err
+	}
+	for {
+		n, sa, err := syscall.Recvfrom(fd.Sysfd, p, 0)
+		if err != nil {
+			if err == syscall.EINTR {
+				continue
+			}
+			n = 0
+			if err == syscall.EAGAIN && fd.pd.pollable() {
+				if err = fd.pd.waitRead(fd.isFile); err == nil {
+					continue
+				}
+			}
+		}
+		err = fd.eofError(n, err)
+		return n, sa, err
+	}
+}
+
+// ReadFromInet4 wraps the recvfrom network call for IPv4.
+func (fd *FD) ReadFromInet4(p []byte, from *syscall.SockaddrInet4) (int, error) {
+	if err := fd.readLock(); err != nil {
+		return 0, err
+	}
+	defer fd.readUnlock()
+	if err := fd.pd.prepareRead(fd.isFile); err != nil {
+		return 0, err
+	}
+	for {
+		n, err := unix.RecvfromInet4(fd.Sysfd, p, 0, from)
+		if err != nil {
+			if err == syscall.EINTR {
+				continue
+			}
+			n = 0
+			if err == syscall.EAGAIN && fd.pd.pollable() {
+				if err = fd.pd.waitRead(fd.isFile); err == nil {
+					continue
+				}
+			}
+		}
+		err = fd.eofError(n, err)
+		return n, err
+	}
+}
+
+// ReadFromInet6 wraps the recvfrom network call for IPv6.
+func (fd *FD) ReadFromInet6(p []byte, from *syscall.SockaddrInet6) (int, error) {
+	if err := fd.readLock(); err != nil {
+		return 0, err
+	}
+	defer fd.readUnlock()
+	if err := fd.pd.prepareRead(fd.isFile); err != nil {
+		return 0, err
+	}
+	for {
+		n, err := unix.RecvfromInet6(fd.Sysfd, p, 0, from)
+		if err != nil {
+			if err == syscall.EINTR {
+				continue
+			}
+			n = 0
+			if err == syscall.EAGAIN && fd.pd.pollable() {
+				if err = fd.pd.waitRead(fd.isFile); err == nil {
+					continue
+				}
+			}
+		}
+		err = fd.eofError(n, err)
+		return n, err
+	}
+}
+
+// ReadMsg wraps the recvmsg network call.
+func (fd *FD) ReadMsg(p []byte, oob []byte, flags int) (int, int, int, syscall.Sockaddr, error) {
+	if err := fd.readLock(); err != nil {
+		return 0, 0, 0, nil, err
+	}
+	defer fd.readUnlock()
+	if err := fd.pd.prepareRead(fd.isFile); err != nil {
+		return 0, 0, 0, nil, err
+	}
+	for {
+		n, oobn, sysflags, sa, err := syscall.Recvmsg(fd.Sysfd, p, oob, flags)
+		if err != nil {
+			if err == syscall.EINTR {
+				continue
+			}
+			// TODO(dfc) should n and oobn be set to 0
+			if err == syscall.EAGAIN && fd.pd.pollable() {
+				if err = fd.pd.waitRead(fd.isFile); err == nil {
+					continue
+				}
+			}
+		}
+		err = fd.eofError(n, err)
+		return n, oobn, sysflags, sa, err
+	}
+}
+
+// ReadMsgInet4 is ReadMsg, but specialized for syscall.SockaddrInet4.
+func (fd *FD) ReadMsgInet4(p []byte, oob []byte, flags int, sa4 *syscall.SockaddrInet4) (int, int, int, error) {
+	if err := fd.readLock(); err != nil {
+		return 0, 0, 0, err
+	}
+	defer fd.readUnlock()
+	if err := fd.pd.prepareRead(fd.isFile); err != nil {
+		return 0, 0, 0, err
+	}
+	for {
+		n, oobn, sysflags, err := unix.RecvmsgInet4(fd.Sysfd, p, oob, flags, sa4)
+		if err != nil {
+			if err == syscall.EINTR {
+				continue
+			}
+			// TODO(dfc) should n and oobn be set to 0
+			if err == syscall.EAGAIN && fd.pd.pollable() {
+				if err = fd.pd.waitRead(fd.isFile); err == nil {
+					continue
+				}
+			}
+		}
+		err = fd.eofError(n, err)
+		return n, oobn, sysflags, err
+	}
+}
+
+// ReadMsgInet6 is ReadMsg, but specialized for syscall.SockaddrInet6.
+func (fd *FD) ReadMsgInet6(p []byte, oob []byte, flags int, sa6 *syscall.SockaddrInet6) (int, int, int, error) {
+	if err := fd.readLock(); err != nil {
+		return 0, 0, 0, err
+	}
+	defer fd.readUnlock()
+	if err := fd.pd.prepareRead(fd.isFile); err != nil {
+		return 0, 0, 0, err
+	}
+	for {
+		n, oobn, sysflags, err := unix.RecvmsgInet6(fd.Sysfd, p, oob, flags, sa6)
+		if err != nil {
+			if err == syscall.EINTR {
+				continue
+			}
+			// TODO(dfc) should n and oobn be set to 0
+			if err == syscall.EAGAIN && fd.pd.pollable() {
+				if err = fd.pd.waitRead(fd.isFile); err == nil {
+					continue
+				}
+			}
+		}
+		err = fd.eofError(n, err)
+		return n, oobn, sysflags, err
+	}
+}
+
+// Write implements io.Writer.
+func (fd *FD) Write(p []byte) (int, error) {
+	if err := fd.writeLock(); err != nil {
+		return 0, err
+	}
+	defer fd.writeUnlock()
+	if err := fd.pd.prepareWrite(fd.isFile); err != nil {
+		return 0, err
+	}
+	var nn int
+	for {
+		max := len(p)
+		if fd.IsStream && max-nn > maxRW {
+			max = nn + maxRW
+		}
+		n, err := ignoringEINTRIO(syscall.Write, fd.Sysfd, p[nn:max])
+		if n > 0 {
+			if n > max-nn {
+				// This can reportedly happen when using
+				// some VPN software. Issue #61060.
+				// If we don't check this we will panic
+				// with slice bounds out of range.
+				// Use a more informative panic.
+				panic("invalid return from write: got " + itoa.Itoa(n) + " from a write of " + itoa.Itoa(max-nn))
+			}
+			nn += n
+		}
+		if nn == len(p) {
+			return nn, err
+		}
+		if err == syscall.EAGAIN && fd.pd.pollable() {
+			if err = fd.pd.waitWrite(fd.isFile); err == nil {
+				continue
+			}
+		}
+		if err != nil {
+			return nn, err
+		}
+		if n == 0 {
+			return nn, io.ErrUnexpectedEOF
+		}
+	}
+}
+
+// Pwrite wraps the pwrite system call.
+func (fd *FD) Pwrite(p []byte, off int64) (int, error) {
+	// Call incref, not writeLock, because since pwrite specifies the
+	// offset it is independent from other writes.
+	// Similarly, using the poller doesn't make sense for pwrite.
+	if err := fd.incref(); err != nil {
+		return 0, err
+	}
+	defer fd.decref()
+	var nn int
+	for {
+		max := len(p)
+		if fd.IsStream && max-nn > maxRW {
+			max = nn + maxRW
+		}
+		n, err := syscall.Pwrite(fd.Sysfd, p[nn:max], off+int64(nn))
+		if err == syscall.EINTR {
+			continue
+		}
+		if n > 0 {
+			nn += n
+		}
+		if nn == len(p) {
+			return nn, err
+		}
+		if err != nil {
+			return nn, err
+		}
+		if n == 0 {
+			return nn, io.ErrUnexpectedEOF
+		}
+	}
+}
+
+// WriteToInet4 wraps the sendto network call for IPv4 addresses.
+func (fd *FD) WriteToInet4(p []byte, sa *syscall.SockaddrInet4) (int, error) {
+	if err := fd.writeLock(); err != nil {
+		return 0, err
+	}
+	defer fd.writeUnlock()
+	if err := fd.pd.prepareWrite(fd.isFile); err != nil {
+		return 0, err
+	}
+	for {
+		err := unix.SendtoInet4(fd.Sysfd, p, 0, sa)
+		if err == syscall.EINTR {
+			continue
+		}
+		if err == syscall.EAGAIN && fd.pd.pollable() {
+			if err = fd.pd.waitWrite(fd.isFile); err == nil {
+				continue
+			}
+		}
+		if err != nil {
+			return 0, err
+		}
+		return len(p), nil
+	}
+}
+
+// WriteToInet6 wraps the sendto network call for IPv6 addresses.
+func (fd *FD) WriteToInet6(p []byte, sa *syscall.SockaddrInet6) (int, error) {
+	if err := fd.writeLock(); err != nil {
+		return 0, err
+	}
+	defer fd.writeUnlock()
+	if err := fd.pd.prepareWrite(fd.isFile); err != nil {
+		return 0, err
+	}
+	for {
+		err := unix.SendtoInet6(fd.Sysfd, p, 0, sa)
+		if err == syscall.EINTR {
+			continue
+		}
+		if err == syscall.EAGAIN && fd.pd.pollable() {
+			if err = fd.pd.waitWrite(fd.isFile); err == nil {
+				continue
+			}
+		}
+		if err != nil {
+			return 0, err
+		}
+		return len(p), nil
+	}
+}
+
+// WriteTo wraps the sendto network call.
+func (fd *FD) WriteTo(p []byte, sa syscall.Sockaddr) (int, error) {
+	if err := fd.writeLock(); err != nil {
+		return 0, err
+	}
+	defer fd.writeUnlock()
+	if err := fd.pd.prepareWrite(fd.isFile); err != nil {
+		return 0, err
+	}
+	for {
+		err := syscall.Sendto(fd.Sysfd, p, 0, sa)
+		if err == syscall.EINTR {
+			continue
+		}
+		if err == syscall.EAGAIN && fd.pd.pollable() {
+			if err = fd.pd.waitWrite(fd.isFile); err == nil {
+				continue
+			}
+		}
+		if err != nil {
+			return 0, err
+		}
+		return len(p), nil
+	}
+}
+
+// WriteMsg wraps the sendmsg network call.
+func (fd *FD) WriteMsg(p []byte, oob []byte, sa syscall.Sockaddr) (int, int, error) {
+	if err := fd.writeLock(); err != nil {
+		return 0, 0, err
+	}
+	defer fd.writeUnlock()
+	if err := fd.pd.prepareWrite(fd.isFile); err != nil {
+		return 0, 0, err
+	}
+	for {
+		n, err := syscall.SendmsgN(fd.Sysfd, p, oob, sa, 0)
+		if err == syscall.EINTR {
+			continue
+		}
+		if err == syscall.EAGAIN && fd.pd.pollable() {
+			if err = fd.pd.waitWrite(fd.isFile); err == nil {
+				continue
+			}
+		}
+		if err != nil {
+			return n, 0, err
+		}
+		return n, len(oob), err
+	}
+}
+
+// WriteMsgInet4 is WriteMsg specialized for syscall.SockaddrInet4.
+func (fd *FD) WriteMsgInet4(p []byte, oob []byte, sa *syscall.SockaddrInet4) (int, int, error) {
+	if err := fd.writeLock(); err != nil {
+		return 0, 0, err
+	}
+	defer fd.writeUnlock()
+	if err := fd.pd.prepareWrite(fd.isFile); err != nil {
+		return 0, 0, err
+	}
+	for {
+		n, err := unix.SendmsgNInet4(fd.Sysfd, p, oob, sa, 0)
+		if err == syscall.EINTR {
+			continue
+		}
+		if err == syscall.EAGAIN && fd.pd.pollable() {
+			if err = fd.pd.waitWrite(fd.isFile); err == nil {
+				continue
+			}
+		}
+		if err != nil {
+			return n, 0, err
+		}
+		return n, len(oob), err
+	}
+}
+
+// WriteMsgInet6 is WriteMsg specialized for syscall.SockaddrInet6.
+func (fd *FD) WriteMsgInet6(p []byte, oob []byte, sa *syscall.SockaddrInet6) (int, int, error) {
+	if err := fd.writeLock(); err != nil {
+		return 0, 0, err
+	}
+	defer fd.writeUnlock()
+	if err := fd.pd.prepareWrite(fd.isFile); err != nil {
+		return 0, 0, err
+	}
+	for {
+		n, err := unix.SendmsgNInet6(fd.Sysfd, p, oob, sa, 0)
+		if err == syscall.EINTR {
+			continue
+		}
+		if err == syscall.EAGAIN && fd.pd.pollable() {
+			if err = fd.pd.waitWrite(fd.isFile); err == nil {
+				continue
+			}
+		}
+		if err != nil {
+			return n, 0, err
+		}
+		return n, len(oob), err
+	}
+}
+
+// Accept wraps the accept network call.
+func (fd *FD) Accept() (int, syscall.Sockaddr, string, error) {
+	if err := fd.readLock(); err != nil {
+		return -1, nil, "", err
+	}
+	defer fd.readUnlock()
+
+	if err := fd.pd.prepareRead(fd.isFile); err != nil {
+		return -1, nil, "", err
+	}
+	for {
+		s, rsa, errcall, err := accept(fd.Sysfd)
+		if err == nil {
+			return s, rsa, "", err
+		}
+		switch err {
+		case syscall.EINTR:
+			continue
+		case syscall.EAGAIN:
+			if fd.pd.pollable() {
+				if err = fd.pd.waitRead(fd.isFile); err == nil {
+					continue
+				}
+			}
+		case syscall.ECONNABORTED:
+			// This means that a socket on the listen
+			// queue was closed before we Accept()ed it;
+			// it's a silly error, so try again.
+			continue
+		}
+		return -1, nil, errcall, err
+	}
+}
+
+// Fchmod wraps syscall.Fchmod.
+func (fd *FD) Fchmod(mode uint32) error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	return ignoringEINTR(func() error {
+		return syscall.Fchmod(fd.Sysfd, mode)
+	})
+}
+
+// Fstat wraps syscall.Fstat
+func (fd *FD) Fstat(s *syscall.Stat_t) error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	return ignoringEINTR(func() error {
+		return syscall.Fstat(fd.Sysfd, s)
+	})
+}
+
+// dupCloexecUnsupported indicates whether F_DUPFD_CLOEXEC is supported by the kernel.
+var dupCloexecUnsupported atomic.Bool
+
+// DupCloseOnExec dups fd and marks it close-on-exec.
+func DupCloseOnExec(fd int) (int, string, error) {
+	if syscall.F_DUPFD_CLOEXEC != 0 && !dupCloexecUnsupported.Load() {
+		r0, err := unix.Fcntl(fd, syscall.F_DUPFD_CLOEXEC, 0)
+		if err == nil {
+			return r0, "", nil
+		}
+		switch err {
+		case syscall.EINVAL, syscall.ENOSYS:
+			// Old kernel, or js/wasm (which returns
+			// ENOSYS). Fall back to the portable way from
+			// now on.
+			dupCloexecUnsupported.Store(true)
+		default:
+			return -1, "fcntl", err
+		}
+	}
+	return dupCloseOnExecOld(fd)
+}
+
+// Dup duplicates the file descriptor.
+func (fd *FD) Dup() (int, string, error) {
+	if err := fd.incref(); err != nil {
+		return -1, "", err
+	}
+	defer fd.decref()
+	return DupCloseOnExec(fd.Sysfd)
+}
+
+// On Unix variants only, expose the IO event for the net code.
+
+// WaitWrite waits until data can be written to fd.
+func (fd *FD) WaitWrite() error {
+	return fd.pd.waitWrite(fd.isFile)
+}
+
+// WriteOnce is for testing only. It makes a single write call.
+func (fd *FD) WriteOnce(p []byte) (int, error) {
+	if err := fd.writeLock(); err != nil {
+		return 0, err
+	}
+	defer fd.writeUnlock()
+	return ignoringEINTRIO(syscall.Write, fd.Sysfd, p)
+}
+
+// RawRead invokes the user-defined function f for a read operation.
+func (fd *FD) RawRead(f func(uintptr) bool) error {
+	if err := fd.readLock(); err != nil {
+		return err
+	}
+	defer fd.readUnlock()
+	if err := fd.pd.prepareRead(fd.isFile); err != nil {
+		return err
+	}
+	for {
+		if f(uintptr(fd.Sysfd)) {
+			return nil
+		}
+		if err := fd.pd.waitRead(fd.isFile); err != nil {
+			return err
+		}
+	}
+}
+
+// RawWrite invokes the user-defined function f for a write operation.
+func (fd *FD) RawWrite(f func(uintptr) bool) error {
+	if err := fd.writeLock(); err != nil {
+		return err
+	}
+	defer fd.writeUnlock()
+	if err := fd.pd.prepareWrite(fd.isFile); err != nil {
+		return err
+	}
+	for {
+		if f(uintptr(fd.Sysfd)) {
+			return nil
+		}
+		if err := fd.pd.waitWrite(fd.isFile); err != nil {
+			return err
+		}
+	}
+}
+
+// ignoringEINTRIO is like ignoringEINTR, but just for IO calls.
+func ignoringEINTRIO(fn func(fd int, p []byte) (int, error), fd int, p []byte) (int, error) {
+	for {
+		n, err := fn(fd, p)
+		if err != syscall.EINTR {
+			return n, err
+		}
+	}
+}
--- a/src/internal/poll/fd_unixjs.go
+++ b/src/internal/poll/fd_unixjs.go
@@ -0,0 +1,79 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build unix || (js && wasm)
+
+package poll
+
+import "syscall"
+
+type SysFile struct {
+	// Writev cache.
+	iovecs *[]syscall.Iovec
+}
+
+func (s *SysFile) init() {}
+
+func (s *SysFile) destroy(fd int) error {
+	// We don't use ignoringEINTR here because POSIX does not define
+	// whether the descriptor is closed if close returns EINTR.
+	// If the descriptor is indeed closed, using a loop would race
+	// with some other goroutine opening a new descriptor.
+	// (The Linux kernel guarantees that it is closed on an EINTR error.)
+	return CloseFunc(fd)
+}
+
+// dupCloseOnExecOld is the traditional way to dup an fd and
+// set its O_CLOEXEC bit, using two system calls.
+func dupCloseOnExecOld(fd int) (int, string, error) {
+	syscall.ForkLock.RLock()
+	defer syscall.ForkLock.RUnlock()
+	newfd, err := syscall.Dup(fd)
+	if err != nil {
+		return -1, "dup", err
+	}
+	syscall.CloseOnExec(newfd)
+	return newfd, "", nil
+}
+
+// Fchdir wraps syscall.Fchdir.
+func (fd *FD) Fchdir() error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	return syscall.Fchdir(fd.Sysfd)
+}
+
+// ReadDirent wraps syscall.ReadDirent.
+// We treat this like an ordinary system call rather than a call
+// that tries to fill the buffer.
+func (fd *FD) ReadDirent(buf []byte) (int, error) {
+	if err := fd.incref(); err != nil {
+		return 0, err
+	}
+	defer fd.decref()
+	for {
+		n, err := ignoringEINTRIO(syscall.ReadDirent, fd.Sysfd, buf)
+		if err != nil {
+			n = 0
+			if err == syscall.EAGAIN && fd.pd.pollable() {
+				if err = fd.pd.waitRead(fd.isFile); err == nil {
+					continue
+				}
+			}
+		}
+		// Do not call eofError; caller does not expect to see io.EOF.
+		return n, err
+	}
+}
+
+// Seek wraps syscall.Seek.
+func (fd *FD) Seek(offset int64, whence int) (int64, error) {
+	if err := fd.incref(); err != nil {
+		return 0, err
+	}
+	defer fd.decref()
+	return syscall.Seek(fd.Sysfd, offset, whence)
+}
--- a/src/internal/poll/fd_wasip1.go
+++ b/src/internal/poll/fd_wasip1.go
@@ -0,0 +1,236 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll
+
+import (
+	"internal/byteorder"
+	"sync/atomic"
+	"syscall"
+	"unsafe"
+)
+
+type SysFile struct {
+	// RefCountPtr is a pointer to the reference count of Sysfd.
+	//
+	// WASI preview 1 lacks a dup(2) system call. When the os and net packages
+	// need to share a file/socket, instead of duplicating the underlying file
+	// descriptor, we instead provide a way to copy FD instances and manage the
+	// underlying file descriptor with reference counting.
+	RefCountPtr *int32
+
+	// RefCount is the reference count of Sysfd. When a copy of an FD is made,
+	// it points to the reference count of the original FD instance.
+	RefCount int32
+
+	// Cache for the file type, lazily initialized when Seek is called.
+	Filetype uint32
+
+	// If the file represents a directory, this field contains the current
+	// readdir position. It is reset to zero if the program calls Seek(0, 0).
+	Dircookie uint64
+
+	// Absolute path of the file, as returned by syscall.PathOpen;
+	// this is used by Fchdir to emulate setting the current directory
+	// to an open file descriptor.
+	Path string
+
+	// TODO(achille): it could be meaningful to move isFile from FD to a method
+	// on this struct type, and expose it as `IsFile() bool` which derives the
+	// result from the Filetype field. We would need to ensure that Filetype is
+	// always set instead of being lazily initialized.
+}
+
+func (s *SysFile) init() {
+	if s.RefCountPtr == nil {
+		s.RefCount = 1
+		s.RefCountPtr = &s.RefCount
+	}
+}
+
+func (s *SysFile) ref() SysFile {
+	atomic.AddInt32(s.RefCountPtr, +1)
+	return SysFile{RefCountPtr: s.RefCountPtr}
+}
+
+func (s *SysFile) destroy(fd int) error {
+	if s.RefCountPtr != nil && atomic.AddInt32(s.RefCountPtr, -1) > 0 {
+		return nil
+	}
+
+	// We don't use ignoringEINTR here because POSIX does not define
+	// whether the descriptor is closed if close returns EINTR.
+	// If the descriptor is indeed closed, using a loop would race
+	// with some other goroutine opening a new descriptor.
+	// (The Linux kernel guarantees that it is closed on an EINTR error.)
+	return CloseFunc(fd)
+}
+
+// Copy creates a copy of the FD.
+//
+// The FD instance points to the same underlying file descriptor. The file
+// descriptor isn't closed until all FD instances that refer to it have been
+// closed/destroyed.
+func (fd *FD) Copy() FD {
+	return FD{
+		Sysfd:         fd.Sysfd,
+		SysFile:       fd.SysFile.ref(),
+		IsStream:      fd.IsStream,
+		ZeroReadIsEOF: fd.ZeroReadIsEOF,
+		isBlocking:    fd.isBlocking,
+		isFile:        fd.isFile,
+	}
+}
+
+// dupCloseOnExecOld always errors on wasip1 because there is no mechanism to
+// duplicate file descriptors.
+func dupCloseOnExecOld(fd int) (int, string, error) {
+	return -1, "dup", syscall.ENOSYS
+}
+
+// Fchdir wraps syscall.Fchdir.
+func (fd *FD) Fchdir() error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	return syscall.Chdir(fd.Path)
+}
+
+// ReadDir wraps syscall.ReadDir.
+// We treat this like an ordinary system call rather than a call
+// that tries to fill the buffer.
+func (fd *FD) ReadDir(buf []byte, cookie syscall.Dircookie) (int, error) {
+	if err := fd.incref(); err != nil {
+		return 0, err
+	}
+	defer fd.decref()
+	for {
+		n, err := syscall.ReadDir(fd.Sysfd, buf, cookie)
+		if err != nil {
+			n = 0
+			if err == syscall.EAGAIN && fd.pd.pollable() {
+				if err = fd.pd.waitRead(fd.isFile); err == nil {
+					continue
+				}
+			}
+		}
+		// Do not call eofError; caller does not expect to see io.EOF.
+		return n, err
+	}
+}
+
+func (fd *FD) ReadDirent(buf []byte) (int, error) {
+	n, err := fd.ReadDir(buf, fd.Dircookie)
+	if err != nil {
+		return 0, err
+	}
+	if n <= 0 {
+		return n, nil // EOF
+	}
+
+	// We assume that the caller of ReadDirent will consume the entire buffer
+	// up to the last full entry, so we scan through the buffer looking for the
+	// value of the last next cookie.
+	b := buf[:n]
+
+	for len(b) > 0 {
+		next, ok := direntNext(b)
+		if !ok {
+			break
+		}
+		size, ok := direntReclen(b)
+		if !ok {
+			break
+		}
+		if size > uint64(len(b)) {
+			break
+		}
+		fd.Dircookie = syscall.Dircookie(next)
+		b = b[size:]
+	}
+
+	// Trim a potentially incomplete trailing entry; this is necessary because
+	// the code in src/os/dir_unix.go does not deal well with partial values in
+	// calls to direntReclen, etc... and ends up causing an early EOF before all
+	// directory entries were consumed. ReadDirent is called with a large enough
+	// buffer (8 KiB) that at least one entry should always fit, tho this seems
+	// a bit brittle but cannot be addressed without a large change of the
+	// algorithm in the os.(*File).readdir method.
+	return n - len(b), nil
+}
+
+// Seek wraps syscall.Seek.
+func (fd *FD) Seek(offset int64, whence int) (int64, error) {
+	if err := fd.incref(); err != nil {
+		return 0, err
+	}
+	defer fd.decref()
+	// syscall.Filetype is a uint8 but we store it as a uint32 in SysFile in
+	// order to use atomic load/store on the field, which is why we have to
+	// perform this type conversion.
+	fileType := syscall.Filetype(atomic.LoadUint32(&fd.Filetype))
+
+	if fileType == syscall.FILETYPE_UNKNOWN {
+		var stat syscall.Stat_t
+		if err := fd.Fstat(&stat); err != nil {
+			return 0, err
+		}
+		fileType = stat.Filetype
+		atomic.StoreUint32(&fd.Filetype, uint32(fileType))
+	}
+
+	if fileType == syscall.FILETYPE_DIRECTORY {
+		// If the file descriptor is opened on a directory, we reset the readdir
+		// cookie when seeking back to the beginning to allow reusing the file
+		// descriptor to scan the directory again.
+		if offset == 0 && whence == 0 {
+			fd.Dircookie = 0
+			return 0, nil
+		} else {
+			return 0, syscall.EINVAL
+		}
+	}
+
+	return syscall.Seek(fd.Sysfd, offset, whence)
+}
+
+// https://github.com/WebAssembly/WASI/blob/main/legacy/preview1/docs.md#-dirent-record
+const sizeOfDirent = 24
+
+func direntReclen(buf []byte) (uint64, bool) {
+	namelen, ok := direntNamlen(buf)
+	return sizeOfDirent + namelen, ok
+}
+
+func direntNamlen(buf []byte) (uint64, bool) {
+	return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Namlen), unsafe.Sizeof(syscall.Dirent{}.Namlen))
+}
+
+func direntNext(buf []byte) (uint64, bool) {
+	return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Next), unsafe.Sizeof(syscall.Dirent{}.Next))
+}
+
+// readInt returns the size-bytes unsigned integer in native byte order at offset off.
+func readInt(b []byte, off, size uintptr) (u uint64, ok bool) {
+	if len(b) < int(off+size) {
+		return 0, false
+	}
+	return readIntLE(b[off:], size), true
+}
+
+func readIntLE(b []byte, size uintptr) uint64 {
+	switch size {
+	case 1:
+		return uint64(b[0])
+	case 2:
+		return uint64(byteorder.LeUint16(b))
+	case 4:
+		return uint64(byteorder.LeUint32(b))
+	case 8:
+		return uint64(byteorder.LeUint64(b))
+	default:
+		panic("internal/poll: readInt with unsupported size")
+	}
+}
--- a/src/internal/poll/fd_windows.go
+++ b/src/internal/poll/fd_windows.go
--- a/src/internal/poll/fd_windows_test.go
+++ b/src/internal/poll/fd_windows_test.go
@@ -0,0 +1,187 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll_test
+
+import (
+	"errors"
+	"fmt"
+	"internal/poll"
+	"internal/syscall/windows"
+	"os"
+	"sync"
+	"syscall"
+	"testing"
+	"unsafe"
+)
+
+type loggedFD struct {
+	Net string
+	FD  *poll.FD
+	Err error
+}
+
+var (
+	logMu     sync.Mutex
+	loggedFDs map[syscall.Handle]*loggedFD
+)
+
+func logFD(net string, fd *poll.FD, err error) {
+	logMu.Lock()
+	defer logMu.Unlock()
+
+	loggedFDs[fd.Sysfd] = &loggedFD{
+		Net: net,
+		FD:  fd,
+		Err: err,
+	}
+}
+
+func init() {
+	loggedFDs = make(map[syscall.Handle]*loggedFD)
+	*poll.LogInitFD = logFD
+
+	poll.InitWSA()
+}
+
+func findLoggedFD(h syscall.Handle) (lfd *loggedFD, found bool) {
+	logMu.Lock()
+	defer logMu.Unlock()
+
+	lfd, found = loggedFDs[h]
+	return lfd, found
+}
+
+// checkFileIsNotPartOfNetpoll verifies that f is not managed by netpoll.
+// It returns error, if check fails.
+func checkFileIsNotPartOfNetpoll(f *os.File) error {
+	lfd, found := findLoggedFD(syscall.Handle(f.Fd()))
+	if !found {
+		return fmt.Errorf("%v fd=%v: is not found in the log", f.Name(), f.Fd())
+	}
+	if lfd.FD.IsPartOfNetpoll() {
+		return fmt.Errorf("%v fd=%v: is part of netpoll, but should not be (logged: net=%v err=%v)", f.Name(), f.Fd(), lfd.Net, lfd.Err)
+	}
+	return nil
+}
+
+func TestFileFdsAreInitialised(t *testing.T) {
+	exe, err := os.Executable()
+	if err != nil {
+		t.Fatal(err)
+	}
+	f, err := os.Open(exe)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer f.Close()
+
+	err = checkFileIsNotPartOfNetpoll(f)
+	if err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestSerialFdsAreInitialised(t *testing.T) {
+	for _, name := range []string{"COM1", "COM2", "COM3", "COM4"} {
+		t.Run(name, func(t *testing.T) {
+			h, err := syscall.CreateFile(syscall.StringToUTF16Ptr(name),
+				syscall.GENERIC_READ|syscall.GENERIC_WRITE,
+				0,
+				nil,
+				syscall.OPEN_EXISTING,
+				syscall.FILE_ATTRIBUTE_NORMAL|syscall.FILE_FLAG_OVERLAPPED,
+				0)
+			if err != nil {
+				if errno, ok := err.(syscall.Errno); ok {
+					switch errno {
+					case syscall.ERROR_FILE_NOT_FOUND,
+						syscall.ERROR_ACCESS_DENIED:
+						t.Log("Skipping: ", err)
+						return
+					}
+				}
+				t.Fatal(err)
+			}
+			f := os.NewFile(uintptr(h), name)
+			defer f.Close()
+
+			err = checkFileIsNotPartOfNetpoll(f)
+			if err != nil {
+				t.Fatal(err)
+			}
+		})
+	}
+}
+
+func TestWSASocketConflict(t *testing.T) {
+	s, err := windows.WSASocket(syscall.AF_INET, syscall.SOCK_STREAM, syscall.IPPROTO_TCP, nil, 0, windows.WSA_FLAG_OVERLAPPED)
+	if err != nil {
+		t.Fatal(err)
+	}
+	fd := poll.FD{Sysfd: s, IsStream: true, ZeroReadIsEOF: true}
+	_, err = fd.Init("tcp", true)
+	if err != nil {
+		syscall.CloseHandle(s)
+		t.Fatal(err)
+	}
+	defer fd.Close()
+
+	const SIO_TCP_INFO = syscall.IOC_INOUT | syscall.IOC_VENDOR | 39
+	inbuf := uint32(0)
+	var outbuf _TCP_INFO_v0
+	cbbr := uint32(0)
+
+	var ov syscall.Overlapped
+	// Create an event so that we can efficiently wait for completion
+	// of a requested overlapped I/O operation.
+	ov.HEvent, _ = windows.CreateEvent(nil, 0, 0, nil)
+	if ov.HEvent == 0 {
+		t.Fatalf("could not create the event!")
+	}
+	defer syscall.CloseHandle(ov.HEvent)
+
+	if err = fd.WSAIoctl(
+		SIO_TCP_INFO,
+		(*byte)(unsafe.Pointer(&inbuf)),
+		uint32(unsafe.Sizeof(inbuf)),
+		(*byte)(unsafe.Pointer(&outbuf)),
+		uint32(unsafe.Sizeof(outbuf)),
+		&cbbr,
+		&ov,
+		0,
+	); err != nil && !errors.Is(err, syscall.ERROR_IO_PENDING) {
+		t.Fatalf("could not perform the WSAIoctl: %v", err)
+	}
+
+	if err != nil && errors.Is(err, syscall.ERROR_IO_PENDING) {
+		// It is possible that the overlapped I/O operation completed
+		// immediately so there is no need to wait for it to complete.
+		if res, err := syscall.WaitForSingleObject(ov.HEvent, syscall.INFINITE); res != 0 {
+			t.Fatalf("waiting for the completion of the overlapped IO failed: %v", err)
+		}
+	}
+}
+
+type _TCP_INFO_v0 struct {
+	State             uint32
+	Mss               uint32
+	ConnectionTimeMs  uint64
+	TimestampsEnabled bool
+	RttUs             uint32
+	MinRttUs          uint32
+	BytesInFlight     uint32
+	Cwnd              uint32
+	SndWnd            uint32
+	RcvWnd            uint32
+	RcvBuf            uint32
+	BytesOut          uint64
+	BytesIn           uint64
+	BytesReordered    uint32
+	BytesRetrans      uint32
+	FastRetrans       uint32
+	DupAcksIn         uint32
+	TimeoutEpisodes   uint32
+	SynRetrans        uint8
+}
--- a/src/internal/poll/fd_writev_libc.go
+++ b/src/internal/poll/fd_writev_libc.go
@@ -0,0 +1,15 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build aix || darwin || (openbsd && !mips64) || solaris
+
+package poll
+
+import (
+	"syscall"
+	_ "unsafe" // for go:linkname
+)
+
+//go:linkname writev syscall.writev
+func writev(fd int, iovecs []syscall.Iovec) (uintptr, error)
--- a/src/internal/poll/fd_writev_unix.go
+++ b/src/internal/poll/fd_writev_unix.go
@@ -0,0 +1,29 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build dragonfly || freebsd || linux || netbsd || (openbsd && mips64)
+
+package poll
+
+import (
+	"syscall"
+	"unsafe"
+)
+
+func writev(fd int, iovecs []syscall.Iovec) (uintptr, error) {
+	var (
+		r uintptr
+		e syscall.Errno
+	)
+	for {
+		r, _, e = syscall.Syscall(syscall.SYS_WRITEV, uintptr(fd), uintptr(unsafe.Pointer(&iovecs[0])), uintptr(len(iovecs)))
+		if e != syscall.EINTR {
+			break
+		}
+	}
+	if e != 0 {
+		return r, e
+	}
+	return r, nil
+}
--- a/src/internal/poll/file_plan9.go
+++ b/src/internal/poll/file_plan9.go
@@ -0,0 +1,42 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll
+
+// Expose fdMutex for use by the os package on Plan 9.
+// On Plan 9 we don't want to use async I/O for file operations,
+// but we still want the locking semantics that fdMutex provides.
+
+// FDMutex is an exported fdMutex, only for Plan 9.
+type FDMutex struct {
+	fdmu fdMutex
+}
+
+func (fdmu *FDMutex) Incref() bool {
+	return fdmu.fdmu.incref()
+}
+
+func (fdmu *FDMutex) Decref() bool {
+	return fdmu.fdmu.decref()
+}
+
+func (fdmu *FDMutex) IncrefAndClose() bool {
+	return fdmu.fdmu.increfAndClose()
+}
+
+func (fdmu *FDMutex) ReadLock() bool {
+	return fdmu.fdmu.rwlock(true)
+}
+
+func (fdmu *FDMutex) ReadUnlock() bool {
+	return fdmu.fdmu.rwunlock(true)
+}
+
+func (fdmu *FDMutex) WriteLock() bool {
+	return fdmu.fdmu.rwlock(false)
+}
+
+func (fdmu *FDMutex) WriteUnlock() bool {
+	return fdmu.fdmu.rwunlock(false)
+}
--- a/src/internal/poll/hook_cloexec.go
+++ b/src/internal/poll/hook_cloexec.go
@@ -0,0 +1,12 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build dragonfly || freebsd || linux || netbsd || openbsd || solaris
+
+package poll
+
+import "syscall"
+
+// Accept4Func is used to hook the accept4 call.
+var Accept4Func func(int, int) (int, syscall.Sockaddr, error) = syscall.Accept4
--- a/src/internal/poll/hook_unix.go
+++ b/src/internal/poll/hook_unix.go
@@ -0,0 +1,15 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build unix || (js && wasm) || wasip1
+
+package poll
+
+import "syscall"
+
+// CloseFunc is used to hook the close call.
+var CloseFunc func(int) error = syscall.Close
+
+// AcceptFunc is used to hook the accept call.
+var AcceptFunc func(int) (int, syscall.Sockaddr, error) = syscall.Accept
--- a/src/internal/poll/hook_windows.go
+++ b/src/internal/poll/hook_windows.go
@@ -0,0 +1,16 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll
+
+import "syscall"
+
+// CloseFunc is used to hook the close call.
+var CloseFunc func(syscall.Handle) error = syscall.Closesocket
+
+// AcceptFunc is used to hook the accept call.
+var AcceptFunc func(syscall.Handle, syscall.Handle, *byte, uint32, uint32, uint32, *uint32, *syscall.Overlapped) error = syscall.AcceptEx
+
+// ConnectExFunc is used to hook the ConnectEx call.
+var ConnectExFunc func(syscall.Handle, syscall.Sockaddr, *byte, uint32, *uint32, *syscall.Overlapped) error = syscall.ConnectEx
--- a/src/internal/poll/iovec_solaris.go
+++ b/src/internal/poll/iovec_solaris.go
@@ -0,0 +1,14 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll
+
+import (
+	"syscall"
+	"unsafe"
+)
+
+func newIovecWithBase(base *byte) syscall.Iovec {
+	return syscall.Iovec{Base: (*int8)(unsafe.Pointer(base))}
+}
--- a/src/internal/poll/iovec_unix.go
+++ b/src/internal/poll/iovec_unix.go
@@ -0,0 +1,13 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd
+
+package poll
+
+import "syscall"
+
+func newIovecWithBase(base *byte) syscall.Iovec {
+	return syscall.Iovec{Base: base}
+}
--- a/src/internal/poll/read_test.go
+++ b/src/internal/poll/read_test.go
@@ -0,0 +1,61 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll_test
+
+import (
+	"os"
+	"runtime"
+	"sync"
+	"testing"
+	"time"
+)
+
+func TestRead(t *testing.T) {
+	t.Run("SpecialFile", func(t *testing.T) {
+		var wg sync.WaitGroup
+		for _, p := range specialFiles() {
+			for i := 0; i < 4; i++ {
+				wg.Add(1)
+				go func(p string) {
+					defer wg.Done()
+					for i := 0; i < 100; i++ {
+						if _, err := os.ReadFile(p); err != nil {
+							t.Error(err)
+							return
+						}
+						time.Sleep(time.Nanosecond)
+					}
+				}(p)
+			}
+		}
+		wg.Wait()
+	})
+}
+
+func specialFiles() []string {
+	var ps []string
+	switch runtime.GOOS {
+	case "darwin", "ios", "dragonfly", "freebsd", "netbsd", "openbsd":
+		ps = []string{
+			"/dev/null",
+		}
+	case "linux":
+		ps = []string{
+			"/dev/null",
+			"/proc/stat",
+			"/sys/devices/system/cpu/online",
+		}
+	}
+	nps := ps[:0]
+	for _, p := range ps {
+		f, err := os.Open(p)
+		if err != nil {
+			continue
+		}
+		f.Close()
+		nps = append(nps, p)
+	}
+	return nps
+}
--- a/src/internal/poll/sendfile.go
+++ b/src/internal/poll/sendfile.go
@@ -0,0 +1,7 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll
+
+var TestHookDidSendFile = func(dstFD *FD, src int, written int64, err error, handled bool) {}
--- a/src/internal/poll/sendfile_bsd.go
+++ b/src/internal/poll/sendfile_bsd.go
@@ -0,0 +1,59 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build darwin || dragonfly || freebsd
+
+package poll
+
+import "syscall"
+
+// maxSendfileSize is the largest chunk size we ask the kernel to copy
+// at a time.
+const maxSendfileSize int = 4 << 20
+
+// SendFile wraps the sendfile system call.
+func SendFile(dstFD *FD, src int, pos, remain int64) (written int64, err error, handled bool) {
+	defer func() {
+		TestHookDidSendFile(dstFD, src, written, err, handled)
+	}()
+	if err := dstFD.writeLock(); err != nil {
+		return 0, err, false
+	}
+	defer dstFD.writeUnlock()
+
+	if err := dstFD.pd.prepareWrite(dstFD.isFile); err != nil {
+		return 0, err, false
+	}
+
+	dst := dstFD.Sysfd
+	for remain > 0 {
+		n := maxSendfileSize
+		if int64(n) > remain {
+			n = int(remain)
+		}
+		pos1 := pos
+		n, err = syscall.Sendfile(dst, src, &pos1, n)
+		if n > 0 {
+			pos += int64(n)
+			written += int64(n)
+			remain -= int64(n)
+		}
+		if err == syscall.EINTR {
+			continue
+		}
+		// This includes syscall.ENOSYS (no kernel
+		// support) and syscall.EINVAL (fd types which
+		// don't implement sendfile), and other errors.
+		// We should end the loop when there is no error
+		// returned from sendfile(2) or it is not a retryable error.
+		if err != syscall.EAGAIN {
+			break
+		}
+		if err = dstFD.pd.waitWrite(dstFD.isFile); err != nil {
+			break
+		}
+	}
+	handled = written != 0 || (err != syscall.ENOSYS && err != syscall.EINVAL)
+	return
+}
--- a/src/internal/poll/sendfile_linux.go
+++ b/src/internal/poll/sendfile_linux.go
@@ -0,0 +1,55 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll
+
+import "syscall"
+
+// maxSendfileSize is the largest chunk size we ask the kernel to copy
+// at a time.
+const maxSendfileSize int = 4 << 20
+
+// SendFile wraps the sendfile system call.
+func SendFile(dstFD *FD, src int, remain int64) (written int64, err error, handled bool) {
+	defer func() {
+		TestHookDidSendFile(dstFD, src, written, err, handled)
+	}()
+	if err := dstFD.writeLock(); err != nil {
+		return 0, err, false
+	}
+	defer dstFD.writeUnlock()
+
+	if err := dstFD.pd.prepareWrite(dstFD.isFile); err != nil {
+		return 0, err, false
+	}
+
+	dst := dstFD.Sysfd
+	for remain > 0 {
+		n := maxSendfileSize
+		if int64(n) > remain {
+			n = int(remain)
+		}
+		n, err = syscall.Sendfile(dst, src, nil, n)
+		if n > 0 {
+			written += int64(n)
+			remain -= int64(n)
+			continue
+		} else if err != syscall.EAGAIN && err != syscall.EINTR {
+			// This includes syscall.ENOSYS (no kernel
+			// support) and syscall.EINVAL (fd types which
+			// don't implement sendfile), and other errors.
+			// We should end the loop when there is no error
+			// returned from sendfile(2) or it is not a retryable error.
+			break
+		}
+		if err == syscall.EINTR {
+			continue
+		}
+		if err = dstFD.pd.waitWrite(dstFD.isFile); err != nil {
+			break
+		}
+	}
+	handled = written != 0 || (err != syscall.ENOSYS && err != syscall.EINVAL)
+	return
+}
--- a/src/internal/poll/sendfile_solaris.go
+++ b/src/internal/poll/sendfile_solaris.go
@@ -0,0 +1,66 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll
+
+import "syscall"
+
+// Not strictly needed, but very helpful for debugging, see issue #10221.
+//
+//go:cgo_import_dynamic _ _ "libsendfile.so"
+//go:cgo_import_dynamic _ _ "libsocket.so"
+
+// maxSendfileSize is the largest chunk size we ask the kernel to copy
+// at a time.
+const maxSendfileSize int = 4 << 20
+
+// SendFile wraps the sendfile system call.
+func SendFile(dstFD *FD, src int, pos, remain int64) (written int64, err error, handled bool) {
+	defer func() {
+		TestHookDidSendFile(dstFD, src, written, err, handled)
+	}()
+	if err := dstFD.writeLock(); err != nil {
+		return 0, err, false
+	}
+	defer dstFD.writeUnlock()
+
+	if err := dstFD.pd.prepareWrite(dstFD.isFile); err != nil {
+		return 0, err, false
+	}
+
+	dst := dstFD.Sysfd
+	for remain > 0 {
+		n := maxSendfileSize
+		if int64(n) > remain {
+			n = int(remain)
+		}
+		pos1 := pos
+		n, err = syscall.Sendfile(dst, src, &pos1, n)
+		if err == syscall.EAGAIN || err == syscall.EINTR {
+			// partial write may have occurred
+			n = int(pos1 - pos)
+		}
+		if n > 0 {
+			pos += int64(n)
+			written += int64(n)
+			remain -= int64(n)
+			continue
+		} else if err != syscall.EAGAIN && err != syscall.EINTR {
+			// This includes syscall.ENOSYS (no kernel
+			// support) and syscall.EINVAL (fd types which
+			// don't implement sendfile), and other errors.
+			// We should end the loop when there is no error
+			// returned from sendfile(2) or it is not a retryable error.
+			break
+		}
+		if err == syscall.EINTR {
+			continue
+		}
+		if err = dstFD.pd.waitWrite(dstFD.isFile); err != nil {
+			break
+		}
+	}
+	handled = written != 0 || (err != syscall.ENOSYS && err != syscall.EINVAL)
+	return
+}
--- a/src/internal/poll/sendfile_windows.go
+++ b/src/internal/poll/sendfile_windows.go
@@ -0,0 +1,87 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll
+
+import (
+	"io"
+	"syscall"
+)
+
+// SendFile wraps the TransmitFile call.
+func SendFile(fd *FD, src syscall.Handle, n int64) (written int64, err error) {
+	defer func() {
+		TestHookDidSendFile(fd, 0, written, err, written > 0)
+	}()
+	if fd.kind == kindPipe {
+		// TransmitFile does not work with pipes
+		return 0, syscall.ESPIPE
+	}
+	if ft, _ := syscall.GetFileType(src); ft == syscall.FILE_TYPE_PIPE {
+		return 0, syscall.ESPIPE
+	}
+
+	if err := fd.writeLock(); err != nil {
+		return 0, err
+	}
+	defer fd.writeUnlock()
+
+	o := &fd.wop
+	o.handle = src
+
+	// TODO(brainman): skip calling syscall.Seek if OS allows it
+	curpos, err := syscall.Seek(o.handle, 0, io.SeekCurrent)
+	if err != nil {
+		return 0, err
+	}
+
+	if n <= 0 { // We don't know the size of the file so infer it.
+		// Find the number of bytes offset from curpos until the end of the file.
+		n, err = syscall.Seek(o.handle, -curpos, io.SeekEnd)
+		if err != nil {
+			return
+		}
+		// Now seek back to the original position.
+		if _, err = syscall.Seek(o.handle, curpos, io.SeekStart); err != nil {
+			return
+		}
+	}
+
+	// TransmitFile can be invoked in one call with at most
+	// 2,147,483,646 bytes: the maximum value for a 32-bit integer minus 1.
+	// See https://docs.microsoft.com/en-us/windows/win32/api/mswsock/nf-mswsock-transmitfile
+	const maxChunkSizePerCall = int64(0x7fffffff - 1)
+
+	for n > 0 {
+		chunkSize := maxChunkSizePerCall
+		if chunkSize > n {
+			chunkSize = n
+		}
+
+		o.qty = uint32(chunkSize)
+		o.o.Offset = uint32(curpos)
+		o.o.OffsetHigh = uint32(curpos >> 32)
+
+		nw, err := execIO(o, func(o *operation) error {
+			return syscall.TransmitFile(o.fd.Sysfd, o.handle, o.qty, 0, &o.o, nil, syscall.TF_WRITE_BEHIND)
+		})
+		if err != nil {
+			return written, err
+		}
+
+		curpos += int64(nw)
+
+		// Some versions of Windows (Windows 10 1803) do not set
+		// file position after TransmitFile completes.
+		// So just use Seek to set file position.
+		if _, err = syscall.Seek(o.handle, curpos, io.SeekStart); err != nil {
+			return written, err
+		}
+
+		n -= int64(nw)
+		written += int64(nw)
+	}
+
+	return
+}
--- a/src/internal/poll/sock_cloexec.go
+++ b/src/internal/poll/sock_cloexec.go
@@ -0,0 +1,22 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file implements accept for platforms that provide a fast path for
+// setting SetNonblock and CloseOnExec.
+
+//go:build dragonfly || freebsd || (linux && !arm) || netbsd || openbsd
+
+package poll
+
+import "syscall"
+
+// Wrapper around the accept system call that marks the returned file
+// descriptor as nonblocking and close-on-exec.
+func accept(s int) (int, syscall.Sockaddr, string, error) {
+	ns, sa, err := Accept4Func(s, syscall.SOCK_NONBLOCK|syscall.SOCK_CLOEXEC)
+	if err != nil {
+		return -1, nil, "accept4", err
+	}
+	return ns, sa, "", nil
+}
--- a/src/internal/poll/sock_cloexec_accept.go
+++ b/src/internal/poll/sock_cloexec_accept.go
@@ -0,0 +1,51 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file implements accept for platforms that provide a fast path for
+// setting SetNonblock and CloseOnExec, but don't necessarily have accept4.
+// This is the code we used for accept in Go 1.17 and earlier.
+// On Linux the accept4 system call was introduced in 2.6.28 kernel,
+// and our minimum requirement is 2.6.32, so we simplified the function.
+// Unfortunately, on ARM accept4 wasn't added until 2.6.36, so for ARM
+// only we continue using the older code.
+
+//go:build linux && arm
+
+package poll
+
+import "syscall"
+
+// Wrapper around the accept system call that marks the returned file
+// descriptor as nonblocking and close-on-exec.
+func accept(s int) (int, syscall.Sockaddr, string, error) {
+	ns, sa, err := Accept4Func(s, syscall.SOCK_NONBLOCK|syscall.SOCK_CLOEXEC)
+	switch err {
+	case nil:
+		return ns, sa, "", nil
+	default: // errors other than the ones listed
+		return -1, sa, "accept4", err
+	case syscall.ENOSYS: // syscall missing
+	case syscall.EINVAL: // some Linux use this instead of ENOSYS
+	case syscall.EACCES: // some Linux use this instead of ENOSYS
+	case syscall.EFAULT: // some Linux use this instead of ENOSYS
+	}
+
+	// See ../syscall/exec_unix.go for description of ForkLock.
+	// It is probably okay to hold the lock across syscall.Accept
+	// because we have put fd.sysfd into non-blocking mode.
+	// However, a call to the File method will put it back into
+	// blocking mode. We can't take that risk, so no use of ForkLock here.
+	ns, sa, err = AcceptFunc(s)
+	if err == nil {
+		syscall.CloseOnExec(ns)
+	}
+	if err != nil {
+		return -1, nil, "accept", err
+	}
+	if err = syscall.SetNonblock(ns, true); err != nil {
+		CloseFunc(ns)
+		return -1, nil, "setnonblock", err
+	}
+	return ns, sa, "", nil
+}
--- a/src/internal/poll/sock_cloexec_solaris.go
+++ b/src/internal/poll/sock_cloexec_solaris.go
@@ -0,0 +1,47 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file implements accept for platforms that provide a fast path for
+// setting SetNonblock and CloseOnExec, but don't necessarily have accept4.
+// The accept4(3c) function was added to Oracle Solaris in the Solaris 11.4.0
+// release. Thus, on releases prior to 11.4, we fall back to the combination
+// of accept(3c) and fcntl(2).
+
+package poll
+
+import (
+	"internal/syscall/unix"
+	"syscall"
+)
+
+// Wrapper around the accept system call that marks the returned file
+// descriptor as nonblocking and close-on-exec.
+func accept(s int) (int, syscall.Sockaddr, string, error) {
+	// Perform a cheap test and try the fast path first.
+	if unix.SupportAccept4() {
+		ns, sa, err := Accept4Func(s, syscall.SOCK_NONBLOCK|syscall.SOCK_CLOEXEC)
+		if err != nil {
+			return -1, nil, "accept4", err
+		}
+		return ns, sa, "", nil
+	}
+
+	// See ../syscall/exec_unix.go for description of ForkLock.
+	// It is probably okay to hold the lock across syscall.Accept
+	// because we have put fd.sysfd into non-blocking mode.
+	// However, a call to the File method will put it back into
+	// blocking mode. We can't take that risk, so no use of ForkLock here.
+	ns, sa, err := AcceptFunc(s)
+	if err == nil {
+		syscall.CloseOnExec(ns)
+	}
+	if err != nil {
+		return -1, nil, "accept", err
+	}
+	if err = syscall.SetNonblock(ns, true); err != nil {
+		CloseFunc(ns)
+		return -1, nil, "setnonblock", err
+	}
+	return ns, sa, "", nil
+}
--- a/src/internal/poll/sockopt.go
+++ b/src/internal/poll/sockopt.go
@@ -0,0 +1,45 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build unix || windows
+
+package poll
+
+import "syscall"
+
+// SetsockoptInt wraps the setsockopt network call with an int argument.
+func (fd *FD) SetsockoptInt(level, name, arg int) error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	return syscall.SetsockoptInt(fd.Sysfd, level, name, arg)
+}
+
+// SetsockoptInet4Addr wraps the setsockopt network call with an IPv4 address.
+func (fd *FD) SetsockoptInet4Addr(level, name int, arg [4]byte) error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	return syscall.SetsockoptInet4Addr(fd.Sysfd, level, name, arg)
+}
+
+// SetsockoptLinger wraps the setsockopt network call with a Linger argument.
+func (fd *FD) SetsockoptLinger(level, name int, l *syscall.Linger) error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	return syscall.SetsockoptLinger(fd.Sysfd, level, name, l)
+}
+
+// GetsockoptInt wraps the getsockopt network call with an int argument.
+func (fd *FD) GetsockoptInt(level, name int) (int, error) {
+	if err := fd.incref(); err != nil {
+		return -1, err
+	}
+	defer fd.decref()
+	return syscall.GetsockoptInt(fd.Sysfd, level, name)
+}
--- a/src/internal/poll/sockopt_linux.go
+++ b/src/internal/poll/sockopt_linux.go
@@ -0,0 +1,16 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll
+
+import "syscall"
+
+// SetsockoptIPMreqn wraps the setsockopt network call with an IPMreqn argument.
+func (fd *FD) SetsockoptIPMreqn(level, name int, mreq *syscall.IPMreqn) error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	return syscall.SetsockoptIPMreqn(fd.Sysfd, level, name, mreq)
+}
--- a/src/internal/poll/sockopt_unix.go
+++ b/src/internal/poll/sockopt_unix.go
@@ -0,0 +1,18 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build unix
+
+package poll
+
+import "syscall"
+
+// SetsockoptByte wraps the setsockopt network call with a byte argument.
+func (fd *FD) SetsockoptByte(level, name int, arg byte) error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	return syscall.SetsockoptByte(fd.Sysfd, level, name, arg)
+}
--- a/src/internal/poll/sockopt_windows.go
+++ b/src/internal/poll/sockopt_windows.go
@@ -0,0 +1,16 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll
+
+import "syscall"
+
+// WSAIoctl wraps the WSAIoctl network call.
+func (fd *FD) WSAIoctl(iocc uint32, inbuf *byte, cbif uint32, outbuf *byte, cbob uint32, cbbr *uint32, overlapped *syscall.Overlapped, completionRoutine uintptr) error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	return syscall.WSAIoctl(fd.Sysfd, iocc, inbuf, cbif, outbuf, cbob, cbbr, overlapped, completionRoutine)
+}
--- a/src/internal/poll/sockoptip.go
+++ b/src/internal/poll/sockoptip.go
@@ -0,0 +1,27 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build unix || windows
+
+package poll
+
+import "syscall"
+
+// SetsockoptIPMreq wraps the setsockopt network call with an IPMreq argument.
+func (fd *FD) SetsockoptIPMreq(level, name int, mreq *syscall.IPMreq) error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	return syscall.SetsockoptIPMreq(fd.Sysfd, level, name, mreq)
+}
+
+// SetsockoptIPv6Mreq wraps the setsockopt network call with an IPv6Mreq argument.
+func (fd *FD) SetsockoptIPv6Mreq(level, name int, mreq *syscall.IPv6Mreq) error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	return syscall.SetsockoptIPv6Mreq(fd.Sysfd, level, name, mreq)
+}
--- a/src/internal/poll/splice_linux.go
+++ b/src/internal/poll/splice_linux.go
@@ -0,0 +1,245 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll
+
+import (
+	"internal/syscall/unix"
+	"runtime"
+	"sync"
+	"syscall"
+	"unsafe"
+)
+
+const (
+	// spliceNonblock doesn't make the splice itself necessarily nonblocking
+	// (because the actual file descriptors that are spliced from/to may block
+	// unless they have the O_NONBLOCK flag set), but it makes the splice pipe
+	// operations nonblocking.
+	spliceNonblock = 0x2
+
+	// maxSpliceSize is the maximum amount of data Splice asks
+	// the kernel to move in a single call to splice(2).
+	// We use 1MB as Splice writes data through a pipe, and 1MB is the default maximum pipe buffer size,
+	// which is determined by /proc/sys/fs/pipe-max-size.
+	maxSpliceSize = 1 << 20
+)
+
+// Splice transfers at most remain bytes of data from src to dst, using the
+// splice system call to minimize copies of data from and to userspace.
+//
+// Splice gets a pipe buffer from the pool or creates a new one if needed, to serve as a buffer for the data transfer.
+// src and dst must both be stream-oriented sockets.
+func Splice(dst, src *FD, remain int64) (written int64, handled bool, err error) {
+	p, err := getPipe()
+	if err != nil {
+		return 0, false, err
+	}
+	defer putPipe(p)
+	var inPipe, n int
+	for err == nil && remain > 0 {
+		max := maxSpliceSize
+		if int64(max) > remain {
+			max = int(remain)
+		}
+		inPipe, err = spliceDrain(p.wfd, src, max)
+		// The operation is considered handled if splice returns no
+		// error, or an error other than EINVAL. An EINVAL means the
+		// kernel does not support splice for the socket type of src.
+		// The failed syscall does not consume any data so it is safe
+		// to fall back to a generic copy.
+		//
+		// spliceDrain should never return EAGAIN, so if err != nil,
+		// Splice cannot continue.
+		//
+		// If inPipe == 0 && err == nil, src is at EOF, and the
+		// transfer is complete.
+		handled = handled || (err != syscall.EINVAL)
+		if err != nil || inPipe == 0 {
+			break
+		}
+		p.data += inPipe
+
+		n, err = splicePump(dst, p.rfd, inPipe)
+		if n > 0 {
+			written += int64(n)
+			remain -= int64(n)
+			p.data -= n
+		}
+	}
+	if err != nil {
+		return written, handled, err
+	}
+	return written, true, nil
+}
+
+// spliceDrain moves data from a socket to a pipe.
+//
+// Invariant: when entering spliceDrain, the pipe is empty. It is either in its
+// initial state, or splicePump has emptied it previously.
+//
+// Given this, spliceDrain can reasonably assume that the pipe is ready for
+// writing, so if splice returns EAGAIN, it must be because the socket is not
+// ready for reading.
+//
+// If spliceDrain returns (0, nil), src is at EOF.
+func spliceDrain(pipefd int, sock *FD, max int) (int, error) {
+	if err := sock.readLock(); err != nil {
+		return 0, err
+	}
+	defer sock.readUnlock()
+	if err := sock.pd.prepareRead(sock.isFile); err != nil {
+		return 0, err
+	}
+	for {
+		// In theory calling splice(2) with SPLICE_F_NONBLOCK could end up an infinite loop here,
+		// because it could return EAGAIN ceaselessly when the write end of the pipe is full,
+		// but this shouldn't be a concern here, since the pipe buffer must be sufficient for
+		// this data transmission on the basis of the workflow in Splice.
+		n, err := splice(pipefd, sock.Sysfd, max, spliceNonblock)
+		if err == syscall.EINTR {
+			continue
+		}
+		if err != syscall.EAGAIN {
+			return n, err
+		}
+		if sock.pd.pollable() {
+			if err := sock.pd.waitRead(sock.isFile); err != nil {
+				return n, err
+			}
+		}
+	}
+}
+
+// splicePump moves all the buffered data from a pipe to a socket.
+//
+// Invariant: when entering splicePump, there are exactly inPipe
+// bytes of data in the pipe, from a previous call to spliceDrain.
+//
+// By analogy to the condition from spliceDrain, splicePump
+// only needs to poll the socket for readiness, if splice returns
+// EAGAIN.
+//
+// If splicePump cannot move all the data in a single call to
+// splice(2), it loops over the buffered data until it has written
+// all of it to the socket. This behavior is similar to the Write
+// step of an io.Copy in userspace.
+func splicePump(sock *FD, pipefd int, inPipe int) (int, error) {
+	if err := sock.writeLock(); err != nil {
+		return 0, err
+	}
+	defer sock.writeUnlock()
+	if err := sock.pd.prepareWrite(sock.isFile); err != nil {
+		return 0, err
+	}
+	written := 0
+	for inPipe > 0 {
+		// In theory calling splice(2) with SPLICE_F_NONBLOCK could end up an infinite loop here,
+		// because it could return EAGAIN ceaselessly when the read end of the pipe is empty,
+		// but this shouldn't be a concern here, since the pipe buffer must contain inPipe size of
+		// data on the basis of the workflow in Splice.
+		n, err := splice(sock.Sysfd, pipefd, inPipe, spliceNonblock)
+		if err == syscall.EINTR {
+			continue
+		}
+		// Here, the condition n == 0 && err == nil should never be
+		// observed, since Splice controls the write side of the pipe.
+		if n > 0 {
+			inPipe -= n
+			written += n
+			continue
+		}
+		if err != syscall.EAGAIN {
+			return written, err
+		}
+		if sock.pd.pollable() {
+			if err := sock.pd.waitWrite(sock.isFile); err != nil {
+				return written, err
+			}
+		}
+	}
+	return written, nil
+}
+
+// splice wraps the splice system call. Since the current implementation
+// only uses splice on sockets and pipes, the offset arguments are unused.
+// splice returns int instead of int64, because callers never ask it to
+// move more data in a single call than can fit in an int32.
+func splice(out int, in int, max int, flags int) (int, error) {
+	n, err := syscall.Splice(in, nil, out, nil, max, flags)
+	return int(n), err
+}
+
+type splicePipeFields struct {
+	rfd  int
+	wfd  int
+	data int
+}
+
+type splicePipe struct {
+	splicePipeFields
+
+	// We want to use a finalizer, so ensure that the size is
+	// large enough to not use the tiny allocator.
+	_ [24 - unsafe.Sizeof(splicePipeFields{})%24]byte
+}
+
+// splicePipePool caches pipes to avoid high-frequency construction and destruction of pipe buffers.
+// The garbage collector will free all pipes in the sync.Pool periodically, thus we need to set up
+// a finalizer for each pipe to close its file descriptors before the actual GC.
+var splicePipePool = sync.Pool{New: newPoolPipe}
+
+func newPoolPipe() any {
+	// Discard the error which occurred during the creation of pipe buffer,
+	// redirecting the data transmission to the conventional way utilizing read() + write() as a fallback.
+	p := newPipe()
+	if p == nil {
+		return nil
+	}
+	runtime.SetFinalizer(p, destroyPipe)
+	return p
+}
+
+// getPipe tries to acquire a pipe buffer from the pool or create a new one with newPipe() if it gets nil from the cache.
+func getPipe() (*splicePipe, error) {
+	v := splicePipePool.Get()
+	if v == nil {
+		return nil, syscall.EINVAL
+	}
+	return v.(*splicePipe), nil
+}
+
+func putPipe(p *splicePipe) {
+	// If there is still data left in the pipe,
+	// then close and discard it instead of putting it back into the pool.
+	if p.data != 0 {
+		runtime.SetFinalizer(p, nil)
+		destroyPipe(p)
+		return
+	}
+	splicePipePool.Put(p)
+}
+
+// newPipe sets up a pipe for a splice operation.
+func newPipe() *splicePipe {
+	var fds [2]int
+	if err := syscall.Pipe2(fds[:], syscall.O_CLOEXEC|syscall.O_NONBLOCK); err != nil {
+		return nil
+	}
+
+	// Splice will loop writing maxSpliceSize bytes from the source to the pipe,
+	// and then write those bytes from the pipe to the destination.
+	// Set the pipe buffer size to maxSpliceSize to optimize that.
+	// Ignore errors here, as a smaller buffer size will work,
+	// although it will require more system calls.
+	unix.Fcntl(fds[0], syscall.F_SETPIPE_SZ, maxSpliceSize)
+
+	return &splicePipe{splicePipeFields: splicePipeFields{rfd: fds[0], wfd: fds[1]}}
+}
+
+// destroyPipe destroys a pipe.
+func destroyPipe(p *splicePipe) {
+	CloseFunc(p.rfd)
+	CloseFunc(p.wfd)
+}
--- a/src/internal/poll/splice_linux_test.go
+++ b/src/internal/poll/splice_linux_test.go
@@ -0,0 +1,136 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll_test
+
+import (
+	"internal/poll"
+	"runtime"
+	"sync"
+	"sync/atomic"
+	"testing"
+	"time"
+)
+
+var closeHook atomic.Value // func(fd int)
+
+func init() {
+	closeFunc := poll.CloseFunc
+	poll.CloseFunc = func(fd int) (err error) {
+		if v := closeHook.Load(); v != nil {
+			if hook := v.(func(int)); hook != nil {
+				hook(fd)
+			}
+		}
+		return closeFunc(fd)
+	}
+}
+
+func TestSplicePipePool(t *testing.T) {
+	const N = 64
+	var (
+		p          *poll.SplicePipe
+		ps         []*poll.SplicePipe
+		allFDs     []int
+		pendingFDs sync.Map // fd → struct{}{}
+		err        error
+	)
+
+	closeHook.Store(func(fd int) { pendingFDs.Delete(fd) })
+	t.Cleanup(func() { closeHook.Store((func(int))(nil)) })
+
+	for i := 0; i < N; i++ {
+		p, err = poll.GetPipe()
+		if err != nil {
+			t.Skipf("failed to create pipe due to error(%v), skip this test", err)
+		}
+		_, pwfd := poll.GetPipeFds(p)
+		allFDs = append(allFDs, pwfd)
+		pendingFDs.Store(pwfd, struct{}{})
+		ps = append(ps, p)
+	}
+	for _, p = range ps {
+		poll.PutPipe(p)
+	}
+	ps = nil
+	p = nil
+
+	// Exploit the timeout of "go test" as a timer for the subsequent verification.
+	timeout := 5 * time.Minute
+	if deadline, ok := t.Deadline(); ok {
+		timeout = deadline.Sub(time.Now())
+		timeout -= timeout / 10 // Leave 10% headroom for cleanup.
+	}
+	expiredTime := time.NewTimer(timeout)
+	defer expiredTime.Stop()
+
+	// Trigger garbage collection repeatedly, waiting for all pipes in sync.Pool
+	// to either be deallocated and closed, or to time out.
+	for {
+		runtime.GC()
+		time.Sleep(10 * time.Millisecond)
+
+		// Detect whether all pipes are closed properly.
+		var leakedFDs []int
+		pendingFDs.Range(func(k, v any) bool {
+			leakedFDs = append(leakedFDs, k.(int))
+			return true
+		})
+		if len(leakedFDs) == 0 {
+			break
+		}
+
+		select {
+		case <-expiredTime.C:
+			t.Logf("all descriptors: %v", allFDs)
+			t.Fatalf("leaked descriptors: %v", leakedFDs)
+		default:
+		}
+	}
+}
+
+func BenchmarkSplicePipe(b *testing.B) {
+	b.Run("SplicePipeWithPool", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			p, err := poll.GetPipe()
+			if err != nil {
+				continue
+			}
+			poll.PutPipe(p)
+		}
+	})
+	b.Run("SplicePipeWithoutPool", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			p := poll.NewPipe()
+			if p == nil {
+				b.Skip("newPipe returned nil")
+			}
+			poll.DestroyPipe(p)
+		}
+	})
+}
+
+func BenchmarkSplicePipePoolParallel(b *testing.B) {
+	b.RunParallel(func(pb *testing.PB) {
+		for pb.Next() {
+			p, err := poll.GetPipe()
+			if err != nil {
+				continue
+			}
+			poll.PutPipe(p)
+		}
+	})
+}
+
+func BenchmarkSplicePipeNativeParallel(b *testing.B) {
+	b.RunParallel(func(pb *testing.PB) {
+		for pb.Next() {
+			p := poll.NewPipe()
+			if p == nil {
+				b.Skip("newPipe returned nil")
+			}
+			poll.DestroyPipe(p)
+		}
+	})
+}
--- a/src/internal/poll/sys_cloexec.go
+++ b/src/internal/poll/sys_cloexec.go
@@ -0,0 +1,36 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file implements accept for platforms that do not provide a fast path for
+// setting SetNonblock and CloseOnExec.
+
+//go:build aix || darwin || (js && wasm) || wasip1
+
+package poll
+
+import (
+	"syscall"
+)
+
+// Wrapper around the accept system call that marks the returned file
+// descriptor as nonblocking and close-on-exec.
+func accept(s int) (int, syscall.Sockaddr, string, error) {
+	// See ../syscall/exec_unix.go for description of ForkLock.
+	// It is probably okay to hold the lock across syscall.Accept
+	// because we have put fd.sysfd into non-blocking mode.
+	// However, a call to the File method will put it back into
+	// blocking mode. We can't take that risk, so no use of ForkLock here.
+	ns, sa, err := AcceptFunc(s)
+	if err == nil {
+		syscall.CloseOnExec(ns)
+	}
+	if err != nil {
+		return -1, nil, "accept", err
+	}
+	if err = syscall.SetNonblock(ns, true); err != nil {
+		CloseFunc(ns)
+		return -1, nil, "setnonblock", err
+	}
+	return ns, sa, "", nil
+}
--- a/src/internal/poll/writev.go
+++ b/src/internal/poll/writev.go
@@ -0,0 +1,90 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build unix
+
+package poll
+
+import (
+	"io"
+	"runtime"
+	"syscall"
+)
+
+// Writev wraps the writev system call.
+func (fd *FD) Writev(v *[][]byte) (int64, error) {
+	if err := fd.writeLock(); err != nil {
+		return 0, err
+	}
+	defer fd.writeUnlock()
+	if err := fd.pd.prepareWrite(fd.isFile); err != nil {
+		return 0, err
+	}
+
+	var iovecs []syscall.Iovec
+	if fd.iovecs != nil {
+		iovecs = *fd.iovecs
+	}
+	// TODO: read from sysconf(_SC_IOV_MAX)? The Linux default is
+	// 1024 and this seems conservative enough for now. Darwin's
+	// UIO_MAXIOV also seems to be 1024.
+	maxVec := 1024
+	if runtime.GOOS == "aix" || runtime.GOOS == "solaris" {
+		// IOV_MAX is set to XOPEN_IOV_MAX on AIX and Solaris.
+		maxVec = 16
+	}
+
+	var n int64
+	var err error
+	for len(*v) > 0 {
+		iovecs = iovecs[:0]
+		for _, chunk := range *v {
+			if len(chunk) == 0 {
+				continue
+			}
+			iovecs = append(iovecs, newIovecWithBase(&chunk[0]))
+			if fd.IsStream && len(chunk) > 1<<30 {
+				iovecs[len(iovecs)-1].SetLen(1 << 30)
+				break // continue chunk on next writev
+			}
+			iovecs[len(iovecs)-1].SetLen(len(chunk))
+			if len(iovecs) == maxVec {
+				break
+			}
+		}
+		if len(iovecs) == 0 {
+			break
+		}
+		if fd.iovecs == nil {
+			fd.iovecs = new([]syscall.Iovec)
+		}
+		*fd.iovecs = iovecs // cache
+
+		var wrote uintptr
+		wrote, err = writev(fd.Sysfd, iovecs)
+		if wrote == ^uintptr(0) {
+			wrote = 0
+		}
+		TestHookDidWritev(int(wrote))
+		n += int64(wrote)
+		consume(v, int64(wrote))
+		clear(iovecs)
+		if err != nil {
+			if err == syscall.EINTR {
+				continue
+			}
+			if err == syscall.EAGAIN {
+				if err = fd.pd.waitWrite(fd.isFile); err == nil {
+					continue
+				}
+			}
+			break
+		}
+		if n == 0 {
+			err = io.ErrUnexpectedEOF
+			break
+		}
+	}
+	return n, err
+}
--- a/src/internal/poll/writev_test.go
+++ b/src/internal/poll/writev_test.go
@@ -0,0 +1,62 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll_test
+
+import (
+	"internal/poll"
+	"reflect"
+	"testing"
+)
+
+func TestConsume(t *testing.T) {
+	tests := []struct {
+		in      [][]byte
+		consume int64
+		want    [][]byte
+	}{
+		{
+			in:      [][]byte{[]byte("foo"), []byte("bar")},
+			consume: 0,
+			want:    [][]byte{[]byte("foo"), []byte("bar")},
+		},
+		{
+			in:      [][]byte{[]byte("foo"), []byte("bar")},
+			consume: 2,
+			want:    [][]byte{[]byte("o"), []byte("bar")},
+		},
+		{
+			in:      [][]byte{[]byte("foo"), []byte("bar")},
+			consume: 3,
+			want:    [][]byte{[]byte("bar")},
+		},
+		{
+			in:      [][]byte{[]byte("foo"), []byte("bar")},
+			consume: 4,
+			want:    [][]byte{[]byte("ar")},
+		},
+		{
+			in:      [][]byte{nil, nil, nil, []byte("bar")},
+			consume: 1,
+			want:    [][]byte{[]byte("ar")},
+		},
+		{
+			in:      [][]byte{nil, nil, nil, []byte("foo")},
+			consume: 0,
+			want:    [][]byte{[]byte("foo")},
+		},
+		{
+			in:      [][]byte{nil, nil, nil},
+			consume: 0,
+			want:    [][]byte{},
+		},
+	}
+	for i, tt := range tests {
+		in := tt.in
+		poll.Consume(&in, tt.consume)
+		if !reflect.DeepEqual(in, tt.want) {
+			t.Errorf("%d. after consume(%d) = %+v, want %+v", i, tt.consume, in, tt.want)
+		}
+	}
+}