Skip to content

kvm, eventfd: support KVM_IOEVENTFD #11661

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions nogo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ analyzers:
internal:
exclude:
- ".*_test.go" # Exclude tests.
- pkg/eventfd/eventfd_unsafe.go # Special case.
- "pkg/flipcall/.*_unsafe.go" # Special case.
- pkg/gohacks/noescape_unsafe.go # Special case.
- pkg/ring0/pagetables/allocator_unsafe.go # Special case.
Expand Down
1 change: 1 addition & 0 deletions pkg/eventfd/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ go_library(
deps = [
"//pkg/hostarch",
"//pkg/rawfile",
"//pkg/safecopy",
"@org_golang_x_sys//unix:go_default_library",
],
)
Expand Down
72 changes: 68 additions & 4 deletions pkg/eventfd/eventfd.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,16 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/rawfile"
"gvisor.dev/gvisor/pkg/safecopy"
)

const sizeofUint64 = 8

// Eventfd represents a Linux eventfd object.
type Eventfd struct {
fd int
fd int
mmioAddr uintptr
mmioCtrl MMIOController
}

// Create returns an initialized eventfd.
Expand All @@ -41,16 +44,19 @@ func Create() (Eventfd, error) {
unix.Close(int(fd))
return Eventfd{}, err
}
return Eventfd{int(fd)}, nil
return Eventfd{fd: int(fd)}, nil
}

// Wrap returns an initialized Eventfd using the provided fd.
func Wrap(fd int) Eventfd {
return Eventfd{fd}
return Eventfd{fd: fd}
}

// Close closes the eventfd, after which it should not be used.
func (ev Eventfd) Close() error {
if ev.mmioCtrl != nil {
ev.mmioCtrl.Close(ev)
}
return unix.Close(ev.fd)
}

Expand All @@ -60,7 +66,7 @@ func (ev Eventfd) Dup() (Eventfd, error) {
if err != nil {
return Eventfd{}, fmt.Errorf("failed to dup: %v", other)
}
return Eventfd{other}, nil
return Eventfd{fd: other}, nil
}

// Notify alerts other users of the eventfd. Users can receive alerts by
Expand All @@ -73,6 +79,12 @@ func (ev Eventfd) Notify() error {
func (ev Eventfd) Write(val uint64) error {
var buf [sizeofUint64]byte
hostarch.ByteOrder.PutUint64(buf[:], val)
if ev.mmioAddr != 0 && ev.mmioCtrl.Enabled() {
if _, err := safecopy.CopyOut(ev.mmioPtr(), buf[:]); err == nil {
return nil
}
// Fall back to using a syscall.
}
for {
n, err := nonBlockingWrite(ev.fd, buf[:])
if err == unix.EINTR {
Expand All @@ -85,6 +97,22 @@ func (ev Eventfd) Write(val uint64) error {
}
}

// MMIOWrite is equivalent to Write, but returns an error if the write cannot be
// implemented by writing to the address set by EnableMMIO. This is primarily
// useful for testing.
func (ev Eventfd) MMIOWrite(val uint64) error {
var buf [sizeofUint64]byte
hostarch.ByteOrder.PutUint64(buf[:], val)
if ev.mmioAddr == 0 {
return fmt.Errorf("no MMIO address set")
}
if !ev.mmioCtrl.Enabled() {
return fmt.Errorf("MMIO is temporarily disabled")
}
_, err := safecopy.CopyOut(ev.mmioPtr(), buf[:])
return err
}

// Wait blocks until eventfd is non-zero (i.e. someone calls Notify or Write).
func (ev Eventfd) Wait() error {
_, err := ev.Read()
Expand Down Expand Up @@ -113,3 +141,39 @@ func (ev Eventfd) Read() (uint64, error) {
func (ev Eventfd) FD() int {
return ev.fd
}

// MMIOController controls eventfd memory-mapped I/O.
type MMIOController interface {
// Enabled returns true if writing to the associated MMIO address can
// succeed. This is inherently racy, so if the memory-mapped write faults,
// the eventfd will fall back to writing using a syscall.
Enabled() bool

// Close is called when the associated Eventfd is closed.
Close(ev Eventfd)
}

// EnableMMIO causes future calls to ev.Write() to use memory-mapped writes to
// addr, subject to ctrl. EnableMMIO cannot be called concurrently with Write,
// MMIOWrite, or MMIOAddr.
//
// This feature is used to support KVM ioeventfds. Since this requires that
// addr is mapped read-only or with no permissions in the host virtual address
// space (so that writes in host mode fault), it cannot reasonably be
// Go-managed memory, so it's safe to type as uintptr rather than a pointer.
func (ev *Eventfd) EnableMMIO(addr uintptr, ctrl MMIOController) {
ev.mmioAddr = addr
ev.mmioCtrl = ctrl
}

// DisableMMIO undoes the effect of a previous call to EnableMMIO. DisableMMIO
// cannot be called concurrently with Write, MMIOWrite, or MMIOAddr.
func (ev *Eventfd) DisableMMIO() {
ev.mmioAddr = 0
ev.mmioCtrl = nil
}

// MMIOAddr returns the address set by the last call to EnableMMIO.
func (ev Eventfd) MMIOAddr() uintptr {
return ev.mmioAddr
}
4 changes: 4 additions & 0 deletions pkg/eventfd/eventfd_unsafe.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ import (
"golang.org/x/sys/unix"
)

func (ev Eventfd) mmioPtr() unsafe.Pointer {
return unsafe.Pointer(ev.mmioAddr)
}

// nonBlockingWrite writes the given buffer to a file descriptor. It fails if
// partial data is written.
func nonBlockingWrite(fd int, buf []byte) (int, error) {
Expand Down
6 changes: 6 additions & 0 deletions pkg/sentry/platform/kvm/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ go_library(
"filters.go",
"filters_amd64.go",
"filters_arm64.go",
"ioeventfd.go",
"ioeventfd_unsafe.go",
"kvm.go",
"kvm_amd64.go",
"kvm_amd64_unsafe.go",
Expand Down Expand Up @@ -73,8 +75,10 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/atomicbitops",
"//pkg/bitmap",
"//pkg/context",
"//pkg/cpuid",
"//pkg/eventfd",
"//pkg/fd",
"//pkg/hostarch",
"//pkg/hostos",
Expand Down Expand Up @@ -120,6 +124,7 @@ go_test(
deps = [
"//pkg/abi/linux",
"//pkg/cpuid",
"//pkg/eventfd",
"//pkg/hostarch",
"//pkg/memutil",
"//pkg/ring0",
Expand Down Expand Up @@ -154,6 +159,7 @@ go_test(
deps = [
"//pkg/abi/linux",
"//pkg/cpuid",
"//pkg/eventfd",
"//pkg/hostarch",
"//pkg/memutil",
"//pkg/ring0",
Expand Down
4 changes: 4 additions & 0 deletions pkg/sentry/platform/kvm/bluepill_amd64.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,3 +166,7 @@ func bluepillArchExit(c *vCPU, context *arch.SignalContext64) {
// from this new pointer value.
context.Fpstate = uint64(uintptrValue(c.FloatingPointState().BytePointer())) // escapes: no.
}

func inKernelMode() bool {
return getcs()&3 == 0
}
6 changes: 6 additions & 0 deletions pkg/sentry/platform/kvm/bluepill_arm64.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,12 @@ func (c *vCPU) KernelException(vector ring0.Vector) {
func (c *vCPU) hltSanityCheck() {
}

func currentEL() uint64

func inKernelMode() bool {
return currentEL() == 1
}

func init() {
// Install the handler.
if err := sighandling.ReplaceSignalHandler(bluepillSignal, addrOfSighandler(), &savedHandler); err != nil {
Expand Down
7 changes: 7 additions & 0 deletions pkg/sentry/platform/kvm/bluepill_arm64.s
Original file line number Diff line number Diff line change
Expand Up @@ -145,3 +145,10 @@ TEXT ·addrOfDieTrampoline(SB), $0-8
MOVD $·dieTrampoline(SB), R0
MOVD R0, ret+0(FP)
RET

// currentEL returns the current exception level.
TEXT ·currentEL(SB),NOSPLIT,$0-8
MRS CurrentEL, R1
UBFX $2, R1, $2, R1
MOVD R1, ret+0(FP)
RET
4 changes: 4 additions & 0 deletions pkg/sentry/platform/kvm/filters.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ func (k *KVM) SeccompInfo() platform.SeccompInfo {
seccomp.NonNegativeFD{},
seccomp.EqualTo(KVM_SET_USER_MEMORY_REGION),
},
seccomp.PerArg{
seccomp.NonNegativeFD{},
seccomp.EqualTo(KVM_IOEVENTFD),
},
seccomp.PerArg{
seccomp.NonNegativeFD{},
seccomp.EqualTo(KVM_GET_REGS),
Expand Down
Loading
Loading