nvidia-container-toolkit/vendor/github.com/container-orchestrated-devices/container-device-interface/pkg/cdi/cache.go

582 lines
13 KiB
Go
Raw Normal View History

/*
Copyright © 2021 The CDI Authors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cdi
import (
"errors"
"fmt"
"io/fs"
"os"
"path/filepath"
"sort"
"strings"
"sync"
"github.com/container-orchestrated-devices/container-device-interface/internal/multierror"
cdi "github.com/container-orchestrated-devices/container-device-interface/specs-go"
"github.com/fsnotify/fsnotify"
oci "github.com/opencontainers/runtime-spec/specs-go"
)
// Option is an option to change some aspect of default CDI behavior.
type Option func(*Cache) error
// Cache stores CDI Specs loaded from Spec directories.
type Cache struct {
sync.Mutex
specDirs []string
specs map[string][]*Spec
devices map[string]*Device
errors map[string][]error
dirErrors map[string]error
autoRefresh bool
watch *watch
}
// WithAutoRefresh returns an option to control automatic Cache refresh.
// By default auto-refresh is enabled, the list of Spec directories are
// monitored and the Cache is automatically refreshed whenever a change
// is detected. This option can be used to disable this behavior when a
// manually refreshed mode is preferable.
func WithAutoRefresh(autoRefresh bool) Option {
return func(c *Cache) error {
c.autoRefresh = autoRefresh
return nil
}
}
// NewCache creates a new CDI Cache. The cache is populated from a set
// of CDI Spec directories. These can be specified using a WithSpecDirs
// option. The default set of directories is exposed in DefaultSpecDirs.
func NewCache(options ...Option) (*Cache, error) {
c := &Cache{
autoRefresh: true,
watch: &watch{},
}
WithSpecDirs(DefaultSpecDirs...)(c)
c.Lock()
defer c.Unlock()
return c, c.configure(options...)
}
// Configure applies options to the Cache. Updates and refreshes the
// Cache if options have changed.
func (c *Cache) Configure(options ...Option) error {
if len(options) == 0 {
return nil
}
c.Lock()
defer c.Unlock()
return c.configure(options...)
}
// Configure the Cache. Start/stop CDI Spec directory watch, refresh
// the Cache if necessary.
func (c *Cache) configure(options ...Option) error {
var err error
for _, o := range options {
if err = o(c); err != nil {
return fmt.Errorf("failed to apply cache options: %w", err)
}
}
c.dirErrors = make(map[string]error)
c.watch.stop()
if c.autoRefresh {
c.watch.setup(c.specDirs, c.dirErrors)
c.watch.start(&c.Mutex, c.refresh, c.dirErrors)
}
c.refresh()
return nil
}
// Refresh rescans the CDI Spec directories and refreshes the Cache.
// In manual refresh mode the cache is always refreshed. In auto-
// refresh mode the cache is only refreshed if it is out of date.
func (c *Cache) Refresh() error {
c.Lock()
defer c.Unlock()
// force a refresh in manual mode
if refreshed, err := c.refreshIfRequired(!c.autoRefresh); refreshed {
return err
}
// collect and return cached errors, much like refresh() does it
var result error
for _, errors := range c.errors {
result = multierror.Append(result, errors...)
}
return result
}
// Refresh the Cache by rescanning CDI Spec directories and files.
func (c *Cache) refresh() error {
var (
specs = map[string][]*Spec{}
devices = map[string]*Device{}
conflicts = map[string]struct{}{}
specErrors = map[string][]error{}
result []error
)
// collect errors per spec file path and once globally
collectError := func(err error, paths ...string) {
result = append(result, err)
for _, path := range paths {
specErrors[path] = append(specErrors[path], err)
}
}
// resolve conflicts based on device Spec priority (order of precedence)
resolveConflict := func(name string, dev *Device, old *Device) bool {
devSpec, oldSpec := dev.GetSpec(), old.GetSpec()
devPrio, oldPrio := devSpec.GetPriority(), oldSpec.GetPriority()
switch {
case devPrio > oldPrio:
return false
case devPrio == oldPrio:
devPath, oldPath := devSpec.GetPath(), oldSpec.GetPath()
collectError(fmt.Errorf("conflicting device %q (specs %q, %q)",
name, devPath, oldPath), devPath, oldPath)
conflicts[name] = struct{}{}
}
return true
}
_ = scanSpecDirs(c.specDirs, func(path string, priority int, spec *Spec, err error) error {
path = filepath.Clean(path)
if err != nil {
collectError(fmt.Errorf("failed to load CDI Spec %w", err), path)
return nil
}
vendor := spec.GetVendor()
specs[vendor] = append(specs[vendor], spec)
for _, dev := range spec.devices {
qualified := dev.GetQualifiedName()
other, ok := devices[qualified]
if ok {
if resolveConflict(qualified, dev, other) {
continue
}
}
devices[qualified] = dev
}
return nil
})
for conflict := range conflicts {
delete(devices, conflict)
}
c.specs = specs
c.devices = devices
c.errors = specErrors
return multierror.New(result...)
}
// RefreshIfRequired triggers a refresh if necessary.
func (c *Cache) refreshIfRequired(force bool) (bool, error) {
// We need to refresh if
// - it's forced by an explicitly call to Refresh() in manual mode
// - a missing Spec dir appears (added to watch) in auto-refresh mode
if force || (c.autoRefresh && c.watch.update(c.dirErrors)) {
return true, c.refresh()
}
return false, nil
}
// InjectDevices injects the given qualified devices to an OCI Spec. It
// returns any unresolvable devices and an error if injection fails for
// any of the devices.
func (c *Cache) InjectDevices(ociSpec *oci.Spec, devices ...string) ([]string, error) {
var unresolved []string
if ociSpec == nil {
return devices, fmt.Errorf("can't inject devices, nil OCI Spec")
}
c.Lock()
defer c.Unlock()
c.refreshIfRequired(false)
edits := &ContainerEdits{}
specs := map[*Spec]struct{}{}
for _, device := range devices {
d := c.devices[device]
if d == nil {
unresolved = append(unresolved, device)
continue
}
if _, ok := specs[d.GetSpec()]; !ok {
specs[d.GetSpec()] = struct{}{}
edits.Append(d.GetSpec().edits())
}
edits.Append(d.edits())
}
if unresolved != nil {
return unresolved, fmt.Errorf("unresolvable CDI devices %s",
strings.Join(devices, ", "))
}
if err := edits.Apply(ociSpec); err != nil {
return nil, fmt.Errorf("failed to inject devices: %w", err)
}
return nil, nil
}
// highestPrioritySpecDir returns the Spec directory with highest priority
// and its priority.
func (c *Cache) highestPrioritySpecDir() (string, int) {
if len(c.specDirs) == 0 {
return "", -1
}
prio := len(c.specDirs) - 1
dir := c.specDirs[prio]
return dir, prio
}
// WriteSpec writes a Spec file with the given content into the highest
// priority Spec directory. If name has a "json" or "yaml" extension it
// choses the encoding. Otherwise the default YAML encoding is used.
func (c *Cache) WriteSpec(raw *cdi.Spec, name string) error {
var (
specDir string
path string
prio int
spec *Spec
err error
)
specDir, prio = c.highestPrioritySpecDir()
if specDir == "" {
return errors.New("no Spec directories to write to")
}
path = filepath.Join(specDir, name)
if ext := filepath.Ext(path); ext != ".json" && ext != ".yaml" {
path += defaultSpecExt
}
spec, err = newSpec(raw, path, prio)
if err != nil {
return err
}
return spec.write(true)
}
// RemoveSpec removes a Spec with the given name from the highest
// priority Spec directory. This function can be used to remove a
// Spec previously written by WriteSpec(). If the file exists and
// its removal fails RemoveSpec returns an error.
func (c *Cache) RemoveSpec(name string) error {
var (
specDir string
path string
err error
)
specDir, _ = c.highestPrioritySpecDir()
if specDir == "" {
return errors.New("no Spec directories to remove from")
}
path = filepath.Join(specDir, name)
if ext := filepath.Ext(path); ext != ".json" && ext != ".yaml" {
path += defaultSpecExt
}
err = os.Remove(path)
if err != nil && errors.Is(err, fs.ErrNotExist) {
err = nil
}
return err
}
// GetDevice returns the cached device for the given qualified name.
func (c *Cache) GetDevice(device string) *Device {
c.Lock()
defer c.Unlock()
c.refreshIfRequired(false)
return c.devices[device]
}
// ListDevices lists all cached devices by qualified name.
func (c *Cache) ListDevices() []string {
var devices []string
c.Lock()
defer c.Unlock()
c.refreshIfRequired(false)
for name := range c.devices {
devices = append(devices, name)
}
sort.Strings(devices)
return devices
}
// ListVendors lists all vendors known to the cache.
func (c *Cache) ListVendors() []string {
var vendors []string
c.Lock()
defer c.Unlock()
c.refreshIfRequired(false)
for vendor := range c.specs {
vendors = append(vendors, vendor)
}
sort.Strings(vendors)
return vendors
}
// ListClasses lists all device classes known to the cache.
func (c *Cache) ListClasses() []string {
var (
cmap = map[string]struct{}{}
classes []string
)
c.Lock()
defer c.Unlock()
c.refreshIfRequired(false)
for _, specs := range c.specs {
for _, spec := range specs {
cmap[spec.GetClass()] = struct{}{}
}
}
for class := range cmap {
classes = append(classes, class)
}
sort.Strings(classes)
return classes
}
// GetVendorSpecs returns all specs for the given vendor.
func (c *Cache) GetVendorSpecs(vendor string) []*Spec {
c.Lock()
defer c.Unlock()
c.refreshIfRequired(false)
return c.specs[vendor]
}
// GetSpecErrors returns all errors encountered for the spec during the
// last cache refresh.
func (c *Cache) GetSpecErrors(spec *Spec) []error {
var errors []error
c.Lock()
defer c.Unlock()
if errs, ok := c.errors[spec.GetPath()]; ok {
errors = make([]error, len(errs))
copy(errors, errs)
}
return errors
}
// GetErrors returns all errors encountered during the last
// cache refresh.
func (c *Cache) GetErrors() map[string][]error {
c.Lock()
defer c.Unlock()
errors := map[string][]error{}
for path, errs := range c.errors {
errors[path] = errs
}
for path, err := range c.dirErrors {
errors[path] = []error{err}
}
return errors
}
// GetSpecDirectories returns the CDI Spec directories currently in use.
func (c *Cache) GetSpecDirectories() []string {
c.Lock()
defer c.Unlock()
dirs := make([]string, len(c.specDirs))
copy(dirs, c.specDirs)
return dirs
}
// GetSpecDirErrors returns any errors related to configured Spec directories.
func (c *Cache) GetSpecDirErrors() map[string]error {
if c.dirErrors == nil {
return nil
}
c.Lock()
defer c.Unlock()
errors := make(map[string]error)
for dir, err := range c.dirErrors {
errors[dir] = err
}
return errors
}
// Our fsnotify helper wrapper.
type watch struct {
watcher *fsnotify.Watcher
tracked map[string]bool
}
// Setup monitoring for the given Spec directories.
func (w *watch) setup(dirs []string, dirErrors map[string]error) {
var (
dir string
err error
)
w.tracked = make(map[string]bool)
for _, dir = range dirs {
w.tracked[dir] = false
}
w.watcher, err = fsnotify.NewWatcher()
if err != nil {
for _, dir := range dirs {
dirErrors[dir] = fmt.Errorf("failed to create watcher: %w", err)
}
return
}
w.update(dirErrors)
}
// Start watching Spec directories for relevant changes.
func (w *watch) start(m *sync.Mutex, refresh func() error, dirErrors map[string]error) {
go w.watch(w.watcher, m, refresh, dirErrors)
}
// Stop watching directories.
func (w *watch) stop() {
if w.watcher == nil {
return
}
w.watcher.Close()
w.tracked = nil
}
// Watch Spec directory changes, triggering a refresh if necessary.
func (w *watch) watch(fsw *fsnotify.Watcher, m *sync.Mutex, refresh func() error, dirErrors map[string]error) {
watch := fsw
if watch == nil {
return
}
for {
select {
case event, ok := <-watch.Events:
if !ok {
return
}
if (event.Op & (fsnotify.Rename | fsnotify.Remove | fsnotify.Write)) == 0 {
continue
}
if event.Op == fsnotify.Write {
if ext := filepath.Ext(event.Name); ext != ".json" && ext != ".yaml" {
continue
}
}
m.Lock()
if event.Op == fsnotify.Remove && w.tracked[event.Name] {
w.update(dirErrors, event.Name)
} else {
w.update(dirErrors)
}
refresh()
m.Unlock()
case _, ok := <-watch.Errors:
if !ok {
return
}
}
}
}
// Update watch with pending/missing or removed directories.
func (w *watch) update(dirErrors map[string]error, removed ...string) bool {
var (
dir string
ok bool
err error
update bool
)
for dir, ok = range w.tracked {
if ok {
continue
}
err = w.watcher.Add(dir)
if err == nil {
w.tracked[dir] = true
delete(dirErrors, dir)
update = true
} else {
w.tracked[dir] = false
dirErrors[dir] = fmt.Errorf("failed to monitor for changes: %w", err)
}
}
for _, dir = range removed {
w.tracked[dir] = false
dirErrors[dir] = errors.New("directory removed")
update = true
}
return update
}