From 40551dfd1b9cc324b57cb878964fc123bca9d954 Mon Sep 17 00:00:00 2001 From: Denys Knertser Date: Wed, 25 Jan 2023 22:38:52 +0100 Subject: [PATCH] pre-allocate storage for metadata json files, see containers/podman#13967 Keeping a temporary file of at least the same size as the target file for atomic writes helps reduce the probability of running out of space when deleting entities from corresponding metadata in a disk full scenario. This strategy is applied to writing containers.json, layers.json, images.json and mountpoints.json. Signed-off-by: Denys Knertser --- containers.go | 10 +-- images.go | 5 +- layers.go | 9 ++- pkg/ioutils/fswriters.go | 109 ++++++++++++++++++++++----- pkg/ioutils/fswriters_linux.go | 9 +++ pkg/ioutils/fswriters_unsupported.go | 4 + 6 files changed, 118 insertions(+), 28 deletions(-) diff --git a/containers.go b/containers.go index 106d1d152e..ee9296dbab 100644 --- a/containers.go +++ b/containers.go @@ -541,13 +541,13 @@ func (r *containerStore) save(saveLocations containerLocations) error { if err != nil { return err } - var opts *ioutils.AtomicFileWriterOptions + opts := ioutils.AtomicFileWriterOptions{ + PreAllocate: true, + } if location == volatileContainerLocation { - opts = &ioutils.AtomicFileWriterOptions{ - NoSync: true, - } + opts.NoSync = true } - if err := ioutils.AtomicWriteFileWithOpts(rpath, jdata, 0600, opts); err != nil { + if err := ioutils.AtomicWriteFileWithOpts(rpath, jdata, 0600, &opts); err != nil { return err } } diff --git a/images.go b/images.go index 577b6f8ed4..46c75dc8f6 100644 --- a/images.go +++ b/images.go @@ -536,7 +536,10 @@ func (r *imageStore) Save() error { if err != nil { return err } - if err := ioutils.AtomicWriteFile(rpath, jdata, 0600); err != nil { + opts := ioutils.AtomicFileWriterOptions{ + PreAllocate: true, + } + if err := ioutils.AtomicWriteFileWithOpts(rpath, jdata, 0600, &opts); err != nil { return err } lw, err := r.lockfile.RecordWrite() diff --git a/layers.go b/layers.go index f14108be5a..ce4eea787e 100644 --- a/layers.go +++ b/layers.go @@ -942,7 +942,9 @@ func (r *layerStore) saveLayers(saveLocations layerLocations) error { if err != nil { return err } - opts := ioutils.AtomicFileWriterOptions{} + opts := ioutils.AtomicFileWriterOptions{ + PreAllocate: true, + } if location == volatileLayerLocation { opts.NoSync = true } @@ -984,7 +986,10 @@ func (r *layerStore) saveMounts() error { if err != nil { return err } - if err = ioutils.AtomicWriteFile(mpath, jmdata, 0600); err != nil { + opts := ioutils.AtomicFileWriterOptions{ + PreAllocate: true, + } + if err = ioutils.AtomicWriteFileWithOpts(mpath, jmdata, 0600, &opts); err != nil { return err } lw, err := r.mountsLockfile.RecordWrite() diff --git a/pkg/ioutils/fswriters.go b/pkg/ioutils/fswriters.go index 231d1c47b2..feadea48e2 100644 --- a/pkg/ioutils/fswriters.go +++ b/pkg/ioutils/fswriters.go @@ -2,8 +2,10 @@ package ioutils import ( "io" + "math/rand" "os" "path/filepath" + "strconv" "time" ) @@ -17,6 +19,9 @@ type AtomicFileWriterOptions struct { // On successful return from Close() this is set to the mtime of the // newly written file. ModTime time.Time + // Whenever an atomic file is successfully written create a temporary + // file of the same size in order to pre-allocate storage for the next write + PreAllocate bool } var defaultWriterOptions = AtomicFileWriterOptions{} @@ -38,22 +43,33 @@ func NewAtomicFileWriterWithOpts(filename string, perm os.FileMode, opts *Atomic // temporary file and closing it atomically changes the temporary file to // destination path. Writing and closing concurrently is not allowed. func newAtomicFileWriter(filename string, perm os.FileMode, opts *AtomicFileWriterOptions) (*atomicFileWriter, error) { - f, err := os.CreateTemp(filepath.Dir(filename), ".tmp-"+filepath.Base(filename)) - if err != nil { - return nil, err - } + dir := filepath.Dir(filename) + base := filepath.Base(filename) if opts == nil { opts = &defaultWriterOptions } + random := "" + // need predictable name when pre-allocated + if !opts.PreAllocate { + random = strconv.FormatUint(rand.Uint64(), 36) + } + tmp := filepath.Join(dir, ".tmp"+random+"-"+base) + // if pre-allocated the temporary file exists and contains some data + // do not truncate it here, instead truncate at Close() + f, err := os.OpenFile(tmp, os.O_WRONLY|os.O_CREATE, perm) + if err != nil { + return nil, err + } abspath, err := filepath.Abs(filename) if err != nil { return nil, err } return &atomicFileWriter{ - f: f, - fn: abspath, - perm: perm, - noSync: opts.NoSync, + f: f, + fn: abspath, + perm: perm, + noSync: opts.NoSync, + preAllocate: opts.PreAllocate, }, nil } @@ -91,12 +107,13 @@ func AtomicWriteFile(filename string, data []byte, perm os.FileMode) error { } type atomicFileWriter struct { - f *os.File - fn string - writeErr error - perm os.FileMode - noSync bool - modTime time.Time + f *os.File + fn string + writeErr error + perm os.FileMode + noSync bool + preAllocate bool + modTime time.Time } func (w *atomicFileWriter) Write(dt []byte) (int, error) { @@ -108,11 +125,22 @@ func (w *atomicFileWriter) Write(dt []byte) (int, error) { } func (w *atomicFileWriter) Close() (retErr error) { - defer func() { - if retErr != nil || w.writeErr != nil { - os.Remove(w.f.Name()) + if !w.preAllocate { + defer func() { + if retErr != nil || w.writeErr != nil { + os.Remove(w.f.Name()) + } + }() + } else { + truncateAt, err := w.f.Seek(0, io.SeekCurrent) + if err != nil { + return err + } + err = w.f.Truncate(truncateAt) + if err != nil { + return err } - }() + } if !w.noSync { if err := fdatasync(w.f); err != nil { w.f.Close() @@ -120,10 +148,12 @@ func (w *atomicFileWriter) Close() (retErr error) { } } + var size int64 = 0 // fstat before closing the fd info, statErr := w.f.Stat() if statErr == nil { w.modTime = info.ModTime() + size = info.Size() } // We delay error reporting until after the real call to close() // to match the traditional linux close() behaviour that an fd @@ -138,11 +168,50 @@ func (w *atomicFileWriter) Close() (retErr error) { return statErr } - if err := os.Chmod(w.f.Name(), w.perm); err != nil { + tmpName := w.f.Name() + if err := os.Chmod(tmpName, w.perm); err != nil { return err } if w.writeErr == nil { - return os.Rename(w.f.Name(), w.fn) + if w.preAllocate { + err := swapOrMove(tmpName, w.fn) + if err != nil { + return err + } + // ignore errors, this is a best effort operation + preAllocate(tmpName, size, w.perm) + return nil + } + return os.Rename(tmpName, w.fn) + } + return nil +} + +// ensure that the file is of at least indicated size +func preAllocate(filename string, size int64, perm os.FileMode) error { + f, err := os.OpenFile(filename, os.O_WRONLY|os.O_CREATE|os.O_APPEND, perm) + if err != nil { + return err + } + defer f.Close() + info, err := f.Stat() + if err != nil { + return err + } + extendBytes := size - info.Size() + if extendBytes > 0 { + var blockSize int64 = 65536 + block := make([]byte, blockSize) + for extendBytes > 0 { + if blockSize > extendBytes { + blockSize = extendBytes + } + _, err := f.Write(block[:blockSize]) + if err != nil { + return err + } + extendBytes -= blockSize + } } return nil } diff --git a/pkg/ioutils/fswriters_linux.go b/pkg/ioutils/fswriters_linux.go index 0da78a063d..d2b8db44f2 100644 --- a/pkg/ioutils/fswriters_linux.go +++ b/pkg/ioutils/fswriters_linux.go @@ -9,3 +9,12 @@ import ( func fdatasync(f *os.File) error { return unix.Fdatasync(int(f.Fd())) } + +func swapOrMove(oldpath string, newpath string) error { + err := unix.Renameat2(unix.AT_FDCWD, oldpath, unix.AT_FDCWD, newpath, unix.RENAME_EXCHANGE) + if err != nil { + // unlikely that rename will succeed if renameat2 failed, but just in case + err = os.Rename(oldpath, newpath) + } + return err +} diff --git a/pkg/ioutils/fswriters_unsupported.go b/pkg/ioutils/fswriters_unsupported.go index 635489280d..8e32301ffe 100644 --- a/pkg/ioutils/fswriters_unsupported.go +++ b/pkg/ioutils/fswriters_unsupported.go @@ -10,3 +10,7 @@ import ( func fdatasync(f *os.File) error { return f.Sync() } + +func swapOrMove(oldpath string, newpath string) error { + return os.Rename(oldpath, newpath) +}