From ca7cf8fcf65e21bc5d99ec8a36ec10e36207cafd Mon Sep 17 00:00:00 2001 From: Jason Bedard Date: Mon, 19 Aug 2024 22:39:15 -0700 Subject: [PATCH] feat: add option to persist rule index --- cmd/gazelle/fix-update.go | 44 +++++++++++++++++++++++++-- resolve/index.go | 63 +++++++++++++++++++++++++++++++++++++++ walk/walk.go | 16 ++++++++++ 3 files changed, 120 insertions(+), 3 deletions(-) diff --git a/cmd/gazelle/fix-update.go b/cmd/gazelle/fix-update.go index c0c8a72ff..fa4b4cb15 100644 --- a/cmd/gazelle/fix-update.go +++ b/cmd/gazelle/fix-update.go @@ -55,6 +55,9 @@ type updateConfig struct { patchBuffer bytes.Buffer print0 bool profile profiler + + // EXPERIMENTAL: caching of the rule index across runs + ruleIndexFile string } type emitFunc func(c *config.Config, f *rule.File) error @@ -96,6 +99,7 @@ func (ucr *updateConfigurer) RegisterFlags(fs *flag.FlagSet, cmd string, c *conf fs.StringVar(&ucr.memProfile, "memprofile", "", "write memory profile to `file`") fs.Var(&gzflag.MultiFlag{Values: &ucr.knownImports}, "known_import", "import path for which external resolution is skipped (can specify multiple times)") fs.StringVar(&ucr.repoConfigPath, "repo_config", "", "file where Gazelle should load repository configuration. Defaults to WORKSPACE.") + fs.StringVar(&uc.ruleIndexFile, "indexdb", "", "EXPERIMENTAL: file to cache the rule index") } func (ucr *updateConfigurer) CheckFlags(fs *flag.FlagSet, c *config.Config) error { @@ -112,6 +116,9 @@ func (ucr *updateConfigurer) CheckFlags(fs *flag.FlagSet, c *config.Config) erro if uc.patchPath != "" && !filepath.IsAbs(uc.patchPath) { uc.patchPath = filepath.Join(c.WorkDir, uc.patchPath) } + if uc.ruleIndexFile != "" && !filepath.IsAbs(uc.ruleIndexFile) { + uc.ruleIndexFile = filepath.Join(c.WorkDir, uc.ruleIndexFile) + } p, err := newProfiler(ucr.cpuProfile, ucr.memProfile) if err != nil { return err @@ -313,15 +320,38 @@ func runFixUpdate(wd string, cmd command, args []string) (err error) { } }() + walkMode := uc.walkMode + + updateRels := walk.NewUpdateFilter(c.RepoRoot, uc.dirs, uc.walkMode) + preindexed := false + + // Load the rule index file if it exists. + if c.IndexLibraries && uc.ruleIndexFile != "" { + // Do not load index entries from directories that are being updated. + indexLoaded, err := ruleIndex.LoadIndex(uc.ruleIndexFile, updateRels.ShouldReIndex) + if err != nil { + log.Printf("Failed to load index file %s: %v", uc.ruleIndexFile, err) + } else if indexLoaded { + // Drop "visit all" since indexing has been loaded from disk. + if walkMode == walk.VisitAllUpdateSubdirsMode { + walkMode = walk.UpdateSubdirsMode + } else if walkMode == walk.VisitAllUpdateDirsMode { + walkMode = walk.UpdateDirsMode + } + + preindexed = true + } + } + var errorsFromWalk []error - walk.Walk(c, cexts, uc.dirs, uc.walkMode, func(dir, rel string, c *config.Config, update bool, f *rule.File, subdirs, regularFiles, genFiles []string) { + walk.Walk(c, cexts, uc.dirs, walkMode, func(dir, rel string, c *config.Config, update bool, f *rule.File, subdirs, regularFiles, genFiles []string) { // If this file is ignored or if Gazelle was not asked to update this // directory, just index the build file and move on. if !update { for _, repl := range c.KindMap { mrslv.MappedKind(rel, repl) } - if c.IndexLibraries && f != nil { + if c.IndexLibraries && f != nil && (!preindexed || updateRels.ShouldReIndex(rel)) { for _, r := range f.Rules { ruleIndex.AddRule(c, r, f) } @@ -448,7 +478,7 @@ func runFixUpdate(wd string, cmd command, args []string) (err error) { }) // Add library rules to the dependency resolution table. - if c.IndexLibraries { + if c.IndexLibraries && (!preindexed || updateRels.ShouldReIndex(rel)) { for _, r := range f.Rules { ruleIndex.AddRule(c, r, f) } @@ -477,6 +507,14 @@ func runFixUpdate(wd string, cmd command, args []string) (err error) { // Finish building the index for dependency resolution. ruleIndex.Finish() + // Persist the index for future runs. + if c.IndexLibraries && uc.ruleIndexFile != "" { + err := ruleIndex.SaveIndex(uc.ruleIndexFile) + if err != nil { + fmt.Printf("Failed to save index file %s: %v", uc.ruleIndexFile, err) + } + } + // Resolve dependencies. rc, cleanupRc := repo.NewRemoteCache(uc.repos) defer func() { diff --git a/resolve/index.go b/resolve/index.go index 109fa6557..b566d0a7d 100644 --- a/resolve/index.go +++ b/resolve/index.go @@ -16,7 +16,10 @@ limitations under the License. package resolve import ( + "encoding/json" + "io" "log" + "os" "github.com/bazelbuild/bazel-gazelle/config" "github.com/bazelbuild/bazel-gazelle/label" @@ -145,6 +148,66 @@ func NewRuleIndex(mrslv func(ruleKind, pkgRel string) Resolver, exts ...interfac } } +func (ix *RuleIndex) LoadIndex(indexDbPath string, isPkgExcluded func(string) bool) (bool, error) { + if ix.indexed { + log.Fatal("LoadIndex called after Finish") + } + + indexDbFile, err := os.Open(indexDbPath) + if err != nil { + if os.IsNotExist(err) { + return false, nil + } + return false, err + } + defer indexDbFile.Close() + + indexDbContent, err := io.ReadAll(indexDbFile) + if err != nil { + return false, err + } + + // TODO: read & verify index version + + var rules []*ruleRecord + + err = json.Unmarshal(indexDbContent, &rules) + if err != nil { + return false, err + } + + ix.rules = make([]*ruleRecord, 0, len(rules)) + for _, r := range rules { + if !isPkgExcluded(r.Label.Pkg) { + ix.rules = append(ix.rules, r) + } + } + + return true, nil +} + +func (ix *RuleIndex) SaveIndex(indexDbPath string) error { + // TODO: write index version + + indexDbContent, err := json.Marshal(ix.rules) + if err != nil { + return err + } + + indexDbFile, err := os.Create(indexDbPath) + if err != nil { + return err + } + defer indexDbFile.Close() + + _, err = indexDbFile.Write(indexDbContent) + if err != nil { + return err + } + + return nil +} + // AddRule adds a rule r to the index. The rule will only be indexed if there // is a known resolver for the rule's kind and Resolver.Imports returns a // non-nil slice. diff --git a/walk/walk.go b/walk/walk.go index 6a3778008..ea7e686fd 100644 --- a/walk/walk.go +++ b/walk/walk.go @@ -235,6 +235,22 @@ func NewUpdateFilter(root string, dirs []string, mode Mode) *UpdateFilter { return &UpdateFilter{mode, relMap} } +func (u *UpdateFilter) ShouldReIndex(rel string) bool { + if rel == "." { + rel = "" + } + + if should, found := u.updateRels[rel]; found { + return should + } + + if rel != "" && (u.mode == UpdateSubdirsMode || u.mode == VisitAllUpdateSubdirsMode) { + return u.ShouldReIndex(path.Dir(rel)) + } + + return false +} + // shouldCall returns true if Walk should call the callback in the // directory rel. func (u *UpdateFilter) shouldCall(rel string, updateParent bool) bool {