diff options
author | Daniel Baumann <daniel@debian.org> | 2024-10-18 20:33:49 +0200 |
---|---|---|
committer | Daniel Baumann <daniel@debian.org> | 2024-12-12 23:57:56 +0100 |
commit | e68b9d00a6e05b3a941f63ffb696f91e554ac5ec (patch) | |
tree | 97775d6c13b0f416af55314eb6a89ef792474615 /modules/indexer/stats | |
parent | Initial commit. (diff) | |
download | forgejo-e68b9d00a6e05b3a941f63ffb696f91e554ac5ec.tar.xz forgejo-e68b9d00a6e05b3a941f63ffb696f91e554ac5ec.zip |
Adding upstream version 9.0.3.
Signed-off-by: Daniel Baumann <daniel@debian.org>
Diffstat (limited to '')
-rw-r--r-- | modules/indexer/stats/db.go | 84 | ||||
-rw-r--r-- | modules/indexer/stats/indexer.go | 88 | ||||
-rw-r--r-- | modules/indexer/stats/indexer_test.go | 52 | ||||
-rw-r--r-- | modules/indexer/stats/queue.go | 49 |
4 files changed, 273 insertions, 0 deletions
diff --git a/modules/indexer/stats/db.go b/modules/indexer/stats/db.go new file mode 100644 index 0000000..98a977c --- /dev/null +++ b/modules/indexer/stats/db.go @@ -0,0 +1,84 @@ +// Copyright 2020 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package stats + +import ( + "fmt" + + repo_model "code.gitea.io/gitea/models/repo" + "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/gitrepo" + "code.gitea.io/gitea/modules/graceful" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/process" + "code.gitea.io/gitea/modules/setting" +) + +// DBIndexer implements Indexer interface to use database's like search +type DBIndexer struct{} + +// Index repository status function +func (db *DBIndexer) Index(id int64) error { + ctx, _, finished := process.GetManager().AddContext(graceful.GetManager().ShutdownContext(), fmt.Sprintf("Stats.DB Index Repo[%d]", id)) + defer finished() + + repo, err := repo_model.GetRepositoryByID(ctx, id) + if err != nil { + return err + } + if repo.IsEmpty { + return nil + } + + status, err := repo_model.GetIndexerStatus(ctx, repo, repo_model.RepoIndexerTypeStats) + if err != nil { + return err + } + + gitRepo, err := gitrepo.OpenRepository(ctx, repo) + if err != nil { + if err.Error() == "no such file or directory" { + return nil + } + return err + } + defer gitRepo.Close() + + // Get latest commit for default branch + commitID, err := gitRepo.GetBranchCommitID(repo.DefaultBranch) + if err != nil { + if git.IsErrBranchNotExist(err) || git.IsErrNotExist(err) || setting.IsInTesting { + log.Debug("Unable to get commit ID for default branch %s in %s ... skipping this repository", repo.DefaultBranch, repo.RepoPath()) + return nil + } + log.Error("Unable to get commit ID for default branch %s in %s. Error: %v", repo.DefaultBranch, repo.RepoPath(), err) + return err + } + + // Do not recalculate stats if already calculated for this commit + if status.CommitSha == commitID { + return nil + } + + // Calculate and save language statistics to database + stats, err := gitRepo.GetLanguageStats(commitID) + if err != nil { + if !setting.IsInTesting { + log.Error("Unable to get language stats for ID %s for default branch %s in %s. Error: %v", commitID, repo.DefaultBranch, repo.RepoPath(), err) + } + return err + } + err = repo_model.UpdateLanguageStats(ctx, repo, commitID, stats) + if err != nil { + log.Error("Unable to update language stats for ID %s for default branch %s in %s. Error: %v", commitID, repo.DefaultBranch, repo.RepoPath(), err) + return err + } + + log.Debug("DBIndexer completed language stats for ID %s for default branch %s in %s. stats count: %d", commitID, repo.DefaultBranch, repo.RepoPath(), len(stats)) + return nil +} + +// Close dummy function +func (db *DBIndexer) Close() { +} diff --git a/modules/indexer/stats/indexer.go b/modules/indexer/stats/indexer.go new file mode 100644 index 0000000..7ec89e2 --- /dev/null +++ b/modules/indexer/stats/indexer.go @@ -0,0 +1,88 @@ +// Copyright 2020 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package stats + +import ( + "context" + + "code.gitea.io/gitea/models/db" + repo_model "code.gitea.io/gitea/models/repo" + "code.gitea.io/gitea/modules/graceful" + "code.gitea.io/gitea/modules/log" +) + +// Indexer defines an interface to index repository stats +// TODO: this indexer is quite different from the others, maybe this package should be moved out from module/indexer +type Indexer interface { + Index(id int64) error + Close() +} + +// indexer represents a indexer instance +var indexer Indexer + +// Init initialize the repo indexer +func Init() error { + indexer = &DBIndexer{} + + if err := initStatsQueue(); err != nil { + return err + } + + go populateRepoIndexer(db.DefaultContext) + + return nil +} + +// populateRepoIndexer populate the repo indexer with pre-existing data. This +// should only be run when the indexer is created for the first time. +func populateRepoIndexer(ctx context.Context) { + log.Info("Populating the repo stats indexer with existing repositories") + + isShutdown := graceful.GetManager().IsShutdown() + + exist, err := db.IsTableNotEmpty("repository") + if err != nil { + log.Fatal("System error: %v", err) + } else if !exist { + return + } + + var maxRepoID int64 + if maxRepoID, err = db.GetMaxID("repository"); err != nil { + log.Fatal("System error: %v", err) + } + + // start with the maximum existing repo ID and work backwards, so that we + // don't include repos that are created after gitea starts; such repos will + // already be added to the indexer, and we don't need to add them again. + for maxRepoID > 0 { + select { + case <-isShutdown: + log.Info("Repository Stats Indexer population shutdown before completion") + return + default: + } + ids, err := repo_model.GetUnindexedRepos(ctx, repo_model.RepoIndexerTypeStats, maxRepoID, 0, 50) + if err != nil { + log.Error("populateRepoIndexer: %v", err) + return + } else if len(ids) == 0 { + break + } + for _, id := range ids { + select { + case <-isShutdown: + log.Info("Repository Stats Indexer population shutdown before completion") + return + default: + } + if err := statsQueue.Push(id); err != nil { + log.Error("statsQueue.Push: %v", err) + } + maxRepoID = id - 1 + } + } + log.Info("Done (re)populating the repo stats indexer with existing repositories") +} diff --git a/modules/indexer/stats/indexer_test.go b/modules/indexer/stats/indexer_test.go new file mode 100644 index 0000000..3ab2e58 --- /dev/null +++ b/modules/indexer/stats/indexer_test.go @@ -0,0 +1,52 @@ +// Copyright 2020 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package stats + +import ( + "context" + "testing" + "time" + + "code.gitea.io/gitea/models/db" + repo_model "code.gitea.io/gitea/models/repo" + "code.gitea.io/gitea/models/unittest" + "code.gitea.io/gitea/modules/queue" + "code.gitea.io/gitea/modules/setting" + + _ "code.gitea.io/gitea/models" + _ "code.gitea.io/gitea/models/actions" + _ "code.gitea.io/gitea/models/activities" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestMain(m *testing.M) { + unittest.MainTest(m) +} + +func TestRepoStatsIndex(t *testing.T) { + require.NoError(t, unittest.PrepareTestDatabase()) + setting.CfgProvider, _ = setting.NewConfigProviderFromData("") + + setting.LoadQueueSettings() + + err := Init() + require.NoError(t, err) + + repo, err := repo_model.GetRepositoryByID(db.DefaultContext, 1) + require.NoError(t, err) + + err = UpdateRepoIndexer(repo) + require.NoError(t, err) + + require.NoError(t, queue.GetManager().FlushAll(context.Background(), 5*time.Second)) + + status, err := repo_model.GetIndexerStatus(db.DefaultContext, repo, repo_model.RepoIndexerTypeStats) + require.NoError(t, err) + assert.Equal(t, "65f1bf27bc3bf70f64657658635e66094edbcb4d", status.CommitSha) + langs, err := repo_model.GetTopLanguageStats(db.DefaultContext, repo, 5) + require.NoError(t, err) + assert.Empty(t, langs) +} diff --git a/modules/indexer/stats/queue.go b/modules/indexer/stats/queue.go new file mode 100644 index 0000000..d002bd5 --- /dev/null +++ b/modules/indexer/stats/queue.go @@ -0,0 +1,49 @@ +// Copyright 2020 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package stats + +import ( + "fmt" + + repo_model "code.gitea.io/gitea/models/repo" + "code.gitea.io/gitea/modules/graceful" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/queue" + "code.gitea.io/gitea/modules/setting" +) + +// statsQueue represents a queue to handle repository stats updates +var statsQueue *queue.WorkerPoolQueue[int64] + +// handle passed PR IDs and test the PRs +func handler(items ...int64) []int64 { + for _, opts := range items { + if err := indexer.Index(opts); err != nil { + if !setting.IsInTesting { + log.Error("stats queue indexer.Index(%d) failed: %v", opts, err) + } + } + } + return nil +} + +func initStatsQueue() error { + statsQueue = queue.CreateUniqueQueue(graceful.GetManager().ShutdownContext(), "repo_stats_update", handler) + if statsQueue == nil { + return fmt.Errorf("unable to create repo_stats_update queue") + } + go graceful.GetManager().RunWithCancel(statsQueue) + return nil +} + +// UpdateRepoIndexer update a repository's entries in the indexer +func UpdateRepoIndexer(repo *repo_model.Repository) error { + if err := statsQueue.Push(repo.ID); err != nil { + if err != queue.ErrAlreadyInQueue { + return err + } + log.Debug("Repo ID: %d already queued", repo.ID) + } + return nil +} |