summaryrefslogtreecommitdiffstats
path: root/modules/indexer/stats
diff options
context:
space:
mode:
authorDaniel Baumann <daniel@debian.org>2024-10-18 20:33:49 +0200
committerDaniel Baumann <daniel@debian.org>2024-10-18 20:33:49 +0200
commitdd136858f1ea40ad3c94191d647487fa4f31926c (patch)
tree58fec94a7b2a12510c9664b21793f1ed560c6518 /modules/indexer/stats
parentInitial commit. (diff)
downloadforgejo-dd136858f1ea40ad3c94191d647487fa4f31926c.tar.xz
forgejo-dd136858f1ea40ad3c94191d647487fa4f31926c.zip
Adding upstream version 9.0.0.
Signed-off-by: Daniel Baumann <daniel@debian.org>
Diffstat (limited to 'modules/indexer/stats')
-rw-r--r--modules/indexer/stats/db.go84
-rw-r--r--modules/indexer/stats/indexer.go88
-rw-r--r--modules/indexer/stats/indexer_test.go52
-rw-r--r--modules/indexer/stats/queue.go49
4 files changed, 273 insertions, 0 deletions
diff --git a/modules/indexer/stats/db.go b/modules/indexer/stats/db.go
new file mode 100644
index 0000000..98a977c
--- /dev/null
+++ b/modules/indexer/stats/db.go
@@ -0,0 +1,84 @@
+// Copyright 2020 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package stats
+
+import (
+ "fmt"
+
+ repo_model "code.gitea.io/gitea/models/repo"
+ "code.gitea.io/gitea/modules/git"
+ "code.gitea.io/gitea/modules/gitrepo"
+ "code.gitea.io/gitea/modules/graceful"
+ "code.gitea.io/gitea/modules/log"
+ "code.gitea.io/gitea/modules/process"
+ "code.gitea.io/gitea/modules/setting"
+)
+
+// DBIndexer implements Indexer interface to use database's like search
+type DBIndexer struct{}
+
+// Index repository status function
+func (db *DBIndexer) Index(id int64) error {
+ ctx, _, finished := process.GetManager().AddContext(graceful.GetManager().ShutdownContext(), fmt.Sprintf("Stats.DB Index Repo[%d]", id))
+ defer finished()
+
+ repo, err := repo_model.GetRepositoryByID(ctx, id)
+ if err != nil {
+ return err
+ }
+ if repo.IsEmpty {
+ return nil
+ }
+
+ status, err := repo_model.GetIndexerStatus(ctx, repo, repo_model.RepoIndexerTypeStats)
+ if err != nil {
+ return err
+ }
+
+ gitRepo, err := gitrepo.OpenRepository(ctx, repo)
+ if err != nil {
+ if err.Error() == "no such file or directory" {
+ return nil
+ }
+ return err
+ }
+ defer gitRepo.Close()
+
+ // Get latest commit for default branch
+ commitID, err := gitRepo.GetBranchCommitID(repo.DefaultBranch)
+ if err != nil {
+ if git.IsErrBranchNotExist(err) || git.IsErrNotExist(err) || setting.IsInTesting {
+ log.Debug("Unable to get commit ID for default branch %s in %s ... skipping this repository", repo.DefaultBranch, repo.RepoPath())
+ return nil
+ }
+ log.Error("Unable to get commit ID for default branch %s in %s. Error: %v", repo.DefaultBranch, repo.RepoPath(), err)
+ return err
+ }
+
+ // Do not recalculate stats if already calculated for this commit
+ if status.CommitSha == commitID {
+ return nil
+ }
+
+ // Calculate and save language statistics to database
+ stats, err := gitRepo.GetLanguageStats(commitID)
+ if err != nil {
+ if !setting.IsInTesting {
+ log.Error("Unable to get language stats for ID %s for default branch %s in %s. Error: %v", commitID, repo.DefaultBranch, repo.RepoPath(), err)
+ }
+ return err
+ }
+ err = repo_model.UpdateLanguageStats(ctx, repo, commitID, stats)
+ if err != nil {
+ log.Error("Unable to update language stats for ID %s for default branch %s in %s. Error: %v", commitID, repo.DefaultBranch, repo.RepoPath(), err)
+ return err
+ }
+
+ log.Debug("DBIndexer completed language stats for ID %s for default branch %s in %s. stats count: %d", commitID, repo.DefaultBranch, repo.RepoPath(), len(stats))
+ return nil
+}
+
+// Close dummy function
+func (db *DBIndexer) Close() {
+}
diff --git a/modules/indexer/stats/indexer.go b/modules/indexer/stats/indexer.go
new file mode 100644
index 0000000..7ec89e2
--- /dev/null
+++ b/modules/indexer/stats/indexer.go
@@ -0,0 +1,88 @@
+// Copyright 2020 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package stats
+
+import (
+ "context"
+
+ "code.gitea.io/gitea/models/db"
+ repo_model "code.gitea.io/gitea/models/repo"
+ "code.gitea.io/gitea/modules/graceful"
+ "code.gitea.io/gitea/modules/log"
+)
+
+// Indexer defines an interface to index repository stats
+// TODO: this indexer is quite different from the others, maybe this package should be moved out from module/indexer
+type Indexer interface {
+ Index(id int64) error
+ Close()
+}
+
+// indexer represents a indexer instance
+var indexer Indexer
+
+// Init initialize the repo indexer
+func Init() error {
+ indexer = &DBIndexer{}
+
+ if err := initStatsQueue(); err != nil {
+ return err
+ }
+
+ go populateRepoIndexer(db.DefaultContext)
+
+ return nil
+}
+
+// populateRepoIndexer populate the repo indexer with pre-existing data. This
+// should only be run when the indexer is created for the first time.
+func populateRepoIndexer(ctx context.Context) {
+ log.Info("Populating the repo stats indexer with existing repositories")
+
+ isShutdown := graceful.GetManager().IsShutdown()
+
+ exist, err := db.IsTableNotEmpty("repository")
+ if err != nil {
+ log.Fatal("System error: %v", err)
+ } else if !exist {
+ return
+ }
+
+ var maxRepoID int64
+ if maxRepoID, err = db.GetMaxID("repository"); err != nil {
+ log.Fatal("System error: %v", err)
+ }
+
+ // start with the maximum existing repo ID and work backwards, so that we
+ // don't include repos that are created after gitea starts; such repos will
+ // already be added to the indexer, and we don't need to add them again.
+ for maxRepoID > 0 {
+ select {
+ case <-isShutdown:
+ log.Info("Repository Stats Indexer population shutdown before completion")
+ return
+ default:
+ }
+ ids, err := repo_model.GetUnindexedRepos(ctx, repo_model.RepoIndexerTypeStats, maxRepoID, 0, 50)
+ if err != nil {
+ log.Error("populateRepoIndexer: %v", err)
+ return
+ } else if len(ids) == 0 {
+ break
+ }
+ for _, id := range ids {
+ select {
+ case <-isShutdown:
+ log.Info("Repository Stats Indexer population shutdown before completion")
+ return
+ default:
+ }
+ if err := statsQueue.Push(id); err != nil {
+ log.Error("statsQueue.Push: %v", err)
+ }
+ maxRepoID = id - 1
+ }
+ }
+ log.Info("Done (re)populating the repo stats indexer with existing repositories")
+}
diff --git a/modules/indexer/stats/indexer_test.go b/modules/indexer/stats/indexer_test.go
new file mode 100644
index 0000000..3ab2e58
--- /dev/null
+++ b/modules/indexer/stats/indexer_test.go
@@ -0,0 +1,52 @@
+// Copyright 2020 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package stats
+
+import (
+ "context"
+ "testing"
+ "time"
+
+ "code.gitea.io/gitea/models/db"
+ repo_model "code.gitea.io/gitea/models/repo"
+ "code.gitea.io/gitea/models/unittest"
+ "code.gitea.io/gitea/modules/queue"
+ "code.gitea.io/gitea/modules/setting"
+
+ _ "code.gitea.io/gitea/models"
+ _ "code.gitea.io/gitea/models/actions"
+ _ "code.gitea.io/gitea/models/activities"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+)
+
+func TestMain(m *testing.M) {
+ unittest.MainTest(m)
+}
+
+func TestRepoStatsIndex(t *testing.T) {
+ require.NoError(t, unittest.PrepareTestDatabase())
+ setting.CfgProvider, _ = setting.NewConfigProviderFromData("")
+
+ setting.LoadQueueSettings()
+
+ err := Init()
+ require.NoError(t, err)
+
+ repo, err := repo_model.GetRepositoryByID(db.DefaultContext, 1)
+ require.NoError(t, err)
+
+ err = UpdateRepoIndexer(repo)
+ require.NoError(t, err)
+
+ require.NoError(t, queue.GetManager().FlushAll(context.Background(), 5*time.Second))
+
+ status, err := repo_model.GetIndexerStatus(db.DefaultContext, repo, repo_model.RepoIndexerTypeStats)
+ require.NoError(t, err)
+ assert.Equal(t, "65f1bf27bc3bf70f64657658635e66094edbcb4d", status.CommitSha)
+ langs, err := repo_model.GetTopLanguageStats(db.DefaultContext, repo, 5)
+ require.NoError(t, err)
+ assert.Empty(t, langs)
+}
diff --git a/modules/indexer/stats/queue.go b/modules/indexer/stats/queue.go
new file mode 100644
index 0000000..d002bd5
--- /dev/null
+++ b/modules/indexer/stats/queue.go
@@ -0,0 +1,49 @@
+// Copyright 2020 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package stats
+
+import (
+ "fmt"
+
+ repo_model "code.gitea.io/gitea/models/repo"
+ "code.gitea.io/gitea/modules/graceful"
+ "code.gitea.io/gitea/modules/log"
+ "code.gitea.io/gitea/modules/queue"
+ "code.gitea.io/gitea/modules/setting"
+)
+
+// statsQueue represents a queue to handle repository stats updates
+var statsQueue *queue.WorkerPoolQueue[int64]
+
+// handle passed PR IDs and test the PRs
+func handler(items ...int64) []int64 {
+ for _, opts := range items {
+ if err := indexer.Index(opts); err != nil {
+ if !setting.IsInTesting {
+ log.Error("stats queue indexer.Index(%d) failed: %v", opts, err)
+ }
+ }
+ }
+ return nil
+}
+
+func initStatsQueue() error {
+ statsQueue = queue.CreateUniqueQueue(graceful.GetManager().ShutdownContext(), "repo_stats_update", handler)
+ if statsQueue == nil {
+ return fmt.Errorf("unable to create repo_stats_update queue")
+ }
+ go graceful.GetManager().RunWithCancel(statsQueue)
+ return nil
+}
+
+// UpdateRepoIndexer update a repository's entries in the indexer
+func UpdateRepoIndexer(repo *repo_model.Repository) error {
+ if err := statsQueue.Push(repo.ID); err != nil {
+ if err != queue.ErrAlreadyInQueue {
+ return err
+ }
+ log.Debug("Repo ID: %d already queued", repo.ID)
+ }
+ return nil
+}