summaryrefslogtreecommitdiffstats
path: root/modules/indexer/issues/indexer.go
diff options
context:
space:
mode:
Diffstat (limited to 'modules/indexer/issues/indexer.go')
-rw-r--r--modules/indexer/issues/indexer.go315
1 files changed, 315 insertions, 0 deletions
diff --git a/modules/indexer/issues/indexer.go b/modules/indexer/issues/indexer.go
new file mode 100644
index 0000000..d731052
--- /dev/null
+++ b/modules/indexer/issues/indexer.go
@@ -0,0 +1,315 @@
+// Copyright 2018 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package issues
+
+import (
+ "context"
+ "fmt"
+ "os"
+ "runtime/pprof"
+ "sync/atomic"
+ "time"
+
+ db_model "code.gitea.io/gitea/models/db"
+ repo_model "code.gitea.io/gitea/models/repo"
+ "code.gitea.io/gitea/modules/graceful"
+ "code.gitea.io/gitea/modules/indexer/issues/bleve"
+ "code.gitea.io/gitea/modules/indexer/issues/db"
+ "code.gitea.io/gitea/modules/indexer/issues/elasticsearch"
+ "code.gitea.io/gitea/modules/indexer/issues/internal"
+ "code.gitea.io/gitea/modules/indexer/issues/meilisearch"
+ "code.gitea.io/gitea/modules/log"
+ "code.gitea.io/gitea/modules/optional"
+ "code.gitea.io/gitea/modules/process"
+ "code.gitea.io/gitea/modules/queue"
+ "code.gitea.io/gitea/modules/setting"
+)
+
+// IndexerMetadata is used to send data to the queue, so it contains only the ids.
+// It may look weird, because it has to be compatible with the old queue data format.
+// If the IsDelete flag is true, the IDs specify the issues to delete from the index without querying the database.
+// If the IsDelete flag is false, the ID specify the issue to index, so Indexer will query the database to get the issue data.
+// It should be noted that if the id is not existing in the database, it's index will be deleted too even if IsDelete is false.
+// Valid values:
+// - IsDelete = true, IDs = [1, 2, 3], and ID will be ignored
+// - IsDelete = false, ID = 1, and IDs will be ignored
+type IndexerMetadata struct {
+ ID int64 `json:"id"`
+
+ IsDelete bool `json:"is_delete"`
+ IDs []int64 `json:"ids"`
+}
+
+var (
+ // issueIndexerQueue queue of issue ids to be updated
+ issueIndexerQueue *queue.WorkerPoolQueue[*IndexerMetadata]
+ // globalIndexer is the global indexer, it cannot be nil.
+ // When the real indexer is not ready, it will be a dummy indexer which will return error to explain it's not ready.
+ // So it's always safe use it as *globalIndexer.Load() and call its methods.
+ globalIndexer atomic.Pointer[internal.Indexer]
+ dummyIndexer *internal.Indexer
+)
+
+func init() {
+ i := internal.NewDummyIndexer()
+ dummyIndexer = &i
+ globalIndexer.Store(dummyIndexer)
+}
+
+// InitIssueIndexer initialize issue indexer, syncReindex is true then reindex until
+// all issue index done.
+func InitIssueIndexer(syncReindex bool) {
+ ctx, _, finished := process.GetManager().AddTypedContext(context.Background(), "Service: IssueIndexer", process.SystemProcessType, false)
+
+ indexerInitWaitChannel := make(chan time.Duration, 1)
+
+ // Create the Queue
+ issueIndexerQueue = queue.CreateUniqueQueue(ctx, "issue_indexer", getIssueIndexerQueueHandler(ctx))
+
+ graceful.GetManager().RunAtTerminate(finished)
+
+ // Create the Indexer
+ go func() {
+ pprof.SetGoroutineLabels(ctx)
+ start := time.Now()
+ log.Info("PID %d: Initializing Issue Indexer: %s", os.Getpid(), setting.Indexer.IssueType)
+ var (
+ issueIndexer internal.Indexer
+ existed bool
+ err error
+ )
+ switch setting.Indexer.IssueType {
+ case "bleve":
+ defer func() {
+ if err := recover(); err != nil {
+ log.Error("PANIC whilst initializing issue indexer: %v\nStacktrace: %s", err, log.Stack(2))
+ log.Error("The indexer files are likely corrupted and may need to be deleted")
+ log.Error("You can completely remove the %q directory to make Forgejo recreate the indexes", setting.Indexer.IssuePath)
+ globalIndexer.Store(dummyIndexer)
+ log.Fatal("PID: %d Unable to initialize the Bleve Issue Indexer at path: %s Error: %v", os.Getpid(), setting.Indexer.IssuePath, err)
+ }
+ }()
+ issueIndexer = bleve.NewIndexer(setting.Indexer.IssuePath)
+ existed, err = issueIndexer.Init(ctx)
+ if err != nil {
+ log.Fatal("Unable to initialize Bleve Issue Indexer at path: %s Error: %v", setting.Indexer.IssuePath, err)
+ }
+ case "elasticsearch":
+ issueIndexer = elasticsearch.NewIndexer(setting.Indexer.IssueConnStr, setting.Indexer.IssueIndexerName)
+ existed, err = issueIndexer.Init(ctx)
+ if err != nil {
+ log.Fatal("Unable to issueIndexer.Init with connection %s Error: %v", setting.Indexer.IssueConnStr, err)
+ }
+ case "db":
+ issueIndexer = db.NewIndexer()
+ case "meilisearch":
+ issueIndexer = meilisearch.NewIndexer(setting.Indexer.IssueConnStr, setting.Indexer.IssueConnAuth, setting.Indexer.IssueIndexerName)
+ existed, err = issueIndexer.Init(ctx)
+ if err != nil {
+ log.Fatal("Unable to issueIndexer.Init with connection %s Error: %v", setting.Indexer.IssueConnStr, err)
+ }
+ default:
+ log.Fatal("Unknown issue indexer type: %s", setting.Indexer.IssueType)
+ }
+ globalIndexer.Store(&issueIndexer)
+
+ graceful.GetManager().RunAtTerminate(func() {
+ log.Debug("Closing issue indexer")
+ (*globalIndexer.Load()).Close()
+ log.Info("PID: %d Issue Indexer closed", os.Getpid())
+ })
+
+ // Start processing the queue
+ go graceful.GetManager().RunWithCancel(issueIndexerQueue)
+
+ // Populate the index
+ if !existed {
+ if syncReindex {
+ graceful.GetManager().RunWithShutdownContext(populateIssueIndexer)
+ } else {
+ go graceful.GetManager().RunWithShutdownContext(populateIssueIndexer)
+ }
+ }
+
+ indexerInitWaitChannel <- time.Since(start)
+ close(indexerInitWaitChannel)
+ }()
+
+ if syncReindex {
+ select {
+ case <-indexerInitWaitChannel:
+ case <-graceful.GetManager().IsShutdown():
+ }
+ } else if setting.Indexer.StartupTimeout > 0 {
+ go func() {
+ pprof.SetGoroutineLabels(ctx)
+ timeout := setting.Indexer.StartupTimeout
+ if graceful.GetManager().IsChild() && setting.GracefulHammerTime > 0 {
+ timeout += setting.GracefulHammerTime
+ }
+ select {
+ case duration := <-indexerInitWaitChannel:
+ log.Info("Issue Indexer Initialization took %v", duration)
+ case <-graceful.GetManager().IsShutdown():
+ log.Warn("Shutdown occurred before issue index initialisation was complete")
+ case <-time.After(timeout):
+ issueIndexerQueue.ShutdownWait(5 * time.Second)
+ log.Fatal("Issue Indexer Initialization timed-out after: %v", timeout)
+ }
+ }()
+ }
+}
+
+func getIssueIndexerQueueHandler(ctx context.Context) func(items ...*IndexerMetadata) []*IndexerMetadata {
+ return func(items ...*IndexerMetadata) []*IndexerMetadata {
+ var unhandled []*IndexerMetadata
+
+ indexer := *globalIndexer.Load()
+ for _, item := range items {
+ log.Trace("IndexerMetadata Process: %d %v %t", item.ID, item.IDs, item.IsDelete)
+ if item.IsDelete {
+ if err := indexer.Delete(ctx, item.IDs...); err != nil {
+ log.Error("Issue indexer handler: failed to from index: %v Error: %v", item.IDs, err)
+ unhandled = append(unhandled, item)
+ }
+ continue
+ }
+ data, existed, err := getIssueIndexerData(ctx, item.ID)
+ if err != nil {
+ log.Error("Issue indexer handler: failed to get issue data of %d: %v", item.ID, err)
+ unhandled = append(unhandled, item)
+ continue
+ }
+ if !existed {
+ if err := indexer.Delete(ctx, item.ID); err != nil {
+ log.Error("Issue indexer handler: failed to delete issue %d from index: %v", item.ID, err)
+ unhandled = append(unhandled, item)
+ }
+ continue
+ }
+ if err := indexer.Index(ctx, data); err != nil {
+ log.Error("Issue indexer handler: failed to index issue %d: %v", item.ID, err)
+ unhandled = append(unhandled, item)
+ continue
+ }
+ }
+
+ return unhandled
+ }
+}
+
+// populateIssueIndexer populate the issue indexer with issue data
+func populateIssueIndexer(ctx context.Context) {
+ ctx, _, finished := process.GetManager().AddTypedContext(ctx, "Service: PopulateIssueIndexer", process.SystemProcessType, true)
+ defer finished()
+ ctx = contextWithKeepRetry(ctx) // keep retrying since it's a background task
+ if err := PopulateIssueIndexer(ctx); err != nil {
+ log.Error("Issue indexer population failed: %v", err)
+ }
+}
+
+func PopulateIssueIndexer(ctx context.Context) error {
+ for page := 1; ; page++ {
+ select {
+ case <-ctx.Done():
+ return fmt.Errorf("shutdown before completion: %w", ctx.Err())
+ default:
+ }
+ repos, _, err := repo_model.SearchRepositoryByName(ctx, &repo_model.SearchRepoOptions{
+ ListOptions: db_model.ListOptions{Page: page, PageSize: repo_model.RepositoryListDefaultPageSize},
+ OrderBy: db_model.SearchOrderByID,
+ Private: true,
+ Collaborate: optional.Some(false),
+ })
+ if err != nil {
+ log.Error("SearchRepositoryByName: %v", err)
+ continue
+ }
+ if len(repos) == 0 {
+ log.Debug("Issue Indexer population complete")
+ return nil
+ }
+
+ for _, repo := range repos {
+ if err := updateRepoIndexer(ctx, repo.ID); err != nil {
+ return fmt.Errorf("populate issue indexer for repo %d: %v", repo.ID, err)
+ }
+ }
+ }
+}
+
+// UpdateRepoIndexer add/update all issues of the repositories
+func UpdateRepoIndexer(ctx context.Context, repoID int64) {
+ if err := updateRepoIndexer(ctx, repoID); err != nil {
+ log.Error("Unable to push repo %d to issue indexer: %v", repoID, err)
+ }
+}
+
+// UpdateIssueIndexer add/update an issue to the issue indexer
+func UpdateIssueIndexer(ctx context.Context, issueID int64) {
+ if err := updateIssueIndexer(ctx, issueID); err != nil {
+ log.Error("Unable to push issue %d to issue indexer: %v", issueID, err)
+ }
+}
+
+// DeleteRepoIssueIndexer deletes repo's all issues indexes
+func DeleteRepoIssueIndexer(ctx context.Context, repoID int64) {
+ if err := deleteRepoIssueIndexer(ctx, repoID); err != nil {
+ log.Error("Unable to push deleted repo %d to issue indexer: %v", repoID, err)
+ }
+}
+
+// IsAvailable checks if issue indexer is available
+func IsAvailable(ctx context.Context) bool {
+ return (*globalIndexer.Load()).Ping(ctx) == nil
+}
+
+// SearchOptions indicates the options for searching issues
+type SearchOptions = internal.SearchOptions
+
+const (
+ SortByCreatedDesc = internal.SortByCreatedDesc
+ SortByUpdatedDesc = internal.SortByUpdatedDesc
+ SortByCommentsDesc = internal.SortByCommentsDesc
+ SortByDeadlineDesc = internal.SortByDeadlineDesc
+ SortByCreatedAsc = internal.SortByCreatedAsc
+ SortByUpdatedAsc = internal.SortByUpdatedAsc
+ SortByCommentsAsc = internal.SortByCommentsAsc
+ SortByDeadlineAsc = internal.SortByDeadlineAsc
+)
+
+// SearchIssues search issues by options.
+func SearchIssues(ctx context.Context, opts *SearchOptions) ([]int64, int64, error) {
+ indexer := *globalIndexer.Load()
+
+ if opts.Keyword == "" {
+ // This is a conservative shortcut.
+ // If the keyword is empty, db has better (at least not worse) performance to filter issues.
+ // When the keyword is empty, it tends to listing rather than searching issues.
+ // So if the user creates an issue and list issues immediately, the issue may not be listed because the indexer needs time to index the issue.
+ // Even worse, the external indexer like elastic search may not be available for a while,
+ // and the user may not be able to list issues completely until it is available again.
+ indexer = db.NewIndexer()
+ }
+
+ result, err := indexer.Search(ctx, opts)
+ if err != nil {
+ return nil, 0, err
+ }
+
+ ret := make([]int64, 0, len(result.Hits))
+ for _, hit := range result.Hits {
+ ret = append(ret, hit.ID)
+ }
+
+ return ret, result.Total, nil
+}
+
+// CountIssues counts issues by options. It is a shortcut of SearchIssues(ctx, opts) but only returns the total count.
+func CountIssues(ctx context.Context, opts *SearchOptions) (int64, error) {
+ opts = opts.Copy(func(options *SearchOptions) { options.Paginator = &db_model.ListOptions{PageSize: 0} })
+
+ _, total, err := SearchIssues(ctx, opts)
+ return total, err
+}