summaryrefslogtreecommitdiffstats
path: root/modules/indexer
diff options
context:
space:
mode:
authorEthan Koenig <ethantkoenig@gmail.com>2017-09-16 22:16:21 +0200
committerLauris BH <lauris@nix.lv>2017-09-16 22:16:21 +0200
commitb0f7457d9ef6e16d4a3197f7544035d5d387e201 (patch)
treedbe6a9b3b74f9a4a6eaa0c0cfe59fa08509edccc /modules/indexer
parentRestructure markup & markdown to prepare for multiple markup languageā€¦ (#2411) (diff)
downloadforgejo-b0f7457d9ef6e16d4a3197f7544035d5d387e201.tar.xz
forgejo-b0f7457d9ef6e16d4a3197f7544035d5d387e201.zip
Improve issue search (#2387)
* Improve issue indexer * Fix new issue sqlite bug * Different test indexer paths for each db * Add integration indexer paths to make clean
Diffstat (limited to 'modules/indexer')
-rw-r--r--modules/indexer/indexer.go37
-rw-r--r--modules/indexer/issue.go143
2 files changed, 176 insertions, 4 deletions
diff --git a/modules/indexer/indexer.go b/modules/indexer/indexer.go
index 2b7b76f7f2..5ee813412d 100644
--- a/modules/indexer/indexer.go
+++ b/modules/indexer/indexer.go
@@ -5,10 +5,39 @@
package indexer
import (
- "code.gitea.io/gitea/models"
+ "fmt"
+ "strconv"
+
+ "github.com/blevesearch/bleve"
+ "github.com/blevesearch/bleve/search/query"
)
-// NewContext start indexer service
-func NewContext() {
- models.InitIssueIndexer()
+// indexerID a bleve-compatible unique identifier for an integer id
+func indexerID(id int64) string {
+ return strconv.FormatInt(id, 36)
+}
+
+// idOfIndexerID the integer id associated with an indexer id
+func idOfIndexerID(indexerID string) (int64, error) {
+ id, err := strconv.ParseInt(indexerID, 36, 64)
+ if err != nil {
+ return 0, fmt.Errorf("Unexpected indexer ID %s: %v", indexerID, err)
+ }
+ return id, nil
+}
+
+// numericEqualityQuery a numeric equality query for the given value and field
+func numericEqualityQuery(value int64, field string) *query.NumericRangeQuery {
+ f := float64(value)
+ tru := true
+ q := bleve.NewNumericRangeInclusiveQuery(&f, &f, &tru, &tru)
+ q.SetField(field)
+ return q
+}
+
+func newMatchPhraseQuery(matchPhrase, field, analyzer string) *query.MatchPhraseQuery {
+ q := bleve.NewMatchPhraseQuery(matchPhrase)
+ q.FieldVal = field
+ q.Analyzer = analyzer
+ return q
}
diff --git a/modules/indexer/issue.go b/modules/indexer/issue.go
new file mode 100644
index 0000000000..2503a78825
--- /dev/null
+++ b/modules/indexer/issue.go
@@ -0,0 +1,143 @@
+// Copyright 2017 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package indexer
+
+import (
+ "os"
+
+ "code.gitea.io/gitea/modules/log"
+ "code.gitea.io/gitea/modules/setting"
+
+ "github.com/blevesearch/bleve"
+ "github.com/blevesearch/bleve/analysis/analyzer/custom"
+ "github.com/blevesearch/bleve/analysis/token/lowercase"
+ "github.com/blevesearch/bleve/analysis/token/unicodenorm"
+ "github.com/blevesearch/bleve/analysis/tokenizer/unicode"
+)
+
+// issueIndexer (thread-safe) index for searching issues
+var issueIndexer bleve.Index
+
+// IssueIndexerData data stored in the issue indexer
+type IssueIndexerData struct {
+ RepoID int64
+ Title string
+ Content string
+ Comments []string
+}
+
+// IssueIndexerUpdate an update to the issue indexer
+type IssueIndexerUpdate struct {
+ IssueID int64
+ Data *IssueIndexerData
+}
+
+const issueIndexerAnalyzer = "issueIndexer"
+
+// InitIssueIndexer initialize issue indexer
+func InitIssueIndexer(populateIndexer func() error) {
+ _, err := os.Stat(setting.Indexer.IssuePath)
+ if err != nil {
+ if os.IsNotExist(err) {
+ if err = createIssueIndexer(); err != nil {
+ log.Fatal(4, "CreateIssuesIndexer: %v", err)
+ }
+ if err = populateIndexer(); err != nil {
+ log.Fatal(4, "PopulateIssuesIndex: %v", err)
+ }
+ } else {
+ log.Fatal(4, "InitIssuesIndexer: %v", err)
+ }
+ } else {
+ issueIndexer, err = bleve.Open(setting.Indexer.IssuePath)
+ if err != nil {
+ log.Error(4, "Unable to open issues indexer (%s)."+
+ " If the error is due to incompatible versions, try deleting the indexer files;"+
+ " gitea will recreate them with the appropriate version the next time it runs."+
+ " Deleting the indexer files will not result in loss of data.",
+ setting.Indexer.IssuePath)
+ log.Fatal(4, "InitIssuesIndexer, open index: %v", err)
+ }
+ }
+}
+
+// createIssueIndexer create an issue indexer if one does not already exist
+func createIssueIndexer() error {
+ mapping := bleve.NewIndexMapping()
+ docMapping := bleve.NewDocumentMapping()
+
+ docMapping.AddFieldMappingsAt("RepoID", bleve.NewNumericFieldMapping())
+
+ textFieldMapping := bleve.NewTextFieldMapping()
+ docMapping.AddFieldMappingsAt("Title", textFieldMapping)
+ docMapping.AddFieldMappingsAt("Content", textFieldMapping)
+ docMapping.AddFieldMappingsAt("Comments", textFieldMapping)
+
+ const unicodeNormNFC = "unicodeNormNFC"
+ if err := mapping.AddCustomTokenFilter(unicodeNormNFC, map[string]interface{}{
+ "type": unicodenorm.Name,
+ "form": unicodenorm.NFC,
+ }); err != nil {
+ return err
+ } else if err = mapping.AddCustomAnalyzer(issueIndexerAnalyzer, map[string]interface{}{
+ "type": custom.Name,
+ "char_filters": []string{},
+ "tokenizer": unicode.Name,
+ "token_filters": []string{unicodeNormNFC, lowercase.Name},
+ }); err != nil {
+ return err
+ }
+
+ mapping.DefaultAnalyzer = issueIndexerAnalyzer
+ mapping.AddDocumentMapping("issues", docMapping)
+
+ var err error
+ issueIndexer, err = bleve.New(setting.Indexer.IssuePath, mapping)
+ return err
+}
+
+// UpdateIssue update the issue indexer
+func UpdateIssue(update IssueIndexerUpdate) error {
+ return issueIndexer.Index(indexerID(update.IssueID), update.Data)
+}
+
+// BatchUpdateIssues perform a batch update of the issue indexer
+func BatchUpdateIssues(updates ...IssueIndexerUpdate) error {
+ batch := issueIndexer.NewBatch()
+ for _, update := range updates {
+ err := batch.Index(indexerID(update.IssueID), update.Data)
+ if err != nil {
+ return err
+ }
+ }
+ return issueIndexer.Batch(batch)
+}
+
+// SearchIssuesByKeyword searches for issues by given conditions.
+// Returns the matching issue IDs
+func SearchIssuesByKeyword(repoID int64, keyword string) ([]int64, error) {
+ indexerQuery := bleve.NewConjunctionQuery(
+ numericEqualityQuery(repoID, "RepoID"),
+ bleve.NewDisjunctionQuery(
+ newMatchPhraseQuery(keyword, "Title", issueIndexerAnalyzer),
+ newMatchPhraseQuery(keyword, "Content", issueIndexerAnalyzer),
+ newMatchPhraseQuery(keyword, "Comments", issueIndexerAnalyzer),
+ ))
+ search := bleve.NewSearchRequestOptions(indexerQuery, 2147483647, 0, false)
+
+ result, err := issueIndexer.Search(search)
+ if err != nil {
+ return nil, err
+ }
+
+ issueIDs := make([]int64, len(result.Hits))
+ for i, hit := range result.Hits {
+ issueIDs[i], err = idOfIndexerID(hit.ID)
+ if err != nil {
+ return nil, err
+ }
+ }
+ return issueIDs, nil
+}