diff options
author | Ethan Koenig <ethantkoenig@gmail.com> | 2017-09-16 22:16:21 +0200 |
---|---|---|
committer | Lauris BH <lauris@nix.lv> | 2017-09-16 22:16:21 +0200 |
commit | b0f7457d9ef6e16d4a3197f7544035d5d387e201 (patch) | |
tree | dbe6a9b3b74f9a4a6eaa0c0cfe59fa08509edccc /modules/indexer | |
parent | Restructure markup & markdown to prepare for multiple markup languageā¦ (#2411) (diff) | |
download | forgejo-b0f7457d9ef6e16d4a3197f7544035d5d387e201.tar.xz forgejo-b0f7457d9ef6e16d4a3197f7544035d5d387e201.zip |
Improve issue search (#2387)
* Improve issue indexer
* Fix new issue sqlite bug
* Different test indexer paths for each db
* Add integration indexer paths to make clean
Diffstat (limited to 'modules/indexer')
-rw-r--r-- | modules/indexer/indexer.go | 37 | ||||
-rw-r--r-- | modules/indexer/issue.go | 143 |
2 files changed, 176 insertions, 4 deletions
diff --git a/modules/indexer/indexer.go b/modules/indexer/indexer.go index 2b7b76f7f2..5ee813412d 100644 --- a/modules/indexer/indexer.go +++ b/modules/indexer/indexer.go @@ -5,10 +5,39 @@ package indexer import ( - "code.gitea.io/gitea/models" + "fmt" + "strconv" + + "github.com/blevesearch/bleve" + "github.com/blevesearch/bleve/search/query" ) -// NewContext start indexer service -func NewContext() { - models.InitIssueIndexer() +// indexerID a bleve-compatible unique identifier for an integer id +func indexerID(id int64) string { + return strconv.FormatInt(id, 36) +} + +// idOfIndexerID the integer id associated with an indexer id +func idOfIndexerID(indexerID string) (int64, error) { + id, err := strconv.ParseInt(indexerID, 36, 64) + if err != nil { + return 0, fmt.Errorf("Unexpected indexer ID %s: %v", indexerID, err) + } + return id, nil +} + +// numericEqualityQuery a numeric equality query for the given value and field +func numericEqualityQuery(value int64, field string) *query.NumericRangeQuery { + f := float64(value) + tru := true + q := bleve.NewNumericRangeInclusiveQuery(&f, &f, &tru, &tru) + q.SetField(field) + return q +} + +func newMatchPhraseQuery(matchPhrase, field, analyzer string) *query.MatchPhraseQuery { + q := bleve.NewMatchPhraseQuery(matchPhrase) + q.FieldVal = field + q.Analyzer = analyzer + return q } diff --git a/modules/indexer/issue.go b/modules/indexer/issue.go new file mode 100644 index 0000000000..2503a78825 --- /dev/null +++ b/modules/indexer/issue.go @@ -0,0 +1,143 @@ +// Copyright 2017 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package indexer + +import ( + "os" + + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/setting" + + "github.com/blevesearch/bleve" + "github.com/blevesearch/bleve/analysis/analyzer/custom" + "github.com/blevesearch/bleve/analysis/token/lowercase" + "github.com/blevesearch/bleve/analysis/token/unicodenorm" + "github.com/blevesearch/bleve/analysis/tokenizer/unicode" +) + +// issueIndexer (thread-safe) index for searching issues +var issueIndexer bleve.Index + +// IssueIndexerData data stored in the issue indexer +type IssueIndexerData struct { + RepoID int64 + Title string + Content string + Comments []string +} + +// IssueIndexerUpdate an update to the issue indexer +type IssueIndexerUpdate struct { + IssueID int64 + Data *IssueIndexerData +} + +const issueIndexerAnalyzer = "issueIndexer" + +// InitIssueIndexer initialize issue indexer +func InitIssueIndexer(populateIndexer func() error) { + _, err := os.Stat(setting.Indexer.IssuePath) + if err != nil { + if os.IsNotExist(err) { + if err = createIssueIndexer(); err != nil { + log.Fatal(4, "CreateIssuesIndexer: %v", err) + } + if err = populateIndexer(); err != nil { + log.Fatal(4, "PopulateIssuesIndex: %v", err) + } + } else { + log.Fatal(4, "InitIssuesIndexer: %v", err) + } + } else { + issueIndexer, err = bleve.Open(setting.Indexer.IssuePath) + if err != nil { + log.Error(4, "Unable to open issues indexer (%s)."+ + " If the error is due to incompatible versions, try deleting the indexer files;"+ + " gitea will recreate them with the appropriate version the next time it runs."+ + " Deleting the indexer files will not result in loss of data.", + setting.Indexer.IssuePath) + log.Fatal(4, "InitIssuesIndexer, open index: %v", err) + } + } +} + +// createIssueIndexer create an issue indexer if one does not already exist +func createIssueIndexer() error { + mapping := bleve.NewIndexMapping() + docMapping := bleve.NewDocumentMapping() + + docMapping.AddFieldMappingsAt("RepoID", bleve.NewNumericFieldMapping()) + + textFieldMapping := bleve.NewTextFieldMapping() + docMapping.AddFieldMappingsAt("Title", textFieldMapping) + docMapping.AddFieldMappingsAt("Content", textFieldMapping) + docMapping.AddFieldMappingsAt("Comments", textFieldMapping) + + const unicodeNormNFC = "unicodeNormNFC" + if err := mapping.AddCustomTokenFilter(unicodeNormNFC, map[string]interface{}{ + "type": unicodenorm.Name, + "form": unicodenorm.NFC, + }); err != nil { + return err + } else if err = mapping.AddCustomAnalyzer(issueIndexerAnalyzer, map[string]interface{}{ + "type": custom.Name, + "char_filters": []string{}, + "tokenizer": unicode.Name, + "token_filters": []string{unicodeNormNFC, lowercase.Name}, + }); err != nil { + return err + } + + mapping.DefaultAnalyzer = issueIndexerAnalyzer + mapping.AddDocumentMapping("issues", docMapping) + + var err error + issueIndexer, err = bleve.New(setting.Indexer.IssuePath, mapping) + return err +} + +// UpdateIssue update the issue indexer +func UpdateIssue(update IssueIndexerUpdate) error { + return issueIndexer.Index(indexerID(update.IssueID), update.Data) +} + +// BatchUpdateIssues perform a batch update of the issue indexer +func BatchUpdateIssues(updates ...IssueIndexerUpdate) error { + batch := issueIndexer.NewBatch() + for _, update := range updates { + err := batch.Index(indexerID(update.IssueID), update.Data) + if err != nil { + return err + } + } + return issueIndexer.Batch(batch) +} + +// SearchIssuesByKeyword searches for issues by given conditions. +// Returns the matching issue IDs +func SearchIssuesByKeyword(repoID int64, keyword string) ([]int64, error) { + indexerQuery := bleve.NewConjunctionQuery( + numericEqualityQuery(repoID, "RepoID"), + bleve.NewDisjunctionQuery( + newMatchPhraseQuery(keyword, "Title", issueIndexerAnalyzer), + newMatchPhraseQuery(keyword, "Content", issueIndexerAnalyzer), + newMatchPhraseQuery(keyword, "Comments", issueIndexerAnalyzer), + )) + search := bleve.NewSearchRequestOptions(indexerQuery, 2147483647, 0, false) + + result, err := issueIndexer.Search(search) + if err != nil { + return nil, err + } + + issueIDs := make([]int64, len(result.Hits)) + for i, hit := range result.Hits { + issueIDs[i], err = idOfIndexerID(hit.ID) + if err != nil { + return nil, err + } + } + return issueIDs, nil +} |