summaryrefslogtreecommitdiffstats
path: root/modules/indexer/issues/meilisearch
diff options
context:
space:
mode:
authorDaniel Baumann <daniel@debian.org>2024-10-18 20:33:49 +0200
committerDaniel Baumann <daniel@debian.org>2024-10-18 20:33:49 +0200
commitdd136858f1ea40ad3c94191d647487fa4f31926c (patch)
tree58fec94a7b2a12510c9664b21793f1ed560c6518 /modules/indexer/issues/meilisearch
parentInitial commit. (diff)
downloadforgejo-dd136858f1ea40ad3c94191d647487fa4f31926c.tar.xz
forgejo-dd136858f1ea40ad3c94191d647487fa4f31926c.zip
Adding upstream version 9.0.0.HEADupstream/9.0.0upstreamdebian
Signed-off-by: Daniel Baumann <daniel@debian.org>
Diffstat (limited to 'modules/indexer/issues/meilisearch')
-rw-r--r--modules/indexer/issues/meilisearch/meilisearch.go301
-rw-r--r--modules/indexer/issues/meilisearch/meilisearch_test.go97
2 files changed, 398 insertions, 0 deletions
diff --git a/modules/indexer/issues/meilisearch/meilisearch.go b/modules/indexer/issues/meilisearch/meilisearch.go
new file mode 100644
index 0000000..7d18444
--- /dev/null
+++ b/modules/indexer/issues/meilisearch/meilisearch.go
@@ -0,0 +1,301 @@
+// Copyright 2023 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package meilisearch
+
+import (
+ "context"
+ "errors"
+ "fmt"
+ "strconv"
+ "strings"
+
+ indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
+ inner_meilisearch "code.gitea.io/gitea/modules/indexer/internal/meilisearch"
+ "code.gitea.io/gitea/modules/indexer/issues/internal"
+
+ "github.com/meilisearch/meilisearch-go"
+)
+
+const (
+ issueIndexerLatestVersion = 3
+
+ // TODO: make this configurable if necessary
+ maxTotalHits = 10000
+)
+
+// ErrMalformedResponse is never expected as we initialize the indexer ourself and so define the types.
+var ErrMalformedResponse = errors.New("meilisearch returned unexpected malformed content")
+
+var _ internal.Indexer = &Indexer{}
+
+// Indexer implements Indexer interface
+type Indexer struct {
+ inner *inner_meilisearch.Indexer
+ indexer_internal.Indexer // do not composite inner_meilisearch.Indexer directly to avoid exposing too much
+}
+
+// NewIndexer creates a new meilisearch indexer
+func NewIndexer(url, apiKey, indexerName string) *Indexer {
+ settings := &meilisearch.Settings{
+ // The default ranking rules of meilisearch are: ["words", "typo", "proximity", "attribute", "sort", "exactness"]
+ // So even if we specify the sort order, it could not be respected because the priority of "sort" is so low.
+ // So we need to specify the ranking rules to make sure the sort order is respected.
+ // See https://www.meilisearch.com/docs/learn/core_concepts/relevancy
+ RankingRules: []string{"sort", // make sure "sort" has the highest priority
+ "words", "typo", "proximity", "attribute", "exactness"},
+
+ SearchableAttributes: []string{
+ "title",
+ "content",
+ "comments",
+ },
+ DisplayedAttributes: []string{
+ "id",
+ "title",
+ "content",
+ "comments",
+ },
+ FilterableAttributes: []string{
+ "repo_id",
+ "is_public",
+ "is_pull",
+ "is_closed",
+ "label_ids",
+ "no_label",
+ "milestone_id",
+ "project_id",
+ "project_board_id",
+ "poster_id",
+ "assignee_id",
+ "mention_ids",
+ "reviewed_ids",
+ "review_requested_ids",
+ "subscriber_ids",
+ "updated_unix",
+ },
+ SortableAttributes: []string{
+ "updated_unix",
+ "created_unix",
+ "deadline_unix",
+ "comment_count",
+ "id",
+ },
+ Pagination: &meilisearch.Pagination{
+ MaxTotalHits: maxTotalHits,
+ },
+ }
+
+ inner := inner_meilisearch.NewIndexer(url, apiKey, indexerName, issueIndexerLatestVersion, settings)
+ indexer := &Indexer{
+ inner: inner,
+ Indexer: inner,
+ }
+ return indexer
+}
+
+// Index will save the index data
+func (b *Indexer) Index(_ context.Context, issues ...*internal.IndexerData) error {
+ if len(issues) == 0 {
+ return nil
+ }
+ for _, issue := range issues {
+ _, err := b.inner.Client.Index(b.inner.VersionedIndexName()).AddDocuments(issue)
+ if err != nil {
+ return err
+ }
+ }
+ // TODO: bulk send index data
+ return nil
+}
+
+// Delete deletes indexes by ids
+func (b *Indexer) Delete(_ context.Context, ids ...int64) error {
+ if len(ids) == 0 {
+ return nil
+ }
+
+ for _, id := range ids {
+ _, err := b.inner.Client.Index(b.inner.VersionedIndexName()).DeleteDocument(strconv.FormatInt(id, 10))
+ if err != nil {
+ return err
+ }
+ }
+ // TODO: bulk send deletes
+ return nil
+}
+
+// Search searches for issues by given conditions.
+// Returns the matching issue IDs
+func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (*internal.SearchResult, error) {
+ query := inner_meilisearch.FilterAnd{}
+
+ if len(options.RepoIDs) > 0 {
+ q := &inner_meilisearch.FilterOr{}
+ q.Or(inner_meilisearch.NewFilterIn("repo_id", options.RepoIDs...))
+ if options.AllPublic {
+ q.Or(inner_meilisearch.NewFilterEq("is_public", true))
+ }
+ query.And(q)
+ }
+
+ if options.IsPull.Has() {
+ query.And(inner_meilisearch.NewFilterEq("is_pull", options.IsPull.Value()))
+ }
+ if options.IsClosed.Has() {
+ query.And(inner_meilisearch.NewFilterEq("is_closed", options.IsClosed.Value()))
+ }
+
+ if options.NoLabelOnly {
+ query.And(inner_meilisearch.NewFilterEq("no_label", true))
+ } else {
+ if len(options.IncludedLabelIDs) > 0 {
+ q := &inner_meilisearch.FilterAnd{}
+ for _, labelID := range options.IncludedLabelIDs {
+ q.And(inner_meilisearch.NewFilterEq("label_ids", labelID))
+ }
+ query.And(q)
+ } else if len(options.IncludedAnyLabelIDs) > 0 {
+ query.And(inner_meilisearch.NewFilterIn("label_ids", options.IncludedAnyLabelIDs...))
+ }
+ if len(options.ExcludedLabelIDs) > 0 {
+ q := &inner_meilisearch.FilterAnd{}
+ for _, labelID := range options.ExcludedLabelIDs {
+ q.And(inner_meilisearch.NewFilterNot(inner_meilisearch.NewFilterEq("label_ids", labelID)))
+ }
+ query.And(q)
+ }
+ }
+
+ if len(options.MilestoneIDs) > 0 {
+ query.And(inner_meilisearch.NewFilterIn("milestone_id", options.MilestoneIDs...))
+ }
+
+ if options.ProjectID.Has() {
+ query.And(inner_meilisearch.NewFilterEq("project_id", options.ProjectID.Value()))
+ }
+ if options.ProjectColumnID.Has() {
+ query.And(inner_meilisearch.NewFilterEq("project_board_id", options.ProjectColumnID.Value()))
+ }
+
+ if options.PosterID.Has() {
+ query.And(inner_meilisearch.NewFilterEq("poster_id", options.PosterID.Value()))
+ }
+
+ if options.AssigneeID.Has() {
+ query.And(inner_meilisearch.NewFilterEq("assignee_id", options.AssigneeID.Value()))
+ }
+
+ if options.MentionID.Has() {
+ query.And(inner_meilisearch.NewFilterEq("mention_ids", options.MentionID.Value()))
+ }
+
+ if options.ReviewedID.Has() {
+ query.And(inner_meilisearch.NewFilterEq("reviewed_ids", options.ReviewedID.Value()))
+ }
+ if options.ReviewRequestedID.Has() {
+ query.And(inner_meilisearch.NewFilterEq("review_requested_ids", options.ReviewRequestedID.Value()))
+ }
+
+ if options.SubscriberID.Has() {
+ query.And(inner_meilisearch.NewFilterEq("subscriber_ids", options.SubscriberID.Value()))
+ }
+
+ if options.UpdatedAfterUnix.Has() {
+ query.And(inner_meilisearch.NewFilterGte("updated_unix", options.UpdatedAfterUnix.Value()))
+ }
+ if options.UpdatedBeforeUnix.Has() {
+ query.And(inner_meilisearch.NewFilterLte("updated_unix", options.UpdatedBeforeUnix.Value()))
+ }
+
+ if options.SortBy == "" {
+ options.SortBy = internal.SortByCreatedAsc
+ }
+ sortBy := []string{
+ parseSortBy(options.SortBy),
+ "id:desc",
+ }
+
+ skip, limit := indexer_internal.ParsePaginator(options.Paginator, maxTotalHits)
+
+ counting := limit == 0
+ if counting {
+ // If set limit to 0, it will be 20 by default, and -1 is not allowed.
+ // See https://www.meilisearch.com/docs/reference/api/search#limit
+ // So set limit to 1 to make the cost as low as possible, then clear the result before returning.
+ limit = 1
+ }
+
+ keyword := options.Keyword
+ if !options.IsFuzzyKeyword {
+ // to make it non fuzzy ("typo tolerance" in meilisearch terms), we have to quote the keyword(s)
+ // https://www.meilisearch.com/docs/reference/api/search#phrase-search
+ keyword = doubleQuoteKeyword(keyword)
+ }
+
+ searchRes, err := b.inner.Client.Index(b.inner.VersionedIndexName()).Search(keyword, &meilisearch.SearchRequest{
+ Filter: query.Statement(),
+ Limit: int64(limit),
+ Offset: int64(skip),
+ Sort: sortBy,
+ MatchingStrategy: meilisearch.All,
+ })
+ if err != nil {
+ return nil, err
+ }
+
+ if counting {
+ searchRes.Hits = nil
+ }
+
+ hits, err := convertHits(searchRes)
+ if err != nil {
+ return nil, err
+ }
+
+ return &internal.SearchResult{
+ Total: searchRes.EstimatedTotalHits,
+ Hits: hits,
+ }, nil
+}
+
+func parseSortBy(sortBy internal.SortBy) string {
+ field := strings.TrimPrefix(string(sortBy), "-")
+ if strings.HasPrefix(string(sortBy), "-") {
+ return field + ":desc"
+ }
+ return field + ":asc"
+}
+
+func doubleQuoteKeyword(k string) string {
+ kp := strings.Split(k, " ")
+ parts := 0
+ for i := range kp {
+ part := strings.Trim(kp[i], "\"")
+ if part != "" {
+ kp[parts] = fmt.Sprintf(`"%s"`, part)
+ parts++
+ }
+ }
+ return strings.Join(kp[:parts], " ")
+}
+
+func convertHits(searchRes *meilisearch.SearchResponse) ([]internal.Match, error) {
+ hits := make([]internal.Match, 0, len(searchRes.Hits))
+ for _, hit := range searchRes.Hits {
+ hit, ok := hit.(map[string]any)
+ if !ok {
+ return nil, ErrMalformedResponse
+ }
+
+ issueID, ok := hit["id"].(float64)
+ if !ok {
+ return nil, ErrMalformedResponse
+ }
+
+ hits = append(hits, internal.Match{
+ ID: int64(issueID),
+ })
+ }
+ return hits, nil
+}
diff --git a/modules/indexer/issues/meilisearch/meilisearch_test.go b/modules/indexer/issues/meilisearch/meilisearch_test.go
new file mode 100644
index 0000000..349102b
--- /dev/null
+++ b/modules/indexer/issues/meilisearch/meilisearch_test.go
@@ -0,0 +1,97 @@
+// Copyright 2023 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package meilisearch
+
+import (
+ "fmt"
+ "net/http"
+ "os"
+ "testing"
+ "time"
+
+ "code.gitea.io/gitea/modules/indexer/issues/internal"
+ "code.gitea.io/gitea/modules/indexer/issues/internal/tests"
+
+ "github.com/meilisearch/meilisearch-go"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+)
+
+func TestMeilisearchIndexer(t *testing.T) {
+ t.Skip("meilisearch not found in Forgejo test yet")
+ // The meilisearch instance started by pull-db-tests.yml > test-unit > services > meilisearch
+ url := "http://meilisearch:7700"
+ key := "" // auth has been disabled in test environment
+
+ if os.Getenv("CI") == "" {
+ // Make it possible to run tests against a local meilisearch instance
+ url = os.Getenv("TEST_MEILISEARCH_URL")
+ if url == "" {
+ t.Skip("TEST_MEILISEARCH_URL not set and not running in CI")
+ return
+ }
+ key = os.Getenv("TEST_MEILISEARCH_KEY")
+ }
+
+ ok := false
+ for i := 0; i < 60; i++ {
+ resp, err := http.Get(url)
+ if err == nil && resp.StatusCode == http.StatusOK {
+ ok = true
+ break
+ }
+ t.Logf("Waiting for meilisearch to be up: %v", err)
+ time.Sleep(time.Second)
+ }
+ if !ok {
+ t.Fatalf("Failed to wait for meilisearch to be up")
+ return
+ }
+
+ indexer := NewIndexer(url, key, fmt.Sprintf("test_meilisearch_indexer_%d", time.Now().Unix()))
+ defer indexer.Close()
+
+ tests.TestIndexer(t, indexer)
+}
+
+func TestConvertHits(t *testing.T) {
+ _, err := convertHits(&meilisearch.SearchResponse{
+ Hits: []any{"aa", "bb", "cc", "dd"},
+ })
+ require.ErrorIs(t, err, ErrMalformedResponse)
+
+ validResponse := &meilisearch.SearchResponse{
+ Hits: []any{
+ map[string]any{
+ "id": float64(11),
+ "title": "a title",
+ "content": "issue body with no match",
+ "comments": []any{"hey what's up?", "I'm currently bowling", "nice"},
+ },
+ map[string]any{
+ "id": float64(22),
+ "title": "Bowling as title",
+ "content": "",
+ "comments": []any{},
+ },
+ map[string]any{
+ "id": float64(33),
+ "title": "Bowl-ing as fuzzy match",
+ "content": "",
+ "comments": []any{},
+ },
+ },
+ }
+ hits, err := convertHits(validResponse)
+ require.NoError(t, err)
+ assert.EqualValues(t, []internal.Match{{ID: 11}, {ID: 22}, {ID: 33}}, hits)
+}
+
+func TestDoubleQuoteKeyword(t *testing.T) {
+ assert.EqualValues(t, "", doubleQuoteKeyword(""))
+ assert.EqualValues(t, `"a" "b" "c"`, doubleQuoteKeyword("a b c"))
+ assert.EqualValues(t, `"a" "d" "g"`, doubleQuoteKeyword("a d g"))
+ assert.EqualValues(t, `"a" "d" "g"`, doubleQuoteKeyword("a d g"))
+ assert.EqualValues(t, `"a" "d" "g"`, doubleQuoteKeyword(`a "" "d" """g`))
+}