[FIX] Set max fuzziness to 2 for bleve (#3444)

closes #3443

regression from ab5f0b7558

Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/3444
Reviewed-by: Otto <otto@codeberg.org>
Co-authored-by: Shiny Nematoda <snematoda.751k2@aleeas.com>
Co-committed-by: Shiny Nematoda <snematoda.751k2@aleeas.com>
(cherry picked from commit a641ebf221)
This commit is contained in:
Shiny Nematoda 2024-04-26 08:08:47 +00:00 committed by GitHub
parent 9cd8cd4874
commit 966975a3e0
4 changed files with 26 additions and 2 deletions

View file

@ -41,6 +41,8 @@ const (
maxBatchSize = 16 maxBatchSize = 16
// fuzzyDenominator determines the levenshtein distance per each character of a keyword // fuzzyDenominator determines the levenshtein distance per each character of a keyword
fuzzyDenominator = 4 fuzzyDenominator = 4
// see https://github.com/blevesearch/bleve/issues/1563#issuecomment-786822311
maxFuzziness = 2
) )
func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error { func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
@ -246,7 +248,7 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
phraseQuery.Analyzer = repoIndexerAnalyzer phraseQuery.Analyzer = repoIndexerAnalyzer
keywordQuery = phraseQuery keywordQuery = phraseQuery
if opts.IsKeywordFuzzy { if opts.IsKeywordFuzzy {
phraseQuery.Fuzziness = len(opts.Keyword) / fuzzyDenominator phraseQuery.Fuzziness = min(maxFuzziness, len(opts.Keyword)/fuzzyDenominator)
} }
if len(opts.RepoIDs) > 0 { if len(opts.RepoIDs) > 0 {

View file

@ -49,6 +49,12 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
IDs: []int64{}, IDs: []int64{},
Langs: 0, Langs: 0,
}, },
{
RepoIDs: nil,
Keyword: "Description for",
IDs: []int64{repoID},
Langs: 1,
},
{ {
RepoIDs: nil, RepoIDs: nil,
Keyword: "repo1", Keyword: "repo1",

View file

@ -39,6 +39,8 @@ const (
maxBatchSize = 16 maxBatchSize = 16
// fuzzyDenominator determines the levenshtein distance per each character of a keyword // fuzzyDenominator determines the levenshtein distance per each character of a keyword
fuzzyDenominator = 4 fuzzyDenominator = 4
// see https://github.com/blevesearch/bleve/issues/1563#issuecomment-786822311
maxFuzziness = 2
) )
// IndexerData an update to the issue indexer // IndexerData an update to the issue indexer
@ -162,7 +164,7 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (
if options.Keyword != "" { if options.Keyword != "" {
fuzziness := 0 fuzziness := 0
if options.IsFuzzyKeyword { if options.IsFuzzyKeyword {
fuzziness = len(options.Keyword) / fuzzyDenominator fuzziness = min(maxFuzziness, len(options.Keyword)/fuzzyDenominator)
} }
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{ queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{

View file

@ -130,6 +130,20 @@ var cases = []*testIndexerCase{
ExpectedIDs: []int64{1002, 1001, 1000}, ExpectedIDs: []int64{1002, 1001, 1000},
ExpectedTotal: 3, ExpectedTotal: 3,
}, },
{
Name: "Keyword Fuzzy",
ExtraData: []*internal.IndexerData{
{ID: 1000, Title: "hi hello world"},
{ID: 1001, Content: "hi hello world"},
{ID: 1002, Comments: []string{"hi", "hello world"}},
},
SearchOptions: &internal.SearchOptions{
Keyword: "hello wrold",
IsFuzzyKeyword: true,
},
ExpectedIDs: []int64{1002, 1001, 1000},
ExpectedTotal: 3,
},
{ {
Name: "RepoIDs", Name: "RepoIDs",
ExtraData: []*internal.IndexerData{ ExtraData: []*internal.IndexerData{