Merge pull request '[v7.0/forgejo] [FIX] Set max fuzziness to 2 for bleve' (#3477) from bp-v7.0/forgejo-a641ebf into v7.0/forgejo

Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/3477
Reviewed-by: Earl Warren <earl-warren@noreply.codeberg.org>
This commit is contained in:
Earl Warren 2024-04-26 09:36:09 +00:00
commit d58b74d368
4 changed files with 26 additions and 2 deletions

View file

@ -41,6 +41,8 @@ const (
maxBatchSize = 16
// fuzzyDenominator determines the levenshtein distance per each character of a keyword
fuzzyDenominator = 4
// see https://github.com/blevesearch/bleve/issues/1563#issuecomment-786822311
maxFuzziness = 2
)
func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
@ -246,7 +248,7 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
phraseQuery.Analyzer = repoIndexerAnalyzer
keywordQuery = phraseQuery
if opts.IsKeywordFuzzy {
phraseQuery.Fuzziness = len(opts.Keyword) / fuzzyDenominator
phraseQuery.Fuzziness = min(maxFuzziness, len(opts.Keyword)/fuzzyDenominator)
}
if len(opts.RepoIDs) > 0 {

View file

@ -49,6 +49,12 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
IDs: []int64{},
Langs: 0,
},
{
RepoIDs: nil,
Keyword: "Description for",
IDs: []int64{repoID},
Langs: 1,
},
{
RepoIDs: nil,
Keyword: "repo1",

View file

@ -39,6 +39,8 @@ const (
maxBatchSize = 16
// fuzzyDenominator determines the levenshtein distance per each character of a keyword
fuzzyDenominator = 4
// see https://github.com/blevesearch/bleve/issues/1563#issuecomment-786822311
maxFuzziness = 2
)
// IndexerData an update to the issue indexer
@ -162,7 +164,7 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (
if options.Keyword != "" {
fuzziness := 0
if options.IsFuzzyKeyword {
fuzziness = len(options.Keyword) / fuzzyDenominator
fuzziness = min(maxFuzziness, len(options.Keyword)/fuzzyDenominator)
}
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{

View file

@ -130,6 +130,20 @@ var cases = []*testIndexerCase{
ExpectedIDs: []int64{1002, 1001, 1000},
ExpectedTotal: 3,
},
{
Name: "Keyword Fuzzy",
ExtraData: []*internal.IndexerData{
{ID: 1000, Title: "hi hello world"},
{ID: 1001, Content: "hi hello world"},
{ID: 1002, Comments: []string{"hi", "hello world"}},
},
SearchOptions: &internal.SearchOptions{
Keyword: "hello wrold",
IsFuzzyKeyword: true,
},
ExpectedIDs: []int64{1002, 1001, 1000},
ExpectedTotal: 3,
},
{
Name: "RepoIDs",
ExtraData: []*internal.IndexerData{