Make issue and code search support camel case (#22829)

Fixes #22714 
### Changes:
1. Add a token filter which named "camelCase" between custom unicode
token filter and "to_lower" token filter when add custom analyzer.

### Notice:
If users want this feature to work, they should delete folder under
{giteaPath}/data/indexers and restart application. Then application will
create a new IndexMapping.

### Screenshots:

![image](https://user-images.githubusercontent.com/33891828/217715692-c18c41f2-57a1-4727-861c-470935c8e0c8.png)

### Others:
I originally attempted to give users the ability to configure the
"token_filters" in the "app.ini" file. But I found that if users does
not strictly follow a right order to register "token_filters", they
won't get the expected results. I think it is difficult to ask users to
do this. So I finally give up this idea.

---------

Co-authored-by: Jason Song <i@wolfogre.com>
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
This commit is contained in:
sillyguodong 2023-02-12 18:09:03 +08:00 committed by GitHub
parent 00f695da48
commit 34399cfd7a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 6 additions and 4 deletions

View file

@ -27,6 +27,7 @@ import (
"github.com/blevesearch/bleve/v2"
analyzer_custom "github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
analyzer_keyword "github.com/blevesearch/bleve/v2/analysis/analyzer/keyword"
"github.com/blevesearch/bleve/v2/analysis/token/camelcase"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/token/unicodenorm"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
@ -107,7 +108,7 @@ func (d *RepoIndexerData) Type() string {
const (
repoIndexerAnalyzer = "repoIndexerAnalyzer"
repoIndexerDocType = "repoIndexerDocType"
repoIndexerLatestVersion = 5
repoIndexerLatestVersion = 6
)
// createBleveIndexer create a bleve repo indexer if one does not already exist
@ -138,7 +139,7 @@ func createBleveIndexer(path string, latestVersion int) (bleve.Index, error) {
"type": analyzer_custom.Name,
"char_filters": []string{},
"tokenizer": unicode.Name,
"token_filters": []string{unicodeNormalizeName, lowercase.Name},
"token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name},
}); err != nil {
return nil, err
}

View file

@ -15,6 +15,7 @@ import (
"github.com/blevesearch/bleve/v2"
"github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
"github.com/blevesearch/bleve/v2/analysis/token/camelcase"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/token/unicodenorm"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
@ -27,7 +28,7 @@ import (
const (
issueIndexerAnalyzer = "issueIndexer"
issueIndexerDocType = "issueIndexerDocType"
issueIndexerLatestVersion = 1
issueIndexerLatestVersion = 2
)
// indexerID a bleve-compatible unique identifier for an integer id
@ -134,7 +135,7 @@ func createIssueIndexer(path string, latestVersion int) (bleve.Index, error) {
"type": custom.Name,
"char_filters": []string{},
"tokenizer": unicode.Name,
"token_filters": []string{unicodeNormalizeName, lowercase.Name},
"token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name},
}); err != nil {
return nil, err
}