searcherside/core/services/bleve_searcher.go

126 lines
3.1 KiB
Go
Raw Normal View History

2024-06-06 20:08:51 +00:00
package services
import (
"context"
"slices"
"github.com/blevesearch/bleve/v2"
"github.com/blevesearch/bleve/v2/search/query"
"github.com/mitchellh/mapstructure"
"code.icb4dc0.de/prskr/searcherside/core/dto"
"code.icb4dc0.de/prskr/searcherside/core/ports"
)
var (
_ ports.Searcher = (*BleveSearcher)(nil)
searchFields = []string{
"mainTitle",
"pageTitle",
"url",
"headings",
"content",
"metaDescription",
"breadcrumbs",
}
)
func NewBleveSearcher(indexPath string) (*BleveSearcher, error) {
idx, err := bleve.Open(indexPath)
if err != nil {
return nil, err
}
return &BleveSearcher{idx: idx}, nil
}
type BleveSearcher struct {
idx bleve.Index
}
func (b BleveSearcher) Search(ctx context.Context, req ports.IndexSearchRequest) (resp dto.SearchResponse, err error) {
var searchQuery query.Query
if req.ExactSearch {
searchQuery = bleve.NewTermQuery(req.Query)
} else {
searchQuery = bleve.NewFuzzyQuery(req.Query)
}
maxHits := req.MaxHits
if maxHits == 0 {
maxHits = 10
}
searchRequest := bleve.NewSearchRequestOptions(searchQuery, maxHits, 0, false)
searchRequest.Fields = slices.Clone(searchFields)
searchRequest.Highlight = bleve.NewHighlight()
result, err := b.idx.Search(searchRequest)
if err != nil {
return resp, err
}
for _, hit := range result.Hits {
var page pageToIndex
if err := mapstructure.Decode(hit.Fields, &page); err != nil {
return resp, err
}
searchHit := dto.SearchHit{
ObjectID: hit.ID,
MainTitle: page.MainTitle,
PageTitle: page.PageTitle,
Url: page.Url,
Breadcrumbs: page.Breadcrumbs,
Snippet: dto.SnippetResult{
Content: dto.SnippetContent{
Value: page.Content,
MatchLevel: dto.MatchLevelFull,
},
},
Highlight: make(map[string]dto.Match),
}
if contentFragment, ok := hit.Fragments["content"]; ok && len(contentFragment) > 0 {
searchHit.Snippet.Content.Value = contentFragment[0]
}
for _, field := range searchFields {
fieldValue := hit.Fields[field].(string)
if locationMap, ok := hit.Locations[field]; ok {
fieldMatch := dto.Match{
MatchLevel: dto.MatchLevelFull,
Value: hit.Fragments[field][0],
MatchedWords: make([]string, 0),
}
for _, locations := range locationMap {
for _, location := range locations {
matchedWord := fieldValue[location.Start:location.End]
if pos, found := slices.BinarySearch(fieldMatch.MatchedWords, matchedWord); found {
continue
} else if pos == len(fieldMatch.MatchedWords) {
fieldMatch.MatchedWords = append(fieldMatch.MatchedWords, matchedWord)
} else {
fieldMatch.MatchedWords = append(fieldMatch.MatchedWords[:pos], append([]string{matchedWord}, fieldMatch.MatchedWords[pos:]...)...)
}
}
}
searchHit.Highlight[field] = fieldMatch
} else {
searchHit.Highlight[field] = dto.Match{
Value: fieldValue,
MatchLevel: dto.MatchLevelNone,
MatchedWords: make([]string, 0),
}
}
}
resp.Hits = append(resp.Hits, searchHit)
}
return resp, nil
}
func (b BleveSearcher) Close() error {
return b.idx.Close()
}