126 lines
3.1 KiB
Go
126 lines
3.1 KiB
Go
|
package services
|
||
|
|
||
|
import (
|
||
|
"context"
|
||
|
"slices"
|
||
|
|
||
|
"github.com/blevesearch/bleve/v2"
|
||
|
"github.com/blevesearch/bleve/v2/search/query"
|
||
|
"github.com/mitchellh/mapstructure"
|
||
|
|
||
|
"code.icb4dc0.de/prskr/searcherside/core/dto"
|
||
|
"code.icb4dc0.de/prskr/searcherside/core/ports"
|
||
|
)
|
||
|
|
||
|
var (
|
||
|
_ ports.Searcher = (*BleveSearcher)(nil)
|
||
|
searchFields = []string{
|
||
|
"mainTitle",
|
||
|
"pageTitle",
|
||
|
"url",
|
||
|
"headings",
|
||
|
"content",
|
||
|
"metaDescription",
|
||
|
"breadcrumbs",
|
||
|
}
|
||
|
)
|
||
|
|
||
|
func NewBleveSearcher(indexPath string) (*BleveSearcher, error) {
|
||
|
idx, err := bleve.Open(indexPath)
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
return &BleveSearcher{idx: idx}, nil
|
||
|
}
|
||
|
|
||
|
type BleveSearcher struct {
|
||
|
idx bleve.Index
|
||
|
}
|
||
|
|
||
|
func (b BleveSearcher) Search(ctx context.Context, req ports.IndexSearchRequest) (resp dto.SearchResponse, err error) {
|
||
|
var searchQuery query.Query
|
||
|
if req.ExactSearch {
|
||
|
searchQuery = bleve.NewTermQuery(req.Query)
|
||
|
} else {
|
||
|
searchQuery = bleve.NewFuzzyQuery(req.Query)
|
||
|
}
|
||
|
|
||
|
maxHits := req.MaxHits
|
||
|
if maxHits == 0 {
|
||
|
maxHits = 10
|
||
|
}
|
||
|
|
||
|
searchRequest := bleve.NewSearchRequestOptions(searchQuery, maxHits, 0, false)
|
||
|
searchRequest.Fields = slices.Clone(searchFields)
|
||
|
searchRequest.Highlight = bleve.NewHighlight()
|
||
|
|
||
|
result, err := b.idx.Search(searchRequest)
|
||
|
if err != nil {
|
||
|
return resp, err
|
||
|
}
|
||
|
|
||
|
for _, hit := range result.Hits {
|
||
|
var page pageToIndex
|
||
|
if err := mapstructure.Decode(hit.Fields, &page); err != nil {
|
||
|
return resp, err
|
||
|
}
|
||
|
|
||
|
searchHit := dto.SearchHit{
|
||
|
ObjectID: hit.ID,
|
||
|
MainTitle: page.MainTitle,
|
||
|
PageTitle: page.PageTitle,
|
||
|
Url: page.Url,
|
||
|
Breadcrumbs: page.Breadcrumbs,
|
||
|
Snippet: dto.SnippetResult{
|
||
|
Content: dto.SnippetContent{
|
||
|
Value: page.Content,
|
||
|
MatchLevel: dto.MatchLevelFull,
|
||
|
},
|
||
|
},
|
||
|
Highlight: make(map[string]dto.Match),
|
||
|
}
|
||
|
|
||
|
if contentFragment, ok := hit.Fragments["content"]; ok && len(contentFragment) > 0 {
|
||
|
searchHit.Snippet.Content.Value = contentFragment[0]
|
||
|
}
|
||
|
|
||
|
for _, field := range searchFields {
|
||
|
fieldValue := hit.Fields[field].(string)
|
||
|
if locationMap, ok := hit.Locations[field]; ok {
|
||
|
fieldMatch := dto.Match{
|
||
|
MatchLevel: dto.MatchLevelFull,
|
||
|
Value: hit.Fragments[field][0],
|
||
|
MatchedWords: make([]string, 0),
|
||
|
}
|
||
|
for _, locations := range locationMap {
|
||
|
for _, location := range locations {
|
||
|
matchedWord := fieldValue[location.Start:location.End]
|
||
|
if pos, found := slices.BinarySearch(fieldMatch.MatchedWords, matchedWord); found {
|
||
|
continue
|
||
|
} else if pos == len(fieldMatch.MatchedWords) {
|
||
|
fieldMatch.MatchedWords = append(fieldMatch.MatchedWords, matchedWord)
|
||
|
} else {
|
||
|
fieldMatch.MatchedWords = append(fieldMatch.MatchedWords[:pos], append([]string{matchedWord}, fieldMatch.MatchedWords[pos:]...)...)
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
searchHit.Highlight[field] = fieldMatch
|
||
|
} else {
|
||
|
searchHit.Highlight[field] = dto.Match{
|
||
|
Value: fieldValue,
|
||
|
MatchLevel: dto.MatchLevelNone,
|
||
|
MatchedWords: make([]string, 0),
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
resp.Hits = append(resp.Hits, searchHit)
|
||
|
}
|
||
|
|
||
|
return resp, nil
|
||
|
}
|
||
|
|
||
|
func (b BleveSearcher) Close() error {
|
||
|
return b.idx.Close()
|
||
|
}
|