searcherside/core/services/bleve_indexer.go

97 lines
2.4 KiB
Go
Raw Normal View History

2024-06-06 20:08:51 +00:00
package services
import (
"context"
"encoding/json"
"fmt"
"io"
"os"
"path/filepath"
"github.com/blevesearch/bleve/v2"
"github.com/blevesearch/bleve/v2/mapping"
"code.icb4dc0.de/prskr/searcherside/core/ports"
"code.icb4dc0.de/prskr/searcherside/infrastructure/archive"
2024-06-06 20:08:51 +00:00
)
var _ ports.Indexer = (*BleveIndexer)(nil)
type BleveIndexer struct {
DataDirectory string
}
func (b BleveIndexer) IngestIndex(ctx context.Context, request ports.IngestIndexRequest) (res ports.IngestIndexResult, err error) {
res.Type = ports.IndexTypeBleve
indexMapping := bleve.NewIndexMapping()
indexMapping.AddDocumentMapping("page", pageDocumentMapping())
if err := os.MkdirAll(b.DataDirectory, 0o750); err != nil {
return res, fmt.Errorf("failed to create data directory: %w", err)
}
indexDirName := fmt.Sprintf("%s.bleve", request.Hash)
res.Path = filepath.Join(b.DataDirectory, indexDirName)
if info, err := os.Stat(res.Path); err == nil && info.IsDir() {
return res, nil
}
index, err := bleve.New(res.Path, indexMapping)
if err != nil {
return res, err
}
indexErr := archive.WalkZipFile(request.FilePath, func(name string, r io.Reader) error {
var page pageToIndex
if err := json.NewDecoder(r).Decode(&page); err != nil {
return fmt.Errorf("failed to decode page: %w", err)
}
return index.Index(page.ObjectID, page)
})
if indexErr != nil {
return res, fmt.Errorf("failed to index pages: %w", indexErr)
}
if closeErr := index.Close(); closeErr != nil {
return res, fmt.Errorf("failed to close index: %w", closeErr)
}
return res, nil
}
var _ mapping.Classifier = (*pageToIndex)(nil)
type pageToIndex struct {
ObjectID string `json:"objectID"`
MainTitle string `json:"mainTitle"`
PageTitle string `json:"pageTitle"`
Url string `json:"url"`
Headings string `json:"headings"`
Content string `json:"content"`
MetaDescription string `json:"metaDescription"`
Breadcrumbs string `json:"breadcrumbs"`
}
func (b pageToIndex) Type() string {
return "page"
}
func pageDocumentMapping() *mapping.DocumentMapping {
docMapping := mapping.NewDocumentMapping()
storeOnlyMapping := &mapping.FieldMapping{
Type: "text",
Store: true,
}
docMapping.AddFieldMappingsAt("objectID", storeOnlyMapping)
docMapping.AddFieldMappingsAt("url", storeOnlyMapping)
docMapping.AddFieldMappingsAt("breadcrumbs", storeOnlyMapping)
return docMapping
}