97 lines
2.4 KiB
Go
97 lines
2.4 KiB
Go
|
package services
|
||
|
|
||
|
import (
|
||
|
"context"
|
||
|
"encoding/json"
|
||
|
"fmt"
|
||
|
"io"
|
||
|
"os"
|
||
|
"path/filepath"
|
||
|
|
||
|
"github.com/blevesearch/bleve/v2"
|
||
|
"github.com/blevesearch/bleve/v2/mapping"
|
||
|
|
||
|
"code.icb4dc0.de/prskr/searcherside/core/ports"
|
||
|
"code.icb4dc0.de/prskr/searcherside/internal/archive"
|
||
|
)
|
||
|
|
||
|
var _ ports.Indexer = (*BleveIndexer)(nil)
|
||
|
|
||
|
type BleveIndexer struct {
|
||
|
DataDirectory string
|
||
|
}
|
||
|
|
||
|
func (b BleveIndexer) IngestIndex(ctx context.Context, request ports.IngestIndexRequest) (res ports.IngestIndexResult, err error) {
|
||
|
res.Type = ports.IndexTypeBleve
|
||
|
|
||
|
indexMapping := bleve.NewIndexMapping()
|
||
|
indexMapping.AddDocumentMapping("page", pageDocumentMapping())
|
||
|
|
||
|
if err := os.MkdirAll(b.DataDirectory, 0o750); err != nil {
|
||
|
return res, fmt.Errorf("failed to create data directory: %w", err)
|
||
|
}
|
||
|
|
||
|
indexDirName := fmt.Sprintf("%s.bleve", request.Hash)
|
||
|
res.Path = filepath.Join(b.DataDirectory, indexDirName)
|
||
|
|
||
|
if info, err := os.Stat(res.Path); err == nil && info.IsDir() {
|
||
|
return res, nil
|
||
|
}
|
||
|
|
||
|
index, err := bleve.New(res.Path, indexMapping)
|
||
|
if err != nil {
|
||
|
return res, err
|
||
|
}
|
||
|
|
||
|
indexErr := archive.WalkZipFile(request.FilePath, func(name string, r io.Reader) error {
|
||
|
var page pageToIndex
|
||
|
if err := json.NewDecoder(r).Decode(&page); err != nil {
|
||
|
return fmt.Errorf("failed to decode page: %w", err)
|
||
|
}
|
||
|
|
||
|
return index.Index(page.ObjectID, page)
|
||
|
})
|
||
|
|
||
|
if indexErr != nil {
|
||
|
return res, fmt.Errorf("failed to index pages: %w", indexErr)
|
||
|
}
|
||
|
|
||
|
if closeErr := index.Close(); closeErr != nil {
|
||
|
return res, fmt.Errorf("failed to close index: %w", closeErr)
|
||
|
}
|
||
|
|
||
|
return res, nil
|
||
|
}
|
||
|
|
||
|
var _ mapping.Classifier = (*pageToIndex)(nil)
|
||
|
|
||
|
type pageToIndex struct {
|
||
|
ObjectID string `json:"objectID"`
|
||
|
MainTitle string `json:"mainTitle"`
|
||
|
PageTitle string `json:"pageTitle"`
|
||
|
Url string `json:"url"`
|
||
|
Headings string `json:"headings"`
|
||
|
Content string `json:"content"`
|
||
|
MetaDescription string `json:"metaDescription"`
|
||
|
Breadcrumbs string `json:"breadcrumbs"`
|
||
|
}
|
||
|
|
||
|
func (b pageToIndex) Type() string {
|
||
|
return "page"
|
||
|
}
|
||
|
|
||
|
func pageDocumentMapping() *mapping.DocumentMapping {
|
||
|
docMapping := mapping.NewDocumentMapping()
|
||
|
|
||
|
storeOnlyMapping := &mapping.FieldMapping{
|
||
|
Type: "text",
|
||
|
Store: true,
|
||
|
}
|
||
|
|
||
|
docMapping.AddFieldMappingsAt("objectID", storeOnlyMapping)
|
||
|
docMapping.AddFieldMappingsAt("url", storeOnlyMapping)
|
||
|
docMapping.AddFieldMappingsAt("breadcrumbs", storeOnlyMapping)
|
||
|
|
||
|
return docMapping
|
||
|
}
|