feat: initial commit

This commit is contained in:
Peter 2024-06-06 22:08:51 +02:00
parent d43271dfd1
commit 0bc68d34e7
WARNING! Although there is a key with this ID in the database it does not verify this commit! This commit is SUSPICIOUS.
GPG key ID: F56BED6903BC5E37
27 changed files with 1652 additions and 0 deletions

View file

@ -0,0 +1,33 @@
name: Go build
on:
push:
branches:
- main
tags:
- "*"
pull_request:
branches:
- main
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: '0'
lfs: 'true'
fetch-tags: 'true'
- name: Setup Go 1.22.x
uses: actions/setup-go@v5
with:
go-version: '1.22.x'
- name: golangci-lint
uses: golangci/golangci-lint-action@v6
- name: Run tests
run: |
go install gotest.tools/gotestsum@latest
gotestsum --junitfile out/results.xml --format pkgname-and-test-fails -- -race -shuffle=on ./...

8
.gitignore vendored
View file

@ -18,6 +18,14 @@
# Dependency directories (remove the comment below to include it)
# vendor/
# Testdata
*.zip
# Go workspace file
go.work
data/
# IDE configs
.idea/
.vscode/

3
config.yaml Normal file
View file

@ -0,0 +1,3 @@
server:
http:
readHeaderTimeout: 15s

38
core/dto/search.go Normal file
View file

@ -0,0 +1,38 @@
package dto
type MatchLevel string
const (
MatchLevelNone MatchLevel = "none"
MatchLevelFull MatchLevel = "full"
)
type Match struct {
Value string `json:"value"`
MatchLevel MatchLevel `json:"matchLevel"`
FullyHighlighted *bool `json:"fullyHighlighted,omitempty"`
MatchedWords []string `json:"matchedWords"`
}
type SnippetContent struct {
Value string `json:"value"`
MatchLevel MatchLevel `json:"matchLevel"`
}
type SnippetResult struct {
Content SnippetContent `json:"content"`
}
type SearchHit struct {
ObjectID string `json:"objectID"`
MainTitle string `json:"mainTitle"`
PageTitle string `json:"pageTitle"`
Url string `json:"url"`
Breadcrumbs string `json:"breadcrumbs"`
Snippet SnippetResult `json:"_snippetResult"`
Highlight map[string]Match `json:"_highlightResult"`
}
type SearchResponse struct {
Hits []SearchHit `json:"hits"`
}

93
core/ports/indices.go Normal file
View file

@ -0,0 +1,93 @@
package ports
import (
"bytes"
"context"
"encoding"
"encoding/base64"
"fmt"
"code.icb4dc0.de/prskr/searcherside/core/dto"
)
type IndexType string
func (i IndexType) String() string {
return string(i)
}
const (
IndexTypeBleve IndexType = "bleve"
)
var (
_ encoding.TextMarshaler = (*IndexKey)(nil)
_ encoding.TextUnmarshaler = (*IndexKey)(nil)
)
type IndexKey struct {
Module string
Instance string
}
func (i *IndexKey) MarshalText() (text []byte, err error) {
return []byte(base64.StdEncoding.EncodeToString([]byte(fmt.Sprintf("%s:%s", i.Module, i.Instance)))), nil
}
func (i *IndexKey) UnmarshalText(text []byte) error {
out := make([]byte, base64.StdEncoding.DecodedLen(len(text)))
if n, err := base64.StdEncoding.Decode(out, text); err != nil {
return err
} else {
split := bytes.Split(out[:n], []byte(":"))
if len(split) != 2 {
return fmt.Errorf("expected to split into module and instance but got %d", len(split))
}
i.Module = string(split[0])
i.Instance = string(split[1])
}
return nil
}
type IngestIndexRequest struct {
Module string
Instance string
Hash string
FilePath string
}
type IngestIndexResult struct {
Type IndexType
Path string
}
type IndexSearchRequest struct {
ExactSearch bool
MaxHits int
Query string
}
type ArchiveIndexRequest struct {
Path string
Hash string
Type IndexType
}
type IndexCurator interface {
Ingest(ctx context.Context, request IngestIndexRequest) error
Searcher(key IndexKey) (Searcher, error)
}
type Indexer interface {
IngestIndex(ctx context.Context, request IngestIndexRequest) (IngestIndexResult, error)
}
type Searcher interface {
Search(ctx context.Context, req IndexSearchRequest) (dto.SearchResponse, error)
}
type Archiver interface {
ArchiveIndex(request ArchiveIndexRequest) error
}

11
core/ports/os.go Normal file
View file

@ -0,0 +1,11 @@
package ports
import "io"
type CWD string
func (c CWD) Value() string {
return string(c)
}
type STDOUT io.Writer

View file

@ -0,0 +1,96 @@
package services
import (
"context"
"encoding/json"
"fmt"
"io"
"os"
"path/filepath"
"github.com/blevesearch/bleve/v2"
"github.com/blevesearch/bleve/v2/mapping"
"code.icb4dc0.de/prskr/searcherside/core/ports"
"code.icb4dc0.de/prskr/searcherside/internal/archive"
)
var _ ports.Indexer = (*BleveIndexer)(nil)
type BleveIndexer struct {
DataDirectory string
}
func (b BleveIndexer) IngestIndex(ctx context.Context, request ports.IngestIndexRequest) (res ports.IngestIndexResult, err error) {
res.Type = ports.IndexTypeBleve
indexMapping := bleve.NewIndexMapping()
indexMapping.AddDocumentMapping("page", pageDocumentMapping())
if err := os.MkdirAll(b.DataDirectory, 0o750); err != nil {
return res, fmt.Errorf("failed to create data directory: %w", err)
}
indexDirName := fmt.Sprintf("%s.bleve", request.Hash)
res.Path = filepath.Join(b.DataDirectory, indexDirName)
if info, err := os.Stat(res.Path); err == nil && info.IsDir() {
return res, nil
}
index, err := bleve.New(res.Path, indexMapping)
if err != nil {
return res, err
}
indexErr := archive.WalkZipFile(request.FilePath, func(name string, r io.Reader) error {
var page pageToIndex
if err := json.NewDecoder(r).Decode(&page); err != nil {
return fmt.Errorf("failed to decode page: %w", err)
}
return index.Index(page.ObjectID, page)
})
if indexErr != nil {
return res, fmt.Errorf("failed to index pages: %w", indexErr)
}
if closeErr := index.Close(); closeErr != nil {
return res, fmt.Errorf("failed to close index: %w", closeErr)
}
return res, nil
}
var _ mapping.Classifier = (*pageToIndex)(nil)
type pageToIndex struct {
ObjectID string `json:"objectID"`
MainTitle string `json:"mainTitle"`
PageTitle string `json:"pageTitle"`
Url string `json:"url"`
Headings string `json:"headings"`
Content string `json:"content"`
MetaDescription string `json:"metaDescription"`
Breadcrumbs string `json:"breadcrumbs"`
}
func (b pageToIndex) Type() string {
return "page"
}
func pageDocumentMapping() *mapping.DocumentMapping {
docMapping := mapping.NewDocumentMapping()
storeOnlyMapping := &mapping.FieldMapping{
Type: "text",
Store: true,
}
docMapping.AddFieldMappingsAt("objectID", storeOnlyMapping)
docMapping.AddFieldMappingsAt("url", storeOnlyMapping)
docMapping.AddFieldMappingsAt("breadcrumbs", storeOnlyMapping)
return docMapping
}

View file

@ -0,0 +1,125 @@
package services
import (
"context"
"slices"
"github.com/blevesearch/bleve/v2"
"github.com/blevesearch/bleve/v2/search/query"
"github.com/mitchellh/mapstructure"
"code.icb4dc0.de/prskr/searcherside/core/dto"
"code.icb4dc0.de/prskr/searcherside/core/ports"
)
var (
_ ports.Searcher = (*BleveSearcher)(nil)
searchFields = []string{
"mainTitle",
"pageTitle",
"url",
"headings",
"content",
"metaDescription",
"breadcrumbs",
}
)
func NewBleveSearcher(indexPath string) (*BleveSearcher, error) {
idx, err := bleve.Open(indexPath)
if err != nil {
return nil, err
}
return &BleveSearcher{idx: idx}, nil
}
type BleveSearcher struct {
idx bleve.Index
}
func (b BleveSearcher) Search(ctx context.Context, req ports.IndexSearchRequest) (resp dto.SearchResponse, err error) {
var searchQuery query.Query
if req.ExactSearch {
searchQuery = bleve.NewTermQuery(req.Query)
} else {
searchQuery = bleve.NewFuzzyQuery(req.Query)
}
maxHits := req.MaxHits
if maxHits == 0 {
maxHits = 10
}
searchRequest := bleve.NewSearchRequestOptions(searchQuery, maxHits, 0, false)
searchRequest.Fields = slices.Clone(searchFields)
searchRequest.Highlight = bleve.NewHighlight()
result, err := b.idx.Search(searchRequest)
if err != nil {
return resp, err
}
for _, hit := range result.Hits {
var page pageToIndex
if err := mapstructure.Decode(hit.Fields, &page); err != nil {
return resp, err
}
searchHit := dto.SearchHit{
ObjectID: hit.ID,
MainTitle: page.MainTitle,
PageTitle: page.PageTitle,
Url: page.Url,
Breadcrumbs: page.Breadcrumbs,
Snippet: dto.SnippetResult{
Content: dto.SnippetContent{
Value: page.Content,
MatchLevel: dto.MatchLevelFull,
},
},
Highlight: make(map[string]dto.Match),
}
if contentFragment, ok := hit.Fragments["content"]; ok && len(contentFragment) > 0 {
searchHit.Snippet.Content.Value = contentFragment[0]
}
for _, field := range searchFields {
fieldValue := hit.Fields[field].(string)
if locationMap, ok := hit.Locations[field]; ok {
fieldMatch := dto.Match{
MatchLevel: dto.MatchLevelFull,
Value: hit.Fragments[field][0],
MatchedWords: make([]string, 0),
}
for _, locations := range locationMap {
for _, location := range locations {
matchedWord := fieldValue[location.Start:location.End]
if pos, found := slices.BinarySearch(fieldMatch.MatchedWords, matchedWord); found {
continue
} else if pos == len(fieldMatch.MatchedWords) {
fieldMatch.MatchedWords = append(fieldMatch.MatchedWords, matchedWord)
} else {
fieldMatch.MatchedWords = append(fieldMatch.MatchedWords[:pos], append([]string{matchedWord}, fieldMatch.MatchedWords[pos:]...)...)
}
}
}
searchHit.Highlight[field] = fieldMatch
} else {
searchHit.Highlight[field] = dto.Match{
Value: fieldValue,
MatchLevel: dto.MatchLevelNone,
MatchedWords: make([]string, 0),
}
}
}
resp.Hits = append(resp.Hits, searchHit)
}
return resp, nil
}
func (b BleveSearcher) Close() error {
return b.idx.Close()
}

View file

@ -0,0 +1,216 @@
package services
import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"maps"
"os"
"sync"
"code.icb4dc0.de/prskr/searcherside/core/ports"
)
var _ ports.IndexCurator = (*FileIndexCurator)(nil)
func NewFileIndexCurator(
filePath string,
indexer ports.Indexer,
archiver ports.Archiver,
) (*FileIndexCurator, error) {
curator := &FileIndexCurator{
FilePath: filePath,
Indexer: indexer,
Archiver: archiver,
searchers: make(map[string]ports.Searcher),
}
if err := curator.load(); err != nil {
return nil, err
}
return curator, nil
}
type FileIndexCurator struct {
lock sync.RWMutex
FilePath string
Indexer ports.Indexer
Archiver ports.Archiver
state indexState
searchers map[string]ports.Searcher
}
func (f *FileIndexCurator) Ingest(ctx context.Context, request ports.IngestIndexRequest) error {
f.lock.Lock()
defer f.lock.Unlock()
result, err := f.Indexer.IngestIndex(ctx, request)
if err != nil {
return err
}
archiveErr := f.Archiver.ArchiveIndex(ports.ArchiveIndexRequest{
Path: result.Path,
Hash: request.Hash,
Type: result.Type,
})
if archiveErr != nil {
return archiveErr
}
idxKey := ports.IndexKey{
Module: request.Module,
Instance: request.Instance,
}
f.state.Add(idxKey, request.Hash, result)
return f.snapshot()
}
func (f *FileIndexCurator) Searcher(key ports.IndexKey) (searcher ports.Searcher, err error) {
f.lock.RLock()
defer f.lock.RUnlock()
idxHash, ok := f.state.Current[key]
if !ok {
return nil, fmt.Errorf("no known index for key %s/%s", key.Module, key.Instance)
}
if instance, ok := f.searchers[idxHash]; ok {
f.searchers[idxHash] = instance
return instance, nil
}
idxResult, ok := f.state.Indices[idxHash]
if !ok {
return nil, fmt.Errorf("no index result for last indexed hash: %s", idxHash)
}
defer func() {
f.searchers[idxHash] = searcher
}()
switch idxResult.Type {
case ports.IndexTypeBleve:
return NewBleveSearcher(idxResult.Path)
default:
return nil, fmt.Errorf("no searcher for index type: %s", idxResult.Type)
}
}
func (f *FileIndexCurator) Close() error {
var err error
for _, searcher := range f.searchers {
if closer, ok := searcher.(io.Closer); ok {
err = errors.Join(err, closer.Close())
}
}
return err
}
func (f *FileIndexCurator) snapshot() (err error) {
stateFile, err := os.Create(f.FilePath)
if err != nil {
return err
}
defer func() {
err = errors.Join(err, stateFile.Close())
}()
encoder := json.NewEncoder(stateFile)
return encoder.Encode(f.state)
}
func (f *FileIndexCurator) load() error {
stateFile, err := os.Open(f.FilePath)
if err != nil {
if os.IsNotExist(err) {
return nil
}
return err
}
defer func() {
_ = stateFile.Close()
}()
decoder := json.NewDecoder(stateFile)
return decoder.Decode(&f.state)
}
var (
_ json.Marshaler = (*indexState)(nil)
_ json.Unmarshaler = (*indexState)(nil)
)
type indexState struct {
Current map[ports.IndexKey]string
Indices map[string]ports.IngestIndexResult
}
func (s indexState) MarshalJSON() ([]byte, error) {
tmp := struct {
Current map[string]string
Indices map[string]ports.IngestIndexResult
}{
Current: make(map[string]string),
}
tmp.Indices = maps.Clone(s.Indices)
for k, v := range s.Current {
if marshalledKey, err := k.MarshalText(); err != nil {
return nil, err
} else {
tmp.Current[string(marshalledKey)] = v
}
}
return json.Marshal(tmp)
}
func (s *indexState) UnmarshalJSON(bytes []byte) error {
tmp := struct {
Current map[string]string
Indices map[string]ports.IngestIndexResult
}{}
s.Current = make(map[ports.IndexKey]string)
if err := json.Unmarshal(bytes, &tmp); err != nil {
return err
}
s.Indices = maps.Clone(tmp.Indices)
for k, v := range tmp.Current {
var idxKey ports.IndexKey
if err := idxKey.UnmarshalText([]byte(k)); err != nil {
return err
}
s.Current[idxKey] = v
}
return nil
}
func (s *indexState) Add(key ports.IndexKey, hash string, result ports.IngestIndexResult) {
if s.Current == nil {
s.Current = make(map[ports.IndexKey]string)
}
if s.Indices == nil {
s.Indices = make(map[string]ports.IngestIndexResult)
}
s.Current[key] = hash
s.Indices[hash] = result
}

View file

@ -0,0 +1,51 @@
package services
import (
"errors"
"fmt"
"os"
"path/filepath"
"strings"
"github.com/klauspost/compress/zstd"
"code.icb4dc0.de/prskr/searcherside/core/ports"
"code.icb4dc0.de/prskr/searcherside/internal/archive"
)
var _ ports.Archiver = (*TarZSTIndexArchiver)(nil)
type TarZSTIndexArchiver struct {
DataDirectory string
}
func (a TarZSTIndexArchiver) ArchiveIndex(req ports.ArchiveIndexRequest) (err error) {
archiveFile, err := os.Create(filepath.Join(a.DataDirectory, strings.Join([]string{req.Hash, req.Type.String(), "tar.zst"}, ".")))
if err != nil {
return fmt.Errorf("failed to create archive file: %w", err)
}
defer func() {
err = errors.Join(err, archiveFile.Close())
}()
encoder, err := zstd.NewWriter(archiveFile, zstd.WithEncoderLevel(zstd.SpeedBestCompression))
if err != nil {
return fmt.Errorf("failed to create zst encoder: %w", err)
}
defer func() {
err = errors.Join(err, encoder.Close())
}()
relativePath, err := filepath.Rel(a.DataDirectory, req.Path)
if err != nil {
return fmt.Errorf("failed to get relative path: %w", err)
}
return archive.TarDirectory(
os.DirFS(a.DataDirectory),
filepath.ToSlash(relativePath),
archiveFile,
)
}

60
go.mod Normal file
View file

@ -0,0 +1,60 @@
module code.icb4dc0.de/prskr/searcherside
go 1.22
toolchain go1.22.4
require (
github.com/alecthomas/kong v0.9.0
github.com/blevesearch/bleve/v2 v2.4.0
github.com/go-chi/chi/v5 v5.0.12
github.com/go-chi/cors v1.2.1
github.com/go-chi/jwtauth/v5 v5.3.1
github.com/klauspost/compress v1.17.8
github.com/lestrrat-go/jwx/v2 v2.0.21
github.com/mitchellh/mapstructure v1.5.0
gopkg.in/yaml.v3 v3.0.1
)
require (
github.com/RoaringBitmap/roaring v1.9.4 // indirect
github.com/bits-and-blooms/bitset v1.13.0 // indirect
github.com/blevesearch/bleve_index_api v1.1.9 // indirect
github.com/blevesearch/geo v0.1.20 // indirect
github.com/blevesearch/go-faiss v1.0.16 // indirect
github.com/blevesearch/go-porterstemmer v1.0.3 // indirect
github.com/blevesearch/gtreap v0.1.1 // indirect
github.com/blevesearch/mmap-go v1.0.4 // indirect
github.com/blevesearch/scorch_segment_api/v2 v2.2.14 // indirect
github.com/blevesearch/segment v0.9.1 // indirect
github.com/blevesearch/snowballstem v0.9.0 // indirect
github.com/blevesearch/upsidedown_store_api v1.0.2 // indirect
github.com/blevesearch/vellum v1.0.10 // indirect
github.com/blevesearch/zapx/v11 v11.3.10 // indirect
github.com/blevesearch/zapx/v12 v12.3.10 // indirect
github.com/blevesearch/zapx/v13 v13.3.10 // indirect
github.com/blevesearch/zapx/v14 v14.3.10 // indirect
github.com/blevesearch/zapx/v15 v15.3.13 // indirect
github.com/blevesearch/zapx/v16 v16.1.1 // indirect
github.com/decred/dcrd/dcrec/secp256k1/v4 v4.3.0 // indirect
github.com/goccy/go-json v0.10.3 // indirect
github.com/golang/geo v0.0.0-20230421003525-6adc56603217 // indirect
github.com/golang/protobuf v1.5.4 // indirect
github.com/golang/snappy v0.0.4 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/kr/pretty v0.1.0 // indirect
github.com/lestrrat-go/blackmagic v1.0.2 // indirect
github.com/lestrrat-go/httpcc v1.0.1 // indirect
github.com/lestrrat-go/httprc v1.0.5 // indirect
github.com/lestrrat-go/iter v1.0.2 // indirect
github.com/lestrrat-go/option v1.0.1 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/mschoch/smat v0.2.0 // indirect
github.com/segmentio/asm v1.2.0 // indirect
go.etcd.io/bbolt v1.3.10 // indirect
golang.org/x/crypto v0.24.0 // indirect
golang.org/x/sys v0.21.0 // indirect
google.golang.org/protobuf v1.34.1 // indirect
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect
)

131
go.sum Normal file
View file

@ -0,0 +1,131 @@
github.com/RoaringBitmap/roaring v1.9.4 h1:yhEIoH4YezLYT04s1nHehNO64EKFTop/wBhxv2QzDdQ=
github.com/RoaringBitmap/roaring v1.9.4/go.mod h1:6AXUsoIEzDTFFQCe1RbGA6uFONMhvejWj5rqITANK90=
github.com/alecthomas/assert/v2 v2.6.0 h1:o3WJwILtexrEUk3cUVal3oiQY2tfgr/FHWiz/v2n4FU=
github.com/alecthomas/assert/v2 v2.6.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k=
github.com/alecthomas/kong v0.9.0 h1:G5diXxc85KvoV2f0ZRVuMsi45IrBgx9zDNGNj165aPA=
github.com/alecthomas/kong v0.9.0/go.mod h1:Y47y5gKfHp1hDc7CH7OeXgLIpp+Q2m1Ni0L5s3bI8Os=
github.com/alecthomas/repr v0.4.0 h1:GhI2A8MACjfegCPVq9f1FLvIBS+DrQ2KQBFZP1iFzXc=
github.com/alecthomas/repr v0.4.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4=
github.com/bits-and-blooms/bitset v1.12.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
github.com/bits-and-blooms/bitset v1.13.0 h1:bAQ9OPNFYbGHV6Nez0tmNI0RiEu7/hxlYJRUA0wFAVE=
github.com/bits-and-blooms/bitset v1.13.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
github.com/blevesearch/bleve/v2 v2.4.0 h1:2xyg+Wv60CFHYccXc+moGxbL+8QKT/dZK09AewHgKsg=
github.com/blevesearch/bleve/v2 v2.4.0/go.mod h1:IhQHoFAbHgWKYavb9rQgQEJJVMuY99cKdQ0wPpst2aY=
github.com/blevesearch/bleve_index_api v1.1.9 h1:Cpq0Lp3As0Gfk3+PmcoNDRKeI50C5yuFNpj0YlN/bOE=
github.com/blevesearch/bleve_index_api v1.1.9/go.mod h1:PbcwjIcRmjhGbkS/lJCpfgVSMROV6TRubGGAODaK1W8=
github.com/blevesearch/geo v0.1.20 h1:paaSpu2Ewh/tn5DKn/FB5SzvH0EWupxHEIwbCk/QPqM=
github.com/blevesearch/geo v0.1.20/go.mod h1:DVG2QjwHNMFmjo+ZgzrIq2sfCh6rIHzy9d9d0B59I6w=
github.com/blevesearch/go-faiss v1.0.16 h1:lfzXzzjO1mAf15MRiRY5yz6KVGr02CyRrr7m0z70Ih8=
github.com/blevesearch/go-faiss v1.0.16/go.mod h1:jrxHrbl42X/RnDPI+wBoZU8joxxuRwedrxqswQ3xfU8=
github.com/blevesearch/go-porterstemmer v1.0.3 h1:GtmsqID0aZdCSNiY8SkuPJ12pD4jI+DdXTAn4YRcHCo=
github.com/blevesearch/go-porterstemmer v1.0.3/go.mod h1:angGc5Ht+k2xhJdZi511LtmxuEf0OVpvUUNrwmM1P7M=
github.com/blevesearch/gtreap v0.1.1 h1:2JWigFrzDMR+42WGIN/V2p0cUvn4UP3C4Q5nmaZGW8Y=
github.com/blevesearch/gtreap v0.1.1/go.mod h1:QaQyDRAT51sotthUWAH4Sj08awFSSWzgYICSZ3w0tYk=
github.com/blevesearch/mmap-go v1.0.4 h1:OVhDhT5B/M1HNPpYPBKIEJaD0F3Si+CrEKULGCDPWmc=
github.com/blevesearch/mmap-go v1.0.4/go.mod h1:EWmEAOmdAS9z/pi/+Toxu99DnsbhG1TIxUoRmJw/pSs=
github.com/blevesearch/scorch_segment_api/v2 v2.2.14 h1:fgMLMpGWR7u2TdRm7XSZVWhPvMAcdYHh25Lq1fQ6Fjo=
github.com/blevesearch/scorch_segment_api/v2 v2.2.14/go.mod h1:B7+a7vfpY4NsjuTkpv/eY7RZ91Xr90VaJzT2t7upZN8=
github.com/blevesearch/segment v0.9.1 h1:+dThDy+Lvgj5JMxhmOVlgFfkUtZV2kw49xax4+jTfSU=
github.com/blevesearch/segment v0.9.1/go.mod h1:zN21iLm7+GnBHWTao9I+Au/7MBiL8pPFtJBJTsk6kQw=
github.com/blevesearch/snowballstem v0.9.0 h1:lMQ189YspGP6sXvZQ4WZ+MLawfV8wOmPoD/iWeNXm8s=
github.com/blevesearch/snowballstem v0.9.0/go.mod h1:PivSj3JMc8WuaFkTSRDW2SlrulNWPl4ABg1tC/hlgLs=
github.com/blevesearch/upsidedown_store_api v1.0.2 h1:U53Q6YoWEARVLd1OYNc9kvhBMGZzVrdmaozG2MfoB+A=
github.com/blevesearch/upsidedown_store_api v1.0.2/go.mod h1:M01mh3Gpfy56Ps/UXHjEO/knbqyQ1Oamg8If49gRwrQ=
github.com/blevesearch/vellum v1.0.10 h1:HGPJDT2bTva12hrHepVT3rOyIKFFF4t7Gf6yMxyMIPI=
github.com/blevesearch/vellum v1.0.10/go.mod h1:ul1oT0FhSMDIExNjIxHqJoGpVrBpKCdgDQNxfqgJt7k=
github.com/blevesearch/zapx/v11 v11.3.10 h1:hvjgj9tZ9DeIqBCxKhi70TtSZYMdcFn7gDb71Xo/fvk=
github.com/blevesearch/zapx/v11 v11.3.10/go.mod h1:0+gW+FaE48fNxoVtMY5ugtNHHof/PxCqh7CnhYdnMzQ=
github.com/blevesearch/zapx/v12 v12.3.10 h1:yHfj3vXLSYmmsBleJFROXuO08mS3L1qDCdDK81jDl8s=
github.com/blevesearch/zapx/v12 v12.3.10/go.mod h1:0yeZg6JhaGxITlsS5co73aqPtM04+ycnI6D1v0mhbCs=
github.com/blevesearch/zapx/v13 v13.3.10 h1:0KY9tuxg06rXxOZHg3DwPJBjniSlqEgVpxIqMGahDE8=
github.com/blevesearch/zapx/v13 v13.3.10/go.mod h1:w2wjSDQ/WBVeEIvP0fvMJZAzDwqwIEzVPnCPrz93yAk=
github.com/blevesearch/zapx/v14 v14.3.10 h1:SG6xlsL+W6YjhX5N3aEiL/2tcWh3DO75Bnz77pSwwKU=
github.com/blevesearch/zapx/v14 v14.3.10/go.mod h1:qqyuR0u230jN1yMmE4FIAuCxmahRQEOehF78m6oTgns=
github.com/blevesearch/zapx/v15 v15.3.13 h1:6EkfaZiPlAxqXz0neniq35my6S48QI94W/wyhnpDHHQ=
github.com/blevesearch/zapx/v15 v15.3.13/go.mod h1:Turk/TNRKj9es7ZpKK95PS7f6D44Y7fAFy8F4LXQtGg=
github.com/blevesearch/zapx/v16 v16.1.1 h1:k+fDKs4ylqqw+X1PopzoxMbDdwgMOaXSbRCo0jnfR2Q=
github.com/blevesearch/zapx/v16 v16.1.1/go.mod h1:Zmq22YL64zvplIjUftIsYX3pV085F0wff8zukUZUww4=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/decred/dcrd/dcrec/secp256k1/v4 v4.3.0 h1:rpfIENRNNilwHwZeG5+P150SMrnNEcHYvcCuK6dPZSg=
github.com/decred/dcrd/dcrec/secp256k1/v4 v4.3.0/go.mod h1:v57UDF4pDQJcEfFUCRop3lJL149eHGSe9Jvczhzjo/0=
github.com/go-chi/chi/v5 v5.0.12 h1:9euLV5sTrTNTRUU9POmDUvfxyj6LAABLUcEWO+JJb4s=
github.com/go-chi/chi/v5 v5.0.12/go.mod h1:DslCQbL2OYiznFReuXYUmQ2hGd1aDpCnlMNITLSKoi8=
github.com/go-chi/cors v1.2.1 h1:xEC8UT3Rlp2QuWNEr4Fs/c2EAGVKBwy/1vHx3bppil4=
github.com/go-chi/cors v1.2.1/go.mod h1:sSbTewc+6wYHBBCW7ytsFSn836hqM7JxpglAy2Vzc58=
github.com/go-chi/jwtauth/v5 v5.3.1 h1:1ePWrjVctvp1tyBq5b/2ER8Th/+RbYc7x4qNsc5rh5A=
github.com/go-chi/jwtauth/v5 v5.3.1/go.mod h1:6Fl2RRmWXs3tJYE1IQGX81FsPoGqDwq9c15j52R5q80=
github.com/goccy/go-json v0.10.3 h1:KZ5WoDbxAIgm2HNbYckL0se1fHD6rz5j4ywS6ebzDqA=
github.com/goccy/go-json v0.10.3/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M=
github.com/golang/geo v0.0.0-20230421003525-6adc56603217 h1:HKlyj6in2JV6wVkmQ4XmG/EIm+SCYlPZ+V4GWit7Z+I=
github.com/golang/geo v0.0.0-20230421003525-6adc56603217/go.mod h1:8wI0hitZ3a1IxZfeH3/5I97CI8i5cLGsYe7xNhQGs9U=
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/klauspost/compress v1.17.8 h1:YcnTYrq7MikUT7k0Yb5eceMmALQPYBW/Xltxn0NAMnU=
github.com/klauspost/compress v1.17.8/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/lestrrat-go/blackmagic v1.0.2 h1:Cg2gVSc9h7sz9NOByczrbUvLopQmXrfFx//N+AkAr5k=
github.com/lestrrat-go/blackmagic v1.0.2/go.mod h1:UrEqBzIR2U6CnzVyUtfM6oZNMt/7O7Vohk2J0OGSAtU=
github.com/lestrrat-go/httpcc v1.0.1 h1:ydWCStUeJLkpYyjLDHihupbn2tYmZ7m22BGkcvZZrIE=
github.com/lestrrat-go/httpcc v1.0.1/go.mod h1:qiltp3Mt56+55GPVCbTdM9MlqhvzyuL6W/NMDA8vA5E=
github.com/lestrrat-go/httprc v1.0.5 h1:bsTfiH8xaKOJPrg1R+E3iE/AWZr/x0Phj9PBTG/OLUk=
github.com/lestrrat-go/httprc v1.0.5/go.mod h1:mwwz3JMTPBjHUkkDv/IGJ39aALInZLrhBp0X7KGUZlo=
github.com/lestrrat-go/iter v1.0.2 h1:gMXo1q4c2pHmC3dn8LzRhJfP1ceCbgSiT9lUydIzltI=
github.com/lestrrat-go/iter v1.0.2/go.mod h1:Momfcq3AnRlRjI5b5O8/G5/BvpzrhoFTZcn06fEOPt4=
github.com/lestrrat-go/jwx/v2 v2.0.21 h1:jAPKupy4uHgrHFEdjVjNkUgoBKtVDgrQPB/h55FHrR0=
github.com/lestrrat-go/jwx/v2 v2.0.21/go.mod h1:09mLW8zto6bWL9GbwnqAli+ArLf+5M33QLQPDggkUWM=
github.com/lestrrat-go/option v1.0.1 h1:oAzP2fvZGQKWkvHa1/SAcFolBEca1oN+mQ7eooNBEYU=
github.com/lestrrat-go/option v1.0.1/go.mod h1:5ZHFbivi4xwXxhxY9XHDe2FHo6/Z7WWmtT7T5nBBp3I=
github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/mschoch/smat v0.2.0 h1:8imxQsjDm8yFEAVBe7azKmKSgzSkZXDuKkSq9374khM=
github.com/mschoch/smat v0.2.0/go.mod h1:kc9mz7DoBKqDyiRL7VZN8KvXQMWeTaVnttLRXOlotKw=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/segmentio/asm v1.2.0 h1:9BQrFxC+YOHJlTlHGkTrFWf59nbL3XnCoFLTwDCI7ys=
github.com/segmentio/asm v1.2.0/go.mod h1:BqMnlJP91P8d+4ibuonYZw9mfnzI9HfxselHZr5aAcs=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
go.etcd.io/bbolt v1.3.10 h1:+BqfJTcCzTItrop8mq/lbzL8wSGtj94UO/3U31shqG0=
go.etcd.io/bbolt v1.3.10/go.mod h1:bK3UQLPJZly7IlNmV7uVHJDxfe5aK9Ll93e/74Y9oEQ=
golang.org/x/crypto v0.24.0 h1:mnl8DM0o513X8fdIkmyFE/5hTYxbwYOjDS/+rK6qpRI=
golang.org/x/crypto v0.24.0/go.mod h1:Z1PMYSOR5nyMcyAVAIQSKCDwalqy85Aqn1x3Ws4L5DM=
golang.org/x/sync v0.5.0 h1:60k92dhOjHxJkrqnwsfl8KuaHbn/5dl0lUPUklKo3qE=
golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws=
golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg=
google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View file

@ -0,0 +1,94 @@
package v1
import (
"crypto/sha256"
"encoding/hex"
"io"
"mime/multipart"
"net/http"
"os"
"github.com/go-chi/chi/v5"
"code.icb4dc0.de/prskr/searcherside/core/ports"
"code.icb4dc0.de/prskr/searcherside/internal/logging"
)
type IndexHandler struct {
MaxMemoryBytes int64
Indexer ports.IndexCurator
}
func (h IndexHandler) IngestIndex(writer http.ResponseWriter, req *http.Request) {
logger := logging.GetLogger(req.Context())
if err := req.ParseMultipartForm(h.MaxMemoryBytes); err != nil {
logger.WarnContext(req.Context(), "Failed to parse multipart form", logging.Error(err))
http.Error(writer, "Failed to parse multipart form", http.StatusInternalServerError)
return
}
if len(req.MultipartForm.File) != 1 {
http.Error(writer, "Only a single file can be uploaded", http.StatusBadRequest)
return
}
var indexFile *multipart.FileHeader
for _, files := range req.MultipartForm.File {
if len(files) != 1 {
http.Error(writer, "Only a single file can be uploaded", http.StatusBadRequest)
return
}
indexFile = files[0]
break
}
indexTempFile, err := os.CreateTemp(os.TempDir(), "searcherside-index-*")
if err != nil {
logger.ErrorContext(req.Context(), "Failed to create temporary index file", logging.Error(err))
http.Error(writer, "Failed to create temporary index file", http.StatusInternalServerError)
return
}
hash := sha256.New()
stream, err := indexFile.Open()
if err != nil {
logger.ErrorContext(req.Context(), "Failed to open index file", logging.Error(err))
http.Error(writer, "Failed to open index file", http.StatusBadRequest)
return
}
if _, err := io.Copy(io.MultiWriter(hash, indexTempFile), stream); err != nil {
logger.ErrorContext(req.Context(), "Failed to copy index file", logging.Error(err))
http.Error(writer, "Failed to copy index file", http.StatusBadRequest)
return
}
if err := stream.Close(); err != nil {
logger.ErrorContext(req.Context(), "Failed to close index file", logging.Error(err))
}
if err := indexTempFile.Close(); err != nil {
logger.ErrorContext(req.Context(), "Failed to close temporary index file", logging.Error(err))
http.Error(writer, "Failed to close temporary index file", http.StatusInternalServerError)
return
}
go func() {
indexErr := h.Indexer.Ingest(req.Context(), ports.IngestIndexRequest{
FilePath: indexTempFile.Name(),
Hash: hex.EncodeToString(hash.Sum(nil)),
Module: chi.URLParam(req, "module"),
Instance: chi.URLParam(req, "instance"),
})
if indexErr != nil {
logger.ErrorContext(req.Context(), "Failed to ingest index", logging.Error(indexErr))
}
}()
writer.WriteHeader(http.StatusAccepted)
}

34
handlers/api/v1/routes.go Normal file
View file

@ -0,0 +1,34 @@
package v1
import (
"net/http"
"github.com/go-chi/chi/v5"
"github.com/go-chi/cors"
"github.com/go-chi/jwtauth/v5"
"github.com/lestrrat-go/jwx/v2/jwa"
)
func Mount(
r chi.Router,
authSecret []byte,
indexHandler IndexHandler,
searchHandler SearchHandler,
) {
r.Group(func(r chi.Router) {
jwtAuth := jwtauth.New(jwa.HS256.String(), authSecret, nil)
r.Use(jwtauth.Verify(jwtAuth, jwtauth.TokenFromHeader), jwtauth.Authenticator(jwtAuth))
r.Put("/index/{module}/{instance}", indexHandler.IngestIndex)
})
// Routes requiring CORS
r.Group(func(r chi.Router) {
r.Use(cors.Handler(cors.Options{
AllowedOrigins: []string{"*"},
AllowedMethods: []string{http.MethodGet},
}))
r.Get("/preview-search/{module}/{instance}", searchHandler.PreviewSearch)
})
}

View file

@ -0,0 +1,70 @@
package v1
import (
"encoding/json"
"log/slog"
"net/http"
"strconv"
"github.com/go-chi/chi/v5"
"code.icb4dc0.de/prskr/searcherside/core/ports"
"code.icb4dc0.de/prskr/searcherside/internal/logging"
)
type SearchHandler struct {
Curator ports.IndexCurator
}
func (h SearchHandler) PreviewSearch(writer http.ResponseWriter, request *http.Request) {
logger := logging.GetLogger(request.Context())
idxKey := ports.IndexKey{
Module: chi.URLParam(request, "module"),
Instance: chi.URLParam(request, "instance"),
}
logger.Info("Get searcher for index", slog.String("module", idxKey.Module), slog.String("instance", idxKey.Instance))
searcher, err := h.Curator.Searcher(idxKey)
if err != nil {
logger.Error("Error getting searcher", logging.Error(err))
}
result, err := searcher.Search(request.Context(), searchRequestFrom(request))
if err != nil {
logger.Error("Failed to search", logging.Error(err))
http.Error(writer, "Failed to search", http.StatusInternalServerError)
}
writer.Header().Set("Content-Type", "application/json")
encoder := json.NewEncoder(writer)
if err := encoder.Encode(result); err != nil {
logger.Error("Failed to encode search result", logging.Error(err))
http.Error(writer, "Failed to encode search result", http.StatusInternalServerError)
return
}
writer.WriteHeader(http.StatusOK)
}
func searchRequestFrom(req *http.Request) ports.IndexSearchRequest {
query := req.URL.Query()
sr := ports.IndexSearchRequest{
ExactSearch: false,
MaxHits: 25,
Query: query.Get("query"),
}
if query.Has("isExactSearch") {
if exactSearch, err := strconv.ParseBool(query.Get("isExactSearch")); err == nil {
sr.ExactSearch = exactSearch
}
}
if query.Has("maxHits") {
if maxHits, err := strconv.Atoi(query.Get("maxHits")); err == nil {
sr.MaxHits = maxHits
}
}
return sr
}

112
handlers/cli/server.go Normal file
View file

@ -0,0 +1,112 @@
package cli
import (
"context"
"crypto/rand"
"errors"
"fmt"
"log/slog"
"net"
"net/http"
"path/filepath"
"time"
"github.com/go-chi/chi/v5"
"code.icb4dc0.de/prskr/searcherside/core/services"
v1 "code.icb4dc0.de/prskr/searcherside/handlers/api/v1"
"code.icb4dc0.de/prskr/searcherside/infrastructure/api"
"code.icb4dc0.de/prskr/searcherside/internal/flags"
"code.icb4dc0.de/prskr/searcherside/internal/logging"
)
const (
jwtSecretLength = 64
)
type ServerHandler struct {
ListenAddress string `env:"LISTEN_ADDRESS" name:"listen-address" short:"a" help:"Listen address" default:":3000"`
DataDirectory string `env:"DATA_DIRECTORY" name:"data-directory" short:"d" help:"Data directory" default:"${CWD}/data"`
Config struct {
ReadHeaderTimeout time.Duration `env:"HTTP_READ_HEADER_TIMEOUT" name:"read-header-timeout" help:"Read header timeout" default:"5s"`
ShutDownTimeout time.Duration `env:"HTTP_SHUTDOWN_TIMEOUT" name:"shutdown-timeout" help:"Shutdown timeout" default:"5s"`
ParseMaxMemoryBytes int64 `env:"HTTP_PARSE_MAX_MEMORY_BYTES" name:"parse-max-memory-bytes" help:"Parse max memory bytes" default:"33554432"`
} `embed:"" prefix:"http."`
Auth struct {
JwtSecret flags.HexString `env:"AUTH_JWT_SECRET" name:"jwt-secret" help:"JWT secret"`
} `embed:"" prefix:"auth."`
}
func (h *ServerHandler) Run(ctx context.Context, logger *slog.Logger) error {
indexCurator, err := services.NewFileIndexCurator(
filepath.Join(h.DataDirectory, "searcherside.json"),
services.BleveIndexer{DataDirectory: h.DataDirectory},
services.TarZSTIndexArchiver{DataDirectory: h.DataDirectory},
)
if err != nil {
logger.Error("Failed to create index curator", logging.Error(err))
return err
}
secret, err := h.jwtSecret()
if err != nil {
return err
}
r := chi.NewRouter()
r.Use(api.LoggingMiddleware)
r.Route("/api/v1", func(r chi.Router) {
indexHandler := v1.IndexHandler{
MaxMemoryBytes: h.Config.ParseMaxMemoryBytes,
Indexer: indexCurator,
}
searchHandler := v1.SearchHandler{
Curator: indexCurator,
}
v1.Mount(r, secret, indexHandler, searchHandler)
})
srv := http.Server{
Addr: h.ListenAddress,
Handler: r,
ReadHeaderTimeout: h.Config.ReadHeaderTimeout,
BaseContext: func(listener net.Listener) context.Context {
return logging.ContextWithLogger(ctx, logger)
},
}
logger.Info("Starting server", slog.String("address", h.ListenAddress))
go func() {
if err := srv.ListenAndServe(); err != nil && !errors.Is(err, http.ErrServerClosed) {
logger.Error("Failed to start server", logging.Error(err))
}
}()
<-ctx.Done()
logger.Info("Shutting down server")
shutdownCtx, cancel := context.WithTimeout(context.Background(), h.Config.ShutDownTimeout)
if err := srv.Shutdown(shutdownCtx); err != nil {
logger.Error("Failed to shutdown server", logging.Error(err))
}
cancel()
return nil
}
func (h *ServerHandler) jwtSecret() ([]byte, error) {
if len(h.Auth.JwtSecret) == 0 {
h.Auth.JwtSecret = make([]byte, jwtSecretLength)
if n, err := rand.Read(h.Auth.JwtSecret); err != nil {
return nil, err
} else if n != jwtSecretLength {
return nil, fmt.Errorf("expected to read %d random bytes but got %d", jwtSecretLength, n)
}
}
return h.Auth.JwtSecret, nil
}

71
handlers/cli/token.go Normal file
View file

@ -0,0 +1,71 @@
package cli
import (
"errors"
"fmt"
"log/slog"
"time"
"github.com/lestrrat-go/jwx/v2/jwa"
"github.com/lestrrat-go/jwx/v2/jwt"
"code.icb4dc0.de/prskr/searcherside/core/ports"
"code.icb4dc0.de/prskr/searcherside/internal/flags"
)
var (
ErrJwtSecretRequired = errors.New("JWT secret is required")
)
type TokenHandler struct {
Token struct {
Secret flags.HexString `name:"secret" help:"JWT secret"`
Lifetime time.Duration `name:"lifetime" help:"JWT lifetime" default:"24h"`
Subject string `name:"subject" help:"JWT subject" default:"${WHOAMI=nobody}"`
Claims []flags.TokenClaim `name:"claims" help:"JWT claims"`
} `embed:"" prefix:"token."`
}
func (h *TokenHandler) Run(stdout ports.STDOUT, logger *slog.Logger) error {
now := time.Now().UTC()
if len(h.Token.Secret) == 0 {
return ErrJwtSecretRequired
}
if tokenLength := len(h.Token.Secret); tokenLength < jwtSecretLength {
logger.Warn(
"The secret does not have the recommended length",
slog.Int("actual_length", tokenLength),
slog.Int("recommended_length", jwtSecretLength),
)
}
token := jwt.New()
for _, claim := range h.Token.Claims {
if err := token.Set(claim.Key, claim.Value); err != nil {
return err
}
}
if err := token.Set(jwt.SubjectKey, h.Token.Subject); err != nil {
return err
}
if err := token.Set(jwt.NotBeforeKey, now); err != nil {
return err
}
if err := token.Set(jwt.ExpirationKey, now.Add(h.Token.Lifetime)); err != nil {
return err
}
tokenString, err := jwt.Sign(token, jwt.WithKey(jwa.HS256, h.Token.Secret.Raw()))
if err != nil {
return err
}
_, err = fmt.Fprintln(stdout, string(tokenString))
return err
}

View file

@ -0,0 +1,38 @@
package api
import (
"encoding/hex"
"hash/fnv"
"log/slog"
"net/http"
"strconv"
"time"
"code.icb4dc0.de/prskr/searcherside/internal/logging"
)
func LoggingMiddleware(next http.Handler) http.Handler {
return http.HandlerFunc(func(writer http.ResponseWriter, request *http.Request) {
requestLogger := logging.GetLogger(request.Context()).With(
slog.String("http.requestId", requestId(request)),
slog.String("http.method", request.Method),
slog.String("http.path", request.URL.Path),
)
requestLogger.Info("Handling incoming request")
next.ServeHTTP(writer, request.WithContext(logging.ContextWithLogger(request.Context(), requestLogger)))
requestLogger.Info("Handled incoming request")
})
}
func requestId(req *http.Request) string {
hash := fnv.New64()
_, _ = hash.Write([]byte(req.Method))
_, _ = hash.Write([]byte(req.RequestURI))
_, _ = hash.Write([]byte(req.RemoteAddr))
_, _ = hash.Write([]byte(req.UserAgent()))
_, _ = hash.Write(strconv.AppendInt(nil, time.Now().UTC().UnixNano(), 10))
return hex.EncodeToString(hash.Sum(nil))
}

View file

@ -0,0 +1,116 @@
package config
import (
"errors"
"fmt"
"io"
"strings"
"unicode"
"github.com/alecthomas/kong"
"gopkg.in/yaml.v3"
)
type KeyFormatterChain []KeyFormatter
func (c KeyFormatterChain) Replace(in string) string {
for _, f := range c {
in = f.Replace(in)
}
return in
}
type KeyFormatter interface {
Replace(in string) string
}
var KebabToPascalCase KeyFormatter = KeyFormatterFunc(func(in string) string {
inLength := len(in)
out := make([]rune, 0, inLength)
for i := 0; i < inLength; i++ {
if in[i] == '-' {
if i < inLength-1 {
i++
out = append(out, unicode.ToUpper((rune)(in[i])))
}
} else {
out = append(out, (rune)(in[i]))
}
}
return string(out)
})
type Yaml struct {
KeySeparator string
KeyFormatter KeyFormatter
}
func (y Yaml) Loader(r io.Reader) (kong.Resolver, error) {
decoder := yaml.NewDecoder(r)
config := make(map[string]any)
if err := decoder.Decode(&config); err != nil && !errors.Is(err, io.EOF) {
return nil, fmt.Errorf("YAML config decoding error: %w", err)
}
return kong.ResolverFunc(func(context *kong.Context, parent *kong.Path, flag *kong.Flag) (any, error) {
path := strings.Split(flag.Name, y.separator())
if n := parent.Node(); n != nil && n.Type != kong.ApplicationNode {
for _, prefix := range append(n.Aliases, n.Name) {
if val := lookup(config, y.normalize(append([]string{prefix}, path...))); val != nil {
return val, nil
}
}
}
return nil, nil
}), nil
}
func (y Yaml) normalize(path []string) []string {
if y.KeyFormatter != nil {
for i := range path {
path[i] = y.KeyFormatter.Replace(path[i])
}
}
return path
}
func (y Yaml) separator() string {
if y.KeySeparator == "" {
return "."
}
return y.KeySeparator
}
func lookup(config map[string]any, path []string) any {
if len(path) == 0 {
return config
}
if len(path) > 1 {
val, ok := config[path[0]]
if !ok {
return nil
}
if valMap, ok := val.(map[string]any); ok {
return lookup(valMap, path[1:])
} else {
return nil
}
}
return config[path[0]]
}
type KeyFormatterFunc func(in string) string
func (f KeyFormatterFunc) Replace(in string) string {
return f(in)
}

68
internal/app.go Normal file
View file

@ -0,0 +1,68 @@
package internal
import (
"context"
"log/slog"
"os"
"os/signal"
"os/user"
"github.com/alecthomas/kong"
"code.icb4dc0.de/prskr/searcherside/core/ports"
clih "code.icb4dc0.de/prskr/searcherside/handlers/cli"
"code.icb4dc0.de/prskr/searcherside/infrastructure/config"
)
type App struct {
Logging struct {
Level slog.Level `env:"LOG_LEVEL" help:"Log level" default:"warn"`
} `embed:"" prefix:"logging."`
Serve clih.ServerHandler `cmd:"" name:"serve" help:"Start the server" aliases:"server"`
Token clih.TokenHandler `cmd:"" name:"token" help:"Generate a token"`
}
func (a *App) Execute() error {
wd, err := os.Getwd()
if err != nil {
return err
}
yamlLoader := config.Yaml{KeyFormatter: config.KebabToPascalCase}
user, err := user.Current()
if err != nil {
return err
}
ctx, _ := signal.NotifyContext(context.Background(), os.Interrupt, os.Kill)
cliCtx := kong.Parse(
a,
kong.Name("searcherside"),
kong.Description("SearcherSide"),
kong.Bind(ports.CWD(wd)),
kong.BindTo(os.Stdout, (*ports.STDOUT)(nil)),
kong.BindTo(ctx, (*context.Context)(nil)),
kong.Configuration(yamlLoader.Loader, "./config.yaml", "/etc/searcherside/config.yaml", "~/.searcherside.yaml"),
kong.Vars{
"CWD": wd,
"WHOAMI": user.Username,
},
)
return cliCtx.Run()
}
func (a *App) AfterApply(kongCtx *kong.Context) error {
loggingOpts := slog.HandlerOptions{
Level: a.Logging.Level,
}
defaultLogger := slog.New(slog.NewJSONHandler(os.Stderr, &loggingOpts))
slog.SetDefault(defaultLogger)
kongCtx.Bind(defaultLogger)
return nil
}

48
internal/archive/tar.go Normal file
View file

@ -0,0 +1,48 @@
package archive
import (
"archive/tar"
"errors"
"io"
"io/fs"
"path/filepath"
)
func TarDirectory(src fs.FS, root string, writer io.Writer) (err error) {
tw := tar.NewWriter(writer)
defer func() {
err = errors.Join(err, tw.Close())
}()
return fs.WalkDir(src, root, func(path string, d fs.DirEntry, err error) error {
fileInfo, err := d.Info()
if err != nil {
return err
}
header, err := tar.FileInfoHeader(fileInfo, path)
if err != nil {
return err
}
header.Name = filepath.ToSlash(path)
if err := tw.WriteHeader(header); err != nil {
return err
}
if !d.IsDir() {
f, err := src.Open(path)
if err != nil {
return err
}
if _, err := io.Copy(tw, f); err != nil {
return err
}
}
return nil
})
}

33
internal/archive/zip.go Normal file
View file

@ -0,0 +1,33 @@
package archive
import (
"archive/zip"
"errors"
"io"
)
func WalkZipFile(filePath string, fn func(name string, r io.Reader) error) (err error) {
zf, err := zip.OpenReader(filePath)
if err != nil {
return err
}
defer func() {
err = errors.Join(err, zf.Close())
}()
for _, f := range zf.File {
if fr, err := f.Open(); err != nil {
return err
} else {
if err := fn(f.Name, fr); err != nil {
return errors.Join(err, fr.Close())
}
if err := fr.Close(); err != nil {
return err
}
}
}
return nil
}

31
internal/flags/claims.go Normal file
View file

@ -0,0 +1,31 @@
package flags
import (
"fmt"
"strings"
"github.com/alecthomas/kong"
)
var _ kong.MapperValue = (*TokenClaim)(nil)
type TokenClaim struct {
Key, Value string
}
func (t *TokenClaim) Decode(ctx *kong.DecodeContext) error {
token, err := ctx.Scan.PopValue("claim")
if err != nil {
return err
}
split := strings.Split(token.String(), "=")
if len(split) != 2 {
return fmt.Errorf("cannot split into key value pair: %s", token.String())
}
t.Key = split[0]
t.Value = split[1]
return nil
}

View file

@ -0,0 +1,28 @@
package flags
import (
"encoding/hex"
"github.com/alecthomas/kong"
)
type HexString []byte
func (h *HexString) Raw() []byte {
return *h
}
func (h *HexString) Decode(ctx *kong.DecodeContext) error {
token, err := ctx.Scan.PopValue("hex")
if err != nil {
return err
}
data, err := hex.DecodeString(token.String())
if err != nil {
return err
}
*h = data
return nil
}

View file

@ -0,0 +1,20 @@
package logging
import (
"context"
"log/slog"
)
var loggerKey struct{}
func ContextWithLogger(ctx context.Context, logger *slog.Logger) context.Context {
return context.WithValue(ctx, loggerKey, logger)
}
func GetLogger(ctx context.Context) *slog.Logger {
contextLogger := ctx.Value(loggerKey).(*slog.Logger)
if contextLogger == nil {
return slog.Default()
}
return contextLogger
}

View file

@ -0,0 +1,7 @@
package logging
import "log/slog"
func Error(err error) slog.Attr {
return slog.String("err", err.Error())
}

17
main.go Normal file
View file

@ -0,0 +1,17 @@
package main
import (
"fmt"
"os"
"code.icb4dc0.de/prskr/searcherside/internal"
)
func main() {
var app internal.App
if err := app.Execute(); err != nil {
_, _ = fmt.Fprintf(os.Stderr, "Failed to execute app: %v", err)
os.Exit(1)
}
}