Skip to content

Commit 4fd658e

Browse files
committed
Add a flag to the reindex command to force a full reindex
That can be helpful when the search service configuration has changed, e.g. by enabling TIKA. Previously files that had already been indexed were not indexed again and thus were no part of the fulltext index. Fixes opencloud-eu#2285 Fixes opencloud-eu#2578
1 parent b9ed197 commit 4fd658e

10 files changed

Lines changed: 112 additions & 73 deletions

File tree

protogen/gen/opencloud/services/search/v0/search.pb.go

Lines changed: 68 additions & 58 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

protogen/gen/opencloud/services/search/v0/search.swagger.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,9 @@
329329
},
330330
"userId": {
331331
"type": "string"
332+
},
333+
"forceReindex": {
334+
"type": "boolean"
332335
}
333336
}
334337
},

protogen/proto/opencloud/services/search/v0/search.proto

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ message SearchIndexResponse {
104104
message IndexSpaceRequest {
105105
string space_id = 1;
106106
string user_id = 2;
107+
bool force_reindex = 3;
107108
}
108109

109110
message IndexSpaceResponse {

services/antivirus/pkg/scanners/mocks/scanner.go

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

services/search/README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,13 @@ It can also be used to re-index all spaces:
118118
opencloud search index --all-spaces
119119
```
120120

121+
Please note that a reindex only picks up new files. Files that have already been indexed are not indexed again, even if the configuration or the whole extractor has been changed. To force a full reindex you need to use the `force-reindex` flag:
122+
123+
124+
```shell
125+
opencloud search index --all-spaces --force-reindex
126+
```
127+
121128
## Metrics
122129

123130
The search service exposes the following prometheus metrics at `<debug_endpoint>/metrics` (as configured using the `SEARCH_DEBUG_ADDR` env var):

services/search/pkg/command/index.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ func Index(cfg *config.Config) *cobra.Command {
2929
RunE: func(cmd *cobra.Command, args []string) error {
3030
allSpacesFlag, _ := cmd.Flags().GetBool("all-spaces")
3131
spaceFlag, _ := cmd.Flags().GetString("space")
32+
forceReindexFlag, _ := cmd.Flags().GetBool("force-reindex")
3233
if spaceFlag == "" && !allSpacesFlag {
3334
return errors.New("either --space or --all-spaces is required")
3435
}
@@ -48,7 +49,8 @@ func Index(cfg *config.Config) *cobra.Command {
4849

4950
c := searchsvc.NewSearchProviderService("eu.opencloud.api.search", grpcClient)
5051
_, err = c.IndexSpace(context.Background(), &searchsvc.IndexSpaceRequest{
51-
SpaceId: spaceFlag,
52+
SpaceId: spaceFlag,
53+
ForceReindex: forceReindexFlag,
5254
}, func(opts *client.CallOptions) { opts.RequestTimeout = 10 * time.Minute })
5355
if err != nil {
5456
fmt.Println("failed to index space: " + err.Error())
@@ -68,6 +70,11 @@ func Index(cfg *config.Config) *cobra.Command {
6870
false,
6971
"index all spaces instead. This or --space is required.",
7072
)
73+
indexCmd.Flags().Bool(
74+
"force-rescan",
75+
false,
76+
"force a rescan of all files, even if they are already indexed. This will make the indexing process much slower, but ensures that the index is up-to-date using the current search service configuration.",
77+
)
7178

7279
return indexCmd
7380
}

services/search/pkg/search/mocks/searcher.go

Lines changed: 14 additions & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

services/search/pkg/search/service.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ const (
4646
type Searcher interface {
4747
Search(ctx context.Context, req *searchsvc.SearchRequest) (*searchsvc.SearchResponse, error)
4848

49-
IndexSpace(rID *provider.StorageSpaceId) error
49+
IndexSpace(rID *provider.StorageSpaceId, forceRescan bool) error
5050
PurgeDeleted(spaceID *provider.StorageSpaceId) error
5151

5252
TrashItem(rID *provider.ResourceId)
@@ -443,7 +443,7 @@ func (s *Service) searchIndex(ctx context.Context, req *searchsvc.SearchRequest,
443443
}
444444

445445
// IndexSpace (re)indexes all resources of a given space.
446-
func (s *Service) IndexSpace(spaceID *provider.StorageSpaceId) error {
446+
func (s *Service) IndexSpace(spaceID *provider.StorageSpaceId, forceRescan bool) error {
447447
ownerCtx, err := getAuthContext(s.serviceAccountID, s.gatewaySelector, s.serviceAccountSecret, s.logger)
448448
if err != nil {
449449
return err
@@ -501,6 +501,11 @@ func (s *Service) IndexSpace(spaceID *provider.StorageSpaceId) error {
501501
}
502502
s.logger.Debug().Str("path", ref.Path).Msg("Walking tree")
503503

504+
if forceRescan {
505+
s.doUpsertItem(ref, batch)
506+
return nil
507+
}
508+
504509
searchRes, err := s.engine.Search(ownerCtx, &searchsvc.SearchIndexRequest{
505510
Query: "id:" + storagespace.FormatResourceID(info.Id) + ` mtime>=` + utils.TSToTime(info.Mtime).Format(time.RFC3339Nano),
506511
})

services/search/pkg/service/event/service.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ func New(ctx context.Context, stream raw.Stream, logger log.Logger, tp trace.Tra
7474
}
7575

7676
svc.indexSpaceDebouncer = NewSpaceDebouncer(time.Duration(debounceDuration)*time.Millisecond, 30*time.Second, func(id *provider.StorageSpaceId) {
77-
if err := svc.index.IndexSpace(id); err != nil {
77+
if err := svc.index.IndexSpace(id, false); err != nil {
7878
svc.log.Error().Err(err).Interface("spaceID", id).Msg("error while indexing a space")
7979
}
8080
}, svc.log)

services/search/pkg/service/grpc/v0/service.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ func (s Service) Search(ctx context.Context, in *searchsvc.SearchRequest, out *s
121121
// IndexSpace (re)indexes all resources of a given space.
122122
func (s Service) IndexSpace(_ context.Context, in *searchsvc.IndexSpaceRequest, _ *searchsvc.IndexSpaceResponse) error {
123123
if in.GetSpaceId() != "" {
124-
return s.searcher.IndexSpace(&provider.StorageSpaceId{OpaqueId: in.GetSpaceId()})
124+
return s.searcher.IndexSpace(&provider.StorageSpaceId{OpaqueId: in.GetSpaceId()}, in.GetForceReindex())
125125
}
126126

127127
// index all spaces instead
@@ -145,7 +145,7 @@ func (s Service) IndexSpace(_ context.Context, in *searchsvc.IndexSpaceRequest,
145145
}
146146

147147
for _, space := range resp.GetStorageSpaces() {
148-
if err := s.searcher.IndexSpace(space.GetId()); err != nil {
148+
if err := s.searcher.IndexSpace(space.GetId(), in.GetForceReindex()); err != nil {
149149
return err
150150
}
151151
}

0 commit comments

Comments
 (0)