Skip to content

Commit b59d21d

Browse files
committed
Merge branch 'feature-dedupe-serialize-index' into temp-feature-all-1.22
2 parents e0d0c62 + ef97a17 commit b59d21d

File tree

4 files changed

+32
-70
lines changed

4 files changed

+32
-70
lines changed

backend/btrixcloud/colls.py

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ async def add_collection(self, org: Organization, coll_in: CollIn):
149149

150150
slug = coll_in.slug or slug_from_name(coll_in.name)
151151

152-
dedupeIndex = DedupeIndexStats() if coll_in.hasDedupeIndex else None
152+
indexStats = DedupeIndexStats() if coll_in.hasDedupeIndex else None
153153

154154
coll = Collection(
155155
id=coll_id,
@@ -163,14 +163,11 @@ async def add_collection(self, org: Organization, coll_in: CollIn):
163163
access=coll_in.access,
164164
defaultThumbnailName=coll_in.defaultThumbnailName,
165165
allowPublicDownload=coll_in.allowPublicDownload,
166-
dedupeIndex=dedupeIndex,
166+
indexStats=indexStats,
167167
)
168168
try:
169169
await self.collections.insert_one(coll.to_dict())
170170
await self.clear_org_previous_slugs_matching_slug(slug, org)
171-
# create collection index
172-
# if coll.dedupeIndex:
173-
# await self.crawl_manager.create_coll_index(coll)
174171

175172
if crawl_ids:
176173
await self.crawl_ops.add_to_collection(crawl_ids, coll_id, org)
@@ -214,11 +211,11 @@ async def update_collection(
214211

215212
query["modified"] = dt_now()
216213

217-
if update.hasDedupeIndex and not coll.dedupeIndex:
218-
query["dedupeIndex"] = DedupeIndexStats().dict()
214+
if update.hasDedupeIndex is True and not coll.indexStats:
215+
query["indexStats"] = DedupeIndexStats().dict()
219216
await self.update_coll_index(coll, org.id)
220217

221-
elif not update.hasDedupeIndex and coll.dedupeIndex:
218+
elif update.hasDedupeIndex is False and coll.indexStats:
222219
await self.delete_coll_index(coll, org)
223220

224221
db_update = {"$set": query}
@@ -541,7 +538,7 @@ async def list_collections(
541538
match_query["name"] = {"$regex": regex_pattern, "$options": "i"}
542539

543540
if has_dedupe_index is not None:
544-
match_query["dedupeIndex"] = {"$ne" if has_dedupe_index else "$eq": None}
541+
match_query["indexStats"] = {"$ne" if has_dedupe_index else "$eq": None}
545542

546543
if public_colls_out:
547544
match_query["access"] = CollAccessType.PUBLIC
@@ -688,6 +685,11 @@ async def get_collection_crawl_ids(
688685

689686
async def update_coll_index(self, coll: Collection, oid: UUID, is_purge=False):
690687
"""create index import job"""
688+
689+
# don't update if no crawls if not purging removed crawls
690+
if not is_purge and not coll.crawlCount:
691+
return
692+
691693
crawler_image = self.crawl_ops.crawl_configs.get_channel_crawler_image(
692694
self.dedupe_importer_channel
693695
)
@@ -710,7 +712,7 @@ async def delete_coll_index(self, coll: Collection, org: Organization):
710712
"""delete coll dedupe index, if possible"""
711713

712714
# if index is not idle, can't delete it yet
713-
if coll.dedupeIndex and coll.indexState != "idle":
715+
if coll.indexStats and coll.indexState != "idle":
714716
raise HTTPException(status_code=400, detail="dedupe_index_is_in_use")
715717

716718
if coll.indexFile:
@@ -768,7 +770,7 @@ async def update_dedupe_index_stats(
768770
"""update dedupe index stats for specified collection"""
769771
self.collections.find_one_and_update(
770772
{"_id": coll_id},
771-
{"$set": {"dedupeIndex": stats.dict() if stats else None}},
773+
{"$set": {"indexStats": stats.dict() if stats else None}},
772774
)
773775

774776
async def update_dedupe_index_info(
@@ -785,7 +787,7 @@ async def update_dedupe_index_info(
785787
query["indexFile"] = index_file.model_dump()
786788

787789
res = self.collections.find_one_and_update(
788-
{"_id": coll_id, "dedupeIndex": {"$ne": None}},
790+
{"_id": coll_id, "indexStats": {"$ne": None}},
789791
{"$set": query},
790792
)
791793
return res is not None
@@ -883,7 +885,7 @@ async def update_collection_dates(
883885
latest_ts = None
884886

885887
# update_index is set, update dedupe index if it exists
886-
if update_index and coll.dedupeIndex:
888+
if update_index and coll.indexStats:
887889
await self.update_coll_index(coll, oid)
888890

889891
match_query = {
@@ -952,7 +954,7 @@ async def add_successful_crawl_to_collections(
952954
async def purge_dedupe_index(self, coll_id: UUID, org: Organization):
953955
"""purge dedupe index on collection, raise exception if no index or not ready"""
954956
coll = await self.get_collection(coll_id, org.id)
955-
if not coll.dedupeIndex:
957+
if not coll.indexStats:
956958
raise HTTPException(status_code=400, detail="no_dedupe_index_on_collection")
957959

958960
if coll.indexState not in ("ready", "idle"):

backend/btrixcloud/models.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1668,7 +1668,7 @@ class Collection(BaseMongoModel):
16681668
indexFile: Optional[DedupeIndexFile] = None
16691669
indexState: Optional[TYPE_DEDUPE_INDEX_STATES] = None
16701670

1671-
dedupeIndex: Optional[DedupeIndexStats] = None
1671+
indexStats: Optional[DedupeIndexStats] = None
16721672

16731673

16741674
# ============================================================================
@@ -1736,7 +1736,7 @@ class CollOut(BaseMongoModel):
17361736
indexLastSavedAt: Optional[datetime] = None
17371737
indexState: Optional[TYPE_DEDUPE_INDEX_STATES] = None
17381738

1739-
dedupeIndex: Optional[DedupeIndexStats] = None
1739+
indexStats: Optional[DedupeIndexStats] = None
17401740

17411741

17421742
# ============================================================================

backend/btrixcloud/operator/collindexes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -391,7 +391,7 @@ async def check_redis_saved(
391391
finished_at = None
392392
finished_at_str = ""
393393
try:
394-
finished_at_str = redis_pod["status"]["initContainerStatuses"][1][
394+
finished_at_str = redis_pod["status"]["initContainerStatuses"][0][
395395
"state"
396396
]["terminated"]["finishedAt"]
397397
# pylint: disable=bare-except

chart/app-templates/redis.yaml

Lines changed: 13 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -141,56 +141,6 @@ spec:
141141
- "res=$(redis-cli ping); [[ $res = 'PONG' ]]"
142142

143143
initContainers:
144-
{% if load_dump %}
145-
- name: rclone-load
146-
image: rclone/rclone:latest
147-
148-
args: ["-vv", "copyto", "--checksum", "remote:{{ remote_file_path }}", "/data/{{ local_file }}"]
149-
150-
volumeMounts:
151-
- name: redis-data
152-
mountPath: /data
153-
154-
env:
155-
- name: RCLONE_CONFIG_REMOTE_TYPE
156-
value: "s3"
157-
158-
- name: RCLONE_CONFIG_REMOTE_ACCESS_KEY_ID
159-
valueFrom:
160-
secretKeyRef:
161-
name: "{{ storage_secret_name }}"
162-
key: STORE_ACCESS_KEY
163-
164-
- name: RCLONE_CONFIG_REMOTE_SECRET_ACCESS_KEY
165-
valueFrom:
166-
secretKeyRef:
167-
name: "{{ storage_secret_name }}"
168-
key: STORE_SECRET_KEY
169-
170-
- name: RCLONE_CONFIG_REMOTE_REGION
171-
valueFrom:
172-
secretKeyRef:
173-
name: "{{ storage_secret_name }}"
174-
key: STORE_REGION
175-
176-
- name: RCLONE_CONFIG_REMOTE_PROVIDER
177-
valueFrom:
178-
secretKeyRef:
179-
name: "{{ storage_secret_name }}"
180-
key: STORE_S3_PROVIDER
181-
182-
- name: RCLONE_CONFIG_REMOTE_ENDPOINT
183-
value: "{{ storage_endpoint }}"
184-
185-
resources:
186-
limits:
187-
memory: "200Mi"
188-
189-
requests:
190-
memory: "200Mi"
191-
cpu: "50m"
192-
{% endif %}
193-
194144
{% if save_dump %}
195145
- name: rclone-save
196146
image: rclone/rclone:latest
@@ -210,11 +160,11 @@ spec:
210160
exec:
211161
command: ["touch", "/tmp/done"]
212162

213-
volumeMounts:
163+
volumeMounts: &rclone_volumes
214164
- name: redis-data
215165
mountPath: /data
216166

217-
env:
167+
env: &rclone_env
218168
- name: RCLONE_CONFIG_REMOTE_TYPE
219169
value: "s3"
220170

@@ -245,14 +195,24 @@ spec:
245195
- name: RCLONE_CONFIG_REMOTE_ENDPOINT
246196
value: "{{ storage_endpoint }}"
247197

248-
resources:
198+
resources: &rclone_resources
249199
limits:
250200
memory: "200Mi"
251201

252202
requests:
253203
memory: "200Mi"
254204
cpu: "50m"
255205

206+
{% if load_dump %}
207+
- name: rclone-load
208+
image: rclone/rclone:latest
209+
210+
args: ["-vv", "copyto", "--checksum", "remote:{{ remote_file_path }}", "/data/{{ local_file }}"]
211+
212+
volumeMounts: *rclone_volumes
213+
env: *rclone_env
214+
resources: *rclone_resources
215+
{% endif %}
256216
{% endif %}
257217

258218
{% endif %}

0 commit comments

Comments
 (0)