Skip to content

Commit 8d32123

Browse files
authored
Merge pull request #72 from matt-in-a-hat/fix-reindex-all-job-batching-logic
fix AlgoliaReindexAllJob to batch into steps
2 parents c584e2a + ef1a478 commit 8d32123

File tree

1 file changed

+86
-72
lines changed

1 file changed

+86
-72
lines changed

src/Jobs/AlgoliaReindexAllJob.php

+86-72
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22

33
namespace Wilr\Silverstripe\Algolia\Jobs;
44

5-
use Exception;
65
use SilverStripe\Core\Config\Configurable;
76
use SilverStripe\Core\Injector\Injector;
87
use SilverStripe\ORM\DataObject;
8+
use stdClass;
99
use Symbiote\QueuedJobs\Services\AbstractQueuedJob;
1010
use Symbiote\QueuedJobs\Services\QueuedJob;
1111
use Throwable;
@@ -20,8 +20,6 @@ class AlgoliaReindexAllJob extends AbstractQueuedJob implements QueuedJob
2020
{
2121
use Configurable;
2222

23-
public $indexData = [];
24-
2523
/**
2624
* An optional array of default filters to apply when doing the reindex
2725
* i.e for indexing Page subclasses you may wish to exclude expired pages.
@@ -59,104 +57,120 @@ public function setup()
5957
$this->totalSteps = 0;
6058
$this->currentStep = 0;
6159

60+
$indexData = [];
61+
6262
$filters = $this->config()->get('reindexing_default_filters');
63+
$batchSize = $task->config()->get('batch_size');
64+
$batching = $this->config()->get('use_batching');
6365

6466
// find all classes we have to index and add them to the indexData map
6567
// in groups of batch size, this setup operation does the heavy lifting
6668
// and process simply handles one batch at a time.
67-
foreach ($algoliaService->indexes as $index) {
69+
foreach ($algoliaService->indexes as $indexName => $index) {
6870
$classes = (isset($index['includeClasses'])) ? $index['includeClasses'] : null;
6971
$indexFilters = (isset($index['includeFilters'])) ? $index['includeFilters'] : null;
7072

7173
if ($classes) {
72-
foreach ($classes as $candidate) {
73-
$filter = (isset($filters[$candidate])) ? $filters[$candidate] : '';
74-
$count = 0;
75-
76-
foreach ($task->getItems($candidate, $filter, $indexFilters)->column('ID') as $id) {
77-
$count++;
78-
79-
if (!isset($this->indexData[$candidate])) {
80-
$this->indexData[$candidate] = [];
74+
foreach ($classes as $class) {
75+
$filter = (isset($filters[$class])) ? $filters[$class] : '';
76+
$ids = $task->getItems($class, $filter, $indexFilters)->column('ID');
77+
if (count($ids)) {
78+
if ($batching && $batchSize > 1) {
79+
foreach (array_chunk($ids, $batchSize) as $chunk) {
80+
$indexData[] = [
81+
'indexName' => $indexName,
82+
'class' => $class,
83+
'ids' => $chunk,
84+
];
85+
}
86+
} else {
87+
foreach ($ids as $id) {
88+
$indexData[] = [
89+
'indexName' => $indexName,
90+
'class' => $class,
91+
'id' => $id,
92+
];
93+
}
8194
}
82-
83-
$this->indexData[$candidate][] = $id;
84-
$this->totalSteps++;
95+
$this->addMessage('[' . $indexName . '] Indexing ' . count($ids) . ' ' . $class . ' instances with filters: ' . ($filter ?: '(none)'));
96+
} else {
97+
$this->addMessage('[' . $indexName . '] 0 ' . $class . ' instances to index with filters: ' . ($filter ?: '(none) - skipping.'));
8598
}
86-
87-
$this->addMessage('Indexing ' . $count . ' ' . $candidate . ' instances with filters ' . $filter);
8899
}
89100
}
90101
}
102+
$this->totalSteps += count($indexData);
103+
// Store in jobData to get written to the job descriptor in DB
104+
if (!$this->jobData) {
105+
$this->jobData = new stdClass();
106+
}
107+
$this->jobData->IndexData = $indexData;
91108
}
92109

93110
/**
94-
* Index data is in groups of 20.
111+
* Index data is an array of steps to process, each step either looks like this with batching:
112+
* [
113+
* 'indexName' => string,
114+
* 'class' => string,
115+
* 'ids' => array of int,
116+
* ]
117+
* or this without batching:
118+
* [
119+
* 'indexName' => string,
120+
* 'class' => string,
121+
* 'id' => int,
122+
* ]
123+
* We process one step / batch / id per call.
95124
*/
96125
public function process()
97126
{
98-
$remainingChildren = $this->indexData;
99-
100-
if (!$remainingChildren || empty($remainingChildren)) {
127+
if ($this->currentStep >= $this->totalSteps) {
101128
$this->isComplete = true;
102129
$this->addMessage('Done!');
103-
130+
return;
131+
}
132+
$indexData = isset($this->jobData->IndexData) ? $this->jobData->IndexData : null;
133+
if (!isset($indexData[$this->currentStep])) {
134+
$this->isComplete = true;
135+
$this->addMessage('Somehow we ran out of job data before all steps were processed. So we will assume we are done!');
136+
$this->addMessage('Dumping out the jop data for debug purposes: ' . json_encode($indexData));
104137
return;
105138
}
106139

107-
$algoliaService = Injector::inst()->create(AlgoliaService::class);
108-
$task = new AlgoliaReindex();
109-
110-
$batchSize = $task->config()->get('batch_size');
111-
$batching = $this->config()->get('use_batching');
112-
113-
foreach ($remainingChildren as $class => $ids) {
114-
foreach ($algoliaService->indexes as $indexName => $index) {
115-
$classes = (isset($index['includeClasses'])) ? $index['includeClasses'] : [];
116-
117-
if (!in_array($class, $classes)) {
118-
continue;
119-
}
120-
121-
$take = array_slice($ids, 0, $batchSize);
122-
$this->indexData[$class] = array_slice($ids, $batchSize);
123-
124-
if (!empty($take)) {
125-
$this->currentStep += count($take);
126-
$errors = [];
127-
128-
try {
129-
if ($batching) {
130-
if ($task->indexItems($indexName, DataObject::get($class)->filter('ID', $take), false)) {
131-
$this->addMessage('Successfully indexing ' . $class . ' [' . implode(', ', $take) . ']');
132-
} else {
133-
$this->addMessage('Error indexing ' . $class . ' [' . implode(', ', $take) . ']');
134-
}
135-
} else {
136-
$items = DataObject::get($class)->filter('ID', $take);
137-
138-
foreach ($items as $item) {
139-
if ($task->indexItem($item)) {
140-
$this->addMessage('Successfully indexed ' . $class . ' [' . $item->ID . ']');
141-
} else {
142-
$this->addMessage('Error indexing ' . $class . ' [' . $item->ID . ']');
143-
}
144-
}
145-
}
146-
147-
$errors = $task->getErrors();
148-
} catch (Throwable $e) {
149-
$errors[] = $e->getMessage();
150-
}
151-
152-
if (!empty($errors)) {
153-
$this->addMessage(implode(', ', $errors));
154-
$task->clearErrors();
140+
$stepData = $indexData[$this->currentStep];
141+
$class = $stepData['class'];
142+
143+
try {
144+
$task = new AlgoliaReindex();
145+
146+
if (isset($stepData['ids'])) {
147+
$summary = $task->indexItems($stepData['indexName'], DataObject::get($class)->filter('ID', $stepData['ids']), false);
148+
$this->addMessage($summary);
149+
} else {
150+
$item = DataObject::get($class)->byID($stepData['id']);
151+
if ($item) {
152+
if (min($item->invokeWithExtensions('canIndexInAlgolia')) === false) {
153+
$this->addMessage('Skipped indexing ' . $class . ' ' . $item->ID);
154+
} else if ($task->indexItem($item)) {
155+
$this->addMessage('Successfully indexed ' . $class . ' ' . $item->ID);
156+
} else {
157+
$this->addMessage('Error indexing ' . $class . ' ' . $item->ID);
155158
}
156159
} else {
157-
unset($this->indexData[$class]);
160+
$this->addMessage('Error indexing ' . $class . ' ' . $stepData['id'] . ' - failed to load item from DB');
158161
}
159162
}
163+
164+
$errors = $task->getErrors();
165+
} catch (Throwable $e) {
166+
$errors[] = $e->getMessage();
167+
}
168+
169+
if (!empty($errors)) {
170+
$this->addMessage(implode(', ', $errors));
171+
$task->clearErrors();
160172
}
173+
174+
$this->currentStep++;
161175
}
162176
}

0 commit comments

Comments
 (0)