-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathindex.js
More file actions
161 lines (145 loc) · 5.73 KB
/
index.js
File metadata and controls
161 lines (145 loc) · 5.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
const pinoDebug = require('pino-debug');
const pino = require('pino');
const pretty = pino.pretty();
pretty.pipe(process.stdout);
const log = pino({level: process.env.LEVEL || 'info', name: 'm2e', safe: true}, pretty);
pinoDebug(log, {});
console.log('\n\n\n=============');
console.log('=============');
console.log(new Date());
console.log('=============');
console.log('=============');
const config = process.argv[2];
const resync = process.argv[3] === 'resync';
if (!config) {
log.error('Please give a config js file');
return;
}
const camelCaseToDash = str => str.replace(/([a-zA-Z])(?=[A-Z])/g, '$1-').toLowerCase();
const requireFile = file => {
try {
return require(file.indexOf('/') === 0 ? file : './' + file);
}
catch (e) {
log.error(e);
}
return {};
};
const {MongoClient} = require('mongodb');
const elasticsearch = require('elasticsearch');
const {mongo, es} = requireFile(config);
if (!mongo || !es) {
log.error('Please give a valid config js file');
return;
}
const limit = mongo.limit || 1000;
const esIndex = es.syncDataIndex || 'm2e-synced-till';
const deleteId = ({...v}) => {
delete v._id;
delete v.version;
return v;
};
const transform = ({...doc}) => {
Object.keys(doc).forEach(function(k) {
if (k.indexOf('_') === 0) {
let newK = k.substr(1), c = 1;
while (doc[newK]) newK = newK + '_' + c++;
doc[newK] = doc[k];
delete doc[k];
}
});
return doc;
};
let cnt = 1, collectionIndExists = false;
async function sync(db, ec, collection) {
const c = collection, cKey = c.createdAtKey || 'createdAt', index = camelCaseToDash(c.index || c.name), type = c.type || c.name;
// check if collection exists in elastic - only first time. If it does not exist, we will not do any update operations
if (cnt === 1) collectionIndExists = await ec.indices.exists({index});
const statusDocExists = await ec.indices.exists({index: esIndex}) && await ec.exists({index: esIndex, type: esIndex, id: c.name});
let statusDoc = statusDocExists ? await ec.get({index: esIndex, type: esIndex, id: c.name}) : null;
let till = statusDoc ? statusDoc._source : {};
// new docs - start from 7 days before to ensure everything has been synced
let query = cnt === 1 && resync ? {} : till.createdAt ? {[cKey]: {$gte: new Date(till.createdAt)}} : {};
log.debug('query', query);
let docs = await db.collection(c.name).find(query).limit(limit).toArray();
if (docs.length && cnt > 1 && Math.abs(docs[docs.length - 1][cKey].getTime() - new Date(till.createdAt).getTime()) <= 1000) {
log.debug('skipping', limit);
docs = await db.collection(c.name).find(query).skip(limit).limit(limit).toArray();
}
let totalDocs = await db.collection(c.name).count(query, {_id: 1});
log.debug('number of docs', docs.length);
if (docs.length) {
log.info('');
log.info('');
log.info('--------------------');
log.info('Fetching ' + c.name + '. Iteration: ' + cnt + '. Docs: ' + docs.length);
const body = docs.reduce((x, d) => ([...x, {create: {_id: d._id}}, {...(c.transform || transform)(deleteId(d))}]), []);
let createdAt = body[body.length - 1][cKey];
// if (Math.abs(createdAt.getTime() - new Date(till.createdAt).getTime()) < 86400000 && cnt > 1) createdAt = new Date(new Date(till.updatedAt).setHours(24));
log.debug('bulk insert');
await ec.bulk({index, type: 't', body});
log.debug('update timestamp in the elastic\'s syncDataIndex', createdAt);
await ec.update({index: esIndex, type: esIndex, id: c.name, body: {
script: {
inline: 'ctx._source.createdAt = params.createdAt; if (!params.collectionIndExists) ctx._source.updatedAt = params.createdAt',
params: {createdAt, collectionIndExists}
},
upsert: {createdAt, updatedAt: createdAt}
}});
if (totalDocs > limit) {
cnt++;
log.debug('.... more data');
await sync(db, ec, collection);
return;
}
}
else cnt = 1;
// updated docs
if (!statusDocExists) return;
log.debug('..... Updating');
const uKey = c.updatedAtKey || 'updatedAt';
statusDoc = await ec.get({index: esIndex, type: esIndex, id: c.name});
till = statusDoc._source;
query = till.updatedAt ? {[uKey]: {$gte: new Date(till.updatedAt)}} : {};
docs = await db.collection(c.name).find(query).limit(limit).toArray();
if (docs.length && cnt > 1 && Math.abs(docs[docs.length - 1][uKey].getTime() - new Date(till.updatedAt).getTime()) <= 1000) {
log.debug('skipping', limit);
docs = await db.collection(c.name).find(query).skip(limit).limit(limit).toArray();
}
totalDocs = await db.collection(c.name).count(query, {_id: 1});
log.debug('query', query);
log.debug('number of docs', docs.length);
if (docs.length) {
log.info('Updating ' + c.name + '. Iteration: ' + cnt + '. Docs: ' + docs.length);
const body = docs.reduce((x, d) => ([...x, {update: {_id: d._id}}, {doc: deleteId((c.transform || transform)(d))}]), []);
let updatedAt = docs[docs.length - 1][uKey];
// if (Math.abs(updatedAt.getTime() - new Date(till.updatedAt).getTime()) < 86400000 && cnt > 1) updatedAt = new Date(new Date(till.updatedAt).setHours(24));
log.debug('bulk insert');
await ec.bulk({index, type, body});
log.debug('update timestamp in the elastic\'s syncDataIndex', updatedAt);
await ec.update({index: esIndex, type: esIndex, id: c.name, body: {
script: {
inline: 'ctx._source.updatedAt = params.updatedAt',
params: {updatedAt}
}
}});
if (totalDocs > limit) await sync(db, ec, collection, cnt++);
}
else cnt = 1;
}
async function main() {
const mc = (await MongoClient.connect(mongo.url)), ec = new elasticsearch.Client({host: 'localhost:9200', requestTimeout: 180000});
const db = mc.db(mongo.db);
for (let i = 0; i < mongo.collections.length; i++) {
const c = mongo.collections[i];
try {
await sync(db, ec, c);
}
catch (e) {
log.error(e);
}
}
log.info('Finished!');
mc.close();
}
main();