-
-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathcollections_and_documents.rb
285 lines (251 loc) · 7.29 KB
/
collections_and_documents.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
# frozen_string_literal: true
##
# These examples walk you through all the operations you can do on a collection and a document
# Search is specifically covered in another file in the examples folder
require_relative 'client_initialization'
##
# Create a collection
schema = {
'name' => 'companies',
'fields' => [
{
'name' => 'company_name',
'type' => 'string'
},
{
'name' => 'num_employees',
'type' => 'int32'
},
{
'name' => 'country',
'type' => 'string',
'facet' => true
}
],
'default_sorting_field' => 'num_employees'
}
# Delete the collection if it already exists
begin
@typesense.collections['companies'].delete
rescue Typesense::Error::ObjectNotFound
end
collection = @typesense.collections.create(schema)
ap collection
# {
# "name" => "companies",
# "fields" => [
# [0] {
# "name" => "company_name",
# "type" => "string"
# },
# [1] {
# "name" => "num_employees",
# "type" => "int32"
# },
# [2] {
# "name" => "country",
# "type" => "string",
# "facet" => true
# }
# ],
# "default_sorting_field" => "num_employees"
# }
##
# Retrieve a collection
sleep 0.5 # Give Typesense cluster a few hundred ms to create the collection on all nodes, before reading it right after (eventually consistent)
collection = @typesense.collections['companies'].retrieve
ap collection
# {
# "default_sorting_field" => "num_employees",
# "fields" => [
# [0] {
# "facet" => false,
# "name" => "company_name",
# "type" => "string"
# },
# [1] {
# "facet" => false,
# "name" => "num_employees",
# "type" => "int32"
# },
# [2] {
# "facet" => true,
# "name" => "country",
# "type" => "string"
# }
# ],
# "name" => "companies",
# "num_documents" => 0
# }
##
# Retrieve all collections
collections = @typesense.collections.retrieve
ap collections
# [
# [0] {
# "default_sorting_field" => "num_employees",
# "fields" => [
# [0] {
# "facet" => false,
# "name" => "company_name",
# "type" => "string"
# },
# [1] {
# "facet" => false,
# "name" => "num_employees",
# "type" => "int32"
# },
# [2] {
# "facet" => true,
# "name" => "country",
# "type" => "string"
# }
# ],
# "name" => "companies",
# "num_documents" => 0
# }
# ]
##
# Delete a collection
# Deletion returns the schema of the collection after deletion
collection = @typesense.collections['companies'].delete
ap collection
# {
# "default_sorting_field" => "num_employees",
# "fields" => [
# [0] {
# "facet" => false,
# "name" => "company_name",
# "type" => "string"
# },
# [1] {
# "facet" => false,
# "name" => "num_employees",
# "type" => "int32"
# },
# [2] {
# "facet" => true,
# "name" => "country",
# "type" => "string"
# }
# ],
# "name" => "companies",
# "num_documents" => 0
# }
###
# Truncate a collection
# Truncation returns the number of documents deleted
collection = @typesense.collections['companies'].documents.truncate
ap collection
# {
# "num_deleted": 125
# }
# Let's create the collection again for use in our remaining examples
@typesense.collections.create(schema)
##
# Create (index) a document
document = {
'id' => '124',
'company_name' => 'Stark Industries',
'num_employees' => 5215,
'country' => 'USA'
}
document = @typesense.collections['companies'].documents.create(document)
ap document
# {
# "company_name" => "Stark Industries",
# "country" => "USA",
# "id" => "124",
# "num_employees" => 5215
# }
# You can also upsert a document, which will update the document if it already exists or create a new one if it doesn't exist
document = @typesense.collections['companies'].documents.upsert(document)
ap document
##
# Retrieve a document
sleep 0.5 # Give Typesense cluster a few hundred ms to create the document on all nodes, before reading it right after (eventually consistent)
document = @typesense.collections['companies'].documents['124'].retrieve
ap document
# {
# "company_name" => "Stark Industries",
# "country" => "USA",
# "id" => "124",
# "num_employees" => 5215
# }
##
# Update a document. Unlike upsert, update will error out if the doc doesn't already exist.
document = @typesense.collections['companies'].documents['124'].update(
'num_employees' => 5500
)
ap document
# {
# "id" => "124",
# "num_employees" => 5500
# }
# This should error out, since document 145 doesn't exist
# document = @typesense.collections['companies'].documents['145'].update(
# 'num_employees' => 5500
# )
# ap document
##
# Delete a document
# Deleting a document, returns the document after deletion
document = @typesense.collections['companies'].documents['124'].delete
ap document
# {
# "company_name" => "Stark Industries",
# "country" => "USA",
# "id" => "124",
# "num_employees" => 5215
# }
# Let's bulk create two documents again for use in our remaining examples
documents = [
{
'id' => '124',
'company_name' => 'Stark Industries',
'num_employees' => 5215,
'country' => 'USA'
},
{
'id' => '125',
'company_name' => 'Acme Corp',
'num_employees' => 1002,
'country' => 'France'
}
]
ap @typesense.collections['companies'].documents.import(documents)
## If you already have documents in JSONL format, you can also pass it directly to #import, to avoid the JSON parsing overhead:
# @typesense.collections['companies'].documents.import(documents_in_jsonl_format)
## You can bulk upsert documents, by adding an upsert action option to #import
documents << {
'id' => '126',
'company_name' => 'Stark Industries 2',
'num_employees' => 200,
'country' => 'USA'
}
ap @typesense.collections['companies'].documents.import(documents, action: :upsert)
## You can bulk update documents, by adding an update action option to #import
# `action: update` will throw an error if the document doesn't already exist
# This document will error out, since id: 1200 doesn't exist
documents << {
'id' => '1200',
'country' => 'USA'
}
documents << {
'id' => '126',
'num_employees' => 300
}
ap @typesense.collections['companies'].documents.import(documents, action: :update)
## You can also bulk delete documents, using filter_by fields:
ap @typesense.collections['companies'].documents.delete(filter_by: 'num_employees:>100')
##
# Export all documents in a collection in JSON Lines format
# We use JSON Lines format for performance reasons. You can choose to parse selected lines as needed, by splitting on \n.
sleep 0.5 # Give Typesense cluster a few hundred ms to create the document on all nodes, before reading it right after (eventually consistent)
jsonl_data = @typesense.collections['companies'].documents.export
ap jsonl_data
# "{\"company_name\":\"Stark Industries\",\"country\":\"USA\",\"id\":\"124\",\"num_employees\":5215}\n{\"company_name\":\"Acme Corp\",\"country\":\"France\",\"id\":\"125\",\"num_employees\":1002}"
##
# Cleanup
# Drop the collection
@typesense.collections['companies'].delete