Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Flatgeobuf #881

Merged
merged 29 commits into from
Jun 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
1f71d24
Add support for flatgeobuf
bchapuis Jun 17, 2024
9cfaf56
Improve implementation
bchapuis Jun 18, 2024
98d85e5
Improve implementation
bchapuis Jun 19, 2024
55adcfd
Improve naming and fix tests
bchapuis Jun 24, 2024
34a82af
Minor refactoring
bchapuis Jun 24, 2024
e31d384
Fix dependencies
bchapuis Jun 24, 2024
42839a2
Use IllegalArgumentException
bchapuis Jun 25, 2024
783b7b6
Refactor the code
bchapuis Jun 25, 2024
a5cdcaf
Make picocli and jackson insensitive with enums
bchapuis Jun 25, 2024
078e075
Add channel based api
bchapuis Jun 25, 2024
28ae026
Improve API
bchapuis Jun 25, 2024
e78868b
Use the flatgeobuf module in the data table
bchapuis Jun 26, 2024
8b3c563
Add flatgeobuf license and copyright
bchapuis Jun 26, 2024
ba92d29
Fix issues detected by spotless
bchapuis Jun 26, 2024
8972762
Suppress all warnings in generated code
bchapuis Jun 26, 2024
f55d900
Fix issues detected by spotless
bchapuis Jun 26, 2024
9fa0bcb
Format code
bchapuis Jun 26, 2024
3e8810a
Make some classes internal
bchapuis Jun 26, 2024
3d403f1
Fix sonar issues
bchapuis Jun 26, 2024
e3df88d
Fix sonar issues
bchapuis Jun 26, 2024
a9ab750
Fix sonar issues
bchapuis Jun 26, 2024
d4f391c
Fix sonar issues
bchapuis Jun 26, 2024
ef725f6
Replace classes with records
bchapuis Jun 26, 2024
52e92f7
Fix sonar issues
bchapuis Jun 26, 2024
e8cd6f8
Remove type parametrization
bchapuis Jun 26, 2024
3d3e06e
Fix codeql issues
bchapuis Jun 26, 2024
6143478
Revert change (deprecated method)
bchapuis Jun 26, 2024
28df190
Skip bytes with a small buffer
bchapuis Jun 27, 2024
8b4e8af
Add javadoc and clean
bchapuis Jun 27, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion baremaps-cli/src/license/override.properties
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,6 @@ org.postgresql--postgresql--42.7.2=BSD 2-Clause License
org.reactivestreams--reactive-streams--1.0.4=MIT License
org.roaringbitmap--RoaringBitmap--1.0.1=Apache License 2.0
org.slf4j--slf4j-api--2.0.12=MIT License
org.wololo--flatgeobuf--3.26.2=ISC License
org.xerial--sqlite-jdbc--3.45.1.0=The Apache Software License, Version 2.0
org.xerial.thirdparty--nestedvm--1.0=Apache License 2.0
org.yaml--snakeyaml--2.2=Apache License 2.0
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,12 @@ public static void main(String... args) {
}

// Execute the command
CommandLine cmd = new CommandLine(new Baremaps()).setUsageHelpLongOptionsMaxWidth(30)
CommandLine commandLine = new CommandLine(new Baremaps())
.setCaseInsensitiveEnumValuesAllowed(true)
.setUsageHelpLongOptionsMaxWidth(30)
.addMixin("options", new Options());
cmd.execute(args);

commandLine.execute(args);
}

@Override
Expand Down
8 changes: 4 additions & 4 deletions baremaps-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,10 @@ limitations under the License.
<groupId>org.apache.baremaps</groupId>
<artifactId>baremaps-data</artifactId>
</dependency>
<dependency>
<groupId>org.apache.baremaps</groupId>
<artifactId>baremaps-flatgeobuf</artifactId>
</dependency>
<dependency>
<groupId>org.apache.baremaps</groupId>
<artifactId>baremaps-geoparquet</artifactId>
Expand Down Expand Up @@ -142,10 +146,6 @@ limitations under the License.
<groupId>org.postgresql</groupId>
<artifactId>postgresql</artifactId>
</dependency>
<dependency>
<groupId>org.wololo</groupId>
<artifactId>flatgeobuf</artifactId>
</dependency>
<dependency>
<groupId>org.xerial</groupId>
<artifactId>sqlite-jdbc</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

package org.apache.baremaps.storage.flatgeobuf;

import com.google.flatbuffers.FlatBufferBuilder;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
Expand All @@ -26,18 +25,14 @@
import java.nio.file.StandardOpenOption;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.Objects;
import org.apache.baremaps.data.collection.DataCollection;
import org.apache.baremaps.data.storage.DataRow;
import org.apache.baremaps.data.storage.DataSchema;
import org.apache.baremaps.data.storage.DataStoreException;
import org.apache.baremaps.data.storage.DataTable;
import org.locationtech.jts.geom.*;
import org.wololo.flatgeobuf.Constants;
import org.wololo.flatgeobuf.GeometryConversions;
import org.wololo.flatgeobuf.HeaderMeta;
import org.wololo.flatgeobuf.PackedRTree;
import org.wololo.flatgeobuf.generated.Feature;
import org.wololo.flatgeobuf.generated.GeometryType;
import org.apache.baremaps.data.storage.*;
import org.apache.baremaps.data.storage.DataColumn.Type;
import org.apache.baremaps.flatgeobuf.FlatGeoBuf;
import org.apache.baremaps.flatgeobuf.FlatGeoBufReader;
import org.apache.baremaps.flatgeobuf.FlatGeoBufWriter;
import org.apache.baremaps.flatgeobuf.PackedRTree;

/**
* A {@link DataTable} that stores rows in a flatgeobuf file.
Expand All @@ -54,8 +49,18 @@ public class FlatGeoBufDataTable implements DataTable {
* @param file the path to the flatgeobuf file
*/
public FlatGeoBufDataTable(Path file) {
this(file, readSchema(file));
}

/**
* Constructs a table from a flatgeobuf file and a schema (used for writing).
*
* @param file the path to the flatgeobuf file
* @param schema the schema of the table
*/
public FlatGeoBufDataTable(Path file, DataSchema schema) {
this.file = file;
this.schema = readSchema(file);
this.schema = schema;
}

/**
Expand All @@ -65,27 +70,15 @@ public FlatGeoBufDataTable(Path file) {
* @return the schema of the table
*/
private static DataSchema readSchema(Path file) {
try (var channel = FileChannel.open(file, StandardOpenOption.READ)) {
try (var reader = new FlatGeoBufReader(FileChannel.open(file, StandardOpenOption.READ))) {
// try to read the schema from the file
var buffer = ByteBuffer.allocate(1 << 20).order(ByteOrder.LITTLE_ENDIAN);
HeaderMeta headerMeta = readHeaderMeta(channel, buffer);
return FlatGeoBufTypeConversion.asSchema(headerMeta);
var header = reader.readHeader();
return FlatGeoBufTypeConversion.asSchema(header);
} catch (IOException e) {
return null;
}
}

/**
* Constructs a table from a flatgeobuf file and a schema (used for writing).
*
* @param file the path to the flatgeobuf file
* @param schema the schema of the table
*/
public FlatGeoBufDataTable(Path file, DataSchema schema) {
this.file = file;
this.schema = schema;
}

/**
* {@inheritDoc}
*/
Expand All @@ -100,22 +93,13 @@ public DataSchema schema() {
@Override
public Iterator<DataRow> iterator() {
try {
var channel = FileChannel.open(file, StandardOpenOption.READ);
var reader = new FlatGeoBufReader(FileChannel.open(file, StandardOpenOption.READ));

var buffer = ByteBuffer.allocate(1 << 20).order(ByteOrder.LITTLE_ENDIAN);
HeaderMeta headerMeta = readHeaderMeta(channel, buffer);
channel.position(headerMeta.offset);

// skip the index
long indexOffset = headerMeta.offset;
long indexSize =
PackedRTree.calcSize((int) headerMeta.featuresCount, headerMeta.indexNodeSize);
channel.position(indexOffset + indexSize);

buffer.clear();
var header = reader.readHeader();
reader.skipIndex();

// create the feature stream
return new RowIterator(channel, headerMeta, schema, buffer);
return new RowIterator(reader, header, schema);
} catch (IOException e) {
throw new DataStoreException(e);
}
Expand All @@ -134,30 +118,14 @@ public void clear() {
*/
@Override
public long size() {
try (var channel = FileChannel.open(file, StandardOpenOption.READ)) {
var buffer = ByteBuffer.allocate(1 << 20).order(ByteOrder.LITTLE_ENDIAN);
HeaderMeta headerMeta = readHeaderMeta(channel, buffer);
return headerMeta.featuresCount;
try (var reader = new FlatGeoBufReader(FileChannel.open(file, StandardOpenOption.READ))) {
FlatGeoBuf.Header header = reader.readHeader();
return header.featuresCount();
} catch (IOException e) {
throw new DataStoreException(e);
}
}

/**
* Reads the header meta from a channel.
*
* @param channel the channel to read from
* @param buffer the buffer to use
* @return the header meta
* @throws IOException if an error occurs while reading the header meta
*/
private static HeaderMeta readHeaderMeta(SeekableByteChannel channel, ByteBuffer buffer)
throws IOException {
channel.read(buffer);
buffer.flip();
return HeaderMeta.read(buffer);
}

/**
* Writes a collection of rows to a flatgeobuf file.
*
Expand All @@ -166,68 +134,45 @@ private static HeaderMeta readHeaderMeta(SeekableByteChannel channel, ByteBuffer
*/
public void write(DataCollection<DataRow> features) throws IOException {
try (
var channel = FileChannel.open(file, StandardOpenOption.CREATE, StandardOpenOption.WRITE);
var outputStream = Channels.newOutputStream(channel)) {
outputStream.write(Constants.MAGIC_BYTES);

var bufferBuilder = new FlatBufferBuilder();

var headerMeta = new HeaderMeta();
headerMeta.geometryType = GeometryType.Unknown;
headerMeta.indexNodeSize = 16;
headerMeta.srid = 3857;
headerMeta.featuresCount = features.size();
headerMeta.name = schema.name();
headerMeta.columns = FlatGeoBufTypeConversion.asColumns(schema.columns());
HeaderMeta.write(headerMeta, outputStream, bufferBuilder);
var writer = new FlatGeoBufWriter(
FileChannel.open(file, StandardOpenOption.CREATE, StandardOpenOption.WRITE))) {

schema.columns().stream()
.filter(c -> c.cardinality() == DataColumn.Cardinality.REQUIRED)
.forEach(c -> {
if (Objects.requireNonNull(c.type()) == Type.BINARY) {
throw new UnsupportedOperationException();
}
});

var header = new FlatGeoBuf.Header(
schema.name(),
null,
FlatGeoBuf.GeometryType.UNKNOWN,
false,
false,
false,
false,
FlatGeoBufTypeConversion.asColumns(schema.columns()),
features.size(),
2,
null,
null,
null,
null);

writer.writeHeader(header);

var indexSize =
(int) PackedRTree.calcSize((int) headerMeta.featuresCount, headerMeta.indexNodeSize);
(int) PackedRTree.calcSize((int) header.featuresCount(), header.indexNodeSize());

for (int i = 0; i < indexSize; i++) {
outputStream.write(0);
}
writer.writeIndexBuffer(ByteBuffer.allocate(indexSize).order(ByteOrder.LITTLE_ENDIAN));

var iterator = features.iterator();
while (iterator.hasNext()) {
var featureBuilder = new FlatBufferBuilder(4096);

var row = iterator.next();

var propertiesBuffer = ByteBuffer.allocate(1 << 20).order(ByteOrder.LITTLE_ENDIAN);
var properties = row.values().stream()
.filter(v -> !(v instanceof Geometry))
.toList();
for (int i = 0; i < properties.size(); i++) {
var column = headerMeta.columns.get(i);
var value = properties.get(i);
propertiesBuffer.putShort((short) i);
FlatGeoBufTypeConversion.writeValue(propertiesBuffer, column, value);
}
if (propertiesBuffer.position() > 0) {
propertiesBuffer.flip();
}
var propertiesOffset = org.wololo.flatgeobuf.generated.Feature
.createPropertiesVector(featureBuilder, propertiesBuffer);

var geometry = row.values().stream()
.filter(v -> v instanceof Geometry)
.map(Geometry.class::cast)
.findFirst();

var geometryOffset = geometry.isPresent()
? GeometryConversions.serialize(featureBuilder, geometry.get(), headerMeta.geometryType)
: 0;

var featureOffset =
org.wololo.flatgeobuf.generated.Feature.createFeature(featureBuilder, geometryOffset,
propertiesOffset, 0);
featureBuilder.finishSizePrefixed(featureOffset);

ByteBuffer data = featureBuilder.dataBuffer();
while (data.hasRemaining()) {
channel.write(data);
}
var feature = FlatGeoBufTypeConversion.asFeature(row);
writer.writeFeature(feature);
}
}
}
Expand All @@ -237,38 +182,36 @@ public void write(DataCollection<DataRow> features) throws IOException {
*/
public static class RowIterator implements Iterator<DataRow> {

private final HeaderMeta headerMeta;
private final FlatGeoBuf.Header header;

private final DataSchema schema;

private final SeekableByteChannel channel;

private final ByteBuffer buffer;
private final FlatGeoBufReader reader;

private long cursor = 0;

/**
* Constructs a row iterator.
*
* @param channel the channel to read from
* @param headerMeta the header meta
* @param reader the channel to read from
* @param header the header of the file
* @param schema the schema of the table
* @param buffer the buffer to use
*/
public RowIterator(SeekableByteChannel channel, HeaderMeta headerMeta,
DataSchema schema, ByteBuffer buffer) {
this.channel = channel;
this.headerMeta = headerMeta;
public RowIterator(
FlatGeoBufReader reader,
FlatGeoBuf.Header header,
DataSchema schema) {
this.reader = reader;
this.header = header;
this.schema = schema;
this.buffer = buffer;
}

/**
* {@inheritDoc}
*/
@Override
public boolean hasNext() {
return cursor < headerMeta.featuresCount;
return cursor < header.featuresCount();
}

/**
Expand All @@ -277,19 +220,9 @@ public boolean hasNext() {
@Override
public DataRow next() {
try {
channel.read(buffer);
buffer.flip();

var featureSize = buffer.getInt();
var row =
FlatGeoBufTypeConversion.asRow(headerMeta, schema, Feature.getRootAsFeature(buffer));

buffer.position(Integer.BYTES + featureSize);
buffer.compact();

var feature = reader.readFeature();
cursor++;

return row;
return FlatGeoBufTypeConversion.asRow(schema, feature);
} catch (IOException e) {
throw new NoSuchElementException(e);
}
Expand Down
Loading
Loading