Skip to content

Commit 5e17635

Browse files
authored
GEOMESA-3575 FSDS - Move xz2 filter function into filter module (#3561)
* Allow for push-down predicate evaluation in Accumulo/HBase
1 parent ec15176 commit 5e17635

7 files changed

Lines changed: 167 additions & 70 deletions

File tree

geomesa-filter/src/main/resources/META-INF/services/org.geotools.api.filter.expression.Function

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,4 @@ org.locationtech.geomesa.filter.function.DateToLong
55
org.locationtech.geomesa.filter.function.FastProperty
66
org.locationtech.geomesa.filter.function.MurmurHashFunction
77
org.locationtech.geomesa.filter.function.ProxyIdFunction
8+
org.locationtech.geomesa.filter.function.XZ2Function
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
/***********************************************************************
2+
* Copyright (c) 2013-2025 General Atomics Integrated Intelligence, Inc.
3+
* All rights reserved. This program and the accompanying materials
4+
* are made available under the terms of the Apache License, Version 2.0
5+
* which accompanies this distribution and is available at
6+
* https://www.apache.org/licenses/LICENSE-2.0
7+
***********************************************************************/
8+
9+
package org.locationtech.geomesa.filter.function
10+
11+
import org.geotools.api.feature.simple.SimpleFeature
12+
import org.geotools.api.filter.expression.{Expression, ExpressionVisitor}
13+
import org.geotools.filter.FunctionExpressionImpl
14+
import org.geotools.filter.capability.FunctionNameImpl
15+
import org.geotools.filter.capability.FunctionNameImpl.parameter
16+
import org.locationtech.geomesa.curve.XZ2SFC
17+
import org.locationtech.geomesa.filter.function.XZ2Function.GetDefaultGeometry
18+
import org.locationtech.jts.geom.Geometry
19+
20+
/**
21+
* Function to calculate an XZ2 hex-encoded value
22+
*/
23+
class XZ2Function extends FunctionExpressionImpl(XZ2Function.FunctionName) {
24+
25+
private var expression: Expression = _
26+
27+
override def setParameters(params: java.util.List[Expression]): Unit = {
28+
super.setParameters(params)
29+
if (params.isEmpty) {
30+
expression = GetDefaultGeometry
31+
} else {
32+
expression = getExpression(0)
33+
}
34+
}
35+
36+
override def evaluate(o: AnyRef): AnyRef = {
37+
if (o == null) {
38+
return null
39+
}
40+
val value = expression.evaluate(o, classOf[Geometry])
41+
if (value == null) {
42+
return null
43+
}
44+
val env = value.getEnvelopeInternal
45+
if (env.isNull) {
46+
return null
47+
}
48+
XZ2SFC.hexEncode(XZ2SFC.index(env.getMinX, env.getMinY, env.getMaxX, env.getMaxY))
49+
}
50+
}
51+
52+
object XZ2Function {
53+
54+
val FunctionName = new FunctionNameImpl("xz2", classOf[String], parameter("geom", classOf[String], 0, 1))
55+
56+
private object GetDefaultGeometry extends Expression {
57+
58+
override def evaluate(obj: Any): Geometry = obj match {
59+
case sf: SimpleFeature => sf.getDefaultGeometry.asInstanceOf[Geometry]
60+
case _ => null
61+
}
62+
63+
override def evaluate[T](obj: Any, context: Class[T]): T = evaluate(obj).asInstanceOf[T] // only called by our code, above
64+
65+
override def accept(visitor: ExpressionVisitor, extraData: Any): AnyRef = throw new UnsupportedOperationException()
66+
}
67+
}

geomesa-fs/geomesa-fs-storage/geomesa-fs-storage-core/src/main/resources/META-INF/services/org.geotools.api.filter.expression.Function

Lines changed: 0 additions & 1 deletion
This file was deleted.

geomesa-fs/geomesa-fs-storage/geomesa-fs-storage-core/src/main/scala/org/locationtech/geomesa/fs/storage/core/StorageMetadata.scala

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,6 @@ object StorageMetadata {
221221

222222
private val sfc = XZ2SFC
223223
private val factory = new GeometryFactory()
224-
private val hexFormat = HexFormat.of()
225224

226225
override val getAlias: String = "xz2"
227226

@@ -235,11 +234,11 @@ object StorageMetadata {
235234
if (env.isNull) {
236235
throw new NullPointerException("Geometry has a null envelope")
237236
}
238-
toHex(sfc.index(env.getMinX, env.getMinY, env.getMaxX, env.getMaxY))
237+
sfc.hexEncode(sfc.index(env.getMinX, env.getMinY, env.getMaxX, env.getMaxY))
239238
}
240239

241240
override def decode(value: String): Geometry = {
242-
val (xmin, ymin, xmax, ymax) = sfc.invert(fromHex(value))
241+
val (xmin, ymin, xmax, ymax) = sfc.invert(sfc.hexDecode(value))
243242
val ring = Array(
244243
new Coordinate(xmin, ymin),
245244
new Coordinate(xmin, ymax),
@@ -257,11 +256,6 @@ object StorageMetadata {
257256
* @param maxRanges a rough upper limit on the number of ranges to generate
258257
*/
259258
def ranges(queries: Seq[(Double, Double, Double, Double)], maxRanges: Option[Int] = None): Seq[(String, String)] =
260-
sfc.ranges(queries, maxRanges).map(r => toHex(r.lower) -> toHex(r.upper))
261-
262-
// our z values use 25 bits, so we only need 7 digits to hex encode the full value
263-
// we bit-shift by 3 to move dead bits to the end for better prefix matching
264-
private def toHex(z: Long): String = hexFormat.toHexDigits(z << 3, 7)
265-
private def fromHex(hex: String): Long = HexFormat.fromHexDigitsToLong(hex) >>> 3
259+
sfc.ranges(queries, maxRanges).map(r => sfc.hexEncode(r.lower) -> sfc.hexEncode(r.upper))
266260
}
267261
}

geomesa-fs/geomesa-fs-storage/geomesa-fs-storage-core/src/main/scala/org/locationtech/geomesa/fs/storage/core/schemes/XZ2Scheme.scala

Lines changed: 6 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -11,19 +11,14 @@ package schemes
1111

1212
import org.geotools.api.feature.simple.SimpleFeature
1313
import org.geotools.api.filter.Filter
14-
import org.geotools.api.filter.expression.{Expression, ExpressionVisitor}
15-
import org.geotools.filter.FunctionExpressionImpl
16-
import org.geotools.filter.capability.FunctionNameImpl
17-
import org.geotools.filter.capability.FunctionNameImpl.parameter
1814
import org.locationtech.geomesa.curve.XZ2SFC
1915
import org.locationtech.geomesa.filter.FilterHelper
20-
import org.locationtech.geomesa.fs.storage.core.StorageMetadata.XZ2Encoder
16+
import org.locationtech.geomesa.filter.function.XZ2Function
2117
import org.locationtech.geomesa.fs.storage.core.schemes.SpatialScheme.SpatialPartitionSchemeFactory
2218
import org.locationtech.geomesa.fs.storage.core.schemes.XZ2Scheme.incrementHex
2319
import org.locationtech.geomesa.utils.geotools.GeometryUtils
2420
import org.locationtech.jts.geom.Geometry
2521

26-
import java.util.HexFormat
2722
import scala.annotation.tailrec
2823

2924
/**
@@ -42,18 +37,16 @@ import scala.annotation.tailrec
4237
case class XZ2Scheme(attribute: String, index: Int, bits: Int) extends PartitionScheme {
4338

4439
import FilterHelper.ff
45-
import XZ2Scheme.FullHexDigits
4640

4741
require(bits % 4 == 0, s"Bit precision must be a multiple of 4, but received $bits")
4842

4943
private val xz2 = XZ2SFC
50-
private val hexFormat = HexFormat.of()
5144

5245
// partition level derived from bits parameter
5346
// each level adds 2 bits (4 quadrants)
5447
private val partitionLevel = (bits / 2).toShort
5548
// number of hex digits used to represent our z value - bits = (xz2.g - partitionLevel) * 2, then divide by 4 to get hex
56-
private val digits = FullHexDigits - ((xz2.g - partitionLevel) / 2)
49+
private val digits = xz2.hexDigits - ((xz2.g - partitionLevel) / 2)
5750

5851
lazy private val wholeWorldRanges = Some(generateRanges(Seq((-180, -90, 180, 90))))
5952

@@ -68,9 +61,9 @@ case class XZ2Scheme(attribute: String, index: Int, bits: Int) extends Partition
6861
override def getCoveringFilter(partition: PartitionKey): Filter = {
6962
// TODO maybe we can improve this with *some* kind of bbox?
7063
val zPrefix = partition.value
71-
val lower = zPrefix.padTo(FullHexDigits, '0')
72-
val upper = zPrefix.padTo(FullHexDigits, 'f')
73-
ff.between(ff.function(XZ2Scheme.FunctionName.getName), ff.literal(lower), ff.literal(upper))
64+
val lower = zPrefix.padTo(xz2.hexDigits, '0')
65+
val upper = zPrefix.padTo(xz2.hexDigits, 'f')
66+
ff.between(ff.function(XZ2Function.FunctionName.getName), ff.literal(lower), ff.literal(upper))
7467
}
7568

7669
override def getRangesForFilter(filter: Filter): Option[Seq[PartitionRange]] = {
@@ -104,16 +97,11 @@ case class XZ2Scheme(attribute: String, index: Int, bits: Int) extends Partition
10497
}
10598

10699
// truncates a full-resolution index to a partition group ID
107-
private def truncateToPartition(fullIndex: Long): String = hexFormat.toHexDigits(fullIndex << 3, FullHexDigits).take(digits)
100+
private def truncateToPartition(fullIndex: Long): String = xz2.hexEncode(fullIndex).take(digits)
108101
}
109102

110103
object XZ2Scheme extends SpatialPartitionSchemeFactory[Geometry]("xz2") {
111104

112-
// number of digits required to index a "full" (g == 12) xz value, which is 25 bits
113-
private val FullHexDigits = 7
114-
115-
val FunctionName = new FunctionNameImpl("xz2", classOf[String], parameter("geom", classOf[String], 0, 1))
116-
117105
override def buildPartitionScheme(bits: Int, geom: String, geomIndex: Int): PartitionScheme =
118106
XZ2Scheme(geom, geomIndex, bits)
119107

@@ -130,44 +118,4 @@ object XZ2Scheme extends SpatialPartitionSchemeFactory[Geometry]("xz2") {
130118
incrementHex(hex.substring(0, pos) + '0' + hex.substring(pos + 1), pos - 1)
131119
}
132120
}
133-
134-
/**
135-
* Function to calculate an XZ2 hex-encoded value
136-
*/
137-
class XZ2Function extends FunctionExpressionImpl(FunctionName) {
138-
139-
private var expression: Expression = _
140-
141-
override def setParameters(params: java.util.List[Expression]): Unit = {
142-
super.setParameters(params)
143-
if (params.isEmpty) {
144-
expression = GetDefaultGeometry
145-
} else {
146-
expression = getExpression(0)
147-
}
148-
}
149-
150-
override def evaluate(o: AnyRef): AnyRef = {
151-
if (o == null) {
152-
return null
153-
}
154-
val value = expression.evaluate(o, classOf[Geometry])
155-
if (value == null) {
156-
return null
157-
}
158-
XZ2Encoder.encode(value)
159-
}
160-
}
161-
162-
private object GetDefaultGeometry extends Expression {
163-
164-
override def evaluate(obj: Any): Geometry = obj match {
165-
case sf: SimpleFeature => sf.getDefaultGeometry.asInstanceOf[Geometry]
166-
case _ => null
167-
}
168-
169-
override def evaluate[T](obj: Any, context: Class[T]): T = evaluate(obj).asInstanceOf[T] // only called by our code, above
170-
171-
override def accept(visitor: ExpressionVisitor, extraData: Any): AnyRef = throw new UnsupportedOperationException()
172-
}
173121
}

geomesa-fs/geomesa-fs-storage/geomesa-fs-storage-core/src/test/scala/org/locationtech/geomesa/fs/storage/core/schemes/XZ2SchemeTest.scala

Lines changed: 62 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import java.util.Random
1818
class XZ2SchemeTest extends SpecificationWithJUnit {
1919

2020
val sft = SimpleFeatureTypes.createType("test", "*geom:Point:srid=4326")
21+
val sftPoly = SimpleFeatureTypes.createType("test", "*geom:Polygon:srid=4326")
2122

2223
"XZ2Scheme" should {
2324

@@ -39,13 +40,13 @@ class XZ2SchemeTest extends SpecificationWithJUnit {
3940
ps.getPartition(ScalaSimpleFeature.create(sft, "1", "POINT (-75 38)")).value mustEqual "66f2db"
4041
}
4142

42-
"calculate covering filters" in {
43+
"calculate covering filters for points" in {
4344
val r = new Random(77)
4445
val features = Seq.tabulate(100) { i =>
4546
ScalaSimpleFeature.create(sft, s"$i", s"POINT (${r.nextDouble(-180, 180)} ${r.nextDouble(-90, 90)})")
4647
}
4748

48-
foreach(Seq(4, 8, 16)) { bits =>
49+
foreach(Seq(4, 8, 12, 16)) { bits =>
4950
val ps = PartitionSchemeFactory.load(sft, s"xz2:bits=$bits")
5051
ps must beAnInstanceOf[XZ2Scheme]
5152
foreach(features.groupBy(ps.getPartition)) { case (partition, group) =>
@@ -54,5 +55,64 @@ class XZ2SchemeTest extends SpecificationWithJUnit {
5455
}
5556
}
5657
}
58+
59+
"calculate covering filters for polygons" in {
60+
val r = new Random(42)
61+
62+
// generate polygons of various sizes
63+
val features = Seq.tabulate(100) { i =>
64+
val centerX = r.nextDouble(-180, 180)
65+
val centerY = r.nextDouble(-90, 90)
66+
67+
// vary the size from small (0.1 degrees) to large (45 degrees)
68+
val size = r.nextDouble(0.1, 45.0) / 2
69+
70+
val xmin = math.max(-180, centerX - size)
71+
val ymin = math.max(-90, centerY - size)
72+
val xmax = math.min(180, centerX + size)
73+
val ymax = math.min(90, centerY + size)
74+
75+
ScalaSimpleFeature.create(sftPoly, s"$i", s"POLYGON (($xmin $ymin, $xmax $ymin, $xmax $ymax, $xmin $ymax, $xmin $ymin))")
76+
}
77+
78+
foreach(Seq(4, 8, 12, 16)) { bits =>
79+
val ps = PartitionSchemeFactory.load(sftPoly, s"xz2:bits=$bits")
80+
ps must beAnInstanceOf[XZ2Scheme]
81+
foreach(features.groupBy(ps.getPartition)) { case (partition, group) =>
82+
val filter = ps.getCoveringFilter(partition)
83+
features.filter(filter.evaluate) must containTheSameElementsAs(group)
84+
}
85+
}
86+
}
87+
88+
"calculate covering filters for mixed geometry sizes" in {
89+
val r = new Random(123)
90+
91+
// mix of very small and very large polygons
92+
val features = Seq.tabulate(50) { i =>
93+
val centerX = r.nextDouble(-180, 180)
94+
val centerY = r.nextDouble(-90, 90)
95+
96+
// alternate between tiny and huge polygons
97+
val size = (if (i % 2 == 0) { r.nextDouble(0.01, 0.1) } else { r.nextDouble(30.0, 90.0) }) / 2
98+
99+
val xmin = math.max(-180, centerX - size)
100+
val ymin = math.max(-90, centerY - size)
101+
val xmax = math.min(180, centerX + size)
102+
val ymax = math.min(90, centerY + size)
103+
104+
ScalaSimpleFeature.create(sftPoly, s"$i", s"POLYGON (($xmin $ymin, $xmax $ymin, $xmax $ymax, $xmin $ymax, $xmin $ymin))")
105+
}
106+
107+
foreach(Seq(4, 8, 12, 16)) { bits =>
108+
val ps = PartitionSchemeFactory.load(sftPoly, s"xz2:bits=$bits")
109+
ps must beAnInstanceOf[XZ2Scheme]
110+
foreach(features.groupBy(ps.getPartition)) { case (partition, group) =>
111+
val filter = ps.getCoveringFilter(partition)
112+
val matched = features.filter(filter.evaluate)
113+
matched must containTheSameElementsAs(group)
114+
}
115+
}
116+
}
57117
}
58118
}

geomesa-z3/src/main/scala/org/locationtech/geomesa/curve/XZ2SFC.scala

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ package org.locationtech.geomesa.curve
1111
import org.locationtech.geomesa.curve.XZ2SFC.{QueryWindow, XElement}
1212
import org.locationtech.geomesa.zorder.sfcurve.IndexRange
1313

14+
import java.util.HexFormat
1415
import scala.collection.mutable.ArrayBuffer
1516

1617
/**
@@ -25,6 +26,8 @@ class XZ2SFC(val g: Short, val xBounds: (Double, Double), val yBounds: (Double,
2526

2627
// TODO see what the max value of g can be where we can use Ints instead of Longs and possibly refactor to use Ints
2728

29+
private val hexFormat = HexFormat.of()
30+
2831
private val xLo = xBounds._1
2932
private val xHi = xBounds._2
3033
private val yLo = yBounds._1
@@ -33,6 +36,31 @@ class XZ2SFC(val g: Short, val xBounds: (Double, Double), val yBounds: (Double,
3336
private val xSize = xHi - xLo
3437
private val ySize = yHi - yLo
3538

39+
// upper bound on the z values produces by this curve (for g values <= 30)
40+
val maxZValue: Long = (math.pow(4, g + 1).toLong - 1) / 3
41+
42+
// number of bits we bit-shift hex values, in order to move the significant digits to the left and allow prefix matching
43+
private val hexBitOffset = java.lang.Long.numberOfLeadingZeros(maxZValue) % 4
44+
45+
// number of digits used in hex encoding function
46+
val hexDigits: Int = ((64 - java.lang.Long.numberOfLeadingZeros(maxZValue)) / 4) + (if (hexBitOffset == 0) { 0 } else { 1 })
47+
48+
/**
49+
* Encodes a z value into hex, bit-shifting left as necessary so that prefix matching works
50+
*
51+
* @param z z value
52+
* @return hex-encoded string
53+
*/
54+
def hexEncode(z: Long): String = hexFormat.toHexDigits(z << hexBitOffset, hexDigits)
55+
56+
/**
57+
* Decodes a previously encoded z value from hex
58+
*
59+
* @param hex hex string from `hexEncode`
60+
* @return original z value
61+
*/
62+
def hexDecode(hex: String): Long = HexFormat.fromHexDigitsToLong(hex) >>> hexBitOffset
63+
3664
/**
3765
* Index a polygon by its bounding box
3866
*

0 commit comments

Comments
 (0)