Skip to content

Commit a097d16

Browse files
uros-dbYicong-Huang
authored andcommitted
[SPARK-53957][GEO][SQL] Support GEOGRAPHY and GEOMETRY in the SpatialReferenceSystemMapper
### What changes were proposed in this pull request? Extend the `SpatialReferenceSystemMapper` class to offer SRID <-> CRS mappings for both `GeographyType` and `GeometryType`. The `SpatialReferenceSystemMapper` class was introduced originally as part of: apache#52491. ### Why are the changes needed? Avoid manual checks for GEOGRAPHY type, and use the centralized SRS mapping logic. Also, this PR will make it easier to support additional SRID/CRS values for spatial types soon. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added new tests to: - `SpatialReferenceSystemMapperSuite` ### Was this patch authored or co-authored using generative AI tooling? No. Closes apache#52667 from uros-db/geo-srsMapper. Authored-by: Uros Bojanic <[email protected]> Signed-off-by: Wenchen Fan <[email protected]>
1 parent 5035217 commit a097d16

File tree

11 files changed

+271
-116
lines changed

11 files changed

+271
-116
lines changed
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql.internal.types;
19+
20+
/**
21+
* Class for providing SRS mappings for cartesian spatial reference systems.
22+
*/
23+
public class CartesianSpatialReferenceSystemMapper extends SpatialReferenceSystemMapper {
24+
// Returns the string ID corresponding to the input SRID. If not supported, returns `null`.
25+
public static String getStringId(int srid) {
26+
SpatialReferenceSystemInformation srsInfo = srsCache.getSrsInfo(srid);
27+
return srsInfo != null ? srsInfo.stringId() : null;
28+
}
29+
30+
// Returns the SRID corresponding to the input string ID. If not supported, returns `null`.
31+
public static Integer getSrid(String stringId) {
32+
SpatialReferenceSystemInformation srsInfo = srsCache.getSrsInfo(stringId);
33+
return srsInfo != null ? srsInfo.srid() : null;
34+
}
35+
}
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql.internal.types;
19+
20+
/**
21+
* Class for providing SRS mappings for geographic spatial reference systems.
22+
*/
23+
public class GeographicSpatialReferenceSystemMapper extends SpatialReferenceSystemMapper {
24+
// Returns the string ID corresponding to the input SRID. If not supported, returns `null`.
25+
public static String getStringId(int srid) {
26+
SpatialReferenceSystemInformation srsInfo = srsCache.getSrsInfo(srid);
27+
return srsInfo != null && srsInfo.isGeographic() ? srsInfo.stringId() : null;
28+
}
29+
30+
// Returns the SRID corresponding to the input string ID. If not supported, returns `null`.
31+
public static Integer getSrid(String stringId) {
32+
SpatialReferenceSystemInformation srsInfo = srsCache.getSrsInfo(stringId);
33+
return srsInfo != null && srsInfo.isGeographic() ? srsInfo.srid() : null;
34+
}
35+
}
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql.internal.types;
19+
20+
import java.util.HashMap;
21+
import java.util.List;
22+
23+
/**
24+
* Class for maintaining the mappings between supported SRID/CRS values and the corresponding SRS.
25+
*/
26+
public class SpatialReferenceSystemCache {
27+
28+
// Private constructor to prevent external instantiation of this singleton class.
29+
private SpatialReferenceSystemCache() {
30+
populateSpatialReferenceSystemInformationMapping();
31+
}
32+
33+
// The singleton `instance` is created lazily, meaning that it is not instantiated until the
34+
// `getInstance()` method is called for the first time. Note that this solution is thread-safe.
35+
private static volatile SpatialReferenceSystemCache instance = null;
36+
37+
// The `getInstance` method uses double-checked locking to ensure efficient and safe instance
38+
// creation. The singleton instance is created only once, even in a multithreaded environment.
39+
public static SpatialReferenceSystemCache getInstance() {
40+
if (instance == null) {
41+
synchronized (SpatialReferenceSystemCache.class) {
42+
if (instance == null) {
43+
instance = new SpatialReferenceSystemCache();
44+
}
45+
}
46+
}
47+
return instance;
48+
}
49+
50+
// Hash map for defining the mappings from the integer SRID value to the full SRS information.
51+
private final HashMap<Integer, SpatialReferenceSystemInformation> sridToSrs =
52+
new HashMap<>();
53+
54+
// Hash map for defining the mappings from the string ID value to the full SRS information.
55+
private final HashMap<String, SpatialReferenceSystemInformation> stringIdToSrs =
56+
new HashMap<>();
57+
58+
// Helper method for building the SRID-to-SRS and stringID-to-SRS mappings.
59+
private void populateSpatialReferenceSystemInformationMapping() {
60+
// Currently, we only support a limited set of SRID / CRS values. However, we will soon extend
61+
// this to support all the SRIDs supported by relevant authorities and libraries. The SRS list
62+
// below will be updated accordingly, and the maps will be populated with more complete data.
63+
List<SpatialReferenceSystemInformation> srsInformationList = List.of(
64+
new SpatialReferenceSystemInformation(0, "SRID:0", false),
65+
new SpatialReferenceSystemInformation(3857, "EPSG:3857", false),
66+
new SpatialReferenceSystemInformation(4326, "OGC:CRS84", true)
67+
);
68+
// Populate the mappings using the same SRS information objects, avoiding any duplication.
69+
for (SpatialReferenceSystemInformation srsInformation: srsInformationList) {
70+
sridToSrs.put(srsInformation.srid(), srsInformation);
71+
stringIdToSrs.put(srsInformation.stringId(), srsInformation);
72+
}
73+
}
74+
75+
// Returns the SRS corresponding to the input SRID. If not supported, returns `null`.
76+
public SpatialReferenceSystemInformation getSrsInfo(int srid) {
77+
return sridToSrs.getOrDefault(srid, null);
78+
}
79+
80+
// Returns the SRS corresponding to the input string ID. If not supported, returns `null`.
81+
public SpatialReferenceSystemInformation getSrsInfo(String stringId) {
82+
return stringIdToSrs.getOrDefault(stringId, null);
83+
}
84+
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql.internal.types;
19+
20+
/**
21+
* Class for maintaining information about a spatial reference system (SRS).
22+
*/
23+
public record SpatialReferenceSystemInformation(
24+
// Field storing the spatial reference identifier (SRID) value of this SRS.
25+
int srid,
26+
// Field storing the string ID of the corresponding coordinate reference system (CRS).
27+
String stringId,
28+
// Field indicating whether the spatial reference system (SRS) is geographic or not.
29+
boolean isGeographic
30+
) {}

sql/api/src/main/scala/org/apache/spark/sql/internal/types/SpatialReferenceSystemMapper.java

Lines changed: 5 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -17,59 +17,10 @@
1717

1818
package org.apache.spark.sql.internal.types;
1919

20-
import java.util.HashMap;
21-
22-
/*
23-
* Class for maintaining mappings between supported SRID values and the string ID of the
24-
* corresponding CRS.
20+
/**
21+
* Abstract class for providing SRS mappings for spatial reference systems.
2522
*/
26-
public class SpatialReferenceSystemMapper {
27-
28-
// We implement this class as a singleton (we disallow construction).
29-
private SpatialReferenceSystemMapper() {}
30-
31-
private static final SpatialReferenceSystemMapper Instance = new SpatialReferenceSystemMapper();
32-
33-
// Returns the unique instance of this class.
34-
public static SpatialReferenceSystemMapper get() {
35-
return Instance;
36-
}
37-
38-
// Hash maps defining the mappings to/from SRID and string ID for a CRS.
39-
private static final HashMap<Integer, String> sridToStringId = buildSridToStringIdMap();
40-
private static final HashMap<String, Integer> stringIdToSrid = buildStringIdToSridMap();
41-
42-
// Returns the string ID corresponding to the input SRID. If the input SRID is not supported,
43-
// `null` is returned.
44-
public String getStringId(int srid) {
45-
return sridToStringId.get(srid);
46-
}
47-
48-
// Returns the SRID corresponding to the input string ID. If the input string ID is not
49-
// supported, `null` is returned.
50-
public Integer getSrid(String stringId) {
51-
return stringIdToSrid.get(stringId);
52-
}
53-
54-
// Currently, we only support a limited set of SRID / CRS mappings. However, we will soon extend
55-
// this to support all the SRIDs supported by relevant authorities and libraries. The methods
56-
// below will be updated accordingly, in order to populate the mappings with more complete data.
57-
58-
// Helper method for building the SRID-to-string-ID mapping.
59-
private static HashMap<Integer, String> buildSridToStringIdMap() {
60-
HashMap<Integer, String> map = new HashMap<>();
61-
map.put(0, "SRID:0"); // Unspecified
62-
map.put(3857, "EPSG:3857"); // Web Mercator
63-
map.put(4326, "OGC:CRS84"); // WGS84
64-
return map;
65-
}
66-
67-
// Helper method for building the string-ID-to-SRID mapping.
68-
private static HashMap<String, Integer> buildStringIdToSridMap() {
69-
HashMap<String, Integer> map = new HashMap<>();
70-
map.put("SRID:0", 0); // Unspecified
71-
map.put("EPSG:3857", 3857); // Web Mercator
72-
map.put("OGC:CRS84", 4326); // WGS84
73-
return map;
74-
}
23+
public abstract class SpatialReferenceSystemMapper {
24+
protected static final SpatialReferenceSystemCache srsCache =
25+
SpatialReferenceSystemCache.getInstance();
7526
}

sql/api/src/main/scala/org/apache/spark/sql/types/GeographyType.scala

Lines changed: 9 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import org.json4s.JsonAST.{JString, JValue}
2121

2222
import org.apache.spark.SparkIllegalArgumentException
2323
import org.apache.spark.annotation.Experimental
24+
import org.apache.spark.sql.internal.types.GeographicSpatialReferenceSystemMapper
2425

2526
/**
2627
* The data type representing GEOGRAPHY values which are spatial objects, as defined in the Open
@@ -160,8 +161,8 @@ object GeographyType extends SpatialType {
160161
* Constructors for GeographyType.
161162
*/
162163
def apply(srid: Int): GeographyType = {
163-
if (!isValidSrid(srid)) {
164-
// Limited geographic SRID values are allowed.
164+
val crs = GeographicSpatialReferenceSystemMapper.getStringId(srid)
165+
if (crs == null) {
165166
throw new SparkIllegalArgumentException(
166167
errorClass = "ST_INVALID_SRID_VALUE",
167168
messageParameters = Map("srid" -> srid.toString))
@@ -193,35 +194,9 @@ object GeographyType extends SpatialType {
193194
}
194195

195196
def apply(crs: String, algorithm: EdgeInterpolationAlgorithm): GeographyType = {
196-
if (!isValidCrs(crs)) {
197-
// Limited geographic CRS values are allowed.
198-
throw new SparkIllegalArgumentException(
199-
errorClass = "ST_INVALID_CRS_VALUE",
200-
messageParameters = Map("crs" -> crs))
201-
}
202197
new GeographyType(crs, algorithm)
203198
}
204199

205-
/**
206-
* Helper method to validate the CRS value. Limited geographic CRS values are allowed.
207-
*/
208-
private def isValidCrs(crs: String): Boolean = {
209-
// Currently, we only support "OGC:CRS84" / "EPSG:4326" / "SRID:ANY".
210-
// In the future, we may support others.
211-
crs.equalsIgnoreCase(GEOGRAPHY_DEFAULT_CRS) ||
212-
crs.equalsIgnoreCase(GEOGRAPHY_DEFAULT_EPSG_CRS) ||
213-
crs.equalsIgnoreCase(MIXED_CRS)
214-
}
215-
216-
/**
217-
* Helper method to validate the SRID value. Only geographic SRID values are allowed.
218-
*/
219-
220-
private def isValidSrid(srid: Int): Boolean = {
221-
// Currently, we only support 4326. In the future, we may support others.
222-
srid == GEOGRAPHY_DEFAULT_SRID
223-
}
224-
225200
override private[sql] def defaultConcreteType: DataType = GEOGRAPHY_MIXED_TYPE
226201

227202
override private[sql] def acceptsType(other: DataType): Boolean =
@@ -235,17 +210,17 @@ object GeographyType extends SpatialType {
235210
private[types] def toSrid(crs: String): Int = {
236211
// The special value "SRID:ANY" is used to represent mixed SRID values.
237212
if (crs.equalsIgnoreCase(GeographyType.MIXED_CRS)) {
238-
GeographyType.MIXED_SRID
213+
return GeographyType.MIXED_SRID
239214
}
240-
// As for other valid CRS values, we currently offer limited support.
241-
else if (crs.equalsIgnoreCase(GeographyType.GEOGRAPHY_DEFAULT_CRS) ||
242-
crs.equalsIgnoreCase(GeographyType.GEOGRAPHY_DEFAULT_EPSG_CRS)) {
243-
GeographyType.GEOGRAPHY_DEFAULT_SRID
244-
} else {
215+
// For all other CRS values, we need to look up the corresponding SRID.
216+
val srid = GeographicSpatialReferenceSystemMapper.getSrid(crs)
217+
if (srid == null) {
218+
// If the CRS value is not recognized, we throw an exception.
245219
throw new SparkIllegalArgumentException(
246220
errorClass = "ST_INVALID_CRS_VALUE",
247221
messageParameters = Map("crs" -> crs))
248222
}
223+
srid
249224
}
250225
}
251226

sql/api/src/main/scala/org/apache/spark/sql/types/GeometryType.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ import org.json4s.JsonAST.{JString, JValue}
2121

2222
import org.apache.spark.SparkIllegalArgumentException
2323
import org.apache.spark.annotation.Experimental
24-
import org.apache.spark.sql.internal.types.SpatialReferenceSystemMapper
24+
import org.apache.spark.sql.internal.types.CartesianSpatialReferenceSystemMapper
2525

2626
/**
2727
* The data type representing GEOMETRY values which are spatial objects, as defined in the Open
@@ -153,7 +153,7 @@ object GeometryType extends SpatialType {
153153
* Constructors for GeometryType.
154154
*/
155155
def apply(srid: Int): GeometryType = {
156-
val crs = SpatialReferenceSystemMapper.get().getStringId(srid)
156+
val crs = CartesianSpatialReferenceSystemMapper.getStringId(srid)
157157
if (crs == null) {
158158
throw new SparkIllegalArgumentException(
159159
errorClass = "ST_INVALID_SRID_VALUE",
@@ -191,7 +191,7 @@ object GeometryType extends SpatialType {
191191
return GeometryType.MIXED_SRID
192192
}
193193
// For all other CRS values, we need to look up the corresponding SRID.
194-
val srid = SpatialReferenceSystemMapper.get().getSrid(crs)
194+
val srid = CartesianSpatialReferenceSystemMapper.getSrid(crs)
195195
if (srid == null) {
196196
// If the CRS value is not recognized, we throw an exception.
197197
throw new SparkIllegalArgumentException(

0 commit comments

Comments
 (0)