2
2
3
3
from __future__ import annotations
4
4
5
+ import collections
5
6
import functools
6
7
import os
7
8
import timeit
8
9
from collections .abc import Callable
9
10
from pathlib import Path
11
+ from tempfile import TemporaryFile as TempF
10
12
from typing import Union
11
13
12
- import shapefile as shp
14
+ import shapefile
13
15
14
16
# For shapefiles from https://github.com/JamesParrott/PyShp_test_shapefile
15
17
DEFAULT_PYSHP_TEST_REPO = (
@@ -31,26 +33,41 @@ def benchmark(
31
33
name : str ,
32
34
run_count : int ,
33
35
func : Callable ,
34
- col_width : tuple ,
36
+ col_widths : tuple ,
35
37
compare_to : float | None = None ,
36
38
) -> float :
37
39
placeholder = "Running..."
38
- print (f"{ name :>{col_width [0 ]}} | { placeholder } " , end = "" , flush = True )
40
+ print (f"{ name :>{col_widths [0 ]}} | { placeholder } " , end = "" , flush = True )
39
41
time_taken = timeit .timeit (func , number = run_count )
40
42
print ("\b " * len (placeholder ), end = "" )
41
43
time_suffix = " s"
42
- print (f"{ time_taken :{col_width [1 ]- len (time_suffix )}.3g} { time_suffix } " , end = "" )
44
+ print (f"{ time_taken :{col_widths [1 ]- len (time_suffix )}.3g} { time_suffix } " , end = "" )
43
45
print ()
44
46
return time_taken
45
47
46
48
49
+ fields = {}
50
+ shapeRecords = collections .defaultdict (list )
51
+
52
+
47
53
def open_shapefile_with_PyShp (target : Union [str , os .PathLike ]):
48
- with shp .Reader (target ) as r :
54
+ with shapefile .Reader (target ) as r :
55
+ fields [target ] = r .fields
49
56
for shapeRecord in r .iterShapeRecords ():
50
- pass
57
+ shapeRecords [target ].append (shapeRecord )
58
+
59
+
60
+ def write_shapefile_with_PyShp (target : Union [str , os .PathLike ]):
61
+ with TempF ("wb" ) as shp , TempF ("wb" ) as dbf , TempF ("wb" ) as shx :
62
+ with shapefile .Writer (shp = shp , dbf = dbf , shx = shx ) as w : # type: ignore [arg-type]
63
+ for field_info_tuple in fields [target ]:
64
+ w .field (* field_info_tuple )
65
+ for shapeRecord in shapeRecords [target ]:
66
+ w .shape (shapeRecord .shape )
67
+ w .record (* shapeRecord .record )
51
68
52
69
53
- READER_TESTS = {
70
+ SHAPEFILES = {
54
71
"Blockgroups" : blockgroups_file ,
55
72
"Edit" : edit_file ,
56
73
"Merge" : merge_file ,
@@ -60,24 +77,47 @@ def open_shapefile_with_PyShp(target: Union[str, os.PathLike]):
60
77
}
61
78
62
79
63
- def run (run_count : int ) -> None :
64
- col_width = (21 , 10 )
80
+ # Load files to avoid one off delays that only affect first disk seek
81
+ for file_path in SHAPEFILES .values ():
82
+ file_path .read_bytes ()
83
+
84
+ reader_benchmarks = [
85
+ functools .partial (
86
+ benchmark ,
87
+ name = f"Read { test_name } " ,
88
+ func = functools .partial (open_shapefile_with_PyShp , target = target ),
89
+ )
90
+ for test_name , target in SHAPEFILES .items ()
91
+ ]
92
+
93
+ # Require fields and shapeRecords to first have been populated
94
+ # from data from previouly running the reader_benchmarks
95
+ writer_benchmarks = [
96
+ functools .partial (
97
+ benchmark ,
98
+ name = f"Write { test_name } " ,
99
+ func = functools .partial (write_shapefile_with_PyShp , target = target ),
100
+ )
101
+ for test_name , target in SHAPEFILES .items ()
102
+ ]
103
+
104
+
105
+ def run (run_count : int , benchmarks : list [Callable [[], None ]]) -> None :
106
+ col_widths = (22 , 10 )
65
107
col_head = ("parser" , "exec time" , "performance (more is better)" )
66
- # Load files to avoid one off delays that only affect first disk seek
67
- for file_path in READER_TESTS .values ():
68
- file_path .read_bytes ()
69
108
print (f"Running benchmarks { run_count } times:" )
70
- print ("-" * col_width [0 ] + "---" + "-" * col_width [1 ])
71
- print (f"{ col_head [0 ]:>{col_width [0 ]}} | { col_head [1 ]:>{col_width [1 ]}} " )
72
- print ("-" * col_width [0 ] + "-+-" + "-" * col_width [1 ])
73
- for test_name , target in READER_TESTS .items ():
74
- benchmark (
75
- f"Read { test_name } " ,
76
- run_count ,
77
- functools .partial (open_shapefile_with_PyShp , target = target ),
78
- col_width ,
109
+ print ("-" * col_widths [0 ] + "---" + "-" * col_widths [1 ])
110
+ print (f"{ col_head [0 ]:>{col_widths [0 ]}} | { col_head [1 ]:>{col_widths [1 ]}} " )
111
+ print ("-" * col_widths [0 ] + "-+-" + "-" * col_widths [1 ])
112
+ for benchmark in benchmarks :
113
+ benchmark ( # type: ignore [call-arg]
114
+ run_count = run_count ,
115
+ col_widths = col_widths ,
79
116
)
80
117
81
118
82
119
if __name__ == "__main__" :
83
- run (1 )
120
+ print ("Reader tests:" )
121
+ run (1 , reader_benchmarks ) # type: ignore [arg-type]
122
+ print ("\n \n Writer tests:" )
123
+ run (1 , writer_benchmarks ) # type: ignore [arg-type]
0 commit comments