-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathutils.jl
307 lines (240 loc) · 10.4 KB
/
utils.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
"""
$(TYPEDSIGNATURES)
Parse the problem instance found in `model_filepath` defined in the UAI model
format. If the provided file path is empty, return `nothing`.
The UAI file formats are defined in:
https://uaicompetition.github.io/uci-2022/file-formats/
"""
function read_model_file(model_filepath::AbstractString; factor_eltype = Float64)::UAIModel
# Read the uai file into an array of lines
str = open(model_filepath) do file
read(file, String)
end
return read_model_from_string(str; factor_eltype)
end
function read_model_from_string(str::AbstractString; factor_eltype = Float64)::UAIModel
rawlines = split(str, "\n")
# Filter out empty lines
lines = filter(!isempty, rawlines)
nvars = lines[2] |> x -> parse.(Int, x)
cards = lines[3] |> split |> x -> parse.(Int, x)
ntables = lines[4] |> x -> parse.(Int, x)
scopes =
lines[5:(5 + ntables - 1)] |> # extract the factor scope definition lines
x -> map(y -> split(y), x) |> # split each line using blank space as delimeter
x -> map(y -> map(z -> parse(Int, z), y), x) |> # parse each string element as an integer
x -> map(y -> y[2:end], x) |> # drop first element of each inner array
x -> map(y -> map(z -> z + 1, y), x) |> # convert to 1-based index
x -> map(reverse, x) # order vars in ascending order (least significant first)
parsed_margs =
lines[(5 + ntables):end] |> # extract the probability tables definition lines
x -> map(y -> y * " ", x) |> # append a "space" to the end of each element
x -> reduce(*, x) |> # concatenate all string elements
x -> split(x) # split the array using blank space as delimeter
tables2 = Array{factor_eltype, 1}[]
let i = 1
while i <= length(parsed_margs)
nelements = parsed_margs[i] |> x -> parse(Int, x)
parsed_margs[(i + 1):(i + nelements)] |> x -> parse.(factor_eltype, x) |> x -> push!(tables2, x)
i += nelements + 1
end
end
tables =
zip(tables2, map(scope -> cards[scope], scopes)) |> # pair each table with its card vector
x -> map(y -> reshape(y[1], Tuple(y[2])), x) # reshape each factor according to its card
# Sort scope vars in ascending order and permute table dims accordingly
scopes_sorted = map(sort, scopes)
tables_sorted = map(indexin, scopes_sorted, scopes) |> x -> map(permutedims, tables, x)
# Wrap the tables with their corresponding scopes in an array of Factor type
factors = [Factor{factor_eltype, length(scope)}(Tuple(scope), table) for (scope, table) in zip(scopes_sorted, tables_sorted)]
return UAIModel(nvars, cards, factors)
end
"""
$(TYPEDSIGNATURES)
Return the observed variables and values in `evidence_filepath`. If the passed
file path is an empty string, return empty vectors.
The UAI file formats are defined in:
https://uaicompetition.github.io/uci-2022/file-formats/
"""
function read_evidence_file(evidence_filepath::AbstractString)
isempty(evidence_filepath) && return Int64[], Int64[] # no evidence
# Read the last line of the uai evid file
line = open(evidence_filepath) do file
readlines(file)
end |> last
# Extract number of observed vars, and their id together with their corresponding value
nobsvars, rest = split(line) |> x -> parse.(Int, x) |> x -> (x[1], x[2:end])
observations = reshape(rest, 2, :)
# Convert to 1-based indexing
obsvars = observations[1, :] .+ 1
obsvals = observations[2, :]
@assert nobsvars == length(obsvars)
return obsvars, obsvals
end
"""
$(TYPEDSIGNATURES)
Return the query variables in `query_filepath`. If the passed file path is an
empty string, return an empty vector.
The UAI file formats are defined in:
https://uaicompetition.github.io/uci-2022/file-formats/
"""
function read_query_file(query_filepath::AbstractString)
isempty(query_filepath) && return Int64[]
# Read the first line of the uai query file
line = open(query_filepath) do file
readlines(file)
end |> first
# Separate the number of query vars and their indices
nqueryvars, queryvars_zero_based = split(line) |> x -> parse.(Int, x) |> x -> (x[1], x[2:end])
# Convert to 1-based indexing
queryvars = queryvars_zero_based .+ 1
@assert nqueryvars == length(queryvars)
return queryvars
end
"""
$(TYPEDSIGNATURES)
Parse the solution marginals of all variables from the UAI MAR solution file.
The order of the variables is the same as in the model definition.
The UAI file formats are defined in:
https://uaicompetition.github.io/uci-2022/file-formats/
"""
function parse_mar_solution_file(rawlines::Vector{String}; factor_eltype = Float64)
parsed_margs = split(rawlines[2]) |> x -> x[2:end] |> x -> parse.(factor_eltype, x)
marginals = Array{factor_eltype, 1}[]
let i = 1
while i <= length(parsed_margs)
nvars = parsed_margs[i] |> x -> convert(Int, x)
parsed_margs[(i + 1):(i + nvars)] |> x -> push!(marginals, x)
i += nvars + 1
end
end
return marginals
end
"""
$(TYPEDSIGNATURES)
Parse a tree decomposition instance described the PACE format.
The PACE file format is defined in:
https://pacechallenge.org/2017/treewidth/
"""
function read_td_file(td_filepath::AbstractString)
# Read the td file into an array of lines
rawlines = open(td_filepath) do file
readlines(file)
end
# Filter out comments
lines = filter(x -> !startswith(x, "c"), rawlines)
# Extract number of bags, treewidth+1 and number of vertices from solution line
nbags, treewidth, nvertices = split(lines[1]) |> x -> x[3:5] |> x -> parse.(Int, x)
# Parse bags and store then in a vector of vectors
bags = lines[2:(2 + nbags - 1)] |>
x -> map(split, x) |>
x -> map(y -> y[3:end], x) |>
x -> map(y -> parse.(Int, y), x)
@assert length(bags) == nbags
# Parse edges and store then in a vector of vectors
edges = lines[(2 + nbags):end] |> x -> map(split, x) |> x -> map(y -> parse.(Int, y), x)
@assert length(edges) == nbags - 1
return nbags, treewidth, nvertices, bags, edges
end
# patch to get content by broadcasting into array, while keep array size unchanged.
broadcasted_content(x) = asarray(content.(x), x)
"""
$TYPEDEF
Specify the UAI models from the artifacts.
It can be used as the input of [`read_model`](@ref).
### Fields
$TYPEDFIELDS
"""
struct ArtifactProblemSpec
artifact_path::String
task::String
problem_set::String
problem_id::Int
end
"""
$TYPEDSIGNATURES
Get artifact from artifact name, task name, problem set name and problem id.
"""
function problem_from_artifact(artifact_name::String, task::String, problem_set::String, problem_id::Int)
path = get_artifact_path(artifact_name)
return ArtifactProblemSpec(path, task, problem_set, problem_id)
end
"""
$TYPEDSIGNATURES
Read an UAI model from an artifact.
"""
function read_model(problem::ArtifactProblemSpec; eltype=Float64)
problem_name = "$(problem.problem_set)_$(problem.problem_id).uai"
return read_model_file(joinpath(problem.artifact_path, problem.task, problem_name); factor_eltype = eltype)
end
"""
$(TYPEDSIGNATURES)
Return the solution in the artifact.
The UAI file formats are defined in:
https://uaicompetition.github.io/uci-2022/file-formats/
"""
function read_solution(problem::ArtifactProblemSpec; factor_eltype=Float64)
problem_name = "$(problem.problem_set)_$(problem.problem_id).uai.$(problem.task)"
solution_filepath = joinpath(problem.artifact_path, problem.task, problem_name)
# Read the solution file into an array of lines
rawlines = open(solution_filepath) do file
readlines(file)
end
if problem.task == "MAR" || problem.task == "MAR2"
return parse_mar_solution_file(rawlines; factor_eltype)
elseif problem.task == "MAP" || problem.task == "MMAP"
# Return all elements except the first in the last line as a vector of integers
return last(rawlines) |> split |> x -> x[2:end] |> x -> parse.(Int, x)
elseif problem.task == "PR"
# Parse the number in the last line as a floating point
return last(rawlines) |> x -> parse(Float64, x)
end
end
"""
$TYPEDSIGNATURES
"""
function read_evidence(problem::ArtifactProblemSpec)
problem_name = "$(problem.problem_set)_$(problem.problem_id).uai.evid"
evidence_filepath = joinpath(problem.artifact_path, problem.task, problem_name)
obsvars, obsvals = read_evidence_file(evidence_filepath)
return Dict(zip(obsvars, obsvals))
end
"""
$TYPEDSIGNATURES
"""
function read_queryvars(problem::ArtifactProblemSpec)
problem_name = "$(problem.problem_set)_$(problem.problem_id).uai.query"
query_filepath = joinpath(problem.artifact_path, problem.task, problem_name)
return read_query_file(query_filepath)
end
"""
$TYPEDSIGNATURES
Helper function that captures the problem names that belong to `problem_set`
for the given task.
"""
function dataset_from_artifact(artifact_name::AbstractString)
artifact_path = get_artifact_path(artifact_name)
tasks = ["PR", "MAR", "MAR2", "MAP", "MMAP"]
problems = Dict{String, Dict{String, Dict{Int, ArtifactProblemSpec}}}()
regex = r"^([a-zA-Z_{1}][a-zA-Z0-9_]+)_(\d+)\.uai$"
for task in tasks
problems_task = Dict{String, Dict{Int, ArtifactProblemSpec}}()
problems[task] = problems_task
readdir(joinpath(artifact_path, task); sort = false) |>
x -> map(y -> match(regex, y), x) |> # apply regex
x -> filter(!isnothing, x) |> # filter out `nothing` values
x -> map(x) do m # matched the `problem_set` and `problem_id`
problem_set, problem_id = m[1], parse(Int, m[2])
haskey(problems_task, problem_set) || (problems_task[problem_set] = Dict{Int, ArtifactProblemSpec}())
set = problems_task[problem_set]
haskey(set, problem_id) || (set[problem_id] = ArtifactProblemSpec(artifact_path, task, problem_set, problem_id))
end
end
return problems
end
function get_artifact_path(artifact_name::String)
artifact_toml = pkgdir(TensorInference, "Artifacts.toml")
Pkg.ensure_artifact_installed(artifact_name, artifact_toml)
artifact_hash = Pkg.Artifacts.artifact_hash(artifact_name, artifact_toml)
return Pkg.Artifacts.artifact_path(artifact_hash)
end