-
Notifications
You must be signed in to change notification settings - Fork 48
Expand file tree
/
Copy pathcreateModels.R
More file actions
1126 lines (923 loc) · 45.5 KB
/
createModels.R
File metadata and controls
1126 lines (923 loc) · 45.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# TODO: enforce use of #iterator syntax in init section for vars with length > 1
# TODO: Check array tags used in the init and body sections for validity.
# TODO: Make sure that classify tags accurately interprets all tags and errors if uninterpretable tag.
# TODO: Allow for conditional tags to use a list, such as [[nclass#class == 5]]
#note that there's a bit of trickery in interpreting list tags
#they varnames are stored as only the prefix in the initCollection (no #iterator)
#and they are referenced in the body as var#iterator
#At this point, doesn't enforce proper use of iterator with a list
#setwd("C:/Users/Michael Hallquist/Documents/Automation_Sandbox")
#createModels("C:/Users/Michael Hallquist/Documents/Automation_Sandbox/LSPD Covariate Template.txt")
#system.time(createModels("C:/Users/Michael Hallquist/Documents/Automation_Sandbox/LSPD Template.txt"))
#createModels("C:/Users/Michael Hallquist/Documents/Automation_Sandbox/LSPD Template New Init.txt")
#createModels("C:/Users/Michael Hallquist/Documents/Automation_Sandbox/L2 Multimodel Template No iter.txt")
#need to sort out why is.na is working for lookupValue in replaceBodyTags
#in particular, why isn't the current value carrying over from the previous looping iteration?
#SOME THOUGHTS RE DOCUMENTATION
#foreach tags may only be with respect to an iterator... could not have some random foreach var
#' Split a data frame into a list by rows
#'
#' Takes a data frame and returns a list with an element for each row of the data frame.
#' This is an internal function.
#'
#' @param df An object inheriting from class \code{data.frame}
#'
#' @return A list where each element is a one row data frame
#' @keywords internal
#' @examples
#' # small example using built in data
#' MplusAutomation:::splitDFByRow(mtcars)
splitDFByRow <- function(df) {
stopifnot(inherits(df, "data.frame"))
lapply(seq.int(nrow(df)), function(i) df[i, ])
}
#' Classifies Tags
#'
#' Accepts a vector of tags to be classified as well as the iterators.
#' Tags are classified as \sQuote{iterator}, \sQuote{array}, \sQuote{conditional}, or
#' \sQuote{simple}. This is an internal function.
#'
#' @param tagVector A vector of tags to be classified
#' @param iteratorsVector a vector of the iterators to correctly classify tags
#' @return A character vector the same length as the vectors to be tagged
#' @keywords internal
classifyTags <- function(tagVector, iteratorsVector) {
#accepts a vector of tags to be classified
#also needs a vector of the iterators to correctly classify tags
#returns a vector of tag types
#creates an empty character vector of the same length as tagVector (each element defaults to "")
tagType <- vector(mode="character", length=length(tagVector))
#default to missing for tag type (replaced below)
#tagData$tagType <- NA_character_
# named list of the regexs to match for
# the names of each elements are used later to classify tags
RegEx <- list(
iterator = paste0("\\[\\[\\s*(", paste(iteratorsVector, collapse="|"), ")\\s*\\]\\]"),
array = paste0("\\[\\[\\s*\\b([\\w\\.]+)#(", paste(iteratorsVector, collapse="|"), ")\\b\\s*\\]\\]"),
#optional forward slash for closing tags
#could the alternation syntax be problematic if variable names overlaps
#(e.g., x matching xy)? Use word boundaries?
#any reason to limit this to iterators?!
conditional = paste0("\\[\\[\\s*/*(", paste(iteratorsVector, collapse="|"), ")\\s*[!><=]+\\s*\\d+\\s*\\]\\]"),
#simple tags -- not wrt iterators, not conditional
#use negative lookahead to skip tags that are iterators
simple = paste0("\\[\\[\\s*(?!", paste(iteratorsVector, collapse="|"), ")[\\w+\\.]+\\s*\\]\\]"))
Positions <- lapply(RegEx, grep, x = tagVector, perl = TRUE)
# assert no duplicates, i.e., tag cannot match multiples classes
stopifnot(!any(duplicated(unlist(Positions))))
for (n in names(Positions)) {
tagType[Positions[[n]]] <- n
}
return(tagType)
}
#' Get Initial Tags
#'
#' An internal function
#'
#' @param initCollection A list?
#' @return The initMatches
#' @keywords internal
getInitTags <- function(initCollection) {
initMatches <- c()
for (i in 1:length(initCollection)) {
if (storage.mode(initCollection[[i]]) == "character") {
matches <- friendlyGregexpr("\\[\\[\\s*[\\s\\w=><!#/]+\\s*\\]\\]", initCollection[[i]], perl=T)
#if there are matches for this item, add its position in the list pos
#the idea is that the list has elements and the elements can be vectors
#thus, a match may occur for initCollection[[5]][3] if the fifth element of the list is a vector
#and the match is the third element.
if (!is.null(matches)) matches$listpos <- i
initMatches <- rbind(initMatches, matches)
}
}
#successfully creates a data.frame of the sort below.
# element start end tag listpos
#1 1 1 11 [[classes]] 14
#2 1 19 38 [[groupnames#group]] 14
#3 1 40 63 [[outcomenames#outcome]] 14
#4 1 65 84 [[modelnames#model]] 14
#5 1 85 112 [[zeroclassnames#zeroclass]] 14
#6 1 6 29 [[outcomenames#outcome]] 15
#7 1 31 50 [[groupnames#group]] 15
#8 1 73 92 [[modelnames#model]] 15
#9 1 1 9 [[hello]] 17
#10 2 1 10 [[hello2]] 17
#classify tags in terms of simple, array, iterator, conditional, foreach
if (!is.null(initMatches) && nrow(initMatches) > 0) {
initMatches$tagType <- classifyTags(initMatches$tag, initCollection$iterators)
#chop off the [[ ]] portion of the tags, along with any leading or trailing space
#this makes it easier to use the sub function to update current values
initMatches$tag <- sapply(initMatches$tag, function(tag) {
return(sub("\\[\\[\\s*([\\s\\w=><!#/]+)\\s*\\]\\]", "\\1", tag, perl=TRUE))
})
}
#return empty data frame if no matches
if (is.null(initMatches)) return(data.frame())
else return(initMatches)
}
#' Parses tags in the body section
#'
#' Parses tags in the body section (character vector) and
#' init collection (list of vars defined in the init section).
#' This is an internal function.
#'
#' @param bodySection The body
#' @param initCollection The initial collection
#' @return A list with three elements, where each list represents the location,
#' start character, end character, tag type, etc. of each tag.
#' * `initTags`: initMatches
#' * `bodyTags`: bodyMatches
#' * `bodyText`: bodySection
#' @keywords internal
parseTags <- function(bodySection, initCollection) {
#first handle init tags
initMatches <- getInitTags(initCollection)
initMatches$currentValue <- NA_character_
bodyTagRegex <- "\\[\\[\\s*[\\s\\w=><!#/]+\\s*\\]\\]"
bodyMatches <- friendlyGregexpr(bodyTagRegex, bodySection, perl=TRUE)
if (is.null(bodyMatches)) stop("No tags found in body section of template file.")
bodyMatches$tagType <- classifyTags(bodyMatches$tag, initCollection$iterators)
#okay, now every tag is categorized
#the notion here is to substitute in the running value for a given variable
#then we'll do a mass substitute for each model
bodyMatches$currentValue <- NA_character_
#chop off the [[ ]] portion of the tags, along with any leading or trailing space
bodyMatches$tag <- sapply(bodyMatches$tag, function(tag) {
return(sub("\\[\\[\\s*([\\s\\w=><!#/]+)\\s*\\]\\]", "\\1", tag, perl=TRUE))
})
#return a three-element list with constituent data frames for init and body tags.
return(list(initTags=initMatches, bodyTags=bodyMatches, bodyText=bodySection))
}
#' Create Mplus Input Files from Template
#'
#' The \code{createModels} function processes a single Mplus template file and creates a group of related
#' model input files. Definitions and examples for the template language are provided in the MplusAutomation
#' vignette and are not duplicated here at the moment. See this documentation: \code{vignette("Vignette", package="MplusAutomation")}
#'
#' @param templatefile The filename (absolute or relative path) of an Mplus template file to be processed. Example \dQuote{C:/MplusTemplate.txt}
#' @return No value is returned by this function. It is solely used to process an Mplus template file.
#' @author Michael Hallquist
#' @keywords interface
#' @export
#' @examples
#' \dontrun{
#' createModels("L2 Multimodel Template No iter.txt")
#' }
createModels <- function(templatefile) {
# should probably have the function cd to wherever the template file is located (if given as abs path)
# todo: allow for direct runs?
if (!file.exists(templatefile)) stop("Template file not found.")
readfile <- scan(templatefile, what="character", sep="\n", strip.white=FALSE, blank.lines.skip=FALSE, quiet=TRUE)
# divide into init versus body
startinit <- grep("[[init]]", readfile, fixed=T)
endinit <- grep("[[/init]]", readfile, fixed=T)
if (length(startinit) != 1 || length(endinit) != 1) {
stop("Unable to find init section in template file.")
}
# extract init section
initSection <- readfile[(startinit+1):(endinit-1)]
# extract body section
bodySection <- readfile[(endinit+1):length(readfile)]
# convert the init text into a list object containing parsed init instructions
initCollection <- processInit(initSection)
templateTags <- parseTags(bodySection, initCollection)
# lookup values for simple tags, which won't vary by iterator
templateTags <- lookupSimpleTags(templateTags, initCollection)
# kick off the recursive replace
if (length(initCollection$iterators) > 0) {
recurseReplace(templateTags, initCollection)
}
}
#' Simple tag lookup
#'
#' The purpose of this function is to set the currentValue column
#' for the bodyTags and initTags data.frames for simple tags only.
#' Most values will be replaced at the bottom level of recursion,
#' but simple tags do not change over iterations, so can be set one time.
#'
#' @param templateTags The template tags
#' @param initCollection The initial collection
#' @return A tag.
#' @keywords internal
lookupSimpleTags <- function(templateTags, initCollection) {
# #locate simple tags in body
# simpleBodyPositions <- which(templateTags$bodyTags$tagType=="simple")
#
# #replace tag with value
# templateTags$bodyTags$currentValue[simpleBodyPositions] <- sapply(templateTags$bodyTags$tag[simpleBodyPositions],
# function(value) {
# currentValue <- eval(parse(text=paste("initCollection$", value, sep="")))
# if (regexpr("\\[\\[\\s*[\\s\\w=><!#/]+\\s*\\]\\]", currentValue, perl=TRUE) > 0) {
# #The replacement tag itself contains additional tags.
# #Thus, not a simple replacement. This replacement needs to be deferred until
# #we have iterated to the bottom of the tree and have all needed information
# #set a deferred value to be replace later
# currentValue <- "..deferred.."
# }
# return(currentValue)
# })
#locate simple tags in init
simpleInitPositions <- which(templateTags$initTags$tagType=="simple")
templateTags$initTags$currentValue[simpleInitPositions] <- sapply(
templateTags$initTags$tag[simpleInitPositions],
function(value) {
return(eval(parse(text=paste0("initCollection$", value))))
})
return(templateTags)
}
#' Updates current values
#'
#' Body tags currentValues are substituted at the bottom-most level
#' after init collection is finalized (recursively process any nested tags)
#'
#' @param templateTags The template tags
#' @param initCollection Initial collection
#' @return Updated current value or the original if no match.
#' @keywords internal
updateCurrentValues <- function(templateTags, initCollection) {
#Better idea: only updateCurrentValues for init tags collection
#And only update init collection for the respective iterator
#Only need to update values for a given iterator....
#The issue is that values for a given iterator shouldn't change when another iterator is active
#need to replace array and iterator tags for this iterator
#locate iterator tags in init
initIteratorPositions <- which(
templateTags$initTags$tagType=="iterator" &
templateTags$initTags$tag == initCollection$curIteratorName)
#set the current value to the position in the looping process for this iterator
templateTags$initTags$currentValue[initIteratorPositions] <- initCollection$curItPos[initCollection$curIteratorDepth]
#allow for iterator lookups here... just to an is.na check in the replaceBodyTags
#locate iterator tags in body
bodyIteratorPositions <- which(
templateTags$bodyTags$tagType == "iterator" &
templateTags$bodyTags$tag == initCollection$curIteratorName)
templateTags$bodyTags$currentValue[bodyIteratorPositions] <- initCollection$curItPos[initCollection$curIteratorDepth]
# Next, handle array tags
# figure out the iterator for each array tag and only select
# those that are relevant to the current iterator
initArrayPositions <- which(templateTags$initTags$tagType=="array")
# only update values if any array tags are found
# (generates an error otherwise because of weird format from splitter_a
if (length(initArrayPositions) > 0) {
divideByRow <- splitDFByRow(templateTags$initTags[initArrayPositions,])
#for each element of the list, check for a match with this iterator and return the value of interest
#if the array tag is not for this iterator, return the current value unchanged
templateTags$initTags$currentValue[initArrayPositions] <- unlist(sapply(divideByRow,
function(row) {
split <- strsplit(row$tag, split="#", fixed=TRUE)[[1]]
if (length(split) != 2) stop("array tag missing iterator: ", row$tag)
if (split[2] == initCollection$curIteratorName) {
currentValue <- eval(parse(text =
paste0("initCollection$", split[1], "[",
initCollection$curItPos[initCollection$curIteratorDepth], "]")))
if (is.null(currentValue)) {
stop("When replacing tag: ", row$tag, ", could not find corresponding value.")
}
return(currentValue)
} else {
# return unchanged current value if not this iterator
return(row$currentValue)
}
}))
}
# for now, we don't use any current values for body tags collection (handled at bottom)
# #conduct same process for body tags: locate array tags and update values for this iterator
# bodyArrayPositions <- which(templateTags$bodyTags$tagType=="array")
#
# #use plyr's splitter_a function to divide dataset by row (builds a big list)
# divideByRow <- splitter_a(templateTags$bodyTags[bodyArrayPositions,], 1)
#
# #for each element of the list, check for a match with this iterator and return the value of interest
# templateTags$bodyTags$currentValue[bodyArrayPositions] <- unlist(sapply(divideByRow,
# function(row) {
# split <- strsplit(row$tag, split="#", fixed=TRUE)[[1]]
# if (length(split) != 2) stop("array tag missing iterator: ", row$tag)
#
# if (split[2] == initCollection$curIteratorName) {
# currentValue <- eval(parse(text=paste("initCollection$", split[1], "[", initCollection$curItPos[initCollection$curIteratorDepth], "]", sep="")))
# if (regexpr("\\[\\[\\s*[\\s\\w=><!#/]+\\s*\\]\\]", currentValue, perl=TRUE) > 0) {
# #The replacement tag itself contains additional tags.
# #Thus, not a simple replacement. This replacement needs to be deferred until
# #we have iterated to the bottom of the tree and have all needed information
# #set a deferred value to be replace later
# currentValue <- "..deferred.."
# }
# if (is.null(currentValue)) stop("When replacing tag: ", row$tag, ", could not find corresponding value.")
# return(currentValue)
# }
# else return(row$currentValue) #return unchanged current value if not this iterator
# }))
return(templateTags)
}
#' Recursive replace
#'
#' To do: fill in some details
#'
#' @param templateTags The template tags
#' @param initCollection The list of all arguments parsed from the init section
#' @param curiterator An integer that tracks of the depth of recursion through the iterators. Defaults to 1.
#' @return Does not look like it returns anything
#' @keywords internal
recurseReplace <- function(templateTags, initCollection, curiterator=1L) {
#bodySection is the character vector representing each line of the body section
#bodyTags is a data.frame documenting the location and type of all tags in bodySection
#initTags is a data.frame documenting the location and type of all tags in initCollection
if (!is.list(initCollection)) {
stop("Argument list passed to recurseReplace is not a list")
}
# check that curiterator is indeed a whole number
stopifnot(curiterator %% 1 == 0)
thisIterator <- initCollection$iterators[curiterator]
#set the current iterator for the collection (used by replaceTags)
initCollection$curIteratorName <- thisIterator
initCollection$curIteratorDepth <- curiterator
#would it work better to use a named array here?
#like curItVals <- c(1, 3, 5, 2) for iterators a, b, c, d
#then names(curItVals) <- c("a", "b", "c", "d")
for (i in initCollection[[thisIterator]]) {
#set the current position within this iterator for use in replace tags
#create a vector of iterator positions for use in replaceTags
#initCollection$curItPos[curiterator] <- i
#add the iterator name to the vector of iterator positions
#this has the same effect as above (appending as it recurses), but allows for name-based lookup
initCollection$curItPos[thisIterator] <- i
#print(paste("current iterator is:", thisIterator, ", position:", as.character(i)))
#process foreach commands
#For now, take this out
#bodySection <- processForEachTags(bodySection, initCollection)
#update the current values for this iterator and this iteration
#this applies for every iterator and iteration, not just processing
#at the deepest level. The function only updates array and iterator
#tags that match this iterator, thus minimizing redundant work.
#the latest is that only init collection tags will be updated
#then body tags are replaced at the bottom level after init collection is finalized
templateTags <- updateCurrentValues(templateTags, initCollection)
if (curiterator < length(initCollection$iterators)) {
#if not at deepest level, recurse to the next level by adding 1 to the iterator
#NOTE to self: consider adding a "foreachReplacements" collection to templateTags
#that contains the expansions of these tags (appended per iteration)
#this avoids having to think about reparsing the tags based on new code created by foreach
recurseReplace(templateTags, initCollection, curiterator = curiterator+1)
} else {
#we have reached the bottom of the iteration tree
#simple, array, and iterator tags should be up to date in the templateTags collection
#first delete conditional tags from the body section, reduce subsequent processing burden
#need to return templateTags collection from processConditionalTags (including bodyText)
#need to use a copy of templateTags to avoid it affecting subsequent loop iterations
finalTemplateTags <- processConditionalTags(templateTags, initCollection)
#the body section to write is stored in the templateTags collection
toWrite <- finalTemplateTags$bodyText
#create a separate initCollection with the appropriate values substituted.
finalInitCollection <- replaceInitTags(finalTemplateTags$initTags, initCollection)
#finalize init collection values (in cases of nested tags)
#wades through init collection for any remaining tags and replaces them
finalInitCollection <- finalizeInitCollection(finalInitCollection)
#update bodySection with tag values from finalized init tags
toWrite <- replaceBodyTags(toWrite, finalTemplateTags$bodyTags, finalInitCollection)
filename <- finalInitCollection$filename
cat(paste("writing file: ", filename, "\n", sep=""))
curdir <- getwd()
#figure out the output directory
outputDir <- finalInitCollection$outputDirectory
if (!file.exists(outputDir)) {
dir.create(outputDir, recursive=TRUE)
}
setwd(outputDir)
#make sure that no line is more than 90 chars
toWrite <- unlist(lapply(toWrite, function(line) {
if (nchar(line) > 90) {
strwrap(line, width=85, exdent=5)
} else {
line
}
}))
writeLines(toWrite, con = filename, sep = "\n")
setwd(curdir)
}
}
}
#' Replace Init Tags
#'
#' To do: fill in some details
#'
#' @param initTags Init tags
#' @param initCollection The list of all arguments parsed from the init section
#' @return Returns updated initCollection
#' @keywords internal
replaceInitTags <- function(initTags, initCollection) {
targetRows <- which(initTags$tagType %in% c("simple", "iterator", "array"))
targetTags <- initTags[targetRows, ]
targetTags$rownumber <- 1:nrow(targetTags)
#going to re-use this chunk in finalizeSubstitutions, so functionalize...
#consider the looping replacement here
for (i in 1:nrow(targetTags)) {
row <- targetTags[i, ]
stringToChange <- initCollection[[row$listpos]][row$element]
if(row$start > 1) {
preTag <- substr(stringToChange, 1, row$start - 1)
} else {
preTag <- ""
}
if (row$end < nchar(stringToChange)) {
postTag <- substr(stringToChange, row$end+1, nchar(stringToChange))
} else {
postTag <- ""
}
initCollection[[row$listpos]][row$element] <- paste0(preTag, row$currentValue, postTag)
subsequentRows <- which(
targetTags$rownumber > i &
targetTags$listpos == row$listpos &
targetTags$element == row$element)
if (length(subsequentRows > 0)) {
#need to offset subsequent start/stops by the difference
#between the tag and replacement lengths
diffLength <- nchar(row$currentValue) - (row$end - row$start + 1)
#update rows in targetTags that have additional tags on the same row
#need to offset by the diffLength
targetTags[subsequentRows,"start"] <- targetTags[subsequentRows,"start"] + diffLength
targetTags[subsequentRows,"end"] <- targetTags[subsequentRows,"end"] + diffLength
}
}
#refresh the initTags collection with the replaced values
#need to dump the rownumber to align the data.frames
# (templateTags doesn't have a rownumber field)
targetTags$rownumber <- NULL
initTags[targetRows, ] <- targetTags
#return(initTags)
#browser()
return(initCollection)
}
#' Replace Body Tags
#'
#' To do: fill in some details
#'
#' @param bodySection character vector of body section of Mplus syntax
#' @param bodyTags collection of tags used inside of the template body
#' @param initCollection The list of all arguments parsed from the init section
#' @return Returns updated bodySection
#' @keywords internal
replaceBodyTags <- function(bodySection, bodyTags, initCollection) {
if (length(bodySection) <= 0) stop("Empty body section")
#need to ponder issues where a replaced tag still contains another tag
#hmm, actually seems futile to do a replacement in the init section
#these are already set by update values.... won't affect the body section
# so we need to finalize the tag substitutions...
# the idea is that we need to convert all tags to literals in the initCollection
# once this is done, then we replace all deferred tags in the body section
#don't update current values if initcollection value contains any tag
#if so, replace at the last minute (check this in Init)
#set a "deferred" status in currentValue if replacement contains tags
targetTags <- with(bodyTags, bodyTags[tagType %in% c("simple", "iterator", "array"), ])
targetTags$rownumber <- 1:nrow(targetTags)
#print(targetTags)
#stop("test")
#could improve this by replacing identical tags at once
#like ddply by the tag
for (i in 1:nrow(targetTags)) {
row <- targetTags[i, ]
stringToChange <- bodySection[row$element]
if (row$start > 1) {
preTag <- substr(stringToChange, 1, row$start-1)
} else {
preTag <- ""
}
if (row$end < nchar(stringToChange)) {
postTag <- substr(stringToChange, row$end+1, nchar(stringToChange))
} else {
postTag <- ""
}
#lookup value as needed
if (is.na(row$currentValue)) {
row$currentValue <- lookupValue(row$tag, row$tagType, initCollection)
}
#row$currentValue <- lookupValue(row$tag, row$tagType, initCollection)
bodySection[row$element] <- paste0(preTag, row$currentValue, postTag)
#need to offset subsequent start/stops by the difference between the tag and replacement lengths
diffLength <- nchar(row$currentValue) - (row$end - row$start + 1)
subsequentRows <- which(
targetTags$rownumber > i &
targetTags$element == row$element)
if (length(subsequentRows > 0)) {
#update rows in targetTags that have additional tags on the same row
#need to offset by the diffLength
targetTags[subsequentRows,"start"] <- targetTags[subsequentRows,"start"] + diffLength
targetTags[subsequentRows,"end"] <- targetTags[subsequentRows,"end"] + diffLength
}
}
return(bodySection)
}
#' Lookup values
#'
#' To do: fill in some details
#'
#' @param tag name of tag for which we want to know the current value
#' @param tagType type of tag (simple, array, etc.) for the tag to lookup
#' @param initCollection The list of all arguments parsed from the init section
#' @return Current value
#' @keywords internal
lookupValue <- function(tag, tagType, initCollection) {
#redundant with finalize code... re-use
if (missing(tag)) stop("No tag provided")
if (missing(tagType)) stop("No tag type provided")
if (tagType == "simple") {
return(eval(parse(text=paste0("initCollection$", tag))))
}
else if (tagType == "array") {
split <- strsplit(tag, split="#", fixed=TRUE)[[1]]
if (length(split) != 2) stop("array tag missing iterator: ", row$tag)
#find where in the iterator depth this iterator lies
#iteratorPosition <- grep(paste("\\b", split[2], "\\b", sep=""), initCollection$iterators, perl=T)
#use named array look-up
iteratorPosition <- initCollection$curItPos[split[2]]
#note that the padding performed by processInit should handle non-contiguous iteratorPosition values here.
currentValue <- eval(parse(text=paste0("initCollection$", split[1], "[", iteratorPosition, "]")))
if (is.null(currentValue)) {
stop("When replacing tag: ", row$tag, ", could not find corresponding value.")
}
return(currentValue)
}
}
#' Finalize Init Collection
#'
#' this function should handle initTags that still contain tags
#' once the initCollection is finalized, then process the deferred body tags
#' the notion is that the substitutions will be handled in an inefficient manner -- using lots
#' of regular expression parsing, not using the matched tags data.frame
#'
#' we only need to handle simple and array tags
#' iterators should always be integers
#' foreach and conditional are not relevant
#'
#' iterate over init tags until no tags are left
#' here, the init collection should already have had most of its tags substituted by
#' replaceInitTags above.
#'
#' @param initCollection The list of all arguments parsed from the init section
#' @return Finalized initCollection
#' @keywords internal
finalizeInitCollection <- function(initCollection) {
tagsRemain <- TRUE
numIterations <- 1
while(tagsRemain) {
initTags <- getInitTags(initCollection)
if (nrow(initTags) == 0) break #if no tags found, then substitution complete
#update: iterator tags can be nested within other tag types and not updated until here.
initTags <- with(initTags, initTags[tagType %in% c("simple", "iterator", "array"),])
if (nrow(initTags) == 0) break #some tags, but none of the simple or array variety, which we want to replace
divideByRow <- splitDFByRow(initTags)
#for each element of the list, check for a match with this iterator and return the value of interest
initTags$currentValue <- unlist(sapply(divideByRow,
function(row) {
if (row$tagType == "simple") {
return(eval(parse(text=paste0("initCollection$", row$tag))))
}
else if (row$tagType == "iterator") {
#an iterator tag was nested
return(initCollection$curItPos[row$tag])
}
else if (row$tagType == "array") {
split <- strsplit(row$tag, split="#", fixed=TRUE)[[1]]
if (length(split) != 2) stop("array tag missing iterator: ", row$tag)
#find where in the iterator depth this iterator lies
#iteratorPosition <- grep(paste("\\b", split[2], "\\b", sep=""), initCollection$iterators, perl=T)
#use named array look-up
iteratorPosition <- initCollection$curItPos[split[2]]
currentValue <- eval(parse(text=paste0("initCollection$", split[1], "[", iteratorPosition, "]")))
if (is.null(currentValue)) {
stop("When replacing tag: ", row$tag, ", could not find corresponding value.")
}
return(currentValue)
}
}
))
#now we have a list of curent values for any init tags
#and we want to update the init collection with their values... just as with above.
initCollection <- replaceInitTags(initTags, initCollection)
numIterations <- numIterations + 1
if (numIterations > 20) stop("While replacing tags in init section, looped over variables 20 times without completing substitutions.\n Check for circular definitions within init section.")
}
#browser()
return(initCollection)
}
#' Evaluate Conditional
#'
#' Note that at thie point the comparator must be a number (not another variable).
#'
#' @param tag A tag
#' @param initCollection The list of all arguments parsed from the init section
#' @return A boolean value indicating whether the conditional is true
#' @keywords internal
evaluateConditional <- function(tag, initCollection) {
#evaluate whether tag is true
#first divide up into name, operator, and value
regexp <- "(\\w+)\\s*([!><=]+)\\s*(\\w+)"
conditional <- unlist(strapply(tag, regexp, c))
if (length(conditional) < 3) {
stop("Error in conditional tag: does not contain variable, operator, and value. Tag = ", tag)
}
#convert simple equals to logical equals
if (conditional[2] == "=") conditional[2] <- "=="
#obsolete b/c using named array
#iteratorPosition <- grep(paste("\\b", conditional[1], "\\b", sep=""), initCollection$iterators, perl=T)
#return a boolean value indicating whether the conditional is true
return(eval(parse(text=paste0("initCollection$curItPos[conditional[1]]", conditional[2], conditional[3]))))
}
#' Clip String
#'
#' To do: add any details.
#'
#' @param string A string to be clipped
#' @param start The character position to start at
#' @param end The character position to end at
#' @return A string from start to end
#' @keywords internal
clipString <- function(string, start, end) {
#if the string is shorter than the length of the clip, then nothing remains
if (nchar(string) <= end-start+1) return("")
if(start > 1) preString <- substr(string, 1, start-1)
else preString <- ""
if(end < nchar(string)) postString <- substr(string, end+1, nchar(string))
else postString <- ""
return(paste0(preString, postString))
}
#' Process Conditional Tags
#'
#' To do: add details.
#'
#' @param templateTags A template tag
#' @param initCollection The list of all arguments parsed from the init section
#' @return Processed templateTags
#' @keywords internal
processConditionalTags <- function(templateTags, initCollection) {
#require(gsubfn) #moving to import strategy
#find all conditional tags in the body section and remove them from the templateTags and bodyText pieces...
conditionalTagIndices <- which(templateTags$bodyTags$tagType=="conditional")
#return templateTags unharmed if there are no conditional tags (creates error below otherwise)
if (length(conditionalTagIndices) == 0) return(templateTags)
openClose <- ifelse(substr(templateTags$bodyTags$tag[conditionalTagIndices], 1, 1)=="/", "close", "open")
allOpen <- conditionalTagIndices[openClose=="open"]
bodyTagsToDrop <- c()
bodyLinesToDrop <- c()
for (i in allOpen) {
#should be able to decide whether to skip an iteration if the affected lines are already in bodyLinesToDrop
thisTag <- templateTags$bodyTags$tag[i]
#evaluate truth of conditional
conditionalTrue <- evaluateConditional(thisTag, initCollection)
#only look for closing tags after the opening and accept the first exact match
close <- conditionalTagIndices[
templateTags$bodyTags$tag[conditionalTagIndices] == paste0("/", thisTag) &
templateTags$bodyTags$element[conditionalTagIndices] >= templateTags$bodyTags$element[i]][1]
sameLine <- FALSE
#in case of same line match, check to make sure close follows opening on that line
#the conditions above could match when a closing tag precedes opening tag on the same line
if (templateTags$bodyTags$element[close]==templateTags$bodyTags$element[i]) {
sameLine <- TRUE
close <- conditionalTagIndices[
openClose == "close" &
templateTags$bodyTags$tag[conditionalTagIndices] == paste0("/", thisTag) &
templateTags$bodyTags$element[conditionalTagIndices] == templateTags$bodyTags$element[i] &
templateTags$bodyTags$start[conditionalTagIndices] > templateTags$bodyTags$end[i]][1]
if (!close > 0) stop("Could not find closing tag for conditional:", thisTag)
}
#skip this iteration if the opening and closing tags in question are already in the drop pile
#these lines (and the lines between, if necessary) will already be dropped, so don't process
if (templateTags$bodyTags$element[i] %in% bodyLinesToDrop &&
templateTags$bodyTags$element[close] %in% bodyLinesToDrop) next
#first check for tags to drop from the bodyTags collection (don't want these parsed later)
if (conditionalTrue) {
#only remove starting and ending tags
bodyTagsToDrop <- c(bodyTagsToDrop, i, close)
} else {
#if conditional false, then remove all tags between conditional tags
#first, dump all lines in the bodyTags section that fall between elements
bodyTagsToDrop <- c(bodyTagsToDrop, i:close)
#conditional is not true
#so dump the tags and all space between
#really, the only difference here from the calculation below is that
#bodyLinesToDrop should encompass the space between opening and closing
#and the clips below should dump the rest of the line when multiple tags on same line
#no need to rewrite code for clipping out tags
#don't clip the tag lines themselves because this is handled below (whole line goes if nchar <= 0)
#print(bodyLinesToDrop)
#browser()
#only drop lines between matching open/close tags if not on the same line
#otherwise, the clipping code below handles everything correctly
#if on the same line, then element + 1:close - 1 will lead to something like 58:56, which is bad
if (!sameLine) {
bodyLinesToDrop <- c(bodyLinesToDrop,
(templateTags$bodyTags$element[i]+1):(templateTags$bodyTags$element[close]-1))
}
}
#then dump lines from the syntax section itself
#handle same line issues, then delete whole lines between tags
#as with replaceTags substitution, need to handle situation where tag is on line with other stuff
#thus, need to update bodyTags collection, too to reflect new start/stop positions
#when the conditional is true, just remove the tags and leave the syntax
#dump the opening tag on the line
#if the conditional is true, just use the last pos of the opening tag for the clip
if (conditionalTrue) endPos <- templateTags$bodyTags$end[i]
#want to clip the rest of the line
else if (!conditionalTrue && sameLine == FALSE) endPos <- nchar(templateTags$bodyText[templateTags$bodyTags$element[i]])
#just clip anything between open tag and first element of close tag (close tag itself handled by code below)
else if (!conditionalTrue && sameLine == TRUE) endPos <- templateTags$bodyTags$start[close] - 1
templateTags$bodyText[templateTags$bodyTags$element[i]] <- clipString(
templateTags$bodyText[templateTags$bodyTags$element[i]],
templateTags$bodyTags$start[i], endPos)
if (nchar(trimSpace(templateTags$bodyText[templateTags$bodyTags$element[i]])) <= 0) {
#no characters remain, so dump line
bodyLinesToDrop <- c(bodyLinesToDrop, templateTags$bodyTags$element[i])
} else {
#if there is other text on this line, it may contain tags that need to be adjusted given the clip
subsequentTags <- which(
templateTags$bodyTags$element == templateTags$bodyTags$element[i] &
templateTags$bodyTags$start > endPos)
if (length(subsequentTags > 0)) {
#calculate length of opening tag
openLength <- endPos - templateTags$bodyTags$start[i] + 1
templateTags$bodyTags[subsequentTags,"start"] <- templateTags$bodyTags[subsequentTags,"start"] - openLength
templateTags$bodyTags[subsequentTags,"end"] <- templateTags$bodyTags[subsequentTags,"end"] - openLength
#print("openlength")
#browser()
}
}
#okay, we've handled issues related to the opening tag, now handle closing tag
#for the closing tag, just need to clip the tag itself (spacing handled above)
templateTags$bodyText[templateTags$bodyTags$element[close]] <- clipString(
templateTags$bodyText[templateTags$bodyTags$element[close]],
templateTags$bodyTags$start[close],
templateTags$bodyTags$end[close])
if (nchar(trimSpace(templateTags$bodyText[templateTags$bodyTags$element[close]])) <= 0) {
#no characters remain, so dump line
bodyLinesToDrop <- c(bodyLinesToDrop, templateTags$bodyTags$element[close])
} else {
#only look for additional tags if nchar > 0
#redundant code with above... must be a way to consolidate
#if there is other text on then end line, it may contain tags that need to be adjusted given the clip
subsequentTags <- which(
templateTags$bodyTags$element == templateTags$bodyTags$element[close] &
templateTags$bodyTags$start > templateTags$bodyTags$end[close])
if (length(subsequentTags > 0)) {
closeLength <- templateTags$bodyTags$end[close] - templateTags$bodyTags$start[close] + 1
templateTags$bodyTags[subsequentTags,"start"] <- templateTags$bodyTags[subsequentTags,"start"] - closeLength
templateTags$bodyTags[subsequentTags,"end"] <- templateTags$bodyTags[subsequentTags,"end"] - closeLength
#print("closelength")
#browser()
}
}
}
#print(bodyLinesToDrop)
#print(bodyTagsToDrop)
#drop all bad body lines
#only keep unique bodyTagsToDrop (and sort for clarity in debugging)
#hard to imagine that bodyTagsToDrop could be NULL at this point (given the return when no conditional tags above)
#but if it were NULL, the bodyTags collection would be dumped by the NULL*-1 evaluation
if (!is.null(bodyTagsToDrop)) {
bodyTagsToDrop <- sort(unique(bodyTagsToDrop))
templateTags$bodyTags <- templateTags$bodyTags[bodyTagsToDrop*-1, ]
}
#need to check whether bodyLinesToDrop is NULL. If it is, then we must not attempt the subset
#(it will delete the whole character vector)
if (!is.null(bodyLinesToDrop)) {
#only retain unique bodyLinesToDrop (in theory handled by the "next" code above, but good to be safe)
bodyLinesToDrop <- sort(unique(bodyLinesToDrop))
templateTags$bodyText <- templateTags$bodyText[bodyLinesToDrop*-1]
#need to move up the line markers in the bodyTags collection based on the lines dropped
templateTags$bodyTags <- ddply(templateTags$bodyTags, "element", function(subDF) {
numMoveUp <- length(which(bodyLinesToDrop < subDF$element[1]))
subDF$element <- subDF$element - numMoveUp
return(subDF)
})
}
return(templateTags)
}
#' Process the Init Section
#'
#' To do: add details.
#'
#' @param initsection The list of all arguments parsed from the init section
#' @return arglist
#' @importFrom gsubfn strapply
#' @keywords internal
processInit <- function(initsection) {
#combine multi-line statements by searching for semi-colon
assignments <- grep("^\\s*.+\\s*=", initsection, perl=TRUE)
#check for valid variable names
valid <- grep("^\\s*[A-Za-z\\.]+[\\w\\.#]*\\s*=", initsection[assignments], perl=TRUE)
if (length(valid) < length(assignments)) {
badvars <- initsection[assignments[which(!1:length(assignments) %in% valid)]]
stop(paste(c("Invalid variable definitions in init section.",
"Variables must begin with a letter or a period.",
"Variables may contain only the following characters: letters, numbers, underscores, periods, and a single pound sign for list variables.",
"Problematic variable(s):", badvars), collapse="\n "))
}
#preallocate vector of strings to process
argstoprocess <- vector("character", length(assignments))
#loop through each line containing an assignment
for (i in 1:length(assignments)) {
argstoprocess[i] = initsection[assignments[i]]
#if line does not terminate in semicolon, then read subsequent lines until semicolon found