From 8057dbc3f837c06c07388a3969f51c91be53c801 Mon Sep 17 00:00:00 2001
From: kbenoit <kbenoit@lse.ac.uk>
Date: Tue, 26 Aug 2025 11:44:38 +0800
Subject: [PATCH 1/5] Fix #732 by de-duplicating duplicated JSON

---
 R/chat-structured.R                   | 37 ++++++++++---
 R/parallel-chat.R                     |  5 +-
 tests/testthat/test-chat-structured.R | 76 +++++++++++++++++++++++++++
 3 files changed, 110 insertions(+), 8 deletions(-)

diff --git a/R/chat-structured.R b/R/chat-structured.R
index 4ef25a1f..acf8bd4c 100644
--- a/R/chat-structured.R
+++ b/R/chat-structured.R
@@ -1,13 +1,38 @@
-extract_data <- function(turn, type, convert = TRUE, needs_wrapper = FALSE) {
+extract_data <- function(turn, type, convert = TRUE, needs_wrapper = FALSE, prompt_index = NULL) {
   is_json <- map_lgl(turn@contents, S7_inherits, ContentJson)
   n <- sum(is_json)
-  if (n != 1) {
-    cli::cli_abort("Data extraction failed: {n} data results recieved.")
+  
+  if (n == 0) {
+    cli::cli_abort("Data extraction failed: 0 data results received.")
+  } else if (n == 1) {
+    # Normal case - exactly 1 JSON object
+    json <- turn@contents[[which(is_json)]]
+    out <- json@value
+  } else if (n == 2) {
+    # Check if the two JSON objects are identical (duplicate case)
+    json_indices <- which(is_json)
+    json1 <- turn@contents[[json_indices[1]]]
+    json2 <- turn@contents[[json_indices[2]]]
+    
+    val1 <- json1@value
+    val2 <- json2@value
+    
+    if (identical(val1, val2)) {
+      # Duplicate case - use the first one
+      index_msg <- if (!is.null(prompt_index)) paste0(" (prompt ", prompt_index, ")") else ""
+      warning("Found duplicate JSON responses, using the first one", index_msg, ".", call. = FALSE, immediate. = TRUE)
+      out <- val1
+    } else {
+      # Different JSON objects - use the last one (likely the final response)
+      index_msg <- if (!is.null(prompt_index)) paste0(" (prompt ", prompt_index, ")") else ""
+      warning("Found multiple different JSON responses, using the last one", index_msg, ".", call. = FALSE, immediate. = TRUE)
+      out <- val2
+    }
+  } else {
+    # More than 2 JSON objects - this is unexpected
+    cli::cli_abort("Data extraction failed: {n} data results received. Expected 1 or 2.")
   }
 
-  json <- turn@contents[[which(is_json)]]
-  out <- json@value
-
   if (needs_wrapper) {
     out <- out$wrapper
     type <- type@properties[[1]]
diff --git a/R/parallel-chat.R b/R/parallel-chat.R
index d77b77e0..35ba2164 100644
--- a/R/parallel-chat.R
+++ b/R/parallel-chat.R
@@ -170,12 +170,13 @@ multi_convert <- function(
 ) {
   needs_wrapper <- type_needs_wrapper(type, provider)
 
-  rows <- map(turns, \(turn) {
+  rows <- imap(turns, \(turn, idx) {
     extract_data(
       turn = turn,
       type = wrap_type_if_needed(type, needs_wrapper),
       convert = FALSE,
-      needs_wrapper = needs_wrapper
+      needs_wrapper = needs_wrapper,
+      prompt_index = idx
     )
   })
 
diff --git a/tests/testthat/test-chat-structured.R b/tests/testthat/test-chat-structured.R
index 95889ea5..1c3c30dc 100644
--- a/tests/testthat/test-chat-structured.R
+++ b/tests/testthat/test-chat-structured.R
@@ -24,6 +24,82 @@ test_that("can extract data when wrapper is used", {
   expect_equal(extract_data(turn, type, needs_wrapper = TRUE), list(x = 1))
 })
 
+test_that("handles duplicate identical JSON responses", {
+  # This test covers the Bedrock duplicate JSON issue
+  json_data <- list(name = "John", age = 25)
+  turn <- Turn("assistant", list(
+    ContentJson(json_data),
+    ContentJson(json_data)  # Identical duplicate
+  ))
+  type <- type_object(name = type_string(), age = type_integer())
+  
+  # Should warn about duplicates and use the first one
+  expect_warning(
+    result <- extract_data(turn, type),
+    "Found duplicate JSON responses, using the first one"
+  )
+  expect_equal(result, list(name = "John", age = 25))
+})
+
+test_that("handles duplicate identical JSON responses with prompt index", {
+  # Test that prompt index is included in warning message
+  json_data <- list(score = 42)
+  turn <- Turn("assistant", list(
+    ContentJson(json_data),
+    ContentJson(json_data)
+  ))
+  type <- type_object(score = type_integer())
+  
+  expect_warning(
+    extract_data(turn, type, prompt_index = 3),
+    "Found duplicate JSON responses, using the first one \\(prompt 3\\)"
+  )
+})
+
+test_that("handles different JSON responses", {
+  # This test covers the case where two different JSON objects are returned
+  turn <- Turn("assistant", list(
+    ContentJson(list(name = "John", age = 25)),
+    ContentJson(list(name = "Jane", age = 30))  # Different data
+  ))
+  type <- type_object(name = type_string(), age = type_integer())
+  
+  # Should warn about multiple responses and use the last one
+  expect_warning(
+    result <- extract_data(turn, type),
+    "Found multiple different JSON responses, using the last one"
+  )
+  expect_equal(result, list(name = "Jane", age = 30))
+})
+
+test_that("handles different JSON responses with prompt index", {
+  turn <- Turn("assistant", list(
+    ContentJson(list(value = 1)),
+    ContentJson(list(value = 2))
+  ))
+  type <- type_object(value = type_integer())
+  
+  expect_warning(
+    extract_data(turn, type, prompt_index = 5),
+    "Found multiple different JSON responses, using the last one \\(prompt 5\\)"
+  )
+})
+
+test_that("errors on more than 2 JSON responses", {
+  # Should error if there are more than 2 JSON objects
+  turn <- Turn("assistant", list(
+    ContentJson(list(x = 1)),
+    ContentJson(list(x = 2)),
+    ContentJson(list(x = 3))
+  ))
+  type <- type_object(x = type_integer())
+  
+  expect_error(
+    extract_data(turn, type),
+    "Data extraction failed: 3 data results received. Expected 1 or 2."
+  )
+})
+
 # Type coercion ---------------------------------------------------------------
 
 test_that("optional base types (scalars) stay as NULL", {

From 66d719f1626efd48cd28982a9e367b3a4b32023e Mon Sep 17 00:00:00 2001
From: Kenneth Benoit <kbenoit@lse.ac.uk>
Date: Tue, 26 Aug 2025 12:01:05 +0800
Subject: [PATCH 2/5] Apply suggestions from code review

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
---
 R/chat-structured.R                   | 34 ++++++++++++---
 tests/testthat/test-chat-structured.R | 62 ++++++++++++++++-----------
 2 files changed, 63 insertions(+), 33 deletions(-)

diff --git a/R/chat-structured.R b/R/chat-structured.R
index acf8bd4c..12e08f86 100644
--- a/R/chat-structured.R
+++ b/R/chat-structured.R
@@ -13,24 +13,44 @@ extract_data <- function(turn, type, convert = TRUE, needs_wrapper = FALSE, prom
     json_indices <- which(is_json)
     json1 <- turn@contents[[json_indices[1]]]
     json2 <- turn@contents[[json_indices[2]]]
-    
     val1 <- json1@value
     val2 <- json2@value
-    
     if (identical(val1, val2)) {
       # Duplicate case - use the first one
-      index_msg <- if (!is.null(prompt_index)) paste0(" (prompt ", prompt_index, ")") else ""
-      warning("Found duplicate JSON responses, using the first one", index_msg, ".", call. = FALSE, immediate. = TRUE)
+      index_msg <- if (!is.null(prompt_index)) {
+        paste0(" (prompt ", prompt_index, ")")
+      } else {
+        ""
+      }
+      warning(
+        "Found duplicate JSON responses, using the first one",
+        index_msg,
+        ".",
+        call. = FALSE,
+        immediate. = TRUE
+      )
       out <- val1
     } else {
       # Different JSON objects - use the last one (likely the final response)
-      index_msg <- if (!is.null(prompt_index)) paste0(" (prompt ", prompt_index, ")") else ""
-      warning("Found multiple different JSON responses, using the last one", index_msg, ".", call. = FALSE, immediate. = TRUE)
+      index_msg <- if (!is.null(prompt_index)) {
+        paste0(" (prompt ", prompt_index, ")")
+      } else {
+        ""
+      }
+      warning(
+        "Found multiple different JSON responses, using the last one",
+        index_msg,
+        ".",
+        call. = FALSE,
+        immediate. = TRUE
+      )
       out <- val2
     }
   } else {
     # More than 2 JSON objects - this is unexpected
-    cli::cli_abort("Data extraction failed: {n} data results received. Expected 1 or 2.")
+    cli::cli_abort(
+      "Data extraction failed: {n} data results received. Expected 1 or 2."
+    )
   }
 
   if (needs_wrapper) {
diff --git a/tests/testthat/test-chat-structured.R b/tests/testthat/test-chat-structured.R
index 1c3c30dc..607986a6 100644
--- a/tests/testthat/test-chat-structured.R
+++ b/tests/testthat/test-chat-structured.R
@@ -27,12 +27,14 @@ test_that("can extract data when wrapper is used", {
 test_that("handles duplicate identical JSON responses", {
   # This test covers the Bedrock duplicate JSON issue
   json_data <- list(name = "John", age = 25)
-  turn <- Turn("assistant", list(
-    ContentJson(json_data),
-    ContentJson(json_data)  # Identical duplicate
-  ))
+  turn <- Turn(
+    "assistant",
+    list(
+      ContentJson(json_data),
+      ContentJson(json_data) # Identical duplicate
+    )
+  )
   type <- type_object(name = type_string(), age = type_integer())
-  
   # Should warn about duplicates and use the first one
   expect_warning(
     result <- extract_data(turn, type),
@@ -44,12 +46,14 @@ test_that("handles duplicate identical JSON responses", {
 test_that("handles duplicate identical JSON responses with prompt index", {
   # Test that prompt index is included in warning message
   json_data <- list(score = 42)
-  turn <- Turn("assistant", list(
-    ContentJson(json_data),
-    ContentJson(json_data)
-  ))
+  turn <- Turn(
+    "assistant",
+    list(
+      ContentJson(json_data),
+      ContentJson(json_data)
+    )
+  )
   type <- type_object(score = type_integer())
-  
   expect_warning(
     extract_data(turn, type, prompt_index = 3),
     "Found duplicate JSON responses, using the first one \\(prompt 3\\)"
@@ -58,12 +62,14 @@ test_that("handles duplicate identical JSON responses with prompt index", {
 
 test_that("handles different JSON responses", {
   # This test covers the case where two different JSON objects are returned
-  turn <- Turn("assistant", list(
-    ContentJson(list(name = "John", age = 25)),
-    ContentJson(list(name = "Jane", age = 30))  # Different data
-  ))
+  turn <- Turn(
+    "assistant",
+    list(
+      ContentJson(list(name = "John", age = 25)),
+      ContentJson(list(name = "Jane", age = 30)) # Different data
+    )
+  )
   type <- type_object(name = type_string(), age = type_integer())
-  
   # Should warn about multiple responses and use the last one
   expect_warning(
     result <- extract_data(turn, type),
@@ -73,12 +79,14 @@ test_that("handles different JSON responses", {
 })
 
 test_that("handles different JSON responses with prompt index", {
-  turn <- Turn("assistant", list(
-    ContentJson(list(value = 1)),
-    ContentJson(list(value = 2))
-  ))
+  turn <- Turn(
+    "assistant",
+    list(
+      ContentJson(list(value = 1)),
+      ContentJson(list(value = 2))
+    )
+  )
   type <- type_object(value = type_integer())
-  
   expect_warning(
     extract_data(turn, type, prompt_index = 5),
     "Found multiple different JSON responses, using the last one \\(prompt 5\\)"
@@ -87,13 +95,15 @@ test_that("handles different JSON responses with prompt index", {
 
 test_that("errors on more than 2 JSON responses", {
   # Should error if there are more than 2 JSON objects
-  turn <- Turn("assistant", list(
-    ContentJson(list(x = 1)),
-    ContentJson(list(x = 2)),
-    ContentJson(list(x = 3))
-  ))
+  turn <- Turn(
+    "assistant",
+    list(
+      ContentJson(list(x = 1)),
+      ContentJson(list(x = 2)),
+      ContentJson(list(x = 3))
+    )
+  )
   type <- type_object(x = type_integer())
-  
   expect_error(
     extract_data(turn, type),
     "Data extraction failed: 3 data results received. Expected 1 or 2."

From df839716130bf4a0e0fec39fdb72ff8d57f06018 Mon Sep 17 00:00:00 2001
From: Kenneth Benoit <kbenoit@lse.ac.uk>
Date: Tue, 26 Aug 2025 14:31:18 +0800
Subject: [PATCH 3/5] Use direct tests instead of snapshot

---
 tests/testthat/_snaps/chat-structured.md | 8 --------
 tests/testthat/test-chat-structured.R    | 6 +++++-
 2 files changed, 5 insertions(+), 9 deletions(-)
 delete mode 100644 tests/testthat/_snaps/chat-structured.md

diff --git a/tests/testthat/_snaps/chat-structured.md b/tests/testthat/_snaps/chat-structured.md
deleted file mode 100644
index 6812f46f..00000000
--- a/tests/testthat/_snaps/chat-structured.md
+++ /dev/null
@@ -1,8 +0,0 @@
-# useful error if no ContentJson
-
-    Code
-      extract_data(turn)
-    Condition
-      Error in `extract_data()`:
-      ! Data extraction failed: 0 data results recieved.
-
diff --git a/tests/testthat/test-chat-structured.R b/tests/testthat/test-chat-structured.R
index 607986a6..5fa16ca6 100644
--- a/tests/testthat/test-chat-structured.R
+++ b/tests/testthat/test-chat-structured.R
@@ -2,7 +2,11 @@
 
 test_that("useful error if no ContentJson", {
   turn <- Turn("assistant", list(ContentText("Hello")))
-  expect_snapshot(extract_data(turn), error = TRUE)
+  expect_error(
+    extract_data(turn),
+    "Data extraction failed: 0 data results received.",
+    fixed = TRUE
+  )
 })
 
 test_that("can extract data from ContentJson", {

From 6f6dbc2bacb32dcc5f6f7956a7f5198397d3cc31 Mon Sep 17 00:00:00 2001
From: Kenneth Benoit <kbenoit@lse.ac.uk>
Date: Tue, 26 Aug 2025 14:49:16 +0800
Subject: [PATCH 4/5] Remove a blank line to avoid reviewdog pedantry

---
 R/chat-structured.R | 1 -
 1 file changed, 1 deletion(-)

diff --git a/R/chat-structured.R b/R/chat-structured.R
index 12e08f86..24e39de6 100644
--- a/R/chat-structured.R
+++ b/R/chat-structured.R
@@ -1,7 +1,6 @@
 extract_data <- function(turn, type, convert = TRUE, needs_wrapper = FALSE, prompt_index = NULL) {
   is_json <- map_lgl(turn@contents, S7_inherits, ContentJson)
   n <- sum(is_json)
-  
   if (n == 0) {
     cli::cli_abort("Data extraction failed: 0 data results received.")
   } else if (n == 1) {

From 21572bc6eddb6143771fb49970c01d86a720e1b8 Mon Sep 17 00:00:00 2001
From: Kenneth Benoit <kbenoit@lse.ac.uk>
Date: Tue, 26 Aug 2025 15:40:37 +0800
Subject: [PATCH 5/5] Accept formatting suggestion, under duress

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
---
 R/chat-structured.R | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/R/chat-structured.R b/R/chat-structured.R
index 24e39de6..0c240a84 100644
--- a/R/chat-structured.R
+++ b/R/chat-structured.R
@@ -1,4 +1,10 @@
-extract_data <- function(turn, type, convert = TRUE, needs_wrapper = FALSE, prompt_index = NULL) {
+extract_data <- function(
+  turn,
+  type,
+  convert = TRUE,
+  needs_wrapper = FALSE,
+  prompt_index = NULL
+) {
   is_json <- map_lgl(turn@contents, S7_inherits, ContentJson)
   n <- sum(is_json)
   if (n == 0) {