diff --git a/lib/xlsxir/parse_workbook.ex b/lib/xlsxir/parse_workbook.ex index c620350..38fe336 100644 --- a/lib/xlsxir/parse_workbook.ex +++ b/lib/xlsxir/parse_workbook.ex @@ -4,44 +4,23 @@ defmodule Xlsxir.ParseWorkbook do """ @doc """ - sheets has multiple sheet map which consists of name, sheet_id and rid + Sheet tags come in the order they appear in the spreadsheet. """ - defstruct sheets: [], tid: nil + defstruct sheet_number: 1, tid: nil def sax_event_handler(:startDocument, _state) do %__MODULE__{tid: GenServer.call(Xlsxir.StateManager, :new_table)} end def sax_event_handler({:startElement, _, 'sheet', _, xml_attrs}, state) do - sheet = - Enum.reduce(xml_attrs, %{name: nil, sheet_id: nil, rid: nil}, fn attr, sheet -> - case attr do - {:attribute, 'name', _, _, name} -> - %{sheet | name: name |> to_string} - - {:attribute, 'sheetId', _, _, sheet_id} -> - {sheet_id, _} = sheet_id |> to_string |> Integer.parse() - %{sheet | sheet_id: sheet_id} - - {:attribute, 'id', _, _, rid} -> - "rId" <> rid = rid |> to_string - {rid, _} = Integer.parse(rid) - %{sheet | rid: rid} - - _ -> - sheet - end + sheet_name = + Enum.find_value(xml_attrs, fn + {:attribute, 'name', _, _, name} -> to_string(name) + _ -> nil end) - %__MODULE__{state | sheets: [sheet | state.sheets]} - end - - def sax_event_handler(:endDocument, %__MODULE__{tid: tid} = state) do - Enum.map(state.sheets, fn %{sheet_id: sheet_id, name: name} -> - :ets.insert(tid, {sheet_id, name}) - end) - - state + :ets.insert(state.tid, {state.sheet_number, sheet_name}) + %__MODULE__{state | sheet_number: state.sheet_number + 1} end def sax_event_handler(_, state), do: state diff --git a/mix.lock b/mix.lock index 435e078..5c8fcd9 100644 --- a/mix.lock +++ b/mix.lock @@ -1,8 +1,8 @@ %{ - "earmark": {:hex, :earmark, "1.3.1", "73812f447f7a42358d3ba79283cfa3075a7580a3a2ed457616d6517ac3738cb9", [:mix], [], "hexpm"}, - "erlsom": {:hex, :erlsom, "1.5.0", "c5a5cdd0ee0e8dca62bcc4b13ff08da24fdefc16ccd8b25282a2fda2ba1be24a", [:rebar3], [], "hexpm"}, - "ex_doc": {:hex, :ex_doc, "0.19.3", "3c7b0f02851f5fc13b040e8e925051452e41248f685e40250d7e40b07b9f8c10", [:mix], [{:earmark, "~> 1.2", [hex: :earmark, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.10", [hex: :makeup_elixir, repo: "hexpm", optional: false]}], "hexpm"}, - "makeup": {:hex, :makeup, "0.8.0", "9cf32aea71c7fe0a4b2e9246c2c4978f9070257e5c9ce6d4a28ec450a839b55f", [:mix], [{:nimble_parsec, "~> 0.5.0", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm"}, - "makeup_elixir": {:hex, :makeup_elixir, "0.13.0", "be7a477997dcac2e48a9d695ec730b2d22418292675c75aa2d34ba0909dcdeda", [:mix], [{:makeup, "~> 0.8", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm"}, - "nimble_parsec": {:hex, :nimble_parsec, "0.5.0", "90e2eca3d0266e5c53f8fbe0079694740b9c91b6747f2b7e3c5d21966bba8300", [:mix], [], "hexpm"}, + "earmark": {:hex, :earmark, "1.3.1", "73812f447f7a42358d3ba79283cfa3075a7580a3a2ed457616d6517ac3738cb9", [:mix], [], "hexpm", "000aaeff08919e95e7aea13e4af7b2b9734577b3e6a7c50ee31ee88cab6ec4fb"}, + "erlsom": {:hex, :erlsom, "1.5.0", "c5a5cdd0ee0e8dca62bcc4b13ff08da24fdefc16ccd8b25282a2fda2ba1be24a", [:rebar3], [], "hexpm", "55a9dbf9cfa77fcfc108bd8e2c4f9f784dea228a8f4b06ea10b684944946955a"}, + "ex_doc": {:hex, :ex_doc, "0.19.3", "3c7b0f02851f5fc13b040e8e925051452e41248f685e40250d7e40b07b9f8c10", [:mix], [{:earmark, "~> 1.2", [hex: :earmark, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.10", [hex: :makeup_elixir, repo: "hexpm", optional: false]}], "hexpm", "0e11d67e662142fc3945b0ee410c73c8c956717fbeae4ad954b418747c734973"}, + "makeup": {:hex, :makeup, "0.8.0", "9cf32aea71c7fe0a4b2e9246c2c4978f9070257e5c9ce6d4a28ec450a839b55f", [:mix], [{:nimble_parsec, "~> 0.5.0", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "5fbc8e549aa9afeea2847c0769e3970537ed302f93a23ac612602e805d9d1e7f"}, + "makeup_elixir": {:hex, :makeup_elixir, "0.13.0", "be7a477997dcac2e48a9d695ec730b2d22418292675c75aa2d34ba0909dcdeda", [:mix], [{:makeup, "~> 0.8", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "adf0218695e22caeda2820eaba703fa46c91820d53813a2223413da3ef4ba515"}, + "nimble_parsec": {:hex, :nimble_parsec, "0.5.0", "90e2eca3d0266e5c53f8fbe0079694740b9c91b6747f2b7e3c5d21966bba8300", [:mix], [], "hexpm", "5c040b8469c1ff1b10093d3186e2e10dbe483cd73d79ec017993fb3985b8a9b3"}, } diff --git a/test/test_data/swapped-sheet-order.xlsx b/test/test_data/swapped-sheet-order.xlsx new file mode 100644 index 0000000..a694433 Binary files /dev/null and b/test/test_data/swapped-sheet-order.xlsx differ diff --git a/test/xlsxir_test.exs b/test/xlsxir_test.exs index 3090e76..ef20177 100644 --- a/test/xlsxir_test.exs +++ b/test/xlsxir_test.exs @@ -157,4 +157,29 @@ defmodule XlsxirTest do assert map["B2"] == "pre 2008" assert map["B3"] == "https://msdn.microsoft.com/en-us/library/office/gg278314.aspx" end + + test "swapped order of sheets still correctly identifies sheet names" do + # The order of tags in the workbook.xml will match the "sheet(index+1).xml" file names + # of the worksheets. (see also %Xlsxir.XmlFile{name: "sheet#.xml"} + # Previously the framework relied on the rid or sheetId values to identify the order + # sheetId especially is unreliable for this because its numbers are created like a + # sequential id in a database and moving sheets or deleting sheets can lead to gaps + # and non-sequential values. + [{:ok, pid_sheet_2}, {:ok, pid_sheet_1}] = + multi_extract("./test/test_data/swapped-sheet-order.xlsx") + + on_exit(fn -> + close(pid_sheet_1) + close(pid_sheet_2) + end) + + map_sheet_2 = get_map(pid_sheet_2) + map_sheet_1 = get_map(pid_sheet_1) + + assert map_sheet_2["A1"] == "SHEET2CELL" + assert map_sheet_1["A1"] == "SHEET1CELL" + + assert get_info(pid_sheet_2, :name) == "Sheet2" + assert get_info(pid_sheet_1, :name) == "Sheet1" + end end diff --git a/xlsxir.patch b/xlsxir.patch new file mode 100644 index 0000000..7a07b7f --- /dev/null +++ b/xlsxir.patch @@ -0,0 +1,53 @@ +diff --git a/lib/xlsxir/parse_workbook.ex b/lib/xlsxir/parse_workbook.ex +index c620350..233b8fb 100644 +--- a/lib/xlsxir/parse_workbook.ex ++++ b/lib/xlsxir/parse_workbook.ex +@@ -37,8 +37,11 @@ defmodule Xlsxir.ParseWorkbook do + end + + def sax_event_handler(:endDocument, %__MODULE__{tid: tid} = state) do +- Enum.map(state.sheets, fn %{sheet_id: sheet_id, name: name} -> +- :ets.insert(tid, {sheet_id, name}) ++ state.sheets ++ |> Enum.reverse() ++ |> Enum.with_index(1) ++ |> Enum.each(fn {%{name: name}, index} -> ++ :ets.insert(tid, {index, name}) + end) + + state +diff --git a/test/test_data/swapped-sheet-order.xlsx b/test/test_data/swapped-sheet-order.xlsx +new file mode 100644 +index 0000000..a694433 +Binary files /dev/null and b/test/test_data/swapped-sheet-order.xlsx differ +diff --git a/test/xlsxir_test.exs b/test/xlsxir_test.exs +index 3090e76..69eaacf 100644 +--- a/test/xlsxir_test.exs ++++ b/test/xlsxir_test.exs +@@ -157,4 +157,26 @@ defmodule XlsxirTest do + assert map["B2"] == "pre 2008" + assert map["B3"] == "https://msdn.microsoft.com/en-us/library/office/gg278314.aspx" + end ++ ++ test "swapped order of sheets still correctly identifies sheet names" do ++ # The order of tags in the workbook.xml will match the "sheet(index+1).xml" file names ++ # of the worksheets. (see also %Xlsxir.XmlFile{name: "sheet#.xml"} ++ # Previously the framework relied on the rid or sheetId values to identify the order ++ # sheetId especially is unreliable for this because its numbers are created like a ++ # sequential id in a database and moving sheets or deleting sheets can lead to gaps ++ # and non-sequential values. ++ [{:ok, pid_sheet_2}, {:ok, pid_sheet_1}] = multi_extract("./test/test_data/swapped-sheet-order.xlsx") ++ on_exit(fn -> ++ close(pid_sheet_1) ++ close(pid_sheet_2) ++ end) ++ map_sheet_2 = get_map(pid_sheet_2) ++ map_sheet_1 = get_map(pid_sheet_1) ++ ++ assert map_sheet_2["A1"] == "SHEET2CELL" ++ assert map_sheet_1["A1"] == "SHEET1CELL" ++ ++ assert get_info(pid_sheet_2, :name) == "Sheet2" ++ assert get_info(pid_sheet_1, :name) == "Sheet1" ++ end + end