Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 8 additions & 29 deletions lib/xlsxir/parse_workbook.ex
Original file line number Diff line number Diff line change
Expand Up @@ -4,44 +4,23 @@ defmodule Xlsxir.ParseWorkbook do
"""

@doc """
sheets has multiple sheet map which consists of name, sheet_id and rid
Sheet tags come in the order they appear in the spreadsheet.
"""
defstruct sheets: [], tid: nil
defstruct sheet_number: 1, tid: nil

def sax_event_handler(:startDocument, _state) do
%__MODULE__{tid: GenServer.call(Xlsxir.StateManager, :new_table)}
end

def sax_event_handler({:startElement, _, 'sheet', _, xml_attrs}, state) do
sheet =
Enum.reduce(xml_attrs, %{name: nil, sheet_id: nil, rid: nil}, fn attr, sheet ->
case attr do
{:attribute, 'name', _, _, name} ->
%{sheet | name: name |> to_string}

{:attribute, 'sheetId', _, _, sheet_id} ->
{sheet_id, _} = sheet_id |> to_string |> Integer.parse()
%{sheet | sheet_id: sheet_id}

{:attribute, 'id', _, _, rid} ->
"rId" <> rid = rid |> to_string
{rid, _} = Integer.parse(rid)
%{sheet | rid: rid}

_ ->
sheet
end
sheet_name =
Enum.find_value(xml_attrs, fn
{:attribute, 'name', _, _, name} -> to_string(name)
_ -> nil
end)

%__MODULE__{state | sheets: [sheet | state.sheets]}
end

def sax_event_handler(:endDocument, %__MODULE__{tid: tid} = state) do
Enum.map(state.sheets, fn %{sheet_id: sheet_id, name: name} ->
:ets.insert(tid, {sheet_id, name})
end)

state
:ets.insert(state.tid, {state.sheet_number, sheet_name})
%__MODULE__{state | sheet_number: state.sheet_number + 1}
end

def sax_event_handler(_, state), do: state
Expand Down
12 changes: 6 additions & 6 deletions mix.lock
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
%{
"earmark": {:hex, :earmark, "1.3.1", "73812f447f7a42358d3ba79283cfa3075a7580a3a2ed457616d6517ac3738cb9", [:mix], [], "hexpm"},
"erlsom": {:hex, :erlsom, "1.5.0", "c5a5cdd0ee0e8dca62bcc4b13ff08da24fdefc16ccd8b25282a2fda2ba1be24a", [:rebar3], [], "hexpm"},
"ex_doc": {:hex, :ex_doc, "0.19.3", "3c7b0f02851f5fc13b040e8e925051452e41248f685e40250d7e40b07b9f8c10", [:mix], [{:earmark, "~> 1.2", [hex: :earmark, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.10", [hex: :makeup_elixir, repo: "hexpm", optional: false]}], "hexpm"},
"makeup": {:hex, :makeup, "0.8.0", "9cf32aea71c7fe0a4b2e9246c2c4978f9070257e5c9ce6d4a28ec450a839b55f", [:mix], [{:nimble_parsec, "~> 0.5.0", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm"},
"makeup_elixir": {:hex, :makeup_elixir, "0.13.0", "be7a477997dcac2e48a9d695ec730b2d22418292675c75aa2d34ba0909dcdeda", [:mix], [{:makeup, "~> 0.8", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm"},
"nimble_parsec": {:hex, :nimble_parsec, "0.5.0", "90e2eca3d0266e5c53f8fbe0079694740b9c91b6747f2b7e3c5d21966bba8300", [:mix], [], "hexpm"},
"earmark": {:hex, :earmark, "1.3.1", "73812f447f7a42358d3ba79283cfa3075a7580a3a2ed457616d6517ac3738cb9", [:mix], [], "hexpm", "000aaeff08919e95e7aea13e4af7b2b9734577b3e6a7c50ee31ee88cab6ec4fb"},
"erlsom": {:hex, :erlsom, "1.5.0", "c5a5cdd0ee0e8dca62bcc4b13ff08da24fdefc16ccd8b25282a2fda2ba1be24a", [:rebar3], [], "hexpm", "55a9dbf9cfa77fcfc108bd8e2c4f9f784dea228a8f4b06ea10b684944946955a"},
"ex_doc": {:hex, :ex_doc, "0.19.3", "3c7b0f02851f5fc13b040e8e925051452e41248f685e40250d7e40b07b9f8c10", [:mix], [{:earmark, "~> 1.2", [hex: :earmark, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.10", [hex: :makeup_elixir, repo: "hexpm", optional: false]}], "hexpm", "0e11d67e662142fc3945b0ee410c73c8c956717fbeae4ad954b418747c734973"},
"makeup": {:hex, :makeup, "0.8.0", "9cf32aea71c7fe0a4b2e9246c2c4978f9070257e5c9ce6d4a28ec450a839b55f", [:mix], [{:nimble_parsec, "~> 0.5.0", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "5fbc8e549aa9afeea2847c0769e3970537ed302f93a23ac612602e805d9d1e7f"},
"makeup_elixir": {:hex, :makeup_elixir, "0.13.0", "be7a477997dcac2e48a9d695ec730b2d22418292675c75aa2d34ba0909dcdeda", [:mix], [{:makeup, "~> 0.8", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "adf0218695e22caeda2820eaba703fa46c91820d53813a2223413da3ef4ba515"},
"nimble_parsec": {:hex, :nimble_parsec, "0.5.0", "90e2eca3d0266e5c53f8fbe0079694740b9c91b6747f2b7e3c5d21966bba8300", [:mix], [], "hexpm", "5c040b8469c1ff1b10093d3186e2e10dbe483cd73d79ec017993fb3985b8a9b3"},
}
Binary file added test/test_data/swapped-sheet-order.xlsx
Binary file not shown.
25 changes: 25 additions & 0 deletions test/xlsxir_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -157,4 +157,29 @@ defmodule XlsxirTest do
assert map["B2"] == "pre 2008"
assert map["B3"] == "https://msdn.microsoft.com/en-us/library/office/gg278314.aspx"
end

test "swapped order of sheets still correctly identifies sheet names" do
# The order of <sheet> tags in the workbook.xml will match the "sheet(index+1).xml" file names
# of the worksheets. (see also %Xlsxir.XmlFile{name: "sheet#.xml"}
# Previously the framework relied on the rid or sheetId values to identify the order
# sheetId especially is unreliable for this because its numbers are created like a
# sequential id in a database and moving sheets or deleting sheets can lead to gaps
# and non-sequential values.
[{:ok, pid_sheet_2}, {:ok, pid_sheet_1}] =
multi_extract("./test/test_data/swapped-sheet-order.xlsx")

on_exit(fn ->
close(pid_sheet_1)
close(pid_sheet_2)
end)

map_sheet_2 = get_map(pid_sheet_2)
map_sheet_1 = get_map(pid_sheet_1)

assert map_sheet_2["A1"] == "SHEET2CELL"
assert map_sheet_1["A1"] == "SHEET1CELL"

assert get_info(pid_sheet_2, :name) == "Sheet2"
assert get_info(pid_sheet_1, :name) == "Sheet1"
end
end
53 changes: 53 additions & 0 deletions xlsxir.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
diff --git a/lib/xlsxir/parse_workbook.ex b/lib/xlsxir/parse_workbook.ex
index c620350..233b8fb 100644
--- a/lib/xlsxir/parse_workbook.ex
+++ b/lib/xlsxir/parse_workbook.ex
@@ -37,8 +37,11 @@ defmodule Xlsxir.ParseWorkbook do
end

def sax_event_handler(:endDocument, %__MODULE__{tid: tid} = state) do
- Enum.map(state.sheets, fn %{sheet_id: sheet_id, name: name} ->
- :ets.insert(tid, {sheet_id, name})
+ state.sheets
+ |> Enum.reverse()
+ |> Enum.with_index(1)
+ |> Enum.each(fn {%{name: name}, index} ->
+ :ets.insert(tid, {index, name})
end)

state
diff --git a/test/test_data/swapped-sheet-order.xlsx b/test/test_data/swapped-sheet-order.xlsx
new file mode 100644
index 0000000..a694433
Binary files /dev/null and b/test/test_data/swapped-sheet-order.xlsx differ
diff --git a/test/xlsxir_test.exs b/test/xlsxir_test.exs
index 3090e76..69eaacf 100644
--- a/test/xlsxir_test.exs
+++ b/test/xlsxir_test.exs
@@ -157,4 +157,26 @@ defmodule XlsxirTest do
assert map["B2"] == "pre 2008"
assert map["B3"] == "https://msdn.microsoft.com/en-us/library/office/gg278314.aspx"
end
+
+ test "swapped order of sheets still correctly identifies sheet names" do
+ # The order of <sheet> tags in the workbook.xml will match the "sheet(index+1).xml" file names
+ # of the worksheets. (see also %Xlsxir.XmlFile{name: "sheet#.xml"}
+ # Previously the framework relied on the rid or sheetId values to identify the order
+ # sheetId especially is unreliable for this because its numbers are created like a
+ # sequential id in a database and moving sheets or deleting sheets can lead to gaps
+ # and non-sequential values.
+ [{:ok, pid_sheet_2}, {:ok, pid_sheet_1}] = multi_extract("./test/test_data/swapped-sheet-order.xlsx")
+ on_exit(fn ->
+ close(pid_sheet_1)
+ close(pid_sheet_2)
+ end)
+ map_sheet_2 = get_map(pid_sheet_2)
+ map_sheet_1 = get_map(pid_sheet_1)
+
+ assert map_sheet_2["A1"] == "SHEET2CELL"
+ assert map_sheet_1["A1"] == "SHEET1CELL"
+
+ assert get_info(pid_sheet_2, :name) == "Sheet2"
+ assert get_info(pid_sheet_1, :name) == "Sheet1"
+ end
end