Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 5 additions & 11 deletions lib/elixir/lib/kernel.ex
Original file line number Diff line number Diff line change
Expand Up @@ -3813,13 +3813,6 @@ defmodule Kernel do
{_, doc} when doc_attr? ->
do_at_escape(name, doc)

%{__struct__: Regex, source: source, opts: opts} = regex ->
# TODO: Automatically deal with exported regexes
case :erlang.system_info(:otp_release) < [?2, ?8] do
true -> do_at_escape(name, regex)
false -> quote(do: Regex.compile!(unquote(source), unquote(opts)))
end

value ->
do_at_escape(name, value)
end
Expand Down Expand Up @@ -6646,13 +6639,14 @@ defmodule Kernel do
end

defp compile_regex(binary_or_tuple, options) do
# TODO: Remove this when we require Erlang/OTP 28+
case is_binary(binary_or_tuple) and :erlang.system_info(:otp_release) < [?2, ?8] do
bin_opts = :binary.list_to_bin(options)

case is_binary(binary_or_tuple) do
true ->
Macro.escape(Regex.compile!(binary_or_tuple, :binary.list_to_bin(options)))
Macro.escape(Regex.compile!(binary_or_tuple, bin_opts))

false ->
quote(do: Regex.compile!(unquote(binary_or_tuple), unquote(:binary.list_to_bin(options))))
quote(do: Regex.compile!(unquote(binary_or_tuple), unquote(bin_opts)))
end
end

Expand Down
60 changes: 60 additions & 0 deletions lib/elixir/lib/macro.ex
Original file line number Diff line number Diff line change
Expand Up @@ -843,6 +843,66 @@ defmodule Macro do
`escape/2` is used to escape *values* (either directly passed or variable
bound), while `quote/2` produces syntax trees for
expressions.

## Dealing with references and other runtime values

Macros work at compile-time and therefore `Macro.escape/1` can only escape values
that are valid during compilation, such as numbers, atoms, tuples, maps, binaries,
etc.

However, you may have values at compile-time which cannot be escaped, such as
`reference`s and `pid`s, since the process or memory address they point to will
no longer exist once compilation completes. Attempting to escape said values will
raise an exception. This is a common issue when working with NIFs.

Luckily, Elixir v1.19 introduces a mechanism that allows those values to be escaped,
as long as they are encapsulated by a struct within a module that defines the
`__escape__/1` function. This is possible as long as the reference has a natural
text or binary representation that can be serialized during compilation.

Let's imagine we have the following struct:

defmodule WrapperStruct do
defstruct [:ref]

def new(...), do: %WrapperStruct{ref: ...}

# efficiently dump to / load from binaries
def dump_to_binary(%WrapperStruct{ref: ref}), do: ...
def load_from_binary(binary), do: %WrapperStruct{ref: ...}
end

Such a struct could not be used in module attributes or escaped with `Macro.escape/2`:

defmodule Foo do
@my_struct WrapperStruct.new(...)
def my_struct, do: @my_struct
end

** (ArgumentError) cannot inject attribute @my_struct into function/macro because cannot escape #Reference<...>

To address this, structs can re-define how they should be escaped by defining a custom
`__escape__/1` function which returns the AST. In our example:

defmodule WrapperStruct do
# ...

def __escape__(struct) do
# dump to a binary representation at compile-time
binary = dump_to_binary(struct)
quote do
# load from the binary representation at runtime
WrapperStruct.load_from_binary(unquote(Macro.escape(binary)))
end
end
end

Now, our example above will be expanded as:

def my_struct, do: WrapperStruct.load_from_binary(<<...>>)

When implementing `__escape__/1`, you must ensure that the quoted expression
will evaluate to a struct that represents the one given as argument.
"""
@spec escape(term, escape_opts) :: t()
def escape(expr, opts \\ []) do
Expand Down
41 changes: 41 additions & 0 deletions lib/elixir/lib/regex.ex
Original file line number Diff line number Diff line change
Expand Up @@ -1000,4 +1000,45 @@ defmodule Regex do

defp translate_options(<<>>, acc), do: acc
defp translate_options(t, _acc), do: {:error, t}

@doc false
def __escape__(%{__struct__: Regex} = regex) do
# OTP 28.0 introduced refs in patterns, which can't be used in AST anymore
# OTP 28.1 introduced :re.import/1 which allows us to work with pre-compiled binaries again

pattern_ast =
cond do
# TODO: Remove this when we require Erlang/OTP 28+
# Before OTP 28.0, patterns did not contain any refs and could be safely be escaped
:erlang.system_info(:otp_release) < [?2, ?8] ->
Macro.escape(regex.re_pattern)

# OTP 28.1+ introduced the ability to export and import regexes from compiled binaries
Code.ensure_loaded?(:re) and function_exported?(:re, :import, 1) ->
{:ok, exported} = :re.compile(regex.source, [:export] ++ regex.opts)

quote do
:re.import(unquote(Macro.escape(exported)))
end

# TODO: Remove this when we require Erlang/OTP 28.1+
# OTP 28.0 works in degraded mode performance-wise, we need to recompile from the source
true ->
quote do
{:ok, pattern} =
:re.compile(unquote(Macro.escape(regex.source)), unquote(Macro.escape(regex.opts)))

pattern
end
end
Comment on lines +1025 to +1033
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was wondering. Perhaps we could/should warn here, so that people notice and pro-actively bump to OTP28.1?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, warning sounds good to me!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hum I tried e1907ea but it seems to cause some bootstrapping issues.

Not too familiar with IO.warn_once, maybe will give up for now 😅

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So let's warn on boot in elixir.erl. Check if version is >= 28 and the import function is not available.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems to work, but would break too many integration tests in the CI?
d1536b4

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sabiwara I think for CI we will run on 28.1. So perhaps we add the warning in a separate PR, which we will merge once we move CI to 28.1. WDYT?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding the warning separately sounds good! 💜


quote do
%{
__struct__: unquote(Regex),
re_pattern: unquote(pattern_ast),
source: unquote(Macro.escape(regex.source)),
opts: unquote(Macro.escape(regex.opts))
}
end
end
end
18 changes: 15 additions & 3 deletions lib/elixir/src/elixir_quote.erl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
%% SPDX-FileCopyrightText: 2012 Plataformatec

-module(elixir_quote).

-feature(maybe_expr, enable).

-export([escape/3, linify/3, linify_with_context_counter/3, build/7, quote/2, has_unquotes/1, fun_to_quoted/1]).
-export([dot/5, tail_list/3, list/2, validate_runtime/2, shallow_validate_ast/1]). %% Quote callbacks

Expand Down Expand Up @@ -169,8 +172,17 @@ do_escape(BitString, _) when is_bitstring(BitString) ->
end;

do_escape(Map, Q) when is_map(Map) ->
TT = [escape_map_key_value(K, V, Map, Q) || {K, V} <- lists:sort(maps:to_list(Map))],
{'%{}', [], TT};
maybe
#{'__struct__' := Module} ?= Map,
true ?= is_atom(Module),
{module, Module} ?= code:ensure_loaded(Module),
true ?= erlang:function_exported(Module, '__escape__', 1),
Module:'__escape__'(Map)
else
_ ->
TT = [escape_map_key_value(K, V, Map, Q) || {K, V} <- lists:sort(maps:to_list(Map))],
{'%{}', [], TT}
end;
Comment on lines +175 to +185
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note: We can finally use maybe 🎉
It fits perfectly here.

Since we're requiring OTP26, there is no runtime flag anymore, just the module one.


do_escape([], _) ->
[];
Expand Down Expand Up @@ -216,7 +228,7 @@ escape_map_key_value(K, V, Map, Q) ->
"(it must be defined within a function instead). ", (bad_escape_hint())/binary>>);
true ->
{do_quote(K, Q), do_quote(V, Q)}
end.
end.

find_tuple_ref(Tuple, Index) when Index > tuple_size(Tuple) -> nil;
find_tuple_ref(Tuple, Index) ->
Expand Down
21 changes: 18 additions & 3 deletions lib/elixir/test/elixir/macro_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -141,11 +141,26 @@ defmodule MacroTest do
assert Macro.escape({:quote, [], [[do: :foo]]}) == {:{}, [], [:quote, [], [[do: :foo]]]}
end

test "inspects container when a reference cannot be escaped" do
assert_raise ArgumentError, ~r"~r/foo/ contains a reference", fn ->
Macro.escape(%{~r/foo/ | re_pattern: {:re_pattern, 0, 0, 0, make_ref()}})
test "escape container when a reference cannot be escaped" do
assert_raise ArgumentError, ~r"contains a reference", fn ->
Macro.escape(%{re_pattern: {:re_pattern, 0, 0, 0, make_ref()}})
end
end

@tag :re_import
test "escape regex will remove references and replace it by a call to :re.import/1" do
assert {
:%{},
[],
[
__struct__: Regex,
re_pattern:
{{:., [], [:re, :import]}, [], [{:{}, [], [:re_exported_pattern | _]}]},
source: "foo",
opts: []
]
} = Macro.escape(~r/foo/)
end
end

describe "expand_once/2" do
Expand Down
11 changes: 10 additions & 1 deletion lib/elixir/test/elixir/test_helper.exs
Original file line number Diff line number Diff line change
Expand Up @@ -132,11 +132,20 @@ cover_exclude =
[]
end

# OTP 28.1+
re_import_exclude =
if Code.ensure_loaded?(:re) and function_exported?(:re, :import, 1) do
[]
else
[:re_import]
end

ExUnit.start(
trace: !!System.get_env("TRACE"),
exclude:
epmd_exclude ++
os_exclude ++ line_exclude ++ distributed_exclude ++ source_exclude ++ cover_exclude,
os_exclude ++
line_exclude ++ distributed_exclude ++ source_exclude ++ cover_exclude ++ re_import_exclude,
include: line_include,
assert_receive_timeout: String.to_integer(System.get_env("ELIXIR_ASSERT_TIMEOUT", "300"))
)