Skip to content

Commit 1f58aa7

Browse files
committed
Fixes to encoding/transcoding for ractors.
Not all ractor-related encoding issues were fixed by 1afc07e. I found more by running my test-all branch with 3 ractors for each test.
1 parent e409371 commit 1f58aa7

File tree

5 files changed

+114
-16
lines changed

5 files changed

+114
-16
lines changed

encoding.c

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -459,6 +459,16 @@ enc_registered(struct enc_table *enc_table, const char *name)
459459
return -1;
460460
}
461461

462+
int
463+
rb_enc_registered(const char *name)
464+
{
465+
int idx;
466+
GLOBAL_ENC_TABLE_LOCKING(enc_table) {
467+
idx = enc_registered(enc_table, name);
468+
}
469+
return idx;
470+
}
471+
462472
void
463473
rb_encdb_declare(const char *name)
464474
{
@@ -1600,8 +1610,10 @@ enc_set_default_encoding(struct default_encoding *def, VALUE encoding, const cha
16001610
/* Already set */
16011611
overridden = TRUE;
16021612

1613+
int index = 0;
16031614
if (!NIL_P(encoding)) {
16041615
enc_check_encoding(encoding); // loads it if necessary. Needs to be done outside of VM lock.
1616+
index = rb_enc_to_index(rb_to_encoding(encoding));
16051617
}
16061618

16071619
GLOBAL_ENC_TABLE_LOCKING(enc_table) {
@@ -1619,7 +1631,7 @@ enc_set_default_encoding(struct default_encoding *def, VALUE encoding, const cha
16191631
(st_data_t)UNSPECIFIED_ENCODING);
16201632
}
16211633
else {
1622-
def->index = rb_enc_to_index(rb_to_encoding(encoding));
1634+
def->index = index;
16231635
def->enc = 0;
16241636
enc_alias_internal(enc_table, name, def->index);
16251637
}

include/ruby/internal/encoding/encoding.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1018,6 +1018,12 @@ void rb_enc_set_default_internal(VALUE encoding);
10181018
*/
10191019
VALUE rb_locale_charmap(VALUE klass);
10201020

1021+
/**
1022+
* Returns an index >= 0 of the registered encoding or -1 if
1023+
* it's not registered.
1024+
*/
1025+
int rb_enc_registered(const char *name);
1026+
10211027
RBIMPL_SYMBOL_EXPORT_END()
10221028

10231029
/** @cond INTERNAL_MACRO */

test/ruby/test_encoding.rb

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,4 +157,23 @@ def test_ractor_lazy_load_encoding_concurrently
157157
assert rs.empty?
158158
end;
159159
end
160+
161+
def test_ractor_set_default_external_string
162+
assert_ractor("#{<<~"begin;"}\n#{<<~'end;'}")
163+
begin;
164+
$-w = nil
165+
rs = []
166+
7.times do |i|
167+
rs << Ractor.new(i) do |i|
168+
Encoding.default_external = "us-ascii"
169+
end
170+
end
171+
172+
while rs.any?
173+
r, _obj = Ractor.select(*rs)
174+
rs.delete(r)
175+
end
176+
assert rs.empty?
177+
end;
178+
end
160179
end

test/ruby/test_transcode.rb

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2361,6 +2361,52 @@ def test_ractor_lazy_load_encoding_random
23612361
end;
23622362
end
23632363

2364+
def test_ractor_asciicompat_encoding_exists
2365+
assert_ractor("#{<<~"begin;"}\n#{<<~'end;'}")
2366+
begin;
2367+
rs = []
2368+
7.times do
2369+
rs << Ractor.new do
2370+
string = "ISO-2022-JP"
2371+
encoding = Encoding.find(string)
2372+
20_000.times do
2373+
Encoding::Converter.asciicompat_encoding(string)
2374+
Encoding::Converter.asciicompat_encoding(encoding)
2375+
end
2376+
end
2377+
end
2378+
2379+
while rs.any?
2380+
r, _obj = Ractor.select(*rs)
2381+
rs.delete(r)
2382+
end
2383+
assert rs.empty?
2384+
end;
2385+
end
2386+
2387+
def test_ractor_asciicompat_encoding_doesnt_exist
2388+
assert_ractor("#{<<~"begin;"}\n#{<<~'end;'}")
2389+
begin;
2390+
rs = []
2391+
NO_EXIST = "I".freeze
2392+
7.times do
2393+
rs << Ractor.new do
2394+
500.times do
2395+
if (val = Encoding::Converter.asciicompat_encoding(NO_EXIST)) != nil
2396+
raise "Got #{val}, expected nil"
2397+
end
2398+
end
2399+
end
2400+
end
2401+
2402+
while rs.any?
2403+
r, _obj = Ractor.select(*rs)
2404+
rs.delete(r)
2405+
end
2406+
assert rs.empty?
2407+
end;
2408+
end
2409+
23642410
private
23652411

23662412
def assert_conversion_both_ways_utf8(utf8, raw, encoding)

transcode.c

Lines changed: 30 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1826,7 +1826,9 @@ rb_econv_asciicompat_encoding(const char *ascii_incompat_name)
18261826
st_table *table2;
18271827
struct asciicompat_encoding_t data = {0};
18281828

1829-
RB_VM_LOCKING() {
1829+
unsigned int lev;
1830+
RB_VM_LOCK_ENTER_LEV(&lev);
1831+
{
18301832
if (st_lookup(transcoder_table, (st_data_t)ascii_incompat_name, &v)) {
18311833
table2 = (st_table *)v;
18321834
/*
@@ -1839,12 +1841,24 @@ rb_econv_asciicompat_encoding(const char *ascii_incompat_name)
18391841
if (table2->num_entries == 1) {
18401842
data.ascii_incompat_name = ascii_incompat_name;
18411843
data.ascii_compat_name = NULL;
1842-
st_foreach(table2, asciicompat_encoding_i, (st_data_t)&data);
1844+
if (rb_multi_ractor_p()) {
1845+
/*
1846+
* We need to unlock in case `load_transcoder_entry` actually loads the encoding
1847+
* and table2 could be inserted into when we unlock.
1848+
*/
1849+
st_table *dup_table2 = st_copy(table2);
1850+
RB_VM_LOCK_LEAVE_LEV(&lev);
1851+
st_foreach(dup_table2, asciicompat_encoding_i, (st_data_t)&data);
1852+
st_free_table(dup_table2);
1853+
RB_VM_LOCK_ENTER_LEV(&lev);
1854+
} else {
1855+
st_foreach(table2, asciicompat_encoding_i, (st_data_t)&data);
1856+
}
18431857
}
18441858

18451859
}
1846-
18471860
}
1861+
RB_VM_LOCK_LEAVE_LEV(&lev);
18481862

18491863
return data.ascii_compat_name; // can be NULL
18501864
}
@@ -2989,10 +3003,15 @@ static rb_encoding *
29893003
make_encoding(const char *name)
29903004
{
29913005
rb_encoding *enc;
2992-
RB_VM_LOCKING() {
2993-
enc = rb_enc_find(name);
2994-
if (!enc)
2995-
enc = make_dummy_encoding(name);
3006+
enc = rb_enc_find(name);
3007+
if (!enc) {
3008+
RB_VM_LOCKING() {
3009+
if (rb_enc_registered(name)) {
3010+
enc = NULL;
3011+
} else {
3012+
enc = make_dummy_encoding(name);
3013+
}
3014+
}
29963015
}
29973016
return enc;
29983017
}
@@ -3029,14 +3048,10 @@ econv_s_asciicompat_encoding(VALUE klass, VALUE arg)
30293048
VALUE enc = Qnil;
30303049

30313050
enc_arg(&arg, &arg_name, &arg_enc);
3032-
3033-
RB_VM_LOCKING() {
3034-
result_name = rb_econv_asciicompat_encoding(arg_name);
3035-
3036-
if (result_name) {
3037-
result_enc = make_encoding(result_name);
3038-
enc = rb_enc_from_encoding(result_enc);
3039-
}
3051+
result_name = rb_econv_asciicompat_encoding(arg_name);
3052+
if (result_name) {
3053+
result_enc = make_encoding(result_name);
3054+
enc = rb_enc_from_encoding(result_enc);
30403055
}
30413056
return enc;
30423057
}

0 commit comments

Comments
 (0)