Skip to content

Commit aab69d4

Browse files
committed
fix: do not parse URL path slashes as bare IPA
Bare IPA used /phoneme/ delimiters that overlapped with URL paths, so segments like /foo/ were tokenized as IPA and plain-text or SSML output dropped slashes. Require a non host/path character before an opening slash so real bare IPA still works after whitespace or punctuation. Add regression tests for plain text and Alexa SSML. Made-with: Cursor
1 parent 9f2625f commit aab69d4

2 files changed

Lines changed: 38 additions & 2 deletions

File tree

src/SpeechMarkdownGrammar.ts

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,22 @@ export function speechMarkdownGrammar(myna: any): any {
269269
this.shortSubValue,
270270
'}',
271271
).ast;
272-
this.bareIpa = m.seq('/', this.shortIpaValue, '/').ast;
272+
// Same characters as this.url / audio unquoted URLs (keep in sync with urlSpecialChar).
273+
const urlUnquotedSpecials = ':/.-_~?#[]@!+,;%=()&';
274+
// Bare IPA uses /phoneme/. The same delimiter appears in URL paths; only
275+
// treat /.../ as IPA when '/' is not immediately after a host/path character
276+
// (same set as letters, digits, and unquoted URL specials), e.g. after "foo+"
277+
// or "https://example.com".
278+
const bareIpaMayOpenHere = m.predicate((p: any) => {
279+
if (p.index === 0) {
280+
return true;
281+
}
282+
const prev = p.input.charAt(p.index - 1);
283+
const urlInterior =
284+
/[A-Za-z0-9]/.test(prev) || urlUnquotedSpecials.includes(prev);
285+
return !urlInterior;
286+
});
287+
this.bareIpa = m.seq(bareIpaMayOpenHere, '/', this.shortIpaValue, '/').ast;
273288

274289
const percentChange = ['+', m.hyphen, m.digit, '%'];
275290

@@ -315,7 +330,7 @@ export function speechMarkdownGrammar(myna: any): any {
315330
).ast;
316331

317332
// Audio
318-
this.urlSpecialChar = m.char(':/.-_~?#[]@!+,;%=()&');
333+
this.urlSpecialChar = m.char(urlUnquotedSpecials);
319334
this.url = m.choice(m.digit, m.letter, this.urlSpecialChar).oneOrMore.ast;
320335
this.audio = m.seq(
321336
'!',

tests/ipa-short.spec.ts

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,4 +75,25 @@ describe('ipa-short', () => {
7575
`;
7676
expect(text).toBe(expectedText);
7777
});
78+
79+
test('plain text keeps URL path slashes (does not treat as bare IPA)', () => {
80+
const input = 'see https://example.com/foo/bar';
81+
expect(speech.toText(input, {})).toBe(input);
82+
expect(speech.toText('https://a.com/x/y', {})).toBe('https://a.com/x/y');
83+
expect(speech.toText('see example.com/foo/bar', {})).toBe(
84+
'see example.com/foo/bar',
85+
);
86+
// '+' and other unquoted URL specials before '/' must not open bare IPA.
87+
expect(speech.toText('https://x.com/seg+/y', {})).toBe(
88+
'https://x.com/seg+/y',
89+
);
90+
});
91+
92+
test('SSML keeps URL path slashes (does not treat as bare IPA)', () => {
93+
const ssml = speech.toSSML('see https://example.com/foo/bar', {
94+
platform: 'amazon-alexa',
95+
});
96+
expect(ssml).toContain('https://example.com/foo/bar');
97+
expect(ssml).not.toContain('<phoneme');
98+
});
7899
});

0 commit comments

Comments
 (0)