Skip to content

Commit 1dd823f

Browse files
philmcmahonpietrop
authored andcommitted
Add amazontranscribe speaker detection support (#131)
* Add speaker grouping for amazontranscribe adaptor * Add tests for speaker grouping. * Respect the mighty linter. * Use let not var.
1 parent 5959cfa commit 1dd823f

File tree

5 files changed

+12810
-5
lines changed

5 files changed

+12810
-5
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
export const groupWordsBySpeakerLabel = (words) => {
2+
const groupedWords = [];
3+
let currentSpeaker = '';
4+
words.forEach((word) => {
5+
if (word.speaker_label === currentSpeaker) {
6+
groupedWords[groupedWords.length - 1].words.push(word);
7+
} else {
8+
currentSpeaker = word.speaker_label;
9+
// start new speaker block
10+
groupedWords.push({
11+
speaker: word.speaker_label,
12+
words: [ word ] });
13+
}
14+
});
15+
16+
return groupedWords;
17+
};
18+
19+
export const findSpeakerForWord = (word, segments) => {
20+
const startTime = parseFloat(word.start_time);
21+
const endTime = parseFloat(word.end_time);
22+
const firstMatchingSegment = segments.find((seg) => {
23+
return startTime >= parseFloat(seg.start_time) && endTime <= parseFloat(seg.end_time);
24+
});
25+
if (firstMatchingSegment === undefined) {
26+
return 'Speaker UKN';
27+
} else {
28+
return `Speaker ${ firstMatchingSegment.speaker_label.replace('spk_', '') }`;
29+
}
30+
};
31+
32+
const addSpeakerLabelToWords = (words, segments) => {
33+
return words.map(w => Object.assign(w, { 'speaker_label': findSpeakerForWord(w, segments) }));
34+
};
35+
36+
export const groupWordsBySpeaker = (words, speakerLabels) => {
37+
const wordsWithSpeakers = addSpeakerLabelToWords(words, speakerLabels.segments);
38+
39+
return groupWordsBySpeakerLabel(wordsWithSpeakers);
40+
};
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
import amazonTodayInFocusTranscript from './sample/todayinfocus.sample.json';
2+
import wordsWithSpeakers from './sample/todayinfocuswords.sample.json';
3+
4+
import { groupWordsBySpeakerLabel, findSpeakerForWord, groupWordsBySpeaker } from './group-words-by-speakers';
5+
6+
const words = amazonTodayInFocusTranscript.results.items;
7+
const speakerLabels = amazonTodayInFocusTranscript.results.speaker_labels;
8+
9+
describe('groupWordsBySpeakerLabel', () => {
10+
11+
it('Should group speakers correctly', ( ) => {
12+
13+
const groups = groupWordsBySpeakerLabel(wordsWithSpeakers);
14+
expect(groups[0].speaker).toBe('spk_0');
15+
expect(groups[0].words.length).toBe(1);
16+
expect(groups[1].speaker).toBe('spk_1');
17+
expect(groups[1].words.length).toBe(2);
18+
});
19+
});
20+
21+
describe('findSpeakerForWord', () => {
22+
23+
it('Should find correct speaker', ( ) => {
24+
25+
const speaker = findSpeakerForWord({
26+
'start_time': '8.65',
27+
'end_time': '8.98',
28+
'alternatives': [
29+
{
30+
'confidence': '0.9999',
31+
'content': 'one'
32+
}
33+
],
34+
'type': 'pronunciation'
35+
}, speakerLabels.segments);
36+
37+
expect(speaker).toBe('Speaker 0');
38+
});
39+
});
40+
41+
describe('groupWordsBySpeaker', () => {
42+
/** Hopefully the other unit tests suffice.
43+
* this is a rather lazy one to check the full results
44+
*/
45+
it('Should return expected number of groups', ( ) => {
46+
47+
const groups = groupWordsBySpeaker(words, speakerLabels);
48+
expect(groups.length).toBe(173);
49+
});
50+
});

src/lib/Util/adapters/amazon-transcribe/index.js

+24-5
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
*/
55

66
import generateEntitiesRanges from '../generate-entities-ranges/index.js';
7+
import { groupWordsBySpeaker } from './group-words-by-speakers';
78

89
export const stripLeadingSpace = word => {
910
return word.replace(/^\s/, '');
@@ -88,7 +89,7 @@ const groupWordsInParagraphs = words => {
8889
words: [],
8990
text: []
9091
};
91-
words.forEach((word, index) => {
92+
words.forEach((word) => {
9293
const content = getBestAlternativeForWord(word).content;
9394
const normalizedWord = normalizeWord(word);
9495
if (/[.?!]/.test(content)) {
@@ -106,19 +107,37 @@ const groupWordsInParagraphs = words => {
106107
return results;
107108
};
108109

110+
const groupSpeakerWordsInParagraphs = (words, speakerLabels) => {
111+
const wordsBySpeaker = groupWordsBySpeaker(words, speakerLabels);
112+
113+
return wordsBySpeaker.map((speakerGroup) => {
114+
return {
115+
words: speakerGroup.words.map(normalizeWord),
116+
text: speakerGroup.words.map((w) => getBestAlternativeForWord(w).content),
117+
speaker: speakerGroup.speaker
118+
};
119+
});
120+
};
121+
109122
const amazonTranscribeToDraft = amazonTranscribeJson => {
110123
const results = [];
111124
const tmpWords = amazonTranscribeJson.results.items;
125+
const speakerLabels = amazonTranscribeJson.results.speaker_labels;
112126
const wordsWithRemappedPunctuation = mapPunctuationItemsToWords(tmpWords);
113-
const wordsByParagraphs = groupWordsInParagraphs(
114-
wordsWithRemappedPunctuation
115-
);
127+
const speakerSegmentation = typeof(speakerLabels) != 'undefined';
128+
129+
const wordsByParagraphs = speakerSegmentation ?
130+
groupSpeakerWordsInParagraphs(wordsWithRemappedPunctuation, speakerLabels) :
131+
groupWordsInParagraphs(
132+
wordsWithRemappedPunctuation
133+
);
134+
116135
wordsByParagraphs.forEach((paragraph, i) => {
117136
const draftJsContentBlockParagraph = {
118137
text: paragraph.text.join(' '),
119138
type: 'paragraph',
120139
data: {
121-
speaker: `TBC ${ i }`,
140+
speaker: paragraph.speaker ? `Speaker ${ paragraph.speaker }` : `TBC ${ i }`,
122141
words: paragraph.words,
123142
start: parseFloat(paragraph.words[0].start)
124143
},

0 commit comments

Comments
 (0)