Skip to content
This repository was archived by the owner on Jun 3, 2019. It is now read-only.

Commit 7ff7a50

Browse files
🔒 Safely modify shared resources & Explicit count (#143)
1 parent b8b3b09 commit 7ff7a50

File tree

5 files changed

+41
-6
lines changed

5 files changed

+41
-6
lines changed

cleansio/censor/censor.py

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,15 @@
11
""" Censors audio chunks by muting explicit sections """
22

3+
from multiprocessing import Lock
34
from pydub import AudioSegment
45
from speech import Timestamp, Transcribe
56

67
class Censor():
78
""" Superclass of CensorFile and CensorRealtime """
9+
lock = Lock()
10+
explicit_count = 0
11+
muted_timestamps = []
12+
813
def __init__(self, explicits):
914
super().__init__()
1015
self.explicits = explicits
@@ -23,12 +28,16 @@ def __mute_explicits(self, file_path, audio_segment, timestamps):
2328
""" Go through each word, if its an explicit, mute the duration """
2429
muted = False
2530
for stamp in timestamps:
26-
if stamp['word'].lower() in self.explicits: # Explicit found, mute
31+
if stamp['word'] in self.explicits: # Explicit found, mute
2732
audio_segment = self.__mute_explicit(audio_segment, stamp)
2833
muted = True
34+
chunk_index = int(file_path.split('-')[-1].split('.')[0])
35+
self.__explicit_count(stamp, chunk_index * 5000)
2936
if muted:
30-
# Overwrite the chunk with the mute(s)
37+
Censor.lock.acquire()
38+
# Overwrite the chunk with the mute(s) safely
3139
audio_segment.export(file_path, format='wav')
40+
Censor.lock.release()
3241

3342
@classmethod
3443
def __mute_explicit(cls, audio_segment, timestamp):
@@ -51,3 +60,29 @@ def __get_lyrics(cls, file_path, audio_segment):
5160
@classmethod
5261
def __get_timestamps(cls, lyrics):
5362
return Timestamp(lyrics).timestamps
63+
64+
@classmethod
65+
def __explicit_count(cls, stamp, chunk_offset):
66+
""" Count the number of explicits safely """
67+
stamp['start'] += chunk_offset
68+
stamp['end'] += chunk_offset
69+
new_stamp = True
70+
Censor.lock.acquire()
71+
for mut in Censor.muted_timestamps:
72+
if cls.__duplicate_stamp(mut, stamp):
73+
new_stamp = False
74+
break
75+
if new_stamp or not Censor.muted_timestamps:
76+
Censor.explicit_count += 1
77+
Censor.muted_timestamps.append(stamp)
78+
Censor.lock.release()
79+
80+
@classmethod
81+
def __duplicate_stamp(cls, stamp1, stamp2):
82+
""" If 2 timestamps are the same word and start and at relatively the
83+
same time, then assume they're the same timestamp """
84+
if stamp1['word'] == stamp2['word'] and \
85+
abs(stamp1['start'] - stamp2['start']) < 201 and \
86+
abs(stamp1['end'] - stamp2['end']) < 201:
87+
return True
88+
return False

cleansio/censor/censor_file.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ def __censor_chunk(self, async_iter):
4141
return self.censor_audio_chunk(chunk_file_path)
4242

4343
def __create_clean_file(self, clean_file):
44+
print('Cleansio found {1}{0}{2} explicit(s)!'.format(
45+
Censor.explicit_count, Fore.GREEN, Fore.RESET))
4446
clean_file.export(self.location, format=self.encoding)
4547
print(Fore.CYAN + 'Successfully created clean file, it\'s located at:')
4648
print(Fore.YELLOW + self.location)

cleansio/speech/timestamp.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def __parse_timestamps(self):
2222
timestamps = []
2323
for word in self.lyrics:
2424
timestamps.append({
25-
'word': word.word,
25+
'word': word.word.lower(),
2626
'start': gcs_time_to_ms(word.start_time),
2727
'end': gcs_time_to_ms(word.end_time)
2828
})

cleansio/utils/cleanup.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
""" Cleans up temporary files after the program runs """
22

3-
# environ - To read the environment variables which we use for communication
4-
# remove - To remove the temporary files
53
from atexit import register
64
from os import environ, remove
75
from signal import signal, SIGABRT, SIGILL, SIGINT, SIGSEGV, SIGTERM

tests/censor/test_censor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def test_censor():
2020
file_path = __get_file('/../data/testing.wav')
2121
audio_segment = AudioSegment.from_file(file_path)
2222
# Duplicate the audio file and begin muting the new file
23-
file_path_duplicate = __get_file('/../data//testing-censored.wav')
23+
file_path_duplicate = __get_file('/../data/testing-censored-0.wav')
2424
duplicate_file = audio_segment.export(file_path_duplicate, format = 'wav')
2525
audio_segment_duplicate = AudioSegment.from_file(file_path_duplicate)
2626
# Test that the explicits were successfully removed

0 commit comments

Comments
 (0)