11""" Censors audio chunks by muting explicit sections """
22
3+ from multiprocessing import Lock
34from pydub import AudioSegment
45from speech import Timestamp , Transcribe
56
67class Censor ():
78 """ Superclass of CensorFile and CensorRealtime """
9+ lock = Lock ()
10+ explicit_count = 0
11+ muted_timestamps = []
12+
813 def __init__ (self , explicits ):
914 super ().__init__ ()
1015 self .explicits = explicits
@@ -23,12 +28,16 @@ def __mute_explicits(self, file_path, audio_segment, timestamps):
2328 """ Go through each word, if its an explicit, mute the duration """
2429 muted = False
2530 for stamp in timestamps :
26- if stamp ['word' ]. lower () in self .explicits : # Explicit found, mute
31+ if stamp ['word' ] in self .explicits : # Explicit found, mute
2732 audio_segment = self .__mute_explicit (audio_segment , stamp )
2833 muted = True
34+ chunk_index = int (file_path .split ('-' )[- 1 ].split ('.' )[0 ])
35+ self .__explicit_count (stamp , chunk_index * 5000 )
2936 if muted :
30- # Overwrite the chunk with the mute(s)
37+ Censor .lock .acquire ()
38+ # Overwrite the chunk with the mute(s) safely
3139 audio_segment .export (file_path , format = 'wav' )
40+ Censor .lock .release ()
3241
3342 @classmethod
3443 def __mute_explicit (cls , audio_segment , timestamp ):
@@ -51,3 +60,29 @@ def __get_lyrics(cls, file_path, audio_segment):
5160 @classmethod
5261 def __get_timestamps (cls , lyrics ):
5362 return Timestamp (lyrics ).timestamps
63+
64+ @classmethod
65+ def __explicit_count (cls , stamp , chunk_offset ):
66+ """ Count the number of explicits safely """
67+ stamp ['start' ] += chunk_offset
68+ stamp ['end' ] += chunk_offset
69+ new_stamp = True
70+ Censor .lock .acquire ()
71+ for mut in Censor .muted_timestamps :
72+ if cls .__duplicate_stamp (mut , stamp ):
73+ new_stamp = False
74+ break
75+ if new_stamp or not Censor .muted_timestamps :
76+ Censor .explicit_count += 1
77+ Censor .muted_timestamps .append (stamp )
78+ Censor .lock .release ()
79+
80+ @classmethod
81+ def __duplicate_stamp (cls , stamp1 , stamp2 ):
82+ """ If 2 timestamps are the same word and start and at relatively the
83+ same time, then assume they're the same timestamp """
84+ if stamp1 ['word' ] == stamp2 ['word' ] and \
85+ abs (stamp1 ['start' ] - stamp2 ['start' ]) < 201 and \
86+ abs (stamp1 ['end' ] - stamp2 ['end' ]) < 201 :
87+ return True
88+ return False
0 commit comments