forked from RavinMaddHatter/YoutubePrepTools
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathopenai_translator.py
More file actions
63 lines (58 loc) · 2.59 KB
/
openai_translator.py
File metadata and controls
63 lines (58 loc) · 2.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import subprocess
import tempfile
import os
from faster_whisper import WhisperModel
from os.path import splitext
class translator:
def __init__(self, transcribe_queue, model):
self.status_queue = transcribe_queue
self.status_queue.put({"percent": 0.0, "state": "Loading Model"})
self.model = WhisperModel(model, device="cpu", compute_type="int8")
self.status_queue.put({"percent": 25, "state": "Model Ready"})
def _preprocess_audio(self, fileName, denoise=False):
"""Extract + clean audio with ffmpeg. Returns path to temp wav."""
tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
tmp.close()
# highpass kills rumble; loudnorm normalizes volume; afftdn denoises (optional)
filters = ["highpass=f=80", "loudnorm=I=-16:TP=-1.5:LRA=11"]
if denoise:
filters.insert(1, "afftdn=nf=-25")
cmd = [
"ffmpeg", "-y", "-i", fileName,
"-vn", "-ac", "1", "-ar", "16000",
"-af", ",".join(filters),
tmp.name,
]
subprocess.run(cmd, check=True, capture_output=True)
return tmp.name
def transcribeFile(self, fileName, preprocess=False, denoise=False):
"""Transcribe a single file and return the text (no file write)."""
target = fileName
temp_audio = None
if preprocess:
try:
temp_audio = self._preprocess_audio(fileName, denoise=denoise)
target = temp_audio
except Exception as e:
print(f"Preprocessing failed, using original: {e}")
try:
segments, info = self.model.transcribe(
target,
beam_size=5,
condition_on_previous_text=False,
vad_filter=True,
)
return " ".join(segment.text.strip() for segment in segments)
finally:
if temp_audio and os.path.exists(temp_audio):
os.unlink(temp_audio)
def audioToText(self, fileName, preprocess=False, denoise=False):
print("starting transcription")
self.status_queue.put({"percent": 20, "state": "Preparing audio..."})
self.status_queue.put({"percent": 33, "state": "Transcribing..."})
text = self.transcribeFile(fileName, preprocess=preprocess, denoise=denoise)
name, extension = splitext(fileName)
self.status_queue.put({"percent": 95, "state": "Transcription Complete"})
with open(name + ".txt", "w+") as text_file:
text_file.write(text)
self.status_queue.put({"percent": 100, "state": "done"})