Whisper audio to txt
Result Demo
1
00:00:00,000 --> 00:00:03,500
那我們這次的目標呢
2
00:00:03,500 --> 00:00:07,299
是希望就是點了那個button之後啊
3
00:00:07,299 --> 00:00:10,500
網頁上的資訊可以有更動
Python Code
import whisper
file_path = "/Users/peggy/Documents/youtube影片/11-軟體開發紀錄/(剪輯)react-3.mp4"
'''
# 一般模式
model = whisper.load_model("base")
result = model.transcribe(file_path, fp16=False, language="zh")
print(result["text"])
# 寫進檔案
# Write the text portion of the transcription result to a file named result.txt
# with open("result.txt", "w") as file:
# file.write(result["text"])
'''
# 轉為srt檔案
# 載入模型並進行轉錄
prompt = '以下是普通話的句子' # 轉錄成繁體中文
model = whisper.load_model("large")
#result = model.transcribe(file_path, fp16=False, language="zh", initial_prompt = prompt)
result = model.transcribe(file_path, fp16=False, initial_prompt = prompt)
transcription_text = result["text"]
transcription_segments = result["segments"]
#print(transcription_segments)
# 將時間戳轉換為SRT格式
def to_srt_time(timestamp):
hours = int(timestamp // 3600)
minutes = int((timestamp // 60) % 60)
seconds = int(timestamp % 60)
milliseconds = int((timestamp - int(timestamp)) * 1000)
return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"
# 創建SRT字幕
srt_content = ""
for i, segment in enumerate(transcription_segments, start=1):
start_idx = segment.get("start")
end_idx = segment.get("end")
# 檢查關鍵字是否存在
if start_idx is not None and end_idx is not None:
start_time = to_srt_time(segment["start"])
end_time = to_srt_time(segment["end"])
subtitle_text = segment["text"]
srt_content += f"{i}\n{start_time} --> {end_time}\n{subtitle_text}\n\n"
# 將SRT內容寫入檔案
with open("result.srt", "w") as file:
file.write(srt_content)
print("SRT檔案已成功生成")