最近在用python处理一些音频数据,特此小记。
主要用到了两个库:pydub和librosa:
Pydub
- 原repo中提及的基本使用:
from pydub import AudioSegment
# 读取音频文件
song = AudioSegment.from_wav("xxx.wav")
song = AudioSegment.from_mp3("xxx.mp3")
song = AudioSegment.from_file("xxx.wav")
# 切分音频
song[:10*1000] #last ten seconds
# 设置采样率
song.set_frame_rate(16000) # set sample rate as 16000
# 调整音量
song = song + 6 # boost volume by 6dB
# 拼接音频
song = song1 + song2
# 导出音频
song.export('test2.wav', format='wav')
- 以及在使用过程中和其他处发现的用法:
# 生成
# 会有损失的一种变速方法
# https://stackoverflow.com/questions/51434897/how-to-change-audio-playback-speed-using-pydub
from pydub import AudioSegment
def speed_change(sound, speed=1.0):
# Manually override the frame_rate. This tells the computer how many
# samples to play per second
sound_with_altered_frame_rate = sound._spawn(sound.raw_data, overrides={
"frame_rate": int(sound.frame_rate * speed)
})
# convert the sound with altered frame rate to a standard frame rate
# so that regular playback programs will work right. They often only
# know how to play audio at standard frame rate (like 44.1k)
return sound_with_altered_frame_rate.set_frame_rate(sound.frame_rate)
sound = AudioSegment.from_file('...')
speed_change(sound, 0.9)