I want to create 32bit float WAV files in Python (2.x). While "standard" WAV files usually use int, many professional audio applications process (and save) audio data as float. The standard wave module is not capable to do this: http://bugs.python.org/issue16525 Has anyone achieved this without using patched modules ? tnx for any help.
问题:
回答1:
This sounded like fun (see my handle), so I hammered out something. Maybe you can use it. If your Python script generates a monophonic waveform of numerical values that fall between [-1.0 .. 1.0], send that waveform in through sample_array
and also specify sample_rate
(e.g., 44100 or 48000). This will return to you an array that you can write to disk as a .wav file.
I tested the resulting .wav output in Windows Media Player, Apple QuickTime Player, and VLC (all on Windows 7). They all played it.
def float32_wav_file(sample_array, sample_rate):
byte_count = (len(sample_array)) * 4 # 32-bit floats
wav_file = ""
# write the header
wav_file += struct.pack('<ccccIccccccccIHHIIHH',
'R', 'I', 'F', 'F',
byte_count + 0x2c - 8, # header size
'W', 'A', 'V', 'E', 'f', 'm', 't', ' ',
0x10, # size of 'fmt ' header
3, # format 3 = floating-point PCM
1, # channels
sample_rate, # samples / second
sample_rate * 4, # bytes / second
4, # block alignment
32) # bits / sample
wav_file += struct.pack('<ccccI',
'd', 'a', 't', 'a', byte_count)
for sample in sample_array:
wav_file += struct.pack("<f", sample)
return wav_file
回答2:
Here's my contribution... includes arbitrary word size and arbitrary number of channels. I've taken the liberty of changing float32_wav_file to include a file save for testing. Note that the multichannel data portion of the file structure interleaved. That loop could be be greatly pythonized I'm sure.
# see http://stackoverflow.com/questions/15576798/create-32bit-float-wav-file-in-python
# see... http://blog.theroyweb.com/extracting-wav-file-header-information-using-a-python-script
import struct
def float32_wav_file(file_name, sample_array, sample_rate):
(M,N)=sample_array.shape
#print "len sample_array=(%d,%d)" % (M,N)
byte_count = M * N * 4 # (len(sample_array)) * 4 # 32-bit floats
wav_file = ""
# write the header
wav_file += struct.pack('<ccccIccccccccIHHIIHH',
'R', 'I', 'F', 'F',
byte_count + 0x2c - 8, # header size
'W', 'A', 'V', 'E', 'f', 'm', 't', ' ',
0x10, # size of 'fmt ' header
3, # format 3 = floating-point PCM
M, # channels
sample_rate, # samples / second
sample_rate * 4, # bytes / second
4, # block alignment
32) # bits / sample
wav_file += struct.pack('<ccccI',
'd', 'a', 't', 'a', byte_count)
print "packing..."
for j in range(0,N):
for k in range(0,M):
wav_file += struct.pack("<f", sample_array[k,j])
print "saving..."
fi=open(file_name,'wb')
for value in wav_file:
fi.write(value)
fi.close()
return wav_file
import numpy as np
def wav_file_read(filename):
fi=open(filename,'rb')
data=fi.read()
fi.close()
A, B, C, D =struct.unpack('4c', data[0:4]) # 'RIFF'
ChunkSize =struct.unpack('<l', data[4:8])[0] #4+(8+SubChunk1Size)+8+SubChunk2Size)
A, B, C, D =struct.unpack('4c', data[8:12]) # 'WAVE'
A, B, C, D =struct.unpack('4c', data[12:16]) # 'fmt '
Subchunk1Size =struct.unpack('<l', data[16:20])[0] # LITTLE ENDIAN, long, 16
AudioFormat =struct.unpack('<h', data[20:22])[0] # LITTLE ENDIAN, short, 1
NumChannels =struct.unpack('<h', data[22:24])[0] # LITTLE ENDIAN, short, Mono = 1, Stereo = 2
SampleRate =struct.unpack('<l', data[24:28])[0] # LITTLE ENDIAN, long, sample rate in samples per second
ByteRate =struct.unpack('<l', data[28:32])[0] # self.SampleRate * self.NumChannels * self.BitsPerSample/8)) # (ByteRate) LITTLE ENDIAN, long
BlockAlign =struct.unpack('<h', data[32:34])[0] # self.NumChannels * self.BitsPerSample/8)) # (BlockAlign) LITTLE ENDIAN, short
BitsPerSample =struct.unpack('<h', data[34:36])[0] # LITTLE ENDIAN, short
A, B, C, D =struct.unpack('4c', data[36:40]) # BIG ENDIAN, char*4
SubChunk2Size =struct.unpack('<l', data[40:44])[0] # LITTLE ENDIAN, long
waveData=data[44:]
(M,N)=(len(waveData),len(waveData[0]))
print("ChunkSize =%d\nSubchunk1Size =%d\nAudioFormat =%d\nNumChannels =%d\nSampleRate =%d\nByteRate =%d\nBlockAlign =%d\nBitsPerSample =%d\nA:%c, B:%c, C:%c, D:%c\nSubChunk2Size =%d" %
(ChunkSize ,
Subchunk1Size,
AudioFormat ,
NumChannels ,
SampleRate ,
ByteRate ,
BlockAlign ,
BitsPerSample ,
A, B, C, D ,
SubChunk2Size ))
if BitsPerSample==8:
print "Unpacking 8 bits on len(waveData)=%d" % len(waveData)
d=np.fromstring(waveData,np.uint8)
floatdata=d.astype(np.float64)/np.float(127)
elif BitsPerSample==16:
print "Unpacking 16 bits on len(waveData)=%d" % len(waveData)
d=np.zeros(SubChunk2Size/2, dtype=np.int16)
j=0
for k in range(0, SubChunk2Size, 2):
d[j]=struct.unpack('<h',waveData[k:k+2])[0]
j=j+1
floatdata=d.astype(np.float64)/np.float(32767)
elif BitsPerSample==24:
print "Unpacking 24 bits on len(waveData)=%d" % len(waveData)
d=np.zeros(SubChunk2Size/3, dtype=np.int32)
j=0
for k in range(0, SubChunk2Size, 3):
d[j]=struct.unpack('<l',struct.pack('c',waveData[k])+waveData[k:k+3])[0]
j=j+1
floatdata=d.astype(np.float64)/np.float(2147483647)
else: # anything else will be considered 32 bits
print "Unpacking 32 bits on len(waveData)=%d" % len(waveData)
d=np.fromstring(waveData,np.int32)
floatdata=d.astype(np.float64)/np.float(2147483647)
v=floatdata[0::NumChannels]
for i in range(1,NumChannels):
v=np.vstack((v,floatdata[i::NumChannels]))
#return (np.vstack((floatdata[0::2],floatdata[1::2])), SampleRate, NumChannels, BitsPerSample)
return (v, SampleRate, NumChannels, BitsPerSample)
if __name__ == "__main__":
(array,SampleRate,NumChannels,BitsPerSample)=wav_file_read("my_input_file.wav")
wavefile=float32_wav_file("test_file.wav",array,SampleRate)