Oisin Mac Aodha
added bat code
9ace58a
"""
Module to read / write wav files using numpy arrays
Functions
---------
`read`: Return the sample rate (in samples/sec) and data from a WAV file.
`write`: Write a numpy array as a WAV file.
"""
from __future__ import division, print_function, absolute_import
import sys
import numpy
import struct
import warnings
import os
class WavFileWarning(UserWarning):
pass
_big_endian = False
WAVE_FORMAT_PCM = 0x0001
WAVE_FORMAT_IEEE_FLOAT = 0x0003
WAVE_FORMAT_EXTENSIBLE = 0xfffe
KNOWN_WAVE_FORMATS = (WAVE_FORMAT_PCM, WAVE_FORMAT_IEEE_FLOAT)
# assumes file pointer is immediately
# after the 'fmt ' id
def _read_fmt_chunk(fid):
if _big_endian:
fmt = '>'
else:
fmt = '<'
res = struct.unpack(fmt+'iHHIIHH',fid.read(20))
size, comp, noc, rate, sbytes, ba, bits = res
if comp not in KNOWN_WAVE_FORMATS or size > 16:
comp = WAVE_FORMAT_PCM
warnings.warn("Unknown wave file format", WavFileWarning)
if size > 16:
fid.read(size - 16)
return size, comp, noc, rate, sbytes, ba, bits
# assumes file pointer is immediately
# after the 'data' id
def _read_data_chunk(fid, comp, noc, bits, mmap=False):
if _big_endian:
fmt = '>i'
else:
fmt = '<i'
size = struct.unpack(fmt,fid.read(4))[0]
bytes = bits//8
if bits == 8:
dtype = 'u1'
else:
if _big_endian:
dtype = '>'
else:
dtype = '<'
if comp == 1:
dtype += 'i%d' % bytes
else:
dtype += 'f%d' % bytes
if not mmap:
data = numpy.fromstring(fid.read(size), dtype=dtype)
else:
start = fid.tell()
data = numpy.memmap(fid, dtype=dtype, mode='c', offset=start,
shape=(size//bytes,))
fid.seek(start + size)
if noc > 1:
data = data.reshape(-1,noc)
return data
def _skip_unknown_chunk(fid):
if _big_endian:
fmt = '>i'
else:
fmt = '<i'
data = fid.read(4)
size = struct.unpack(fmt, data)[0]
fid.seek(size, 1)
def _read_riff_chunk(fid):
global _big_endian
str1 = fid.read(4)
if str1 == b'RIFX':
_big_endian = True
elif str1 != b'RIFF':
raise ValueError("Not a WAV file.")
if _big_endian:
fmt = '>I'
else:
fmt = '<I'
fsize = struct.unpack(fmt, fid.read(4))[0] + 8
str2 = fid.read(4)
if (str2 != b'WAVE'):
raise ValueError("Not a WAV file.")
if str1 == b'RIFX':
_big_endian = True
return fsize
# open a wave-file
def read(filename, mmap=False):
"""
Return the sample rate (in samples/sec) and data from a WAV file
Parameters
----------
filename : string or open file handle
Input wav file.
mmap : bool, optional
Whether to read data as memory mapped.
Only to be used on real files (Default: False)
.. versionadded:: 0.12.0
Returns
-------
rate : int
Sample rate of wav file
data : numpy array
Data read from wav file
Notes
-----
* The file can be an open file or a filename.
* The returned sample rate is a Python integer
* The data is returned as a numpy array with a
data-type determined from the file.
"""
if hasattr(filename,'read'):
fid = filename
mmap = False
else:
fid = open(filename, 'rb')
try:
# some files seem to have the size recorded in the header greater than
# the actual file size.
fid.seek(0, os.SEEK_END)
actual_size = fid.tell()
fid.seek(0)
fsize = _read_riff_chunk(fid)
# the fsize should be identical to the actual size, if not
# the header information is wrong and we need to correct it.
if fsize != actual_size:
fsize = actual_size
noc = 1
bits = 8
comp = WAVE_FORMAT_PCM
while (fid.tell() < fsize):
# read the next chunk
chunk_id = fid.read(4)
if chunk_id == b'fmt ':
size, comp, noc, rate, sbytes, ba, bits = _read_fmt_chunk(fid)
elif chunk_id == b'fact':
_skip_unknown_chunk(fid)
elif chunk_id == b'data':
data = _read_data_chunk(fid, comp, noc, bits, mmap=mmap)
elif chunk_id == b'LIST':
# Someday this could be handled properly but for now skip it
_skip_unknown_chunk(fid)
# OMA warning - I've commented out the following lines
# else:
# warnings.warn("Chunk (non-data) not understood, skipping it.", WavFileWarning)
# _skip_unknown_chunk(fid)
finally:
if not hasattr(filename,'read'):
fid.close()
else:
fid.seek(0)
return rate, data
# Write a wave-file
# sample rate, data
def write(filename, rate, data):
"""
Write a numpy array as a WAV file
Parameters
----------
filename : string or open file handle
Output wav file
rate : int
The sample rate (in samples/sec).
data : ndarray
A 1-D or 2-D numpy array of either integer or float data-type.
Notes
-----
* The file can be an open file or a filename.
* Writes a simple uncompressed WAV file.
* The bits-per-sample will be determined by the data-type.
* To write multiple-channels, use a 2-D array of shape
(Nsamples, Nchannels).
"""
if hasattr(filename, 'write'):
fid = filename
else:
fid = open(filename, 'wb')
try:
# kind of numeric data in the numpy array
dkind = data.dtype.kind
if not (dkind == 'i' or dkind == 'f' or (dkind == 'u' and data.dtype.itemsize == 1)):
raise ValueError("Unsupported data type '%s'" % data.dtype)
# wav header stuff
# http://soundfile.sapp.org/doc/WaveFormat/
fid.write(b'RIFF')
# placeholder for chunk size (updated later)
fid.write(b'\x00\x00\x00\x00')
fid.write(b'WAVE')
# fmt chunk
fid.write(b'fmt ')
if dkind == 'f':
# comp stands for compression. PCM = 1
comp = 3
else:
comp = 1
# determine number of channels
if data.ndim == 1:
noc = 1
else:
noc = data.shape[1]
bits = data.dtype.itemsize * 8
# number of bytes per second, at the specified sampling rate rate,
# bits per sample and number of channels (just needed for wav header)
sbytes = rate*(bits // 8)*noc
# number of bytes per sample
ba = noc * (bits // 8)
# https://docs.python.org/3/library/struct.html#struct-format-strings
# Write the data (16, comp, noc, etc) in the correct binary format
# for the wav header. the string format (first arg) specifies how many bytes for each
# value.
fid.write(struct.pack('<ihHIIHH', 16, comp, noc, rate, sbytes, ba, bits))
# data chunk: the word 'data' followed by the size followed by the actual data
fid.write(b'data')
fid.write(struct.pack('<i', data.nbytes))
if data.dtype.byteorder == '>' or (data.dtype.byteorder == '=' and sys.byteorder == 'big'):
data = data.byteswap()
_array_tofile(fid, data)
# Determine file size and place it in correct
# position at start of the file (replacing the 4 bytes of zeros)
size = fid.tell()
fid.seek(4)
fid.write(struct.pack('<i', size-8))
finally:
if not hasattr(filename,'write'):
fid.close()
else:
fid.seek(0)
if sys.version_info[0] >= 3:
def _array_tofile(fid, data):
# ravel gives a c-contiguous buffer
fid.write(data.ravel().view('b').data)
else:
def _array_tofile(fid, data):
fid.write(data.tostring())