Spaces:

macaodha
/

batdetect2

Running

batdetect2 / bat_detect /utils /wavfile.py

Oisin Mac Aodha

added bat code

9ace58a over 2 years ago

7.95 kB

	"""
	Module to read / write wav files using numpy arrays

	Functions
	---------
	`read`: Return the sample rate (in samples/sec) and data from a WAV file.

	`write`: Write a numpy array as a WAV file.

	"""
	from __future__ import division, print_function, absolute_import

	import sys
	import numpy
	import struct
	import warnings
	import os


	class WavFileWarning(UserWarning):
	pass

	_big_endian = False

	WAVE_FORMAT_PCM = 0x0001
	WAVE_FORMAT_IEEE_FLOAT = 0x0003
	WAVE_FORMAT_EXTENSIBLE = 0xfffe
	KNOWN_WAVE_FORMATS = (WAVE_FORMAT_PCM, WAVE_FORMAT_IEEE_FLOAT)

	# assumes file pointer is immediately
	# after the 'fmt ' id


	def _read_fmt_chunk(fid):
	if _big_endian:
	fmt = '>'
	else:
	fmt = '<'
	res = struct.unpack(fmt+'iHHIIHH',fid.read(20))
	size, comp, noc, rate, sbytes, ba, bits = res
	if comp not in KNOWN_WAVE_FORMATS or size > 16:
	comp = WAVE_FORMAT_PCM
	warnings.warn("Unknown wave file format", WavFileWarning)
	if size > 16:
	fid.read(size - 16)

	return size, comp, noc, rate, sbytes, ba, bits


	# assumes file pointer is immediately
	# after the 'data' id
	def _read_data_chunk(fid, comp, noc, bits, mmap=False):
	if _big_endian:
	fmt = '>i'
	else:
	fmt = '<i'
	size = struct.unpack(fmt,fid.read(4))[0]

	bytes = bits//8
	if bits == 8:
	dtype = 'u1'
	else:
	if _big_endian:
	dtype = '>'
	else:
	dtype = '<'
	if comp == 1:
	dtype += 'i%d' % bytes
	else:
	dtype += 'f%d' % bytes
	if not mmap:
	data = numpy.fromstring(fid.read(size), dtype=dtype)
	else:
	start = fid.tell()
	data = numpy.memmap(fid, dtype=dtype, mode='c', offset=start,
	shape=(size//bytes,))
	fid.seek(start + size)

	if noc > 1:
	data = data.reshape(-1,noc)
	return data


	def _skip_unknown_chunk(fid):
	if _big_endian:
	fmt = '>i'
	else:
	fmt = '<i'

	data = fid.read(4)
	size = struct.unpack(fmt, data)[0]
	fid.seek(size, 1)


	def _read_riff_chunk(fid):
	global _big_endian
	str1 = fid.read(4)
	if str1 == b'RIFX':
	_big_endian = True
	elif str1 != b'RIFF':
	raise ValueError("Not a WAV file.")
	if _big_endian:
	fmt = '>I'
	else:
	fmt = '<I'
	fsize = struct.unpack(fmt, fid.read(4))[0] + 8
	str2 = fid.read(4)
	if (str2 != b'WAVE'):
	raise ValueError("Not a WAV file.")
	if str1 == b'RIFX':
	_big_endian = True
	return fsize

	# open a wave-file


	def read(filename, mmap=False):
	"""
	Return the sample rate (in samples/sec) and data from a WAV file

	Parameters
	----------
	filename : string or open file handle
	Input wav file.
	mmap : bool, optional
	Whether to read data as memory mapped.
	Only to be used on real files (Default: False)

	.. versionadded:: 0.12.0

	Returns
	-------
	rate : int
	Sample rate of wav file
	data : numpy array
	Data read from wav file

	Notes
	-----

	* The file can be an open file or a filename.

	* The returned sample rate is a Python integer
	* The data is returned as a numpy array with a
	data-type determined from the file.

	"""
	if hasattr(filename,'read'):
	fid = filename
	mmap = False
	else:
	fid = open(filename, 'rb')

	try:

	# some files seem to have the size recorded in the header greater than
	# the actual file size.
	fid.seek(0, os.SEEK_END)
	actual_size = fid.tell()
	fid.seek(0)

	fsize = _read_riff_chunk(fid)

	# the fsize should be identical to the actual size, if not
	# the header information is wrong and we need to correct it.
	if fsize != actual_size:
	fsize = actual_size

	noc = 1
	bits = 8
	comp = WAVE_FORMAT_PCM
	while (fid.tell() < fsize):
	# read the next chunk
	chunk_id = fid.read(4)
	if chunk_id == b'fmt ':
	size, comp, noc, rate, sbytes, ba, bits = _read_fmt_chunk(fid)
	elif chunk_id == b'fact':
	_skip_unknown_chunk(fid)
	elif chunk_id == b'data':
	data = _read_data_chunk(fid, comp, noc, bits, mmap=mmap)
	elif chunk_id == b'LIST':
	# Someday this could be handled properly but for now skip it
	_skip_unknown_chunk(fid)

	# OMA warning - I've commented out the following lines
	# else:
	# warnings.warn("Chunk (non-data) not understood, skipping it.", WavFileWarning)
	# _skip_unknown_chunk(fid)
	finally:
	if not hasattr(filename,'read'):
	fid.close()
	else:
	fid.seek(0)

	return rate, data

	# Write a wave-file
	# sample rate, data


	def write(filename, rate, data):
	"""
	Write a numpy array as a WAV file

	Parameters
	----------
	filename : string or open file handle
	Output wav file
	rate : int
	The sample rate (in samples/sec).
	data : ndarray
	A 1-D or 2-D numpy array of either integer or float data-type.

	Notes
	-----
	* The file can be an open file or a filename.

	* Writes a simple uncompressed WAV file.
	* The bits-per-sample will be determined by the data-type.
	* To write multiple-channels, use a 2-D array of shape
	(Nsamples, Nchannels).

	"""
	if hasattr(filename, 'write'):
	fid = filename
	else:
	fid = open(filename, 'wb')

	try:
	# kind of numeric data in the numpy array
	dkind = data.dtype.kind
	if not (dkind == 'i' or dkind == 'f' or (dkind == 'u' and data.dtype.itemsize == 1)):
	raise ValueError("Unsupported data type '%s'" % data.dtype)

	# wav header stuff
	# http://soundfile.sapp.org/doc/WaveFormat/
	fid.write(b'RIFF')
	# placeholder for chunk size (updated later)
	fid.write(b'\x00\x00\x00\x00')
	fid.write(b'WAVE')
	# fmt chunk
	fid.write(b'fmt ')
	if dkind == 'f':
	# comp stands for compression. PCM = 1
	comp = 3
	else:
	comp = 1
	# determine number of channels
	if data.ndim == 1:
	noc = 1
	else:
	noc = data.shape[1]
	bits = data.dtype.itemsize * 8
	# number of bytes per second, at the specified sampling rate rate,
	# bits per sample and number of channels (just needed for wav header)
	sbytes = rate(bits // 8)noc
	# number of bytes per sample
	ba = noc * (bits // 8)

	# https://docs.python.org/3/library/struct.html#struct-format-strings
	# Write the data (16, comp, noc, etc) in the correct binary format
	# for the wav header. the string format (first arg) specifies how many bytes for each
	# value.
	fid.write(struct.pack('<ihHIIHH', 16, comp, noc, rate, sbytes, ba, bits))
	# data chunk: the word 'data' followed by the size followed by the actual data
	fid.write(b'data')
	fid.write(struct.pack('<i', data.nbytes))
	if data.dtype.byteorder == '>' or (data.dtype.byteorder == '=' and sys.byteorder == 'big'):
	data = data.byteswap()
	_array_tofile(fid, data)

	# Determine file size and place it in correct
	# position at start of the file (replacing the 4 bytes of zeros)
	size = fid.tell()
	fid.seek(4)
	fid.write(struct.pack('<i', size-8))

	finally:
	if not hasattr(filename,'write'):
	fid.close()
	else:
	fid.seek(0)


	if sys.version_info[0] >= 3:
	def _array_tofile(fid, data):
	# ravel gives a c-contiguous buffer
	fid.write(data.ravel().view('b').data)
	else:
	def _array_tofile(fid, data):
	fid.write(data.tostring())