package audio import ( "bytes" "encoding/binary" "errors" "fmt" "io" "math" "os" "path/filepath" ) /* -----RIFF----- RIFF 4 标头字母 CHUNK_SIZE 4 整个RIFF文件的长度(不包含RIFF和CHUNK_SIZE这两个字段的长度) FORMAT 4 格式,WAVE代表是wav文件,WAVE格式要求带有标头为fmt和data的子chunk -----FMT ----- SUB_CHUNK_ID 4 子chunk的标头字母,此处为"fmt "(注意,fmt后面是带一个空格的),其相当于wav的属性字段 SUB_CHUNK_SIZE 4 此subchunk的长度(不包含SUB_CHUNK_ID和SUB_CHUNK_SIZE两个字段的长度) AUDIO_FORMAT 2 音频格式,pcm为1 NUM_CHANNELS 2 声道数量,理论上可以n声道,一般我们用单声道mono,或者双声道stereo(双声道也叫立体声) SAMPLE_RATE 4 采样率,每秒采样多少次,通常都有固定的采样选择(8000, 11025,12000,16000,22050,24000,32000,44100,48000) BYTE_RATE 4 码率,即每秒播放多少byte数据,计算公式=SAMPLE_RATE*NUM_CHANNELS*BITS_PER_SAMPLE/8(不明白为何需要这个字段) BLOCK_ALIGN 2 块对其,其值=BITS_PER_SAMPLE*NUM_CHANNELS/8 BITS_PER_SAMPLE 2 每个采样多少bit,通常为8,16,32(为8时候代表的是uint8,16代表的是int16,32代表float32) -----DATA----- SUB_CHUNK_ID 4 子chunk的标头字母,此处为"data" SUB_CHUNK_SIZE 4 此subchunk的长度(不包含SUB_CHUNK_ID和SUB_CHUNK_SIZE两个字段的长度) DATA pcm的数据 ------------- */ //MaxChannelNum 最大声道数量(此处只允许2) const MaxChannelNum = 2 const ( LeftChannel = 0 RightChannel =1 ) //tag tag定义 type tag [4]byte //一些变量 var ( tagRIFF = tag{'R', 'I', 'F', 'F'} // "RIFF" tagWAVE = tag{'W', 'A', 'V', 'E'} // "WAVE" tagFmt = tag{'f', 'm', 't', ' '} // "fmt " tagData = tag{'d', 'a', 't', 'a'} // "data" ) //WavHeaderType wav子部头结构 type WavHeaderType struct { ID tag Size uint32 } //String 打印 func (wavHeader *WavHeaderType) String() string { return fmt.Sprintf("ID=%s,Size=%d", string(wavHeader.ID[:]), wavHeader.Size) } //头部size var ( sizeHeader = binary.Size(WavHeaderType{}) ) //chunkLoc ... type chunkLoc struct { pos int64 size int64 } //RiffType ... type RiffType struct { WavHeaderType Fmt tag } //String ... func (riff *RiffType) String() string { return fmt.Sprintf("ID=%s,Size=%d,Fmt=%s", string(riff.ID[:]), riff.Size, string(riff.Fmt[:])) } //WavFmtType wav格式结构(头部) type WavFmtType struct { WavHeaderType AudioFormat uint16 Channels uint16 SampleRate uint32 BytesPerSec uint32 BytesPerBlock uint16 BitsPerSample uint16 } //String ... func (wavFmt *WavFmtType) String() string { return fmt.Sprintf( "ID=%s,Size=%d,AudioFormat=%d,Channels=%d,SampleRate=%d,BytesPerSec=%d,BytesPerBlock=%d,BitsPerSample=%d", string(wavFmt.ID[:]), wavFmt.Size, wavFmt.AudioFormat, wavFmt.Channels, wavFmt.SampleRate, wavFmt.BytesPerSec, wavFmt.BytesPerBlock, wavFmt.BitsPerSample) } //SampleType 采样结构 type SampleType struct { val8s [MaxChannelNum]uint8 val16s [MaxChannelNum]int16 val32s [MaxChannelNum]float32 } //WavDataType wav整体结构(头部+采样数据结构) type WavDataType struct { WavHeaderType Sample []SampleType } //String ... func (wavData *WavDataType) String() string { blockNum := len(wavData.Sample) return fmt.Sprintf("ID=%s,Size=%d,BlockNum=%d", string(wavData.ID[:]), wavData.Size, blockNum) } //WavInfoType wav操作实例 type WavInfoType struct { Riff RiffType Fmt WavFmtType Data WavDataType //create info createMs int64 } //String ... func (wavInfo *WavInfoType) String() string { f := &wavInfo.Fmt blockNum := len(wavInfo.Data.Sample) return fmt.Sprintf("SampleRate=%d,BitsPerSample=%d,Channels=%d,BlockNum=%d", f.SampleRate, f.BitsPerSample, f.Channels, blockNum) } //SetCreateTs ... func (wavInfo *WavInfoType) SetCreateTs(timestampMs int64) { wavInfo.createMs = timestampMs } //CopyFormat 复制头部结构 func (wavInfo *WavInfoType) CopyFormat(w *WavInfoType) (err error) { wavInfo.Riff.ID = w.Riff.ID wavInfo.Riff.Size = w.Riff.Size wavInfo.Riff.Fmt = w.Riff.Fmt wavInfo.Fmt.ID = w.Fmt.ID wavInfo.Fmt.Size = w.Fmt.Size wavInfo.Fmt.AudioFormat = w.Fmt.AudioFormat wavInfo.Fmt.Channels = w.Fmt.Channels wavInfo.Fmt.SampleRate = w.Fmt.SampleRate wavInfo.Fmt.BytesPerSec = w.Fmt.BytesPerSec wavInfo.Fmt.BytesPerBlock = w.Fmt.BytesPerBlock wavInfo.Fmt.BitsPerSample = w.Fmt.BitsPerSample wavInfo.Data.ID = w.Data.ID wavInfo.Data.Size = w.Data.Size return } //ParseFromFile 从文件中导入 func (wavInfo *WavInfoType) ParseFromFile(absFile string) (err error) { absFile, err = filepath.Abs(absFile) //#nosec if err != nil { return err } fileHandler, err := os.Open(absFile) //#nosec if err != nil { return err } defer fileHandler.Close() _, err = fileHandler.Seek(0, os.SEEK_SET) if err != nil { return err } var pos int64 var ch WavHeaderType //----------------------------------------------------- // RIFF header err = binary.Read(fileHandler, binary.LittleEndian, &wavInfo.Riff) if err != nil { return err } pos += int64(sizeHeader) + int64(len(tagWAVE)) if wavInfo.Riff.ID != tagRIFF { return errors.New("File Format Not Riff") } if wavInfo.Riff.Fmt != tagWAVE { return errors.New("File Format Not Wave") } fileSize := int64(sizeHeader) + int64(wavInfo.Riff.Size) _ = fileSize //r := &wavInfo.Riff //----------------------------------------------------- // read all chunks var chunks = make(map[tag]*chunkLoc) for { err = binary.Read(fileHandler, binary.LittleEndian, &ch) if err != nil { if err == io.EOF { break } return err } pos += int64(sizeHeader) loc := chunkLoc{ pos: pos, size: int64(ch.Size), } _, err = fileHandler.Seek(loc.size, os.SEEK_CUR) if err != nil { return err } pos += loc.size // chunk data chunks[ch.ID] = &loc } // check fileHandler size if pos != fileSize { return errors.New("pos != fileSize") } //----------------------------------------------------- // chunk fmt_ loc, ok := chunks[tagFmt] if !ok { return errors.New("wav: has not chunk \"fmt \"") } _, err = fileHandler.Seek(loc.pos-int64(sizeHeader), os.SEEK_SET) if err != nil { return err } err = binary.Read(fileHandler, binary.LittleEndian, &wavInfo.Fmt) if err != nil { return err } //----------------------------------------------------- // chunk data loc, ok = chunks[tagData] if !ok { return errors.New("wav: has not chunk \"data\"") } _, err = fileHandler.Seek(loc.pos, os.SEEK_SET) if err != nil { return err } channel := wavInfo.Fmt.Channels bytePerSample := wavInfo.Fmt.BitsPerSample / 8 blockSize := channel * bytePerSample wavInfo.Data.ID = tagData wavInfo.Data.Size = uint32(loc.size) blockNum := wavInfo.Data.Size / uint32(blockSize) wavInfo.Data.Sample = make([]SampleType, blockNum) _, err = fileHandler.Seek(loc.pos, os.SEEK_SET) if err != nil { return err } blockIdx := 0 for i := 0; i < int(wavInfo.Data.Size); i += int(blockSize) { sample := &wavInfo.Data.Sample[blockIdx] blockIdx++ for c := 0; c < int(channel); c++ { switch bytePerSample { case 1: var val uint8 err = binary.Read(fileHandler, binary.LittleEndian, &val) //sample.val8s = append(sample.val8s,val) sample.val8s[c] = val case 2: var val int16 err = binary.Read(fileHandler, binary.LittleEndian, &val) //sample.val16s = append(sample.val16s,val) sample.val16s[c] = val //fmt.Printf("pos=%d,val=%d\n",i,sample.val16s) case 4: var val float32 err = binary.Read(fileHandler, binary.LittleEndian, &val) //sample.val32s = append(sample.val32s,val) sample.val32s[c] = val } if err != nil { return err } } //wavInfo.Data.Sample = append(wavInfo.Data.Sample,sample) } //for i:=0;i MaxUint8 { clip = MaxUint8 } //wavInfo.Data.Sample[i].val8s[c] = uint8((float32(val) * rateAsRaw)) wavInfo.Data.Sample[i].val8s[c] = uint8(clip) } case 2: for c := 0; c < int(channel); c++ { //val := wavInfo.Data.Sample[i].val16s[c] //wavInfo.Data.Sample[i].val16s[c] = int16((float32(val) * rateAsRaw)) val := wavInfo.Data.Sample[i].val16s[c] clip := float64(val) * float64(rateAsRaw) if clip < MinInt16 { clip = MinInt16 } if clip > MaxInt16 { clip = MaxInt16 } wavInfo.Data.Sample[i].val16s[c] = int16(clip) } case 4: for c := 0; c < int(channel); c++ { val := wavInfo.Data.Sample[i].val32s[c] wavInfo.Data.Sample[i].val32s[c] = val * rateAsRaw } } } } //Resample 重置采样率 func (wavInfo *WavInfoType) Resample(resampleRate uint32) { sampleRate := wavInfo.Fmt.SampleRate rate := float64(sampleRate) / float64(resampleRate) channel := wavInfo.Fmt.Channels bytePerSample := wavInfo.Fmt.BitsPerSample / 8 rawLen := len(wavInfo.Data.Sample) resampleData := make([]SampleType, 0) resampleIdx := 0 for { rawIdx := int(float64(resampleIdx) * rate) if rawIdx < rawLen { sample := SampleType{} for c := 0; c < int(channel); c++ { switch bytePerSample { case 1: val := wavInfo.Data.Sample[rawIdx].val8s[c] //sample.val8s = append(sample.val8s,val) sample.val8s[c] = val case 2: val := wavInfo.Data.Sample[rawIdx].val16s[c] //sample.val16s = append(sample.val16s,val) sample.val16s[c] = val case 4: val := wavInfo.Data.Sample[rawIdx].val32s[c] //sample.val32s = append(sample.val32s,val) sample.val32s[c] = val } } resampleData = append(resampleData, sample) } else { break } resampleIdx++ } wavInfo.Data.Sample = resampleData wavInfo.Fmt.SampleRate = resampleRate } //ConvertToFloat32 将采样值转换到 0 到 1 之间 func (wavInfo *WavInfoType) GetFloat32Samples(channel int, bytePerSample int) []float32 { //fmt.Println(wavInfo.cha) var floatSamples []float32 var point float32 for i := 0; i < len(wavInfo.Data.Sample); i++ { sample := &wavInfo.Data.Sample[i] switch bytePerSample { case 1: point = float32(sample.val8s[channel]) / (1 << 8) case 2: point = float32(sample.val16s[channel]) / (1 << 15) case 4: point = sample.val32s[channel] } floatSamples = append(floatSamples, point) } return floatSamples } //Trim 切头切尾 func (wavInfo *WavInfoType) Trim(dbPercent float32) { channel := wavInfo.Fmt.Channels bytePerSample := wavInfo.Fmt.BitsPerSample / 8 //blockSize := channel*bytePerSample const MaxUint8 = math.MaxUint8 const MinUint8 = 0 const MaxInt16 = math.MaxInt16 const MinInt16 = math.MinInt16 //trim head silenceHeadIdx := 0 //头部静音截止位置 done := false for i := 0; i < len(wavInfo.Data.Sample); i++ { switch bytePerSample { case 1: for c := 0; c < int(channel); c++ { val := wavInfo.Data.Sample[i].val8s[c] if float32(val) > MaxUint8*dbPercent { silenceHeadIdx = i done = true break } } case 2: for c := 0; c < int(channel); c++ { val := wavInfo.Data.Sample[i].val16s[c] if float32(val) > MaxInt16*dbPercent { silenceHeadIdx = i done = true break } if float32(val) < MinInt16*dbPercent { silenceHeadIdx = i done = true break } } case 4: for c := 0; c < int(channel); c++ { val := wavInfo.Data.Sample[i].val32s[c] if float32(val) > math.MaxFloat32*dbPercent { silenceHeadIdx = i done = true break } if val < 0 && (-val > math.MaxFloat32*dbPercent) { silenceHeadIdx = i done = true break } } } if done { break } } //trim tail,截断尾部 silenceTailIdx := len(wavInfo.Data.Sample) - 1 //尾部静音截止位置 done = false for i := len(wavInfo.Data.Sample) - 1; i >= 0; i-- { switch bytePerSample { case 1: for c := 0; c < int(channel); c++ { val := wavInfo.Data.Sample[i].val8s[c] if float32(val) > MaxUint8*dbPercent { silenceTailIdx = i done = true break } } case 2: for c := 0; c < int(channel); c++ { val := wavInfo.Data.Sample[i].val16s[c] if val >= 0 && float32(val) > float32(MaxInt16*dbPercent) { silenceTailIdx = i done = true break } if val < 0 && float32(val) < float32(MinInt16*dbPercent) { silenceTailIdx = i done = true break } } case 4: for c := 0; c < int(channel); c++ { val := wavInfo.Data.Sample[i].val32s[c] if float32(val) > math.MaxFloat32*dbPercent { silenceTailIdx = i done = true break } if val < 0 && (-val > math.MaxFloat32*dbPercent) { silenceTailIdx = i done = true break } } } if done { break } } wavInfo.Data.Sample = wavInfo.Data.Sample[:silenceTailIdx] wavInfo.Data.Sample = wavInfo.Data.Sample[silenceHeadIdx:] } func (wavInfo *WavInfoType) TrimFirstWithTime(milliseconds int64) error{ sampleRate := wavInfo.Fmt.SampleRate sizeToTrim := int64(sampleRate) * milliseconds / 1000 if int(sizeToTrim) >= len(wavInfo.Data.Sample) { return errors.New("check time err") } wavInfo.Data.Sample = wavInfo.Data.Sample[sizeToTrim:] return nil } func (wavInfo *WavInfoType) GetWavTime() int { return int(math.Ceil(float64(len(wavInfo.Data.Sample)) / float64(wavInfo.Fmt.SampleRate))) } //NewWavInfo 新建一个wav操作实例 func NewWavInfo() *WavInfoType { w := &WavInfoType{} return w }