saheedniyi
/

YarnGPT-local

@@ -118,6 +118,125 @@ IPython.display.Audio(audio,rate=24000)
 torchaudio.save(f"audio.wav", audio, sample_rate=24000)
 ```
 ## Model Description
 - **Developed by:** [Saheedniyi](https://linkedin.com/in/azeez-saheed)

 torchaudio.save(f"audio.wav", audio, sample_rate=24000)
 ```
+### Simple News-Reader for Local languages
+```python
+# clone the YarnGPT repo to get access to the `audiotokenizer`
+!git clone https://github.com/saheedniyi02/yarngpt.git
+# install some necessary libraries
+!pip install outetts uroman trafilatura pydub
+#import important packages
+import os
+import re
+import json
+import torch
+import inflect
+import random
+import requests
+import trafilatura
+import inflect
+import uroman as ur
+import numpy as np
+import torchaudio
+import IPython
+from pydub import AudioSegment
+from pydub.effects import normalize
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from outetts.wav_tokenizer.decoder import WavTokenizer
+from yarngpt.audiotokenizer import AudioTokenizer,AudioTokenizerForLocal
+# download the `WavTokenizer` files
+!wget https://huggingface.co/novateur/WavTokenizer-medium-speech-75token/resolve/main/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml
+!wget https://huggingface.co/novateur/WavTokenizer-large-speech-75token/resolve/main/wavtokenizer_large_speech_320_24k.ckpt
+tokenizer_path="saheedniyi/YarnGPT-local"
+wav_tokenizer_config_path="/content/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml"
+wav_tokenizer_model_path = "/content/wavtokenizer_large_speech_320_24k.ckpt"
+audio_tokenizer=AudioTokenizerForLocal(
+    tokenizer_path,wav_tokenizer_model_path,wav_tokenizer_config_path
+       )
+model = AutoModelForCausalLM.from_pretrained(tokenizer_path,torch_dtype="auto").to(audio_tokenizer.device)
+# Split text into chunks
+def split_text_into_chunks(text, word_limit=25):
+  sentences=[sentence.strip() for sentence in text.split('.') if sentence.strip()]
+  chunks=[]
+  for sentence in sentences:
+    chunks.append(".")
+    sentence_splitted=sentence.split(" ")
+    num_words=len(sentence_splitted)
+    start_index=0
+    if num_words>word_limit:
+      while start_index<num_words:
+        end_index=min(num_words,start_index+word_limit)
+        chunks.append(" ".join(sentence_splitted[start_index:start_index+word_limit]))
+        start_index=end_index
+    else:
+      chunks.append(sentence)
+  return chunks
+# reduce the speed of the audio, results from the local languages are always fast
+def speed_change(sound, speed=0.9):
+    # Manually override the frame_rate. This tells the computer how many
+    # samples to play per second
+    sound_with_altered_frame_rate = sound._spawn(sound.raw_data, overrides={
+         "frame_rate": int(sound.frame_rate * speed)
+      })
+     # convert the sound with altered frame rate to a standard frame rate
+     # so that regular playback programs will work right. They often only
+     # know how to play audio at standard frame rate (like 44.1k)
+    return sound_with_altered_frame_rate.set_frame_rate(sound.frame_rate)
+page=requests.get("https://alaroye.org/a-maa-too-fo-ipinle-ogun-mo-omo-egbe-okunkun-meje-lowo-ti-te-bayii-omolola/")
+content=trafilatura.extract(page.text)
+chunks=split_text_into_chunks(content)
+all_codes=[]
+for i,chunk in enumerate(chunks):
+  print(i)
+  print("\n")
+  print(chunk)
+  if chunk==".":
+    #add silence for 0.5 seconds if we encounter a full stop
+    all_codes.extend([453]*38)
+  else:
+    prompt=audio_tokenizer.create_prompt(chunk,lang="yoruba",speaker_name="igbo_female1")
+    input_ids=audio_tokenizer.tokenize_prompt(prompt)
+    output  = model.generate(
+            input_ids=input_ids,
+            temperature=0.1,
+            repetition_penalty=1.1,
+            max_length=4000,
+            num_beams=5,
+        )
+    codes=audio_tokenizer.get_codes(output)
+    all_codes.extend(codes)
+audio=audio_tokenizer.get_audio(all_codes)
+#display the output
+IPython.display.Audio(audio,rate=24000)
+#save audio
+torchaudio.save(f"news1.wav", audio, sample_rate=24000)
+#convert file to an `AudioSegment` object for furher processing
+audio_dub=AudioSegment.from_file("news1.wav")
+# reduce audio speed
+speed_change(audio_dub,0.9)
+```
 ## Model Description
 - **Developed by:** [Saheedniyi](https://linkedin.com/in/azeez-saheed)