SpireLab commited on
Commit
b28205b
·
verified ·
1 Parent(s): 37dc6fc

initial commit

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +23 -0
  2. Dockerfile +42 -0
  3. build_and_run.sh +51 -0
  4. data/input/reference_audio/phase3_assamese_female_48k.wav +3 -0
  5. data/input/reference_audio/phase3_assamese_male_48k.wav +3 -0
  6. data/input/reference_audio/phase3_bengali_male_48k.wav +3 -0
  7. data/input/reference_audio/phase3_bodo_female_48k.wav +3 -0
  8. data/input/reference_audio/phase3_bodo_male_48k.wav +3 -0
  9. data/input/reference_audio/phase3_gujarati_female_48k.wav +3 -0
  10. data/input/reference_audio/phase3_gujarati_male_48k.wav +3 -0
  11. data/input/reference_audio/phase3_hindi_female_48k.wav +3 -0
  12. data/input/reference_audio/phase3_hindi_male_48k.wav +3 -0
  13. data/input/reference_audio/phase3_manipuri_female_48k.wav +3 -0
  14. data/input/reference_audio/phase3_manipuri_male_48k.wav +3 -0
  15. data/input/reference_audio/phase3_nepali_female_48k.wav +3 -0
  16. data/input/reference_audio/phase3_nepali_male_48k.wav +3 -0
  17. data/input/reference_audio/phase3_punjabi_female_48k.wav +3 -0
  18. data/input/reference_audio/phase3_punjabi_male_48k.wav +3 -0
  19. data/input/reference_audio/phase3_sanskrit_female_48k.wav +3 -0
  20. data/input/reference_audio/phase3_sanskrit_male_48k.wav +3 -0
  21. data/input/reference_audio/phase3_tamil_female_48k.wav +3 -0
  22. data/input/reference_audio/phase3_tamil_male_48k.wav +3 -0
  23. data/input/reference_audio/phase3_telugu_female_48k.wav +3 -0
  24. data/input/reference_audio/phase3_telugu_male_48k.wav +3 -0
  25. data/input/texts.txt +2 -0
  26. data/output/1_te_phase3_telugu_male_48k.wav +3 -0
  27. data/output/2_mr_phase3_manipuri_male_48k.wav +3 -0
  28. infer.py +132 -0
  29. model_related/Bengali_Female/speakers.pth +3 -0
  30. model_related/Bengali_Male/speakers.pth +3 -0
  31. model_related/Bhojpuri_Female/speakers.pth +3 -0
  32. model_related/Bhojpuri_Male/speakers.pth +3 -0
  33. model_related/Chhattisgarhi_Female/speakers.pth +3 -0
  34. model_related/Chhattisgarhi_Male/speakers.pth +3 -0
  35. model_related/English_Female/speakers.pth +3 -0
  36. model_related/English_Male/speakers.pth +3 -0
  37. model_related/Gujarati_Female/speakers.pth +3 -0
  38. model_related/Gujarati_Male/speakers.pth +3 -0
  39. model_related/Hindi_Female/speakers.pth +3 -0
  40. model_related/Hindi_Male/speakers.pth +3 -0
  41. model_related/Kannada_Female/speakers.pth +3 -0
  42. model_related/Kannada_Male/speakers.pth +3 -0
  43. model_related/Magahi_Female/speakers.pth +3 -0
  44. model_related/Magahi_Male/speakers.pth +3 -0
  45. model_related/Maithili_Female/speakers.pth +3 -0
  46. model_related/Maithili_Male/speakers.pth +3 -0
  47. model_related/Marathi_Female/speakers.pth +3 -0
  48. model_related/Marathi_Male/speakers.pth +3 -0
  49. model_related/Telugu_Female/speakers.pth +3 -0
  50. model_related/Telugu_Male/speakers.pth +3 -0
.gitattributes CHANGED
@@ -33,3 +33,26 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ data/input/reference_audio/phase3_assamese_female_48k.wav filter=lfs diff=lfs merge=lfs -text
37
+ data/input/reference_audio/phase3_assamese_male_48k.wav filter=lfs diff=lfs merge=lfs -text
38
+ data/input/reference_audio/phase3_bengali_male_48k.wav filter=lfs diff=lfs merge=lfs -text
39
+ data/input/reference_audio/phase3_bodo_female_48k.wav filter=lfs diff=lfs merge=lfs -text
40
+ data/input/reference_audio/phase3_bodo_male_48k.wav filter=lfs diff=lfs merge=lfs -text
41
+ data/input/reference_audio/phase3_gujarati_female_48k.wav filter=lfs diff=lfs merge=lfs -text
42
+ data/input/reference_audio/phase3_gujarati_male_48k.wav filter=lfs diff=lfs merge=lfs -text
43
+ data/input/reference_audio/phase3_hindi_female_48k.wav filter=lfs diff=lfs merge=lfs -text
44
+ data/input/reference_audio/phase3_hindi_male_48k.wav filter=lfs diff=lfs merge=lfs -text
45
+ data/input/reference_audio/phase3_manipuri_female_48k.wav filter=lfs diff=lfs merge=lfs -text
46
+ data/input/reference_audio/phase3_manipuri_male_48k.wav filter=lfs diff=lfs merge=lfs -text
47
+ data/input/reference_audio/phase3_nepali_female_48k.wav filter=lfs diff=lfs merge=lfs -text
48
+ data/input/reference_audio/phase3_nepali_male_48k.wav filter=lfs diff=lfs merge=lfs -text
49
+ data/input/reference_audio/phase3_punjabi_female_48k.wav filter=lfs diff=lfs merge=lfs -text
50
+ data/input/reference_audio/phase3_punjabi_male_48k.wav filter=lfs diff=lfs merge=lfs -text
51
+ data/input/reference_audio/phase3_sanskrit_female_48k.wav filter=lfs diff=lfs merge=lfs -text
52
+ data/input/reference_audio/phase3_sanskrit_male_48k.wav filter=lfs diff=lfs merge=lfs -text
53
+ data/input/reference_audio/phase3_tamil_female_48k.wav filter=lfs diff=lfs merge=lfs -text
54
+ data/input/reference_audio/phase3_tamil_male_48k.wav filter=lfs diff=lfs merge=lfs -text
55
+ data/input/reference_audio/phase3_telugu_female_48k.wav filter=lfs diff=lfs merge=lfs -text
56
+ data/input/reference_audio/phase3_telugu_male_48k.wav filter=lfs diff=lfs merge=lfs -text
57
+ data/output/1_te_phase3_telugu_male_48k.wav filter=lfs diff=lfs merge=lfs -text
58
+ data/output/2_mr_phase3_manipuri_male_48k.wav filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime
2
+
3
+ # Set working directory
4
+ WORKDIR /app
5
+
6
+ # Verify Python version meets requirements (>=3.9.0, <3.12)
7
+ RUN python --version
8
+
9
+ # Install system dependencies
10
+ RUN apt-get update && apt-get install -y \
11
+ git \
12
+ libsndfile1 \
13
+ build-essential \
14
+ && rm -rf /var/lib/apt/lists/*
15
+
16
+ # Install Cython first (crucial for building extensions)
17
+ RUN pip install --no-cache-dir Cython packaging
18
+
19
+ # Clone the repository
20
+ RUN git clone https://github.com/PranavDBhat/LIMMITS-24-Coquiai.git /app/LIMMITS-24-Coquiai
21
+
22
+ # Install only the main requirements (not dev/notebooks requirements)
23
+ # This directly uses the requirements.txt file from the repository
24
+ RUN cd /app/LIMMITS-24-Coquiai && \
25
+ pip install --no-cache-dir -r requirements.txt
26
+
27
+ # Install the package in development mode
28
+ # This will build the Cython extensions
29
+ RUN cd /app/LIMMITS-24-Coquiai && \
30
+ pip install -e .
31
+
32
+ # Create directories for models, input, and output
33
+ RUN mkdir -p /app/models /app/data/input /app/data/output
34
+
35
+ # Copy the inference script
36
+ COPY infer.py /app/
37
+
38
+ # Set the entrypoint to run the inference script
39
+ ENTRYPOINT ["python", "infer.py"]
40
+
41
+ # Default command (can be overridden)
42
+ CMD ["--help"]
build_and_run.sh ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Ensure directories exist
4
+ mkdir -p models data/input/reference_audio data/output
5
+
6
+ # Check if model files exist
7
+ if [ ! -f "models/best_model_479919.pth" ] || [ ! -f "models/config.json" ]; then
8
+ echo "ERROR: Model files not found in models/ directory!"
9
+ echo "Please place model files in models/ directory:"
10
+ echo "- models/best_model_479919.pth"
11
+ echo "- models/config.json"
12
+ exit 1
13
+ fi
14
+
15
+ # Check if sample inputs exist
16
+ if [ ! -f "data/input/texts.txt" ]; then
17
+ echo "WARNING: No texts.txt found. Creating sample file..."
18
+ echo -e "1\tte\tవడ్రంగి, క్షురక వృత్తులలో పెట్టుబడి ప్రధానమై ఇతరులు కూడా ఈ వృత్తిలో ప్రవేశించి వ్యాపారంగా మార్చేసార\tspeaker1.wav" > data/input/texts.txt
19
+ echo "Please add reference audio files to data/input/reference_audio/"
20
+ fi
21
+
22
+ # Build the Docker image
23
+ echo "Building Docker image (this may take some time)..."
24
+ docker build -t tts-baseline .
25
+
26
+ echo ""
27
+ echo "Build complete!"
28
+ echo ""
29
+
30
+ docker run --gpus all \
31
+ -v "$(pwd)/models:/app/models" \
32
+ -v "$(pwd)/data/input:/app/data/input" \
33
+ -v "$(pwd)/data/output:/app/data/output" \
34
+ -v "$(pwd)/model_related:/app/model_related" \
35
+ tts-baseline \
36
+ --text_file /app/data/input/texts.txt \
37
+ --ref_dir /app/data/input/reference_audio \
38
+ --savedir /app/data/output \
39
+ --device cuda
40
+
41
+ # For CPU inference
42
+ # docker run \
43
+ # -v "$(pwd)/models:/app/models" \
44
+ # -v "$(pwd)/data/input:/app/data/input" \
45
+ # -v "$(pwd)/data/output:/app/data/output" \
46
+ # -v "$(pwd)/model_related:/app/model_related" \
47
+ # tts-baseline \
48
+ # --text_file /app/data/input/texts.txt \
49
+ # --ref_dir /app/data/input/reference_audio \
50
+ # --savedir /app/data/output \
51
+ # --device cpu
data/input/reference_audio/phase3_assamese_female_48k.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cac46ed6d99adee425d8e56f26b94672632f77b8bb4e9149538327625bf8590f
3
+ size 878636
data/input/reference_audio/phase3_assamese_male_48k.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ae4ba4cd789dc4a260d2ee96ac3b56250545d148bc1b8a5ad306c32fae87b64
3
+ size 622636
data/input/reference_audio/phase3_bengali_male_48k.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5774878765083e411d9d45eb1e593b2b593114d469411082b40172c36990589b
3
+ size 645164
data/input/reference_audio/phase3_bodo_female_48k.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5aa6b50a42e585f8a5165c0d1364f7867b178eb3c77f28cb77cc5649ff1246b7
3
+ size 585772
data/input/reference_audio/phase3_bodo_male_48k.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7cbbab9e5fdd8f0d213278a9055ee248ef25e23489d497036c049bba33d5462
3
+ size 528428
data/input/reference_audio/phase3_gujarati_female_48k.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92bbae9ecf28b340056a68a0c41f1827a16b6ad90d04ef5fb1466044ce8342d3
3
+ size 438316
data/input/reference_audio/phase3_gujarati_male_48k.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca7b082fc6ffa75a001b462b144881c67b3a574b6fc23ffb89b2c4e0d7a9a6db
3
+ size 452652
data/input/reference_audio/phase3_hindi_female_48k.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84b8ca1f8cf8d14eab7f10887b5ea02d5fe91a135aef9b11b572d9fa2af1a289
3
+ size 520236
data/input/reference_audio/phase3_hindi_male_48k.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdbda05aa4e2cbfa1947a16416cc9cca2af29a6a02a947204d41ebaf31470fbc
3
+ size 428076
data/input/reference_audio/phase3_manipuri_female_48k.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebbddaa5903c08c85a190ba7dbbacfa2ed0f3e81f960de2bf35053eb5efad732
3
+ size 696364
data/input/reference_audio/phase3_manipuri_male_48k.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3d91517ef59af3ad21592f7046c6c3f9ca20844804473caa578dd89c69f5fcd
3
+ size 688172
data/input/reference_audio/phase3_nepali_female_48k.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:484f636799da0a1c7c5b24062aa53c55de5bed3ad0b7fe769b8b4dacc61e32ea
3
+ size 628780
data/input/reference_audio/phase3_nepali_male_48k.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a21ef0dcf83df7a98c9522181bb0e6af121c58be58dedf989ad62b305c8fcc76
3
+ size 518188
data/input/reference_audio/phase3_punjabi_female_48k.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9382ebca3054cec66e70d3264622d8896d17bcd46ec19ab17c8334fc3d0097ef
3
+ size 413740
data/input/reference_audio/phase3_punjabi_male_48k.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb6c66455e0669f22a24f0b06ea4457337dd0a9f4bfc3e253d58a2f9a0953da8
3
+ size 378924
data/input/reference_audio/phase3_sanskrit_female_48k.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:382581b53343e6bcad461cec7f24a687d9439b4388791396021b34473bd396dc
3
+ size 1557548
data/input/reference_audio/phase3_sanskrit_male_48k.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8dda08b50cd05d50b20558ada30516c483464b8241fe81a33471411b841737f
3
+ size 1234988
data/input/reference_audio/phase3_tamil_female_48k.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f9a67918b9466c725329998bdaed0b2279b12b9dfe43f88fd1613c8fe6411eb
3
+ size 2347052
data/input/reference_audio/phase3_tamil_male_48k.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36990873db9fedce95bd28d78ae5689b02459ebdee9cbbd81e5f947da0be08af
3
+ size 2007084
data/input/reference_audio/phase3_telugu_female_48k.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:baa4775b307b9a5c80b7c6b61e302adcbef6b288890c4d54efa903f0a2f89a19
3
+ size 571436
data/input/reference_audio/phase3_telugu_male_48k.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b72435776f2ee7ea0ac0b6fa14419436846cf303a5ad1343e23f071c6dc34f9
3
+ size 499756
data/input/texts.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ 1 te వడ్రంగి, క్షురక వృత్తులలో పెట్టుబడి ప్రధానమై ఇతరులు కూడా ఈ వృత్తిలో ప్రవేశించి వ్యాపారంగా మార్చేసార phase3_telugu_male_48k.wav
2
+ 2 mr जायकवाडी धरणातून तब्बल अडीच ते तीन लाख हेक्टर शेतीच्या सिंचनासाठी पाणी सोडलं जातं phase3_manipuri_male_48k.wav
data/output/1_te_phase3_telugu_male_48k.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d92f6ba7245ff2a222132eb4bfb6cf23239ff15f3f71b5dcfad9e3addc5f1454
3
+ size 425036
data/output/2_mr_phase3_manipuri_male_48k.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73dd8edae221d33baaa2276bb364e9e836d1ee39dffc8a379668bc2256796c99
3
+ size 499276
infer.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from TTS.api import TTS
3
+ import os
4
+ from tqdm import tqdm
5
+ import argparse
6
+
7
+ # Parse arguments
8
+ parser = argparse.ArgumentParser(description="Text-to-Speech Synthesis")
9
+ parser.add_argument('-t', '--text_file', type=str, required=True,
10
+ help='Path to text file containing text and audio reference files')
11
+ parser.add_argument('-r', '--ref_dir', type=str, required=True,
12
+ help='Root directory containing reference audio files')
13
+ parser.add_argument('-s', '--savedir', type=str, required=True,
14
+ help='Directory to store synthesized audio files')
15
+ parser.add_argument('-d', '--device', type=str, required=True,
16
+ help='Device to use for synthesis (cpu or cuda)')
17
+ parser.add_argument('-m', '--model_path', type=str,
18
+ default="/app/models/best_model_479919.pth",
19
+ help='Path to the model file')
20
+ parser.add_argument('-c', '--config_path', type=str,
21
+ default="/app/models/config.json",
22
+ help='Path to the config file')
23
+
24
+ args = parser.parse_args()
25
+
26
+ # Get device
27
+ device = args.device if args.device in ["cpu", "cuda"] else "cuda" if torch.cuda.is_available() else "cpu"
28
+ print(f"Using device: {device}")
29
+
30
+ # Initialize TTS model
31
+ print(f"Loading model from {args.model_path} with config {args.config_path}")
32
+ tts = TTS(
33
+ model_path=args.model_path,
34
+ config_path=args.config_path,
35
+ progress_bar=False,
36
+ ).to(device)
37
+
38
+ # Create output directory
39
+ os.makedirs(args.savedir, exist_ok=True)
40
+ print(f"Output directory: {args.savedir}")
41
+
42
+ # Read the text file
43
+ print(f"Reading text file: {args.text_file}")
44
+ with open(args.text_file, 'r') as f:
45
+ lines = f.readlines()
46
+
47
+ # Process each line
48
+ print(f"Processing {len(lines)} entries...")
49
+ for i, line in enumerate(tqdm(lines)):
50
+ parts = line.strip().split('\t')
51
+ if len(parts) != 4:
52
+ print(f"Warning: Line {i+1} does not have 4 tab-separated parts. Skipping.")
53
+ continue
54
+
55
+ idx, lang, text, ref_file = parts
56
+ ref_path = os.path.join(args.ref_dir, ref_file)
57
+ save_path = os.path.join(args.savedir, f"{idx}_{lang}_{os.path.basename(ref_file)}")
58
+
59
+ print(f"Synthesizing: {text[:30]}... using reference {ref_path}")
60
+ tts.tts_to_file(text=text, speaker_wav=ref_path, language=lang, file_path=save_path)
61
+ print(f"Saved to: {save_path}")
62
+
63
+ print("Synthesis complete!")
64
+
65
+ # import torch
66
+ # from TTS.api import TTS
67
+ # import os
68
+ # from tqdm import tqdm
69
+ # import argparse
70
+
71
+ # # Get device
72
+ # device = "cuda:3" if torch.cuda.is_available() else "cpu"
73
+
74
+
75
+ # sentences_dict = {
76
+ # "te": ["వడ్రంగి, క్షురక వృత్తులలో పెట్టుబడి ప్రధానమై ఇతరులు కూడా ఈ వృత్తిలో ప్రవేశించి వ్యాపారంగా మార్చేసార",
77
+ # "నేను ఈ రోజు నాకు ఇష్టమైన పుస్తకాన్ని చదివాను మరియు తరువాత నా స్నేహితుడితో సినిమాకు వెళ్ళాను",
78
+ # "ఈ వేసవి సెలవులలో నేను నా కుటుంబంతో కలిసి ఒక అందమైన బీచ్‌కి వెళ్ళాలని అనుకుంటున్నాను"],
79
+ # "mr": ["जायकवाडी धरणातून तब्बल अडीच ते तीन लाख हेक्टर शेतीच्या सिंचनासाठी पाणी सोडलं जातं",
80
+ # "मी आज माझ्या आवडत्या पुस्तकाचे वाचन केले आणि नंतर माझ्या मित्रासोबत चित्रपटाला गेलो",
81
+ # "या उन्हाळी सुट्टीत मी माझ्या कुटुंबासोबत एक सुंदर समुद्रकिनाऱ्यावर जाण्याचा विचार करतो"],
82
+ # "bho": ["बिहार के बक्सर जिला के बक्सर नगर निगम क्षेत्र में गंगा नदी पर बने बक्सर पुल का उद्घाटन आज प्रधानमंत्री नरेंद्र मोदी करेंगे",
83
+ # "एन्ट्रापी कंप्यूटिंग में एन्ट्रोपी ऊ ऑपरेटिंग सिस्टम ह जे पे सरा क्रिप्टोग्राफिक फंक्शन सब काम करे लें",
84
+ # "हमार मंडराये वाली जहाज़ सर्पमीनन से भरी है"],
85
+ # }
86
+
87
+ # tts = TTS(
88
+ # model_path="/home1/jesuraj/speechlm/espnet/egs2/LIMMITS_25/speechlm1/downloads/yourtts_syspin_baseline-April-19-2025_10+55AM-0b13ea658/best_model_479919.pth",
89
+ # config_path="/home1/jesuraj/speechlm/espnet/egs2/LIMMITS_25/speechlm1/downloads/yourtts_syspin_baseline-April-19-2025_10+55AM-0b13ea658/config.json",
90
+ # progress_bar=False,
91
+ # ).to(device)
92
+
93
+
94
+ # parser = argparse.ArgumentParser(description="Text-to-Speech Synthesis")
95
+ # parser.add_argument('-t', '--text_file', type=str, required=True,
96
+ # help='Path to text file containing text and audio reference files')
97
+ # parser.add_argument('-r', '--ref_dir', type=str, required=True,
98
+ # help='Root directory containing reference audio files')
99
+ # parser.add_argument('-s', '--savedir', type=str, required=True,
100
+ # help='Directory to store synthesized audio files')
101
+ # parser.add_argument('-d', '--device', type=str, required=True,
102
+ # help='Device to use for synthesis (cpu or cuda)')
103
+
104
+ # args = parser.parse_args()
105
+
106
+
107
+
108
+
109
+ # os.makedirs(args.savedir, exist_ok=True)
110
+
111
+ # # Read the text file
112
+ # with open(args.text_file, 'r') as f:
113
+ # lines = f.readlines()
114
+
115
+ # for line in lines:
116
+ # idx, lang, text, ref_file = line.strip().split('\t')
117
+ # ref_file = os.path.join(args.ref_dir, ref_file)
118
+ # save_path = os.path.join(args.savedir, f"{idx}_{lang}_{os.path.basename(ref_file)}")
119
+ # tts.tts_to_file(text=text, speaker_wav=ref_file, language=lang, file_path=save_path)
120
+
121
+
122
+ # # ref_files = [os.path.join("/home1/jesuraj/speechlm/espnet/egs2/LIMMITS_25/speechlm1/downloads/test_samples/", x) for x in os.listdir("/home1/jesuraj/speechlm/espnet/egs2/LIMMITS_25/speechlm1/downloads/test_samples/")]
123
+
124
+
125
+
126
+ # # for ref_file in ref_files:
127
+ # # for language_key in sentences_dict.keys():
128
+ # # for s_idx, sentence in enumerate(sentences_dict[language_key]):
129
+ # # save_path = os.path.join("/home1/jesuraj/speechlm/espnet/egs2/LIMMITS_25/speechlm1/downloads/test_infers/", f"test_{language_key}_{s_idx}_{os.path.basename(ref_file)}")
130
+ # # tts.tts_to_file(text=sentence, speaker_wav=ref_file, language=language_key, file_path=save_path)
131
+
132
+ # # tts.tts_to_file(text="ಹಸ್ದೇವ್ ನದಿ, ರಿಹಂಡ್ ನದಿ ಮತ್ತು ಕನ್ಹರ್ ನದಿಗಳು ಸುರ್ಗುಜಾದ ಮುಖಜ ಭೂಮಿಯಲ್ಲಿ ಹರಿಯುತ್ತವೆ.", speaker_wav="/home1/jesuraj/speechlm/espnet/egs2/LIMMITS_25/speechlm1/downloads/syspin_data/Chhattisgarhi_Male/wavs/IISc_SYSPINProject_chha_m_AGRI_00001.wav", language="kn", file_path="test_kn.wav")
model_related/Bengali_Female/speakers.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56f54ff1a806ebe607b0095b489f553546cce0b9904d38c531e78e4a86f09e67
3
+ size 100098080
model_related/Bengali_Male/speakers.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a860399950439f46083e8e6e7e5c057e10cd7994c6232a09b33fd85f0dd1c1cb
3
+ size 119368032
model_related/Bhojpuri_Female/speakers.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80b05743e5e5c2d8cc10ac96a7e66006f7ed046fc7c7663f83971607afed31e3
3
+ size 129451808
model_related/Bhojpuri_Male/speakers.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60b03f732526e59e5165fe17c243fdb3338dc69d6b4539cd94009e6152955511
3
+ size 123140960
model_related/Chhattisgarhi_Female/speakers.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ab3dc8e3a8946d1a170030f7da2a829441be4a70d9dd9f9689d03360d0bc876
3
+ size 130934816
model_related/Chhattisgarhi_Male/speakers.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:973c6233369aade6066051802d3d8165c9199be7bc0144ea0a4ffee18cb315cc
3
+ size 108560992
model_related/English_Female/speakers.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef4b8a8c9e23c9ab0f2e0c053ce85e55b0f3ea34e23dccf2b463502218b7ba1f
3
+ size 113899296
model_related/English_Male/speakers.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1a36e7af9cd38a35a70227b25ec4de0ea9cfdf171bf445bec5731abb65eded4
3
+ size 118719648
model_related/Gujarati_Female/speakers.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81e435824571f839e07984422a3528cb6243da654137560df67b03d2858a75b5
3
+ size 35408544
model_related/Gujarati_Male/speakers.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:315f8348df2662c0d04297df825a02190108ede342b3b2926fb9c83b22cbb3cc
3
+ size 35875360
model_related/Hindi_Female/speakers.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d36722d9cfa2714f7682d8cd932182b0ecf71663151131a12db01352c6d32672
3
+ size 103680032
model_related/Hindi_Male/speakers.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea4a7831a9fa5961f56bbab0d97d63df34a8189fad2d5294dee2effc5a4e2f24
3
+ size 116714528
model_related/Kannada_Female/speakers.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12ac883789029ddab4f2f6cf0340a0ad52210035513ba2a6676f21f6438a4ee3
3
+ size 80812384
model_related/Kannada_Male/speakers.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2af52780734b88535adff48246df32ca4e5b2cf20f0c9f7efcb849d8f024eba4
3
+ size 97633824
model_related/Magahi_Female/speakers.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8000b755a4ffd5e6b0a95e2a06dabd02d6f59dbae46776d9a662e214f7f93fb9
3
+ size 145807392
model_related/Magahi_Male/speakers.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7a9007a83759fb8f1cd4bcef79a5b083839d09e1703484caa9d727dd5a1bce8
3
+ size 153767712
model_related/Maithili_Female/speakers.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4f587283f8604bcc95cba0b6e2320041619b76a715b6b0c4253a584acc595a2
3
+ size 163039520
model_related/Maithili_Male/speakers.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7f910296937f346e330e06fc5c7c0afcad3f05f252afd3e941711f2557d197b
3
+ size 152221920
model_related/Marathi_Female/speakers.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e67881879aac91f6653e96f559af5129e023e2879f76fa7bc8e24b77a8236b3
3
+ size 103038496
model_related/Marathi_Male/speakers.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bde8d12edc5745f6c1151f7a0e53bce657a416ad16259a98da14480e330472d
3
+ size 98007520
model_related/Telugu_Female/speakers.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70be39a4e6967100c91ce142d617d1ee23fbba24bd84141bfc67cc71a3cd6ed8
3
+ size 103993632
model_related/Telugu_Male/speakers.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b419a9b129289c861f6d2f6d8e0f176e9a245879b5a0c43e5b75ccf0ad9ed93d
3
+ size 101432480