UDface11jkj commited on
Commit
8139351
Β·
verified Β·
1 Parent(s): 37c261a

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +10 -13
  2. data.py +99 -0
  3. requirements.txt +10 -0
README.md CHANGED
@@ -1,13 +1,10 @@
1
- ---
2
- title: Testing
3
- emoji: 🐒
4
- colorFrom: blue
5
- colorTo: blue
6
- sdk: streamlit
7
- sdk_version: 1.44.1
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ ---
2
+ title: 'Text '
3
+ emoji: πŸ†
4
+ colorFrom: yellow
5
+ colorTo: indigo
6
+ sdk: docker
7
+ pinned: false
8
+ ---
9
+
10
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
data.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import soundfile as sf
3
+ from dia.model import Dia
4
+ import os
5
+ import uuid
6
+ import torch
7
+
8
+ # Set page config
9
+ st.set_page_config(
10
+ page_title="Dia Text-to-Speech Converter",
11
+ page_icon="πŸŽ™οΈ",
12
+ layout="centered"
13
+ )
14
+
15
+ # Create directory for audio files
16
+ os.makedirs("static/audio", exist_ok=True)
17
+
18
+ # Initialize session state for model
19
+ if 'model' not in st.session_state:
20
+ try:
21
+ device = "cuda" if torch.cuda.is_available() else "cpu"
22
+ st.sidebar.info(f"Using device: {device}")
23
+
24
+ with st.spinner("Loading Dia model..."):
25
+ st.session_state.model = Dia.from_pretrained(
26
+ "nari-labs/Dia-1.6B",
27
+ compute_dtype="float16",
28
+ device=device,
29
+ )
30
+
31
+ if device == "cpu":
32
+ st.session_state.model = st.session_state.model.eval()
33
+ torch.set_num_threads(4)
34
+
35
+ st.sidebar.success("Model loaded successfully with optimizations")
36
+ except Exception as e:
37
+ st.error(f"Error loading Dia model: {str(e)}")
38
+ st.stop()
39
+
40
+ # Function to generate audio
41
+ def generate_audio(text):
42
+ try:
43
+ if not text:
44
+ st.error("Text is required")
45
+ return None
46
+
47
+ output = st.session_state.model.generate(text)
48
+ filename = f"audio_{uuid.uuid4()}.wav"
49
+ filepath = f"static/audio/{filename}"
50
+ os.makedirs(os.path.dirname(filepath), exist_ok=True)
51
+
52
+ sf.write(filepath, output, 44100)
53
+ return filepath
54
+ except Exception as e:
55
+ st.error(f"Error generating audio: {str(e)}")
56
+ return None
57
+
58
+ # UI
59
+ st.title("πŸŽ™οΈ Dia - Text to Dialogue Demo")
60
+ st.markdown("Enter a multi-speaker script below using `[S1]`, `[S2]`, etc.")
61
+
62
+ # Text input
63
+ text_input = st.text_area(
64
+ "Script",
65
+ value="[S1] Dia is an open weights text to dialogue model. [S2] You get full control over scripts and voices. [S1] Wow. Amazing. (laughs) [S2] Try it now on Git hub or Hugging Face.",
66
+ height=150
67
+ )
68
+
69
+ # Action selection
70
+ action = st.selectbox(
71
+ "Choose Action",
72
+ ["Convert to Audio", "Summarize (Coming Soon)"],
73
+ index=0
74
+ )
75
+
76
+ # Generate button
77
+ if st.button("Generate Audio", type="primary"):
78
+ if action == "Convert to Audio":
79
+ with st.spinner("Generating audio..."):
80
+ audio_file = generate_audio(text_input)
81
+
82
+ if audio_file:
83
+ st.success("Audio generated successfully!")
84
+ st.audio(audio_file)
85
+
86
+ # Download button
87
+ with open(audio_file, "rb") as file:
88
+ btn = st.download_button(
89
+ label="Download Audio",
90
+ data=file,
91
+ file_name="generated_dialogue.wav",
92
+ mime="audio/wav"
93
+ )
94
+ else:
95
+ st.error("Summarization not implemented yet")
96
+
97
+ # Display info in sidebar
98
+ st.sidebar.markdown("---")
99
+ st.sidebar.markdown("Powered by Dia-1.6B AI Text-to-Dialogue Model")
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ dac==0.4.3
2
+ fastapi==0.115.12
3
+ huggingface_hub==0.30.2
4
+ numpy==1.21.5
5
+ pydantic==2.11.3
6
+ soundfile==0.13.1
7
+ torch==2.6.0
8
+ torchaudio==2.6.0
9
+ uvicorn
10
+