Spaces:
Sleeping
Sleeping
File size: 8,455 Bytes
a9209e8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 |
import os
from typing import Dict, Optional
from whisper import load_model # Import directly from whisper package
import librosa
import soundfile as sf
from datetime import datetime
from rich.console import Console
from rich.progress import Progress
from content_generator import ContentGenerator
import config
class AudioProcessor:
def __init__(self):
self.console = Console()
try:
# Use tiny model instead of base for faster processing
self.model = load_model("tiny")
self.console.print("[green]Successfully loaded Whisper model (tiny)[/green]")
except Exception as e:
self.console.print(f"[red]Error loading Whisper model:[/red] {str(e)}")
raise
self.content_generator = ContentGenerator()
def process_audio(
self,
audio_path: str,
language: str = config.DEFAULT_LANGUAGE,
content_type: str = "news", # "news" or "blog"
generate_content: bool = True
) -> Dict:
"""
Process audio file: transcribe and optionally generate content.
Args:
audio_path (str): Path to the audio file
language (str): Language code for transcription and content generation
content_type (str): Type of content to generate ("news" or "blog")
generate_content (bool): Whether to generate content from transcript
Returns:
Dict: Contains transcript and optionally generated content
"""
self.console.print(f"[yellow]Processing audio file:[/yellow] {audio_path}")
try:
# Transcribe audio with highly optimized settings
self.console.print("[yellow]Transcribing audio...[/yellow]")
result = self.model.transcribe(
audio_path,
language=language if language != "tr" else "turkish",
fp16=False,
beam_size=1, # Minimum beam size for fastest processing
best_of=1, # Single candidate for fastest processing
condition_on_previous_text=False,
compression_ratio_threshold=2.4,
logprob_threshold=-1.0,
no_speech_threshold=0.6,
initial_prompt="Bu bir haber metnidir." # Add context for better transcription
)
transcript = result["text"]
# Generate content if requested
generated_content = None
if generate_content:
self.console.print(f"[yellow]Generating {content_type} content from transcript...[/yellow]")
if content_type == "news":
generated_content = self._generate_news_from_transcript(transcript, language)
else:
generated_content = self._generate_blog_from_transcript(transcript, language)
output = {
"transcript": transcript,
"language": language,
"date": datetime.now().strftime("%Y-%m-%d"),
"audio_file": os.path.basename(audio_path),
"content_type": content_type
}
if generated_content:
output["generated_content"] = generated_content
return output
except Exception as e:
self.console.print(f"[red]Error processing audio:[/red] {str(e)}")
raise
def _generate_news_from_transcript(
self,
transcript: str,
language: str
) -> Optional[Dict]:
"""Generate a news article from the transcript."""
try:
news_content = self.content_generator.generate_content(
topic=transcript,
keywords=["news", "professional", "factual"],
language=language
)
# Validate the generated content
if news_content and "title" in news_content:
if len(news_content["content"].split('\n')) < 3: # Minimum 3 paragraphs
return None
return news_content
except Exception as e:
self.console.print(f"[red]Error generating news article:[/red] {str(e)}")
return None
def _generate_blog_from_transcript(
self,
transcript: str,
language: str
) -> Optional[Dict]:
"""Generate a blog post from the transcript."""
try:
blog_content = self.content_generator.generate_content(
topic=transcript,
keywords=["blog", "engaging", "informative"],
language=language
)
return blog_content
except Exception as e:
self.console.print(f"[red]Error generating blog post:[/red] {str(e)}")
return None
def save_results(
self,
results: Dict,
output_dir: str = "data/transcripts"
) -> None:
"""
Save transcription and generated content results.
Args:
results (Dict): Processing results including transcript and content
output_dir (str): Directory to save the output files
"""
os.makedirs(output_dir, exist_ok=True)
# Create base filename from audio file
base_name = os.path.splitext(results["audio_file"])[0]
date_prefix = results["date"]
# Save transcript
transcript_file = os.path.join(
output_dir,
f"{date_prefix}-{base_name}-transcript.txt"
)
with open(transcript_file, "w", encoding="utf-8") as f:
f.write(results["transcript"])
# Save generated content if available and valid
if "generated_content" in results and results["generated_content"]:
content_type = results["content_type"]
content_file = os.path.join(
output_dir,
f"{date_prefix}-{base_name}-{content_type}.md"
)
try:
with open(content_file, "w", encoding="utf-8") as f:
if content_type == "news":
# Add metadata and format for news articles
f.write(f"# {results['generated_content']['title']}\n\n")
# Extract subtitle if it exists (first non-empty line after title)
content_lines = results['generated_content']['content'].split('\n')
first_line = next((line for line in content_lines if line.strip()), '')
if first_line and not first_line.startswith('*') and not first_line.startswith('#'):
f.write(f"*{first_line}*\n\n")
content = '\n'.join(content_lines[content_lines.index(first_line) + 1:])
else:
content = results['generated_content']['content']
# Add metadata
f.write(f"**Tarih:** {date_prefix}\n\n")
f.write("---\n\n") # Separator line
# Write main content with proper formatting
f.write(content)
else:
# Blog format
f.write(f"# {results['generated_content']['title']}\n\n")
f.write(f"*Yazar: Mete*\n")
f.write(f"*Tarih: {date_prefix}*\n\n")
f.write(results['generated_content']['content'])
self.console.print(f"[green]{results['content_type'].title()} content saved to:[/green] {content_file}")
except Exception as e:
self.console.print(f"[red]Error saving content:[/red] {str(e)}")
else:
if results.get("content_type") == "news":
self.console.print("[yellow]Warning:[/yellow] Could not generate news article from this audio content.")
else:
self.console.print("[yellow]Warning:[/yellow] Could not generate blog post from this audio content.")
self.console.print(f"[green]Transcript saved to:[/green] {transcript_file}") |