speech_recognize1

Sleeping

App Files Files Community

mr2along commited on Oct 11, 2024

Commit

d558c26

verified ·

1 Parent(s): 96b96fe

Update app.py

Browse files

Files changed (1) hide show

app.py +126 -142

app.py CHANGED Viewed

@@ -1,142 +1,126 @@
-import requests
-from bs4 import BeautifulSoup
-from concurrent.futures import ThreadPoolExecutor, as_completed
-import re
-import pypub
-import os
-import time  # Thư viện để tính thời gian
-import gradio as gr  # Thêm thư viện Gradio
-# Hàm để phân tích URL và tạo api_url và base_url
-def parse_story_url(story_url):
-    # Cố gắng tìm kiếm tên và ID truyện từ URL
-    match = re.search(r"https://truyenfull\.tv/([^/]+)(?:-f\d+)?\.(\d+)/", story_url)
-    if match:
-        story_name = match.group(1)  # Trích xuất tên truyện
-        story_id = match.group(3)     # Trích xuất ID truyện
-        api_url = f"https://truyenfull.tv/api/chapters/{story_id}/"  # Tạo URL API
-        base_url = f"https://truyenfull.tv/{story_name}/chuong-"  # Tạo URL cơ bản cho các chương
-        return story_name, story_id, api_url, base_url  # Trả về thông tin đã trích xuất và tạo
-    else:
-        raise ValueError("URL không hợp lệ")  # Ném lỗi nếu định dạng URL không hợp lệ
-# Hàm để lấy thông tin các chương từ API
-def get_chapter_info(api_url):
-    response = requests.get(api_url)
-    response.raise_for_status()  # Ném lỗi nếu không thành công
-    data = response.json()
-    return data.get('items', [])
-# Hàm để lấy nội dung của một chương dựa trên thứ tự chương
-def get_chapter_content(chapter_index, base_url):
-    chapter_url = base_url + str(chapter_index) + ".html"
-    try:
-        response = requests.get(chapter_url)
-        response.raise_for_status()
-        soup = BeautifulSoup(response.content, 'html.parser')
-        content_div = soup.find('div', id='chapter-c', class_='chapter-c')
-        return content_div.get_text(separator='\n').strip() if content_div else "Không tìm thấy nội dung chương."
-    except Exception as e:
-        print(f"Lỗi khi lấy nội dung chương {chapter_index}: {e}")
-        return "Không thể lấy nội dung."
-# Hàm để lấy nội dung tất cả các chương và lưu vào file
-def get_all_chapters_content(story_url, start_chapter, max_chapters):
-    story_name, story_id, api_url, base_url = parse_story_url(story_url)
-    chapters = get_chapter_info(api_url)
-    if not chapters:
-        return "Không tìm thấy chương nào."
-    # Giới hạn số chương tải xuống
-    chapters_to_load = chapters[start_chapter - 1:start_chapter - 1 + max_chapters]
-    chapter_contents = []  # Danh sách lưu nội dung các chương theo thứ tự
-    total_time = 0  # Biến để tính tổng thời gian thực hiện từng chương
-    # Sử dụng ThreadPoolExecutor để lấy nội dung các chương song song
-    with ThreadPoolExecutor(max_workers=10) as executor:
-        future_to_chapter = {executor.submit(get_chapter_content, idx + 1, base_url): idx + 1 for idx in range(len(chapters_to_load))}
-        for future in as_completed(future_to_chapter):
-            chapter_index = future_to_chapter[future]
-            start_time = time.time()  # Bắt đầu đo thời gian cho mỗi chương
-            try:
-                content = future.result()
-                # Lưu nội dung chương vào danh sách theo thứ tự
-                chapter_contents.append((chapter_index, content, chapters[chapter_index - 1]['chapter_name']))  # Thêm tiêu đề chương
-                print(f"Đã lưu chương {chapter_index}")
-            except Exception as e:
-                print(f"Lỗi khi lấy nội dung chương {chapter_index}: {e}")
-            end_time = time.time()  # Kết thúc đo thời gian
-            chapter_time = end_time - start_time
-            total_time += chapter_time  # Cộng dồn thời gian cho mỗi chương
-            print(f"Thời gian tải chương {chapter_index}: {chapter_time:.2f} giây")
-    # Tính tổng thời gian và thời gian trung bình cho mỗi chương
-    avg_time_per_chapter = total_time / max_chapters if max_chapters > 0 else 0
-    print(f"Tổng thời gian tải {max_chapters} chương: {total_time:.2f} giây")
-    print(f"Thời gian trung bình cho mỗi chương: {avg_time_per_chapter:.2f} giây")
-    # Ghi nội dung các chương vào file theo thứ tự đã lưu
-    chapter_contents.sort(key=lambda x: x[0])  # Sắp xếp theo chỉ số chương
-    output_file = f"{story_name}.txt"
-    with open(output_file, 'w', encoding='utf-8') as f:
-        for chapter_index, content, chapter_title in chapter_contents:
-            chapter_name = f"{chapter_title}"  # Tạo tên chương với tiêu đề
-            f.write(f"{chapter_name}\n\n")
-            f.write(f"{content}\n")
-            f.write("-" * 50 + "\n")
-    # Tạo file EPUB từ nội dung đã lưu
-    epubfile=create_epub_from_chapters(chapter_contents, story_name)
-    # Trả về kết quả
-    return [f"Đã tải thành công {max_chapters} chương. Tổng thời gian: {total_time:.2f} giây, Thời gian trung bình: {avg_time_per_chapter:.2f} giây. File TXT: {output_file}",epubfile]
-# Hàm để tạo file EPUB từ nội dung các chương
-def create_epub_from_chapters(chapter_contents, story_name):
-    try:
-        # Tạo đối tượng Epub
-        my_epub = pypub.Epub(story_name)
-        # Thêm từng chương vào EPUB
-        for chapter_index, content, chapter_title in chapter_contents:
-            # Tạo chương từ nội dung đã có
-            my_chapter = pypub.create_chapter_from_text(content, chapter_title)
-            my_epub.add_chapter(my_chapter)
-        # Lưu file EPUB
-        output_directory = f"./{story_name}.epub"
-        epubfile=my_epub.create(output_directory)  # Lưu file EPUB
-        print(f"Đã tạo file EPUB: {output_directory}")
-    except Exception as e:
-        print(f"Lỗi khi tạo file EPUB: {e}")
-    return epubfile
-# Giao diện Gradio
-def gradio_interface(story_url, start_chapter, max_chapters):
-    # Bắt đầu đo thời gian cho toàn bộ quá trình
-    start_total_time = time.time()
-    # Gọi hàm tải và xử lý nội dung
-    result = get_all_chapters_content(story_url, int(start_chapter), int(max_chapters))
-    # Kết thúc đo thời gian
-    end_total_time = time.time()
-    # Tính tổng thời gian cho toàn bộ quá trình
-    total_process_time = end_total_time - start_total_time
-    result += f"\nTổng thời gian hoàn thành tất cả các chức năng: {total_process_time:.2f} giây"
-    return result
-# Tạo giao diện với Gradio
-gr.Interface(
-    fn=gradio_interface,
-    inputs=[
-        gr.Textbox(label="URL Truyện", placeholder="Nhập URL của truyện từ truyenfull.tv"),
-        gr.Textbox(label="Số chương bắt đầu", placeholder="Nhập số chương bắt đầu"),
-        gr.Textbox(label="Số chương muốn tải", placeholder="Nhập số chương muốn tải")
-    ],
-    outputs=["text","file"],
-    title="Truyện Full Downloader",
-    description="Công cụ tải truyện từ truyenfull.tv và tạo file EPUB."
-).launch()

+import speech_recognition as sr
+import difflib
+import wave
+import pyaudio
+import gradio as gr
+# Step 1: Record audio
+def record_audio(filename):
+    chunk = 1024  # Record in chunks of 1024 samples
+    sample_format = pyaudio.paInt16  # 16 bits per sample
+    channels = 1
+    fs = 44100  # Record at 44100 samples per second
+    seconds = 10  # Length of recording
+    p = pyaudio.PyAudio()  # Create an interface to PortAudio
+    print("Recording...")
+    stream = p.open(format=sample_format,
+                    channels=channels,
+                    rate=fs,
+                    frames_per_buffer=chunk,
+                    input=True)
+    frames = []  # Initialize array to store frames
+    # Store data in chunks for the specified duration
+    for _ in range(0, int(fs / chunk * seconds)):
+        data = stream.read(chunk)
+        frames.append(data)
+    # Stop and close the stream
+    stream.stop_stream()
+    stream.close()
+    p.terminate()
+    # Save the recorded audio as a WAV file
+    wf = wave.open(filename, 'wb')
+    wf.setnchannels(channels)
+    wf.setsampwidth(p.get_sample_size(sample_format))
+    wf.setframerate(fs)
+    wf.writeframes(b''.join(frames))
+    wf.close()
+    print("Recording completed.")
+# Step 2: Transcribe the audio file
+def transcribe_audio(filename):
+    recognizer = sr.Recognizer()
+    # Open the audio file for transcription
+    with sr.AudioFile(filename) as source:
+        audio = recognizer.record(source)
+        try:
+            # Recognize the audio using Google Web Speech API
+            print("Transcribing the audio...")
+            transcription = recognizer.recognize_google(audio)
+            print("Transcription completed.")
+            return transcription
+        except sr.UnknownValueError:
+            print("Google Speech Recognition could not understand the audio")
+            return ""
+        except sr.RequestError as e:
+            print(f"Error with Google Speech Recognition service: {e}")
+            return ""
+# Step 3: Compare the transcribed text with the input paragraph
+def compare_texts(reference_text, transcribed_text):
+    word_scores = []
+    reference_words = reference_text.split()
+    transcribed_words = transcribed_text.split()
+    sm = difflib.SequenceMatcher(None, reference_text, transcribed_text)
+    similarity_score = round(sm.ratio() * 100, 2)
+    for i, word in enumerate(reference_words):
+        try:
+            if word.lower() == transcribed_words[i].lower():
+                word_scores.append({"word": word, "quality_score": 100})
+            else:
+                word_scores.append({"word": word, "quality_score": 50})  # Assuming 50 if it's wrong
+        except IndexError:
+            word_scores.append({"word": word, "quality_score": 0})
+    fidelity_class = "CORRECT" if similarity_score > 50 else "INCORRECT"
+    output = {
+        "quota_remaining": -1,
+        "reference_text_from_application": reference_text,
+        "status": "success",
+        "text_score": {
+            "fidelity_class": fidelity_class,
+            "quality_score": similarity_score,
+            "text": reference_text,
+            "transcribedText": transcribed_text,
+            "word_score_list": word_scores
+        },
+        "version": "1.1"
+    }
+    return output
+# Gradio Interface Function
+def gradio_function(paragraph):
+    # Record the audio (the filename will be 'recorded_audio.wav')
+    record_audio("recorded_audio.wav")
+    # Transcribe the audio
+    transcribed_text = transcribe_audio("recorded_audio.wav")
+    # Compare the original paragraph with the transcribed text
+    comparison_result = compare_texts(paragraph, transcribed_text)
+    # Return comparison result
+    return comparison_result
+# Gradio Interface
+interface = gr.Interface(
+    fn=gradio_function,
+    inputs=gr.inputs.Textbox(lines=5, label="Input Paragraph"),
+    outputs="json",
+    title="Speech Recognition Comparison",
+    description="Input a paragraph, record your audio, and compare the transcription to the original text."
+)
+# Launch Gradio app
+interface.launch()