Spaces:
Configuration error
Configuration error
Enrique Sanchez
commited on
Commit
•
52a3e7e
0
Parent(s):
First commit
Browse files- .gitignore +134 -0
- .vscode/settings.json +3 -0
- README.md +4 -0
- app.py +87 -0
- extra/generate_audio.sh +8 -0
- extra/script.aiff +0 -0
- extra/script.txt +4 -0
- poetry.lock +0 -0
- pyproject.toml +23 -0
- src/__init__.py +0 -0
- src/sentiment.py +18 -0
- src/transcribe.py +19 -0
- tests/__init__.py +0 -0
- tests/samplings/thisisatest.mp3 +0 -0
- tests/test_sentiment.py +33 -0
- tests/test_transcribe.py +18 -0
.gitignore
ADDED
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
build/
|
12 |
+
develop-eggs/
|
13 |
+
dist/
|
14 |
+
downloads/
|
15 |
+
eggs/
|
16 |
+
.eggs/
|
17 |
+
lib/
|
18 |
+
lib64/
|
19 |
+
parts/
|
20 |
+
sdist/
|
21 |
+
var/
|
22 |
+
wheels/
|
23 |
+
pip-wheel-metadata/
|
24 |
+
share/python-wheels/
|
25 |
+
*.egg-info/
|
26 |
+
.installed.cfg
|
27 |
+
*.egg
|
28 |
+
MANIFEST
|
29 |
+
|
30 |
+
# PyInstaller
|
31 |
+
# Usually these files are written by a python script from a template
|
32 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
33 |
+
*.manifest
|
34 |
+
*.spec
|
35 |
+
|
36 |
+
# Installer logs
|
37 |
+
pip-log.txt
|
38 |
+
pip-delete-this-directory.txt
|
39 |
+
|
40 |
+
# Unit test / coverage reports
|
41 |
+
htmlcov/
|
42 |
+
.tox/
|
43 |
+
.nox/
|
44 |
+
.coverage
|
45 |
+
.coverage.*
|
46 |
+
.cache
|
47 |
+
nosetests.xml
|
48 |
+
coverage.xml
|
49 |
+
*.cover
|
50 |
+
.hypothesis/
|
51 |
+
.pytest_cache/
|
52 |
+
|
53 |
+
# Translations
|
54 |
+
*.mo
|
55 |
+
*.pot
|
56 |
+
|
57 |
+
# Django stuff:
|
58 |
+
*.log
|
59 |
+
local_settings.py
|
60 |
+
db.sqlite3
|
61 |
+
db.sqlite3-journal
|
62 |
+
|
63 |
+
# Flask stuff:
|
64 |
+
instance/
|
65 |
+
.webassets-cache
|
66 |
+
|
67 |
+
# Scrapy stuff:
|
68 |
+
.scrapy
|
69 |
+
|
70 |
+
# Sphinx documentation
|
71 |
+
docs/_build/
|
72 |
+
|
73 |
+
# PyBuilder
|
74 |
+
target/
|
75 |
+
|
76 |
+
# Jupyter Notebook
|
77 |
+
.ipynb_checkpoints
|
78 |
+
|
79 |
+
# IPython
|
80 |
+
profile_default/
|
81 |
+
ipython_config.py
|
82 |
+
|
83 |
+
# pyenv
|
84 |
+
.python-version
|
85 |
+
|
86 |
+
# pipenv
|
87 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
88 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
89 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
90 |
+
# install all needed dependencies.
|
91 |
+
#Pipfile.lock
|
92 |
+
|
93 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
94 |
+
__pypackages__/
|
95 |
+
|
96 |
+
# Celery stuff
|
97 |
+
celerybeat-schedule
|
98 |
+
celerybeat.pid
|
99 |
+
|
100 |
+
# SageMath parsed files
|
101 |
+
*.sage.py
|
102 |
+
|
103 |
+
# Environments
|
104 |
+
.env
|
105 |
+
.venv
|
106 |
+
env/
|
107 |
+
venv/
|
108 |
+
ENV/
|
109 |
+
env.bak/
|
110 |
+
venv.bak/
|
111 |
+
|
112 |
+
# Spyder project settings
|
113 |
+
.spyderproject
|
114 |
+
.spyproject
|
115 |
+
|
116 |
+
# Rope project settings
|
117 |
+
.ropeproject
|
118 |
+
|
119 |
+
# mkdocs documentation
|
120 |
+
/site
|
121 |
+
|
122 |
+
# mypy
|
123 |
+
.mypy_cache/
|
124 |
+
.dmypy.json
|
125 |
+
dmypy.json
|
126 |
+
|
127 |
+
# Pyre type checker
|
128 |
+
.pyre/
|
129 |
+
|
130 |
+
# pytype static type analyzer
|
131 |
+
.pytype/
|
132 |
+
|
133 |
+
# Cython debug symbols
|
134 |
+
cython_debug/
|
.vscode/settings.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"python.analysis.extraPaths": ["./src"]
|
3 |
+
}
|
README.md
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Sentiment Analysis testing
|
2 |
+
|
3 |
+
A simple repo to play with testing for an app to transcribe sentences and get the emotions from them.
|
4 |
+
|
app.py
ADDED
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from src.sentiment import analyze_sentiment
|
3 |
+
from src.transcribe import transcribe_audio
|
4 |
+
|
5 |
+
TITLE = """<h1 align="center">🎤 Emotion Detection 💬</h1>"""
|
6 |
+
|
7 |
+
EMOJI_MAPPING = {
|
8 |
+
"disappointment": "😞",
|
9 |
+
"sadness": "😢",
|
10 |
+
"annoyance": "😠",
|
11 |
+
"neutral": "😐",
|
12 |
+
"disapproval": "👎",
|
13 |
+
"realization": "😮",
|
14 |
+
"nervousness": "😬",
|
15 |
+
"approval": "👍",
|
16 |
+
"joy": "😄",
|
17 |
+
"anger": "😡",
|
18 |
+
"embarrassment": "😳",
|
19 |
+
"caring": "🤗",
|
20 |
+
"remorse": "😔",
|
21 |
+
"disgust": "🤢",
|
22 |
+
"grief": "😥",
|
23 |
+
"confusion": "😕",
|
24 |
+
"relief": "😌",
|
25 |
+
"desire": "😍",
|
26 |
+
"admiration": "😌",
|
27 |
+
"optimism": "😊",
|
28 |
+
"fear": "😨",
|
29 |
+
"love": "❤️",
|
30 |
+
"excitement": "🎉",
|
31 |
+
"curiosity": "🤔",
|
32 |
+
"amusement": "😄",
|
33 |
+
"surprise": "😲",
|
34 |
+
"gratitude": "🙏",
|
35 |
+
"pride": "🦁",
|
36 |
+
}
|
37 |
+
|
38 |
+
|
39 |
+
def get_sentiment_emoji(sentiment: str) -> str:
|
40 |
+
"""Returns the emoji corresponding to the sentiment"""
|
41 |
+
return EMOJI_MAPPING.get(sentiment, "")
|
42 |
+
|
43 |
+
|
44 |
+
def display_sentiment_results(sentiment_results: dict) -> str:
|
45 |
+
"""Returns the sentiment analysis results as a string"""
|
46 |
+
sentiment_text = ""
|
47 |
+
for sentiment, _ in sentiment_results.items():
|
48 |
+
emoji = get_sentiment_emoji(sentiment)
|
49 |
+
sentiment_text += f"{sentiment} {emoji}\n"
|
50 |
+
return sentiment_text
|
51 |
+
|
52 |
+
|
53 |
+
def get_ouput(audio_file: str) -> (str, str):
|
54 |
+
"""Returns the transcribed text and the sentiment analysis results"""
|
55 |
+
try:
|
56 |
+
text = transcribe_audio(audio_file)
|
57 |
+
sentiment = analyze_sentiment(text)
|
58 |
+
return text, display_sentiment_results(sentiment)
|
59 |
+
except Exception as e:
|
60 |
+
print(f"Error in transcribe_audio: {e}")
|
61 |
+
return "", "Error in transcription."
|
62 |
+
|
63 |
+
|
64 |
+
def main():
|
65 |
+
"""Main function"""
|
66 |
+
block = gr.Blocks()
|
67 |
+
with block:
|
68 |
+
gr.HTML(TITLE)
|
69 |
+
|
70 |
+
with gr.Group():
|
71 |
+
audio_input = gr.Audio(sources=["microphone"], type="filepath")
|
72 |
+
output_text = gr.Textbox(label="Transcription")
|
73 |
+
emotion_output = gr.Textbox(label="Emotion Analysis")
|
74 |
+
|
75 |
+
gr.Interface(
|
76 |
+
fn=get_ouput,
|
77 |
+
inputs=audio_input,
|
78 |
+
outputs=[output_text, emotion_output],
|
79 |
+
title="Get the text and the sentiment",
|
80 |
+
description="Upload an audio file and hit the 'Submit'\
|
81 |
+
button",
|
82 |
+
)
|
83 |
+
block.launch()
|
84 |
+
|
85 |
+
|
86 |
+
if __name__ == "__main__":
|
87 |
+
main()
|
extra/generate_audio.sh
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
|
3 |
+
if [[ $(uname) == "Darwin" ]]; then
|
4 |
+
say -v Daniel -f extra/script.txt -o extra/script.aiff
|
5 |
+
lame -m m extra/script.aiff extra/thisisatest.mp3
|
6 |
+
else
|
7 |
+
echo "This script only works on Mac OS"
|
8 |
+
fi
|
extra/script.aiff
ADDED
Binary file (296 kB). View file
|
|
extra/script.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[[ slnc 5000 ]]
|
2 |
+
[[rate 150]][[volm 0.9]]
|
3 |
+
This is a test
|
4 |
+
[[ slnc 1300 ]]
|
poetry.lock
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pyproject.toml
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[tool.poetry]
|
2 |
+
name = "testing-sentiment"
|
3 |
+
version = "0.1.0"
|
4 |
+
description = "A simple repo to add testing to a AI app"
|
5 |
+
authors = ["Enrique Sanchez <je.sanchezb@gmail.com>"]
|
6 |
+
license = "MIT"
|
7 |
+
readme = "README.md"
|
8 |
+
|
9 |
+
[tool.poetry.dependencies]
|
10 |
+
python = ">=3.10,<3.12"
|
11 |
+
faster-whisper = "^0.10.0"
|
12 |
+
transformers = "^4.37.1"
|
13 |
+
gradio = "^4.16.0"
|
14 |
+
ruff = "^0.1.14"
|
15 |
+
torch = "^2.1.2"
|
16 |
+
|
17 |
+
|
18 |
+
[tool.poetry.group.dev.dependencies]
|
19 |
+
pytest = "^8.0.0"
|
20 |
+
|
21 |
+
[build-system]
|
22 |
+
requires = ["poetry-core"]
|
23 |
+
build-backend = "poetry.core.masonry.api"
|
src/__init__.py
ADDED
File without changes
|
src/sentiment.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Get sentiment analysis results for the given text
|
3 |
+
"""
|
4 |
+
|
5 |
+
from transformers import pipeline
|
6 |
+
|
7 |
+
sentiment_pipeline = pipeline(model="SamLowe/roberta-base-go_emotions")
|
8 |
+
|
9 |
+
|
10 |
+
def analyze_sentiment(text: str) -> dict:
|
11 |
+
"""Returns the sentiment analysis results for the given text"""
|
12 |
+
try:
|
13 |
+
results = sentiment_pipeline(text)
|
14 |
+
sentiment_results = {result["label"]: result["score"] for result in results}
|
15 |
+
return sentiment_results
|
16 |
+
except Exception as e:
|
17 |
+
print(f"Error in analyze_sentiment: {e}")
|
18 |
+
return {}
|
src/transcribe.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
This module is used to transcribe the audio file and return the text
|
3 |
+
"""
|
4 |
+
|
5 |
+
from faster_whisper import WhisperModel
|
6 |
+
|
7 |
+
|
8 |
+
model = WhisperModel("small", device="cpu", compute_type="int8")
|
9 |
+
|
10 |
+
|
11 |
+
def transcribe_audio(audio_file: str) -> str:
|
12 |
+
"""Returns the transcribed text and the sentiment analysis results"""
|
13 |
+
try:
|
14 |
+
segments, _ = model.transcribe(audio_file, beam_size=5)
|
15 |
+
text = list(segments)[0].text
|
16 |
+
return text
|
17 |
+
except Exception as e:
|
18 |
+
print(f"Error in transcribe_audio: {e}")
|
19 |
+
return ""
|
tests/__init__.py
ADDED
File without changes
|
tests/samplings/thisisatest.mp3
ADDED
Binary file (26.7 kB). View file
|
|
tests/test_sentiment.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from src.sentiment import analyze_sentiment
|
2 |
+
|
3 |
+
|
4 |
+
def test_analyze_sentiment_positives():
|
5 |
+
# Positive sentiment
|
6 |
+
text = "I love this movie!"
|
7 |
+
expected_keys = ["love"]
|
8 |
+
result = analyze_sentiment(text)
|
9 |
+
assert all(key in result.keys() for key in expected_keys)
|
10 |
+
|
11 |
+
|
12 |
+
def test_analyze_sentiment_negatives():
|
13 |
+
# Negative sentiment
|
14 |
+
text = "I hate this product!"
|
15 |
+
expected_keys = ["anger"]
|
16 |
+
result = analyze_sentiment(text)
|
17 |
+
assert all(key in result.keys() for key in expected_keys)
|
18 |
+
|
19 |
+
|
20 |
+
def test_analyze_sentiment_empty():
|
21 |
+
# Negative sentiment
|
22 |
+
text = ""
|
23 |
+
expected_keys = ["neutral"]
|
24 |
+
result = analyze_sentiment(text)
|
25 |
+
assert all(key in result.keys() for key in expected_keys)
|
26 |
+
|
27 |
+
|
28 |
+
def test_analyze_sentiment_neutral():
|
29 |
+
# Negative sentiment
|
30 |
+
text = "hello how are you"
|
31 |
+
expected_keys = ["neutral"]
|
32 |
+
result = analyze_sentiment(text)
|
33 |
+
assert all(key in result.keys() for key in expected_keys)
|
tests/test_transcribe.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from src.transcribe import transcribe_audio
|
3 |
+
|
4 |
+
|
5 |
+
def test_transcribe_audio_success():
|
6 |
+
current_dir = os.path.dirname(os.path.abspath(__file__))
|
7 |
+
audio_file = os.path.join(current_dir, "samplings/thisisatest.mp3")
|
8 |
+
expected_text = "This is a test"
|
9 |
+
result = transcribe_audio(audio_file).strip().rstrip(".")
|
10 |
+
assert result == expected_text
|
11 |
+
|
12 |
+
|
13 |
+
def test_transcribe_audio_failure():
|
14 |
+
# Transcription failure
|
15 |
+
audio_file = "samplings/nonexistent_audio.wav"
|
16 |
+
expected_text = ""
|
17 |
+
result = transcribe_audio(audio_file)
|
18 |
+
assert result == expected_text
|