Enrique Sanchez commited on
Commit
52a3e7e
0 Parent(s):

First commit

Browse files
.gitignore ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ pip-wheel-metadata/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ # Usually these files are written by a python script from a template
32
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
33
+ *.manifest
34
+ *.spec
35
+
36
+ # Installer logs
37
+ pip-log.txt
38
+ pip-delete-this-directory.txt
39
+
40
+ # Unit test / coverage reports
41
+ htmlcov/
42
+ .tox/
43
+ .nox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ *.cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+
53
+ # Translations
54
+ *.mo
55
+ *.pot
56
+
57
+ # Django stuff:
58
+ *.log
59
+ local_settings.py
60
+ db.sqlite3
61
+ db.sqlite3-journal
62
+
63
+ # Flask stuff:
64
+ instance/
65
+ .webassets-cache
66
+
67
+ # Scrapy stuff:
68
+ .scrapy
69
+
70
+ # Sphinx documentation
71
+ docs/_build/
72
+
73
+ # PyBuilder
74
+ target/
75
+
76
+ # Jupyter Notebook
77
+ .ipynb_checkpoints
78
+
79
+ # IPython
80
+ profile_default/
81
+ ipython_config.py
82
+
83
+ # pyenv
84
+ .python-version
85
+
86
+ # pipenv
87
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
88
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
89
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
90
+ # install all needed dependencies.
91
+ #Pipfile.lock
92
+
93
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
94
+ __pypackages__/
95
+
96
+ # Celery stuff
97
+ celerybeat-schedule
98
+ celerybeat.pid
99
+
100
+ # SageMath parsed files
101
+ *.sage.py
102
+
103
+ # Environments
104
+ .env
105
+ .venv
106
+ env/
107
+ venv/
108
+ ENV/
109
+ env.bak/
110
+ venv.bak/
111
+
112
+ # Spyder project settings
113
+ .spyderproject
114
+ .spyproject
115
+
116
+ # Rope project settings
117
+ .ropeproject
118
+
119
+ # mkdocs documentation
120
+ /site
121
+
122
+ # mypy
123
+ .mypy_cache/
124
+ .dmypy.json
125
+ dmypy.json
126
+
127
+ # Pyre type checker
128
+ .pyre/
129
+
130
+ # pytype static type analyzer
131
+ .pytype/
132
+
133
+ # Cython debug symbols
134
+ cython_debug/
.vscode/settings.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "python.analysis.extraPaths": ["./src"]
3
+ }
README.md ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Sentiment Analysis testing
2
+
3
+ A simple repo to play with testing for an app to transcribe sentences and get the emotions from them.
4
+
app.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from src.sentiment import analyze_sentiment
3
+ from src.transcribe import transcribe_audio
4
+
5
+ TITLE = """<h1 align="center">🎤 Emotion Detection 💬</h1>"""
6
+
7
+ EMOJI_MAPPING = {
8
+ "disappointment": "😞",
9
+ "sadness": "😢",
10
+ "annoyance": "😠",
11
+ "neutral": "😐",
12
+ "disapproval": "👎",
13
+ "realization": "😮",
14
+ "nervousness": "😬",
15
+ "approval": "👍",
16
+ "joy": "😄",
17
+ "anger": "😡",
18
+ "embarrassment": "😳",
19
+ "caring": "🤗",
20
+ "remorse": "😔",
21
+ "disgust": "🤢",
22
+ "grief": "😥",
23
+ "confusion": "😕",
24
+ "relief": "😌",
25
+ "desire": "😍",
26
+ "admiration": "😌",
27
+ "optimism": "😊",
28
+ "fear": "😨",
29
+ "love": "❤️",
30
+ "excitement": "🎉",
31
+ "curiosity": "🤔",
32
+ "amusement": "😄",
33
+ "surprise": "😲",
34
+ "gratitude": "🙏",
35
+ "pride": "🦁",
36
+ }
37
+
38
+
39
+ def get_sentiment_emoji(sentiment: str) -> str:
40
+ """Returns the emoji corresponding to the sentiment"""
41
+ return EMOJI_MAPPING.get(sentiment, "")
42
+
43
+
44
+ def display_sentiment_results(sentiment_results: dict) -> str:
45
+ """Returns the sentiment analysis results as a string"""
46
+ sentiment_text = ""
47
+ for sentiment, _ in sentiment_results.items():
48
+ emoji = get_sentiment_emoji(sentiment)
49
+ sentiment_text += f"{sentiment} {emoji}\n"
50
+ return sentiment_text
51
+
52
+
53
+ def get_ouput(audio_file: str) -> (str, str):
54
+ """Returns the transcribed text and the sentiment analysis results"""
55
+ try:
56
+ text = transcribe_audio(audio_file)
57
+ sentiment = analyze_sentiment(text)
58
+ return text, display_sentiment_results(sentiment)
59
+ except Exception as e:
60
+ print(f"Error in transcribe_audio: {e}")
61
+ return "", "Error in transcription."
62
+
63
+
64
+ def main():
65
+ """Main function"""
66
+ block = gr.Blocks()
67
+ with block:
68
+ gr.HTML(TITLE)
69
+
70
+ with gr.Group():
71
+ audio_input = gr.Audio(sources=["microphone"], type="filepath")
72
+ output_text = gr.Textbox(label="Transcription")
73
+ emotion_output = gr.Textbox(label="Emotion Analysis")
74
+
75
+ gr.Interface(
76
+ fn=get_ouput,
77
+ inputs=audio_input,
78
+ outputs=[output_text, emotion_output],
79
+ title="Get the text and the sentiment",
80
+ description="Upload an audio file and hit the 'Submit'\
81
+ button",
82
+ )
83
+ block.launch()
84
+
85
+
86
+ if __name__ == "__main__":
87
+ main()
extra/generate_audio.sh ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ if [[ $(uname) == "Darwin" ]]; then
4
+ say -v Daniel -f extra/script.txt -o extra/script.aiff
5
+ lame -m m extra/script.aiff extra/thisisatest.mp3
6
+ else
7
+ echo "This script only works on Mac OS"
8
+ fi
extra/script.aiff ADDED
Binary file (296 kB). View file
 
extra/script.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ [[ slnc 5000 ]]
2
+ [[rate 150]][[volm 0.9]]
3
+ This is a test
4
+ [[ slnc 1300 ]]
poetry.lock ADDED
The diff for this file is too large to render. See raw diff
 
pyproject.toml ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [tool.poetry]
2
+ name = "testing-sentiment"
3
+ version = "0.1.0"
4
+ description = "A simple repo to add testing to a AI app"
5
+ authors = ["Enrique Sanchez <je.sanchezb@gmail.com>"]
6
+ license = "MIT"
7
+ readme = "README.md"
8
+
9
+ [tool.poetry.dependencies]
10
+ python = ">=3.10,<3.12"
11
+ faster-whisper = "^0.10.0"
12
+ transformers = "^4.37.1"
13
+ gradio = "^4.16.0"
14
+ ruff = "^0.1.14"
15
+ torch = "^2.1.2"
16
+
17
+
18
+ [tool.poetry.group.dev.dependencies]
19
+ pytest = "^8.0.0"
20
+
21
+ [build-system]
22
+ requires = ["poetry-core"]
23
+ build-backend = "poetry.core.masonry.api"
src/__init__.py ADDED
File without changes
src/sentiment.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Get sentiment analysis results for the given text
3
+ """
4
+
5
+ from transformers import pipeline
6
+
7
+ sentiment_pipeline = pipeline(model="SamLowe/roberta-base-go_emotions")
8
+
9
+
10
+ def analyze_sentiment(text: str) -> dict:
11
+ """Returns the sentiment analysis results for the given text"""
12
+ try:
13
+ results = sentiment_pipeline(text)
14
+ sentiment_results = {result["label"]: result["score"] for result in results}
15
+ return sentiment_results
16
+ except Exception as e:
17
+ print(f"Error in analyze_sentiment: {e}")
18
+ return {}
src/transcribe.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This module is used to transcribe the audio file and return the text
3
+ """
4
+
5
+ from faster_whisper import WhisperModel
6
+
7
+
8
+ model = WhisperModel("small", device="cpu", compute_type="int8")
9
+
10
+
11
+ def transcribe_audio(audio_file: str) -> str:
12
+ """Returns the transcribed text and the sentiment analysis results"""
13
+ try:
14
+ segments, _ = model.transcribe(audio_file, beam_size=5)
15
+ text = list(segments)[0].text
16
+ return text
17
+ except Exception as e:
18
+ print(f"Error in transcribe_audio: {e}")
19
+ return ""
tests/__init__.py ADDED
File without changes
tests/samplings/thisisatest.mp3 ADDED
Binary file (26.7 kB). View file
 
tests/test_sentiment.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.sentiment import analyze_sentiment
2
+
3
+
4
+ def test_analyze_sentiment_positives():
5
+ # Positive sentiment
6
+ text = "I love this movie!"
7
+ expected_keys = ["love"]
8
+ result = analyze_sentiment(text)
9
+ assert all(key in result.keys() for key in expected_keys)
10
+
11
+
12
+ def test_analyze_sentiment_negatives():
13
+ # Negative sentiment
14
+ text = "I hate this product!"
15
+ expected_keys = ["anger"]
16
+ result = analyze_sentiment(text)
17
+ assert all(key in result.keys() for key in expected_keys)
18
+
19
+
20
+ def test_analyze_sentiment_empty():
21
+ # Negative sentiment
22
+ text = ""
23
+ expected_keys = ["neutral"]
24
+ result = analyze_sentiment(text)
25
+ assert all(key in result.keys() for key in expected_keys)
26
+
27
+
28
+ def test_analyze_sentiment_neutral():
29
+ # Negative sentiment
30
+ text = "hello how are you"
31
+ expected_keys = ["neutral"]
32
+ result = analyze_sentiment(text)
33
+ assert all(key in result.keys() for key in expected_keys)
tests/test_transcribe.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from src.transcribe import transcribe_audio
3
+
4
+
5
+ def test_transcribe_audio_success():
6
+ current_dir = os.path.dirname(os.path.abspath(__file__))
7
+ audio_file = os.path.join(current_dir, "samplings/thisisatest.mp3")
8
+ expected_text = "This is a test"
9
+ result = transcribe_audio(audio_file).strip().rstrip(".")
10
+ assert result == expected_text
11
+
12
+
13
+ def test_transcribe_audio_failure():
14
+ # Transcription failure
15
+ audio_file = "samplings/nonexistent_audio.wav"
16
+ expected_text = ""
17
+ result = transcribe_audio(audio_file)
18
+ assert result == expected_text