Spaces:

bayuela
/

sentiment-testing

Configuration error

App Files Files Community

Enrique Sanchez commited on Feb 1

Commit

969ccb5

•

1 Parent(s): 0624de9

improve testing and fixes for the workflow

Browse files

Files changed (6) hide show

.github/workflows/main.yml +4 -12
poetry.lock +86 -1
pyproject.toml +1 -0
src/summarization.py +29 -3
tests/test_sentiments_and_topics.py +0 -34
tests/test_summarization.py +69 -0

.github/workflows/main.yml CHANGED Viewed

@@ -1,7 +1,7 @@
 # This workflow will install Python dependencies using Poetry, run tests and lint with a single version of Python using Ruff
 # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
-name: Python application
 on:
   push:
@@ -26,7 +26,7 @@ jobs:
           ollama pull llama2
       - uses: actions/checkout@v4
       - name: Set up Python 3.10
-        uses: actions/setup-python@v3
         with:
           python-version: "3.10"
       - name: Install Poetry
@@ -36,17 +36,9 @@ jobs:
       - name: Install dependencies with Poetry
         run: |
           poetry install
-  lint:
-    needs: build
-    runs-on: ubuntu-latest
-    steps:
       - name: Lint with ruff
         run: |
-          poetry run ruff check
-  test:
-    needs: lint
-    runs-on: ubuntu-latest
-    steps:
       - name: Test with pytest
         run: |
-          poetry run pytest --reruns 1 --reruns-delay 1

 # This workflow will install Python dependencies using Poetry, run tests and lint with a single version of Python using Ruff
 # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
+name: Sentiment Analysis for Voice
 on:
   push:
           ollama pull llama2
       - uses: actions/checkout@v4
       - name: Set up Python 3.10
+        uses: actions/setup-python@v4
         with:
           python-version: "3.10"
       - name: Install Poetry
       - name: Install dependencies with Poetry
         run: |
           poetry install
       - name: Lint with ruff
         run: |
+          poetry run ruff --output-format=github .
       - name: Test with pytest
         run: |
+          poetry run pytest --reruns 1 --reruns-delay 1 --junitxml=junit/test-results.xml --cov=com --cov-report=xml --cov-report=html

poetry.lock CHANGED Viewed

@@ -487,6 +487,73 @@ mypy = ["contourpy[bokeh,docs]", "docutils-stubs", "mypy (==1.6.1)", "types-Pill
 test = ["Pillow", "contourpy[test-no-images]", "matplotlib"]
 test-no-images = ["pytest", "pytest-cov", "pytest-xdist", "wurlitzer"]
 [[package]]
 name = "ctranslate2"
 version = "3.24.0"
@@ -2461,6 +2528,24 @@ tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""}
 [package.extras]
 testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"]
 [[package]]
 name = "pytest-rerunfailures"
 version = "13.0"
@@ -3819,4 +3904,4 @@ multidict = ">=4.0"
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.10,<3.12"
-content-hash = "5a7bfcc29d469fc3c5a947a9d0d2679d925d032a8474a5cc09a4ea258f4c6c6b"

 test = ["Pillow", "contourpy[test-no-images]", "matplotlib"]
 test-no-images = ["pytest", "pytest-cov", "pytest-xdist", "wurlitzer"]
+[[package]]
+name = "coverage"
+version = "7.4.1"
+description = "Code coverage measurement for Python"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "coverage-7.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:077d366e724f24fc02dbfe9d946534357fda71af9764ff99d73c3c596001bbd7"},
+    {file = "coverage-7.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0193657651f5399d433c92f8ae264aff31fc1d066deee4b831549526433f3f61"},
+    {file = "coverage-7.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d17bbc946f52ca67adf72a5ee783cd7cd3477f8f8796f59b4974a9b59cacc9ee"},
+    {file = "coverage-7.4.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a3277f5fa7483c927fe3a7b017b39351610265308f5267ac6d4c2b64cc1d8d25"},
+    {file = "coverage-7.4.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6dceb61d40cbfcf45f51e59933c784a50846dc03211054bd76b421a713dcdf19"},
+    {file = "coverage-7.4.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:6008adeca04a445ea6ef31b2cbaf1d01d02986047606f7da266629afee982630"},
+    {file = "coverage-7.4.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:c61f66d93d712f6e03369b6a7769233bfda880b12f417eefdd4f16d1deb2fc4c"},
+    {file = "coverage-7.4.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b9bb62fac84d5f2ff523304e59e5c439955fb3b7f44e3d7b2085184db74d733b"},
+    {file = "coverage-7.4.1-cp310-cp310-win32.whl", hash = "sha256:f86f368e1c7ce897bf2457b9eb61169a44e2ef797099fb5728482b8d69f3f016"},
+    {file = "coverage-7.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:869b5046d41abfea3e381dd143407b0d29b8282a904a19cb908fa24d090cc018"},
+    {file = "coverage-7.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b8ffb498a83d7e0305968289441914154fb0ef5d8b3157df02a90c6695978295"},
+    {file = "coverage-7.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3cacfaefe6089d477264001f90f55b7881ba615953414999c46cc9713ff93c8c"},
+    {file = "coverage-7.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d6850e6e36e332d5511a48a251790ddc545e16e8beaf046c03985c69ccb2676"},
+    {file = "coverage-7.4.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:18e961aa13b6d47f758cc5879383d27b5b3f3dcd9ce8cdbfdc2571fe86feb4dd"},
+    {file = "coverage-7.4.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dfd1e1b9f0898817babf840b77ce9fe655ecbe8b1b327983df485b30df8cc011"},
+    {file = "coverage-7.4.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6b00e21f86598b6330f0019b40fb397e705135040dbedc2ca9a93c7441178e74"},
+    {file = "coverage-7.4.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:536d609c6963c50055bab766d9951b6c394759190d03311f3e9fcf194ca909e1"},
+    {file = "coverage-7.4.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:7ac8f8eb153724f84885a1374999b7e45734bf93a87d8df1e7ce2146860edef6"},
+    {file = "coverage-7.4.1-cp311-cp311-win32.whl", hash = "sha256:f3771b23bb3675a06f5d885c3630b1d01ea6cac9e84a01aaf5508706dba546c5"},
+    {file = "coverage-7.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:9d2f9d4cc2a53b38cabc2d6d80f7f9b7e3da26b2f53d48f05876fef7956b6968"},
+    {file = "coverage-7.4.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f68ef3660677e6624c8cace943e4765545f8191313a07288a53d3da188bd8581"},
+    {file = "coverage-7.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:23b27b8a698e749b61809fb637eb98ebf0e505710ec46a8aa6f1be7dc0dc43a6"},
+    {file = "coverage-7.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e3424c554391dc9ef4a92ad28665756566a28fecf47308f91841f6c49288e66"},
+    {file = "coverage-7.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e0860a348bf7004c812c8368d1fc7f77fe8e4c095d661a579196a9533778e156"},
+    {file = "coverage-7.4.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe558371c1bdf3b8fa03e097c523fb9645b8730399c14fe7721ee9c9e2a545d3"},
+    {file = "coverage-7.4.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:3468cc8720402af37b6c6e7e2a9cdb9f6c16c728638a2ebc768ba1ef6f26c3a1"},
+    {file = "coverage-7.4.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:02f2edb575d62172aa28fe00efe821ae31f25dc3d589055b3fb64d51e52e4ab1"},
+    {file = "coverage-7.4.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ca6e61dc52f601d1d224526360cdeab0d0712ec104a2ce6cc5ccef6ed9a233bc"},
+    {file = "coverage-7.4.1-cp312-cp312-win32.whl", hash = "sha256:ca7b26a5e456a843b9b6683eada193fc1f65c761b3a473941efe5a291f604c74"},
+    {file = "coverage-7.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:85ccc5fa54c2ed64bd91ed3b4a627b9cce04646a659512a051fa82a92c04a448"},
+    {file = "coverage-7.4.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8bdb0285a0202888d19ec6b6d23d5990410decb932b709f2b0dfe216d031d218"},
+    {file = "coverage-7.4.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:918440dea04521f499721c039863ef95433314b1db00ff826a02580c1f503e45"},
+    {file = "coverage-7.4.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:379d4c7abad5afbe9d88cc31ea8ca262296480a86af945b08214eb1a556a3e4d"},
+    {file = "coverage-7.4.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b094116f0b6155e36a304ff912f89bbb5067157aff5f94060ff20bbabdc8da06"},
+    {file = "coverage-7.4.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2f5968608b1fe2a1d00d01ad1017ee27efd99b3437e08b83ded9b7af3f6f766"},
+    {file = "coverage-7.4.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:10e88e7f41e6197ea0429ae18f21ff521d4f4490aa33048f6c6f94c6045a6a75"},
+    {file = "coverage-7.4.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a4a3907011d39dbc3e37bdc5df0a8c93853c369039b59efa33a7b6669de04c60"},
+    {file = "coverage-7.4.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6d224f0c4c9c98290a6990259073f496fcec1b5cc613eecbd22786d398ded3ad"},
+    {file = "coverage-7.4.1-cp38-cp38-win32.whl", hash = "sha256:23f5881362dcb0e1a92b84b3c2809bdc90db892332daab81ad8f642d8ed55042"},
+    {file = "coverage-7.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:a07f61fc452c43cd5328b392e52555f7d1952400a1ad09086c4a8addccbd138d"},
+    {file = "coverage-7.4.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8e738a492b6221f8dcf281b67129510835461132b03024830ac0e554311a5c54"},
+    {file = "coverage-7.4.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:46342fed0fff72efcda77040b14728049200cbba1279e0bf1188f1f2078c1d70"},
+    {file = "coverage-7.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9641e21670c68c7e57d2053ddf6c443e4f0a6e18e547e86af3fad0795414a628"},
+    {file = "coverage-7.4.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aeb2c2688ed93b027eb0d26aa188ada34acb22dceea256d76390eea135083950"},
+    {file = "coverage-7.4.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d12c923757de24e4e2110cf8832d83a886a4cf215c6e61ed506006872b43a6d1"},
+    {file = "coverage-7.4.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0491275c3b9971cdbd28a4595c2cb5838f08036bca31765bad5e17edf900b2c7"},
+    {file = "coverage-7.4.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:8dfc5e195bbef80aabd81596ef52a1277ee7143fe419efc3c4d8ba2754671756"},
+    {file = "coverage-7.4.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:1a78b656a4d12b0490ca72651fe4d9f5e07e3c6461063a9b6265ee45eb2bdd35"},
+    {file = "coverage-7.4.1-cp39-cp39-win32.whl", hash = "sha256:f90515974b39f4dea2f27c0959688621b46d96d5a626cf9c53dbc653a895c05c"},
+    {file = "coverage-7.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:64e723ca82a84053dd7bfcc986bdb34af8d9da83c521c19d6b472bc6880e191a"},
+    {file = "coverage-7.4.1-pp38.pp39.pp310-none-any.whl", hash = "sha256:32a8d985462e37cfdab611a6f95b09d7c091d07668fdc26e47a725ee575fe166"},
+    {file = "coverage-7.4.1.tar.gz", hash = "sha256:1ed4b95480952b1a26d863e546fa5094564aa0065e1e5f0d4d0041f293251d04"},
+]
+[package.dependencies]
+tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""}
+[package.extras]
+toml = ["tomli"]
 [[package]]
 name = "ctranslate2"
 version = "3.24.0"
 [package.extras]
 testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"]
+[[package]]
+name = "pytest-cov"
+version = "4.1.0"
+description = "Pytest plugin for measuring coverage."
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "pytest-cov-4.1.0.tar.gz", hash = "sha256:3904b13dfbfec47f003b8e77fd5b589cd11904a21ddf1ab38a64f204d6a10ef6"},
+    {file = "pytest_cov-4.1.0-py3-none-any.whl", hash = "sha256:6ba70b9e97e69fcc3fb45bfeab2d0a138fb65c4d0d6a41ef33983ad114be8c3a"},
+]
+[package.dependencies]
+coverage = {version = ">=5.2.1", extras = ["toml"]}
+pytest = ">=4.6"
+[package.extras]
+testing = ["fields", "hunter", "process-tests", "pytest-xdist", "six", "virtualenv"]
 [[package]]
 name = "pytest-rerunfailures"
 version = "13.0"
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.10,<3.12"
+content-hash = "29545ccc27b7856e223d0565b7fb4f22a4ff6cd3f8b1960b83e3875ab100a871"

pyproject.toml CHANGED Viewed

@@ -22,6 +22,7 @@ pytest-rerunfailures = "^13.0"
 [tool.poetry.group.dev.dependencies]
 pytest = "^8.0.0"
 [build-system]
 requires = ["poetry-core"]

 [tool.poetry.group.dev.dependencies]
 pytest = "^8.0.0"
+pytest-cov = "^4.1.0"
 [build-system]
 requires = ["poetry-core"]

src/summarization.py CHANGED Viewed

@@ -13,7 +13,7 @@ def topics_for_text(file_conv: str, llm: str = "mistral") -> str:
     Returns:
         str: The topics for the text.
     """
-    prompt_template = """The next text is a conversation between two people about a topic. Your task is to summarize the conversation in only a list of words that describe the conversation. The list of words should be separated by a comma. The conversation is the following:
     "{text}"
     TOPICS:"""
@@ -21,10 +21,36 @@ def topics_for_text(file_conv: str, llm: str = "mistral") -> str:
     llm = Ollama(model=llm)
     loader = TextLoader(file_conv)
-    docs = loader.load()
     # Define StuffDocumentsChain
     chain = load_summarize_chain(
         llm, chain_type="stuff", prompt=prompt, input_key="text"
     )
-    result = chain({"text": docs}, return_only_outputs=True)
     return [element.strip().lower() for element in result["output_text"].split(", ")]

     Returns:
         str: The topics for the text.
     """
+    prompt_template = """The next text is a conversation between two people about a topic. Your task is to summarize the conversation in only a list of words that describe the conversation. Keep the list short (max 5 items) and every topic has to be described by one word. The list of words should be separated by a comma. The conversation is the following:
     "{text}"
     TOPICS:"""
     llm = Ollama(model=llm)
     loader = TextLoader(file_conv)
+    doc = loader.load()
     # Define StuffDocumentsChain
     chain = load_summarize_chain(
         llm, chain_type="stuff", prompt=prompt, input_key="text"
     )
+    result = chain.invoke({"text": doc}, return_only_outputs=True)
     return [element.strip().lower() for element in result["output_text"].split(", ")]
+def summarize(file_conv: str, llm: str = "mistral") -> str:
+    """Summarize a conversation.
+    Args:
+        file_conv (str): The file with the conversation.
+    Returns:
+        str: The summary of the conversation.
+    """
+    prompt_template = """The next text is a conversation between two people about a topic. Your task is to summarize the conversation in one sentence. The conversation is the following:
+    "{text}"
+    SUMMARY:"""
+    prompt = PromptTemplate.from_template(prompt_template)
+    llm = Ollama(model=llm)
+    loader = TextLoader(file_conv)
+    doc = loader.load()
+    # Define StuffDocumentsChain
+    chain = load_summarize_chain(
+        llm, chain_type="stuff", prompt=prompt, input_key="text"
+    )
+    result = chain.invoke({"text": doc}, return_only_outputs=True)
+    return result["output_text"]

tests/test_sentiments_and_topics.py DELETED Viewed

@@ -1,34 +0,0 @@
-import os
-import tempfile
-import pytest
-from src.generate_conversation import generate_conversation
-from src.sentiment import analyze_sentiment
-from src.summarization import topics_for_text
-SENTIMENTS = ["neutral", "sadness", "neutral", "joy"]
-TOPICS = ["ai", "soccer", "art", "science"]
-@pytest.mark.parametrize("sentiment", SENTIMENTS)
-@pytest.mark.parametrize("topic", TOPICS)
-def test_generate_conversation(sentiment, topic):
-    # Call the function
-    conversation = generate_conversation(topic, sentiment, llm="llama2")
-    new_sentiment = analyze_sentiment(conversation)
-    # Assert that the conversation has a positive sentiment
-    assert (
-        sentiment in s for s in new_sentiment
-    ), "Sentiment is not in the index of new_sentiment"
-    # Save the conversation to a temporary text file
-    with tempfile.NamedTemporaryFile(mode="w", delete=False) as file:
-        file.write(conversation)
-        temp_filepath = file.name
-    new_topics = topics_for_text(temp_filepath)
-    os.remove(temp_filepath)
-    assert any(topic in word for word in new_topics)

tests/test_summarization.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import os
+import tempfile
+import pytest
+from langchain_community.llms import Ollama
+from langchain_core.output_parsers import JsonOutputParser
+from langchain_core.prompts import PromptTemplate
+from src.generate_conversation import generate_conversation
+from src.summarization import summarize
+from src.summarization import topics_for_text
+# SENTIMENTS = ["neutral", "sadness", "joy"]
+# TOPICS = ["ai", "soccer", "art"]
+SENTIMENTS = ["joy"]
+TOPICS = ["ai"]
+@pytest.mark.parametrize("sentiment", SENTIMENTS)
+@pytest.mark.parametrize("topic", TOPICS)
+def test_topics_conversation(sentiment, topic):
+    # Call the function
+    conversation = generate_conversation(topic, sentiment, llm="llama2")
+    # Save the conversation to a temporary text file
+    with tempfile.NamedTemporaryFile(mode="w", delete=False) as file:
+        file.write(conversation)
+        temp_filepath = file.name
+    new_topics = topics_for_text(temp_filepath)
+    os.remove(temp_filepath)
+    assert any(topic in word for word in new_topics)
+@pytest.mark.parametrize("sentiment", SENTIMENTS)
+@pytest.mark.parametrize("topic", TOPICS)
+def test_summary_conversation(sentiment, topic):
+    # Call the function
+    conversation = generate_conversation(topic, sentiment, llm="llama2")
+    # Save the conversation to a temporary text file
+    with tempfile.NamedTemporaryFile(mode="w", delete=False) as file:
+        file.write(conversation)
+        temp_filepath = file.name
+    summary = summarize(temp_filepath)
+    os.remove(temp_filepath)
+    assert summary != ""
+    model = Ollama(model="llama2")
+    prompt = PromptTemplate(
+        template="""You will read a summary of a conversation with a sentiment and a topic. Your task is to analyze the conversation and the summary and returns a json object where the key is summary, topic, sentiment and the value is True if the sentiment and the topic are correct and False otherwise. The conversation is the following: {conversation} The summary is the following: {summary}, the topic is {topic} and the sentiment is the following: {sentiment}
+        JSON:""",
+        input_variables=["conversation", "summary", "topic", "sentiment"],
+    )
+    output_parser = JsonOutputParser()
+    chain = prompt | model | output_parser
+    result = chain.invoke(
+        {
+            "conversation": conversation,
+            "topic": topic,
+            "summary": summary,
+            "sentiment": sentiment,
+        }
+    )
+    assert result["summary"], "The summary is not correct"
+    assert result["topic"], "The topic is not correct"
+    assert result["sentiment"], "The sentiment is not correct"